diff options
Diffstat (limited to 'compiler')
64 files changed, 8938 insertions, 644 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index d57f301ff9..312fc7b35a 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -106,7 +106,9 @@ art_cc_defaults { "linker/arm/relative_patcher_arm_base.cc", "linker/arm/relative_patcher_thumb2.cc", "optimizing/code_generator_arm.cc", + "optimizing/code_generator_vector_arm.cc", "optimizing/code_generator_arm_vixl.cc", + "optimizing/code_generator_vector_arm_vixl.cc", "optimizing/dex_cache_array_fixups_arm.cc", "optimizing/instruction_simplifier_arm.cc", "optimizing/instruction_simplifier_shared.cc", @@ -126,6 +128,7 @@ art_cc_defaults { "jni/quick/arm64/calling_convention_arm64.cc", "linker/arm64/relative_patcher_arm64.cc", "optimizing/code_generator_arm64.cc", + "optimizing/code_generator_vector_arm64.cc", "optimizing/scheduler_arm64.cc", "optimizing/instruction_simplifier_arm64.cc", "optimizing/intrinsics_arm64.cc", @@ -139,6 +142,7 @@ art_cc_defaults { "jni/quick/mips/calling_convention_mips.cc", "linker/mips/relative_patcher_mips.cc", "optimizing/code_generator_mips.cc", + "optimizing/code_generator_vector_mips.cc", "optimizing/dex_cache_array_fixups_mips.cc", "optimizing/intrinsics_mips.cc", "optimizing/pc_relative_fixups_mips.cc", @@ -151,6 +155,7 @@ art_cc_defaults { "jni/quick/mips64/calling_convention_mips64.cc", "linker/mips64/relative_patcher_mips64.cc", "optimizing/code_generator_mips64.cc", + "optimizing/code_generator_vector_mips64.cc", "optimizing/intrinsics_mips64.cc", "utils/mips64/assembler_mips64.cc", "utils/mips64/managed_register_mips64.cc", @@ -162,6 +167,7 @@ art_cc_defaults { "linker/x86/relative_patcher_x86.cc", "linker/x86/relative_patcher_x86_base.cc", "optimizing/code_generator_x86.cc", + "optimizing/code_generator_vector_x86.cc", "optimizing/intrinsics_x86.cc", "optimizing/pc_relative_fixups_x86.cc", "optimizing/x86_memory_gen.cc", @@ -176,6 +182,7 @@ art_cc_defaults { "linker/x86_64/relative_patcher_x86_64.cc", "optimizing/intrinsics_x86_64.cc", "optimizing/code_generator_x86_64.cc", + "optimizing/code_generator_vector_x86_64.cc", "utils/x86_64/assembler_x86_64.cc", "utils/x86_64/jni_macro_assembler_x86_64.cc", "utils/x86_64/managed_register_x86_64.cc", @@ -391,6 +398,7 @@ art_cc_test { mips64: { srcs: [ "linker/mips64/relative_patcher_mips64_test.cc", + "utils/mips64/managed_register_mips64_test.cc", ], }, x86: { diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc index 808e28c9ea..538fe93793 100644 --- a/compiler/dex/dex_to_dex_compiler.cc +++ b/compiler/dex/dex_to_dex_compiler.cc @@ -70,10 +70,6 @@ class DexCompiler { return *unit_.GetDexFile(); } - bool PerformOptimizations() const { - return dex_to_dex_compilation_level_ >= DexToDexCompilationLevel::kOptimize; - } - // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where // a barrier is required. void CompileReturnVoid(Instruction* inst, uint32_t dex_pc); @@ -114,7 +110,7 @@ class DexCompiler { }; void DexCompiler::Compile() { - DCHECK_GE(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kRequired); + DCHECK_EQ(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kOptimize); const DexFile::CodeItem* code_item = unit_.GetCodeItem(); const uint16_t* insns = code_item->insns_; const uint32_t insns_size = code_item->insns_size_in_code_units_; @@ -221,7 +217,7 @@ void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) { } Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) { - if (!kEnableCheckCastEllision || !PerformOptimizations()) { + if (!kEnableCheckCastEllision) { return inst; } if (!driver_.IsSafeCast(&unit_, dex_pc)) { @@ -254,7 +250,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst, uint32_t dex_pc, Instruction::Code new_opcode, bool is_put) { - if (!kEnableQuickening || !PerformOptimizations()) { + if (!kEnableQuickening) { return; } uint32_t field_idx = inst->VRegC_22c(); @@ -279,7 +275,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst, void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc, Instruction::Code new_opcode, bool is_range) { - if (!kEnableQuickening || !PerformOptimizations()) { + if (!kEnableQuickening) { return; } uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c(); diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h index 00c596d60e..87ddb395ad 100644 --- a/compiler/dex/dex_to_dex_compiler.h +++ b/compiler/dex/dex_to_dex_compiler.h @@ -34,8 +34,7 @@ namespace optimizer { enum class DexToDexCompilationLevel { kDontDexToDexCompile, // Only meaning wrt image time interpretation. - kRequired, // Dex-to-dex compilation required for correctness. - kOptimize // Perform required transformation and peep-hole optimizations. + kOptimize // Perform peep-hole optimizations. }; std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs); diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc index 995098799c..e823f67d3c 100644 --- a/compiler/driver/compiler_driver.cc +++ b/compiler/driver/compiler_driver.cc @@ -532,16 +532,13 @@ static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel( if (driver.GetCompilerOptions().GetDebuggable()) { // We are debuggable so definitions of classes might be changed. We don't want to do any // optimizations that could break that. - max_level = optimizer::DexToDexCompilationLevel::kRequired; + max_level = optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; } if (klass->IsVerified()) { // Class is verified so we can enable DEX-to-DEX compilation for performance. return max_level; - } else if (klass->ShouldVerifyAtRuntime()) { - // Class verification has soft-failed. Anyway, ensure at least correctness. - return optimizer::DexToDexCompilationLevel::kRequired; } else { - // Class verification has failed: do not run DEX-to-DEX compilation. + // Class verification has failed: do not run DEX-to-DEX optimizations. return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile; } } @@ -611,7 +608,7 @@ static void CompileMethod(Thread* self, dex_file, (verified_method != nullptr) ? dex_to_dex_compilation_level - : optimizer::DexToDexCompilationLevel::kRequired); + : optimizer::DexToDexCompilationLevel::kDontDexToDexCompile); } } else if ((access_flags & kAccNative) != 0) { // Are we extracting only and have support for generic JNI down calls? diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc index d156644484..d129249d63 100644 --- a/compiler/image_writer.cc +++ b/compiler/image_writer.cc @@ -1338,21 +1338,20 @@ mirror::Object* ImageWriter::TryAssignBinSlot(WorkStack& work_stack, // live. if (as_klass->ShouldHaveImt()) { ImTable* imt = as_klass->GetImt(target_ptr_size_); - for (size_t i = 0; i < ImTable::kSize; ++i) { - ArtMethod* imt_method = imt->Get(i, target_ptr_size_); - DCHECK(imt_method != nullptr); - if (imt_method->IsRuntimeMethod() && - !IsInBootImage(imt_method) && - !NativeRelocationAssigned(imt_method)) { - AssignMethodOffset(imt_method, kNativeObjectRelocationTypeRuntimeMethod, oat_index); + if (TryAssignImTableOffset(imt, oat_index)) { + // Since imt's can be shared only do this the first time to not double count imt method + // fixups. + for (size_t i = 0; i < ImTable::kSize; ++i) { + ArtMethod* imt_method = imt->Get(i, target_ptr_size_); + DCHECK(imt_method != nullptr); + if (imt_method->IsRuntimeMethod() && + !IsInBootImage(imt_method) && + !NativeRelocationAssigned(imt_method)) { + AssignMethodOffset(imt_method, kNativeObjectRelocationTypeRuntimeMethod, oat_index); + } } } } - - if (as_klass->ShouldHaveImt()) { - ImTable* imt = as_klass->GetImt(target_ptr_size_); - TryAssignImTableOffset(imt, oat_index); - } } else if (obj->IsClassLoader()) { // Register the class loader if it has a class table. // The fake boot class loader should not get registered and we should end up with only one @@ -1386,10 +1385,10 @@ bool ImageWriter::NativeRelocationAssigned(void* ptr) const { return native_object_relocations_.find(ptr) != native_object_relocations_.end(); } -void ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) { +bool ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) { // No offset, or already assigned. if (imt == nullptr || IsInBootImage(imt) || NativeRelocationAssigned(imt)) { - return; + return false; } // If the method is a conflict method we also want to assign the conflict table offset. ImageInfo& image_info = GetImageInfo(oat_index); @@ -1401,6 +1400,7 @@ void ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) { image_info.bin_slot_sizes_[kBinImTable], kNativeObjectRelocationTypeIMTable}); image_info.bin_slot_sizes_[kBinImTable] += size; + return true; } void ImageWriter::TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index) { @@ -1499,8 +1499,7 @@ class ImageWriter::VisitReferencesVisitor { ALWAYS_INLINE void operator() (ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED, ObjPtr<mirror::Reference> ref) const REQUIRES_SHARED(Locks::mutator_lock_) { - ref->SetReferent</*kTransactionActive*/false>( - VisitReference(ref->GetReferent<kWithoutReadBarrier>())); + operator()(ref, mirror::Reference::ReferentOffset(), /* is_static */ false); } private: @@ -1658,7 +1657,7 @@ void ImageWriter::CalculateNewObjectOffsets() { // Calculate size of the dex cache arrays slot and prepare offsets. PrepareDexCacheArraySlots(); - // Calculate the sizes of the intern tables and class tables. + // Calculate the sizes of the intern tables, class tables, and fixup tables. for (ImageInfo& image_info : image_infos_) { // Calculate how big the intern table will be after being serialized. InternTable* const intern_table = image_info.intern_table_.get(); @@ -1666,6 +1665,7 @@ void ImageWriter::CalculateNewObjectOffsets() { if (intern_table->StrongSize() != 0u) { image_info.intern_table_bytes_ = intern_table->WriteToMemory(nullptr); } + // Calculate the size of the class table. ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_); DCHECK_EQ(image_info.class_table_->NumReferencedZygoteClasses(), 0u); @@ -1718,8 +1718,6 @@ void ImageWriter::CalculateNewObjectOffsets() { // Transform each object's bin slot into an offset which will be used to do the final copy. heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this); - // DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_); - size_t i = 0; for (ImageInfo& image_info : image_infos_) { image_info.image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots[i].Get())); @@ -1733,8 +1731,6 @@ void ImageWriter::CalculateNewObjectOffsets() { ImageInfo& image_info = GetImageInfo(relocation.oat_index); relocation.offset += image_info.bin_slot_offsets_[bin_type]; } - - // Note that image_info.image_end_ is left at end of used mirror object section. } size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) const { @@ -1776,7 +1772,6 @@ size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) c ImageSection* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays]; *dex_cache_arrays_section = ImageSection(bin_slot_offsets_[kBinDexCacheArray], bin_slot_sizes_[kBinDexCacheArray]); - // Round up to the alignment the string table expects. See HashSet::WriteToMemory. size_t cur_pos = RoundUp(dex_cache_arrays_section->End(), sizeof(uint64_t)); // Calculate the size of the interned strings. @@ -1868,18 +1863,18 @@ class ImageWriter::FixupRootVisitor : public RootVisitor { explicit FixupRootVisitor(ImageWriter* image_writer) : image_writer_(image_writer) { } - void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) + void VisitRoots(mirror::Object*** roots ATTRIBUTE_UNUSED, + size_t count ATTRIBUTE_UNUSED, + const RootInfo& info ATTRIBUTE_UNUSED) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { - for (size_t i = 0; i < count; ++i) { - *roots[i] = image_writer_->GetImageAddress(*roots[i]); - } + LOG(FATAL) << "Unsupported"; } void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { for (size_t i = 0; i < count; ++i) { - roots[i]->Assign(image_writer_->GetImageAddress(roots[i]->AsMirrorPtr())); + image_writer_->CopyReference(roots[i], roots[i]->AsMirrorPtr()); } } @@ -1890,7 +1885,9 @@ class ImageWriter::FixupRootVisitor : public RootVisitor { void ImageWriter::CopyAndFixupImTable(ImTable* orig, ImTable* copy) { for (size_t i = 0; i < ImTable::kSize; ++i) { ArtMethod* method = orig->Get(i, target_ptr_size_); - copy->Set(i, NativeLocationInImage(method), target_ptr_size_); + void** address = reinterpret_cast<void**>(copy->AddressOfElement(i, target_ptr_size_)); + CopyAndFixupPointer(address, method); + DCHECK_EQ(copy->Get(i, target_ptr_size_), NativeLocationInImage(method)); } } @@ -1899,10 +1896,13 @@ void ImageWriter::CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConfli for (size_t i = 0; i < count; ++i) { ArtMethod* interface_method = orig->GetInterfaceMethod(i, target_ptr_size_); ArtMethod* implementation_method = orig->GetImplementationMethod(i, target_ptr_size_); - copy->SetInterfaceMethod(i, target_ptr_size_, NativeLocationInImage(interface_method)); - copy->SetImplementationMethod(i, - target_ptr_size_, - NativeLocationInImage(implementation_method)); + CopyAndFixupPointer(copy->AddressOfInterfaceMethod(i, target_ptr_size_), interface_method); + CopyAndFixupPointer(copy->AddressOfImplementationMethod(i, target_ptr_size_), + implementation_method); + DCHECK_EQ(copy->GetInterfaceMethod(i, target_ptr_size_), + NativeLocationInImage(interface_method)); + DCHECK_EQ(copy->GetImplementationMethod(i, target_ptr_size_), + NativeLocationInImage(implementation_method)); } } @@ -1921,8 +1921,9 @@ void ImageWriter::CopyAndFixupNativeData(size_t oat_index) { switch (relocation.type) { case kNativeObjectRelocationTypeArtField: { memcpy(dest, pair.first, sizeof(ArtField)); - reinterpret_cast<ArtField*>(dest)->SetDeclaringClass( - GetImageAddress(reinterpret_cast<ArtField*>(pair.first)->GetDeclaringClass().Ptr())); + CopyReference( + reinterpret_cast<ArtField*>(dest)->GetDeclaringClassAddressWithoutBarrier(), + reinterpret_cast<ArtField*>(pair.first)->GetDeclaringClass().Ptr()); break; } case kNativeObjectRelocationTypeRuntimeMethod: @@ -2039,8 +2040,10 @@ void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) { reinterpret_cast<ImageWriter*>(arg)->CopyAndFixupObject(obj); } -void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* arr, - mirror::Class* klass, Bin array_type) { +void ImageWriter::FixupPointerArray(mirror::Object* dst, + mirror::PointerArray* arr, + mirror::Class* klass, + Bin array_type) { CHECK(klass->IsArrayClass()); CHECK(arr->IsIntArray() || arr->IsLongArray()) << klass->PrettyClass() << " " << arr; // Fixup int and long pointers for the ArtMethod or ArtField arrays. @@ -2049,7 +2052,7 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a auto* dest_array = down_cast<mirror::PointerArray*>(dst); for (size_t i = 0, count = num_elements; i < count; ++i) { void* elem = arr->GetElementPtrSize<void*>(i, target_ptr_size_); - if (elem != nullptr && !IsInBootImage(elem)) { + if (kIsDebugBuild && elem != nullptr && !IsInBootImage(elem)) { auto it = native_object_relocations_.find(elem); if (UNLIKELY(it == native_object_relocations_.end())) { if (it->second.IsArtMethodRelocation()) { @@ -2065,12 +2068,9 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a << Class::PrettyClass(field->GetDeclaringClass()); } UNREACHABLE(); - } else { - ImageInfo& image_info = GetImageInfo(it->second.oat_index); - elem = image_info.image_begin_ + it->second.offset; } } - dest_array->SetElementPtrSize<false, true>(i, elem, target_ptr_size_); + CopyAndFixupPointer(dest_array->ElementAddress(i, target_ptr_size_), elem); } } @@ -2118,22 +2118,19 @@ class ImageWriter::FixupVisitor { void operator()(ObjPtr<Object> obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const - REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) { + REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) { ObjPtr<Object> ref = obj->GetFieldObject<Object, kVerifyNone>(offset); - // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the - // image. - copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>( - offset, - image_writer_->GetImageAddress(ref.Ptr())); + // Copy the reference and record the fixup if necessary. + image_writer_->CopyReference( + copy_->GetFieldObjectReferenceAddr<kVerifyNone>(offset), + ref.Ptr()); } // java.lang.ref.Reference visitor. void operator()(ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED, ObjPtr<mirror::Reference> ref) const REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) { - copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>( - mirror::Reference::ReferentOffset(), - image_writer_->GetImageAddress(ref->GetReferent())); + operator()(ref, mirror::Reference::ReferentOffset(), /* is_static */ false); } protected: @@ -2211,7 +2208,10 @@ class ImageWriter::NativeLocationVisitor { explicit NativeLocationVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {} template <typename T> - T* operator()(T* ptr) const REQUIRES_SHARED(Locks::mutator_lock_) { + T* operator()(T* ptr, void** dest_addr = nullptr) const REQUIRES_SHARED(Locks::mutator_lock_) { + if (dest_addr != nullptr) { + image_writer_->CopyAndFixupPointer(dest_addr, ptr); + } return image_writer_->NativeLocationInImage(ptr); } @@ -2274,10 +2274,10 @@ void ImageWriter::FixupObject(Object* orig, Object* copy) { } } - -class ImageAddressVisitor { +class ImageWriter::ImageAddressVisitorForDexCacheArray { public: - explicit ImageAddressVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {} + explicit ImageAddressVisitorForDexCacheArray(ImageWriter* image_writer) + : image_writer_(image_writer) {} template <typename T> T* operator()(T* ptr) const REQUIRES_SHARED(Locks::mutator_lock_) { @@ -2288,9 +2288,9 @@ class ImageAddressVisitor { ImageWriter* const image_writer_; }; - void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, mirror::DexCache* copy_dex_cache) { + ImageAddressVisitorForDexCacheArray fixup_visitor(this); // Though the DexCache array fields are usually treated as native pointers, we set the full // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e. @@ -2300,8 +2300,7 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::StringsOffset(), NativeLocationInImage(orig_strings), PointerSize::k64); - orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache), - ImageAddressVisitor(this)); + orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache), fixup_visitor); } mirror::TypeDexCacheType* orig_types = orig_dex_cache->GetResolvedTypes(); if (orig_types != nullptr) { @@ -2309,7 +2308,7 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, NativeLocationInImage(orig_types), PointerSize::k64); orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types, orig_dex_cache), - ImageAddressVisitor(this)); + fixup_visitor); } ArtMethod** orig_methods = orig_dex_cache->GetResolvedMethods(); if (orig_methods != nullptr) { @@ -2333,7 +2332,8 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) { mirror::FieldDexCachePair orig = mirror::DexCache::GetNativePairPtrSize(orig_fields, i, target_ptr_size_); - mirror::FieldDexCachePair copy(NativeLocationInImage(orig.object), orig.index); + mirror::FieldDexCachePair copy = orig; + copy.object = NativeLocationInImage(orig.object); mirror::DexCache::SetNativePairPtrSize(copy_fields, i, copy, target_ptr_size_); } } @@ -2343,7 +2343,7 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, NativeLocationInImage(orig_method_types), PointerSize::k64); orig_dex_cache->FixupResolvedMethodTypes(NativeCopyLocation(orig_method_types, orig_dex_cache), - ImageAddressVisitor(this)); + fixup_visitor); } GcRoot<mirror::CallSite>* orig_call_sites = orig_dex_cache->GetResolvedCallSites(); if (orig_call_sites != nullptr) { @@ -2351,7 +2351,7 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache, NativeLocationInImage(orig_call_sites), PointerSize::k64); orig_dex_cache->FixupResolvedCallSites(NativeCopyLocation(orig_call_sites, orig_dex_cache), - ImageAddressVisitor(this)); + fixup_visitor); } // Remove the DexFile pointers. They will be fixed up when the runtime loads the oat file. Leaving @@ -2459,7 +2459,8 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig, memcpy(copy, orig, ArtMethod::Size(target_ptr_size_)); - copy->SetDeclaringClass(GetImageAddress(orig->GetDeclaringClassUnchecked())); + CopyReference(copy->GetDeclaringClassAddressWithoutBarrier(), orig->GetDeclaringClassUnchecked()); + ArtMethod** orig_resolved_methods = orig->GetDexCacheResolvedMethods(target_ptr_size_); copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods), target_ptr_size_); @@ -2571,7 +2572,7 @@ size_t ImageWriter::GetOatIndex(mirror::Object* obj) const { return GetDefaultOatIndex(); } auto it = oat_index_map_.find(obj); - DCHECK(it != oat_index_map_.end()); + DCHECK(it != oat_index_map_.end()) << obj; return it->second; } @@ -2672,4 +2673,31 @@ ImageWriter::ImageInfo::ImageInfo() : intern_table_(new InternTable), class_table_(new ClassTable) {} +void ImageWriter::CopyReference(mirror::HeapReference<mirror::Object>* dest, + ObjPtr<mirror::Object> src) { + dest->Assign(GetImageAddress(src.Ptr())); +} + +void ImageWriter::CopyReference(mirror::CompressedReference<mirror::Object>* dest, + ObjPtr<mirror::Object> src) { + dest->Assign(GetImageAddress(src.Ptr())); +} + +void ImageWriter::CopyAndFixupPointer(void** target, void* value) { + void* new_value = value; + if (value != nullptr && !IsInBootImage(value)) { + auto it = native_object_relocations_.find(value); + CHECK(it != native_object_relocations_.end()) << value; + const NativeObjectRelocation& relocation = it->second; + ImageInfo& image_info = GetImageInfo(relocation.oat_index); + new_value = reinterpret_cast<void*>(image_info.image_begin_ + relocation.offset); + } + if (target_ptr_size_ == PointerSize::k32) { + *reinterpret_cast<uint32_t*>(target) = PointerToLowMemUInt32(new_value); + } else { + *reinterpret_cast<uint64_t*>(target) = reinterpret_cast<uintptr_t>(new_value); + } +} + + } // namespace art diff --git a/compiler/image_writer.h b/compiler/image_writer.h index 16aff61dab..39113c8143 100644 --- a/compiler/image_writer.h +++ b/compiler/image_writer.h @@ -38,8 +38,9 @@ #include "image.h" #include "lock_word.h" #include "mem_map.h" -#include "oat_file.h" #include "mirror/dex_cache.h" +#include "obj_ptr.h" +#include "oat_file.h" #include "os.h" #include "safe_map.h" #include "utils.h" @@ -317,6 +318,12 @@ class ImageWriter FINAL { // Number of image class table bytes. size_t class_table_bytes_ = 0; + // Number of object fixup bytes. + size_t object_fixup_bytes_ = 0; + + // Number of pointer fixup bytes. + size_t pointer_fixup_bytes_ = 0; + // Intern table associated with this image for serialization. std::unique_ptr<InternTable> intern_table_; @@ -464,7 +471,8 @@ class ImageWriter FINAL { size_t oat_index) REQUIRES_SHARED(Locks::mutator_lock_); - void TryAssignImTableOffset(ImTable* imt, size_t oat_index) REQUIRES_SHARED(Locks::mutator_lock_); + // Return true if imt was newly inserted. + bool TryAssignImTableOffset(ImTable* imt, size_t oat_index) REQUIRES_SHARED(Locks::mutator_lock_); // Assign the offset for an IMT conflict table. Does nothing if the table already has a native // relocation. @@ -534,6 +542,14 @@ class ImageWriter FINAL { // Return true if there already exists a native allocation for an object. bool NativeRelocationAssigned(void* ptr) const; + void CopyReference(mirror::HeapReference<mirror::Object>* dest, ObjPtr<mirror::Object> src) + REQUIRES_SHARED(Locks::mutator_lock_); + + void CopyReference(mirror::CompressedReference<mirror::Object>* dest, ObjPtr<mirror::Object> src) + REQUIRES_SHARED(Locks::mutator_lock_); + + void CopyAndFixupPointer(void** target, void* value); + const CompilerDriver& compiler_driver_; // Beginning target image address for the first image. @@ -608,9 +624,11 @@ class ImageWriter FINAL { class FixupRootVisitor; class FixupVisitor; class GetRootsVisitor; + class ImageAddressVisitorForDexCacheArray; class NativeLocationVisitor; class PruneClassesVisitor; class PruneClassLoaderClassesVisitor; + class RegisterBootClassPathClassesVisitor; class VisitReferencesVisitor; DISALLOW_COPY_AND_ASSIGN(ImageWriter); diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index 2ee4db923a..476906a768 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -528,7 +528,8 @@ class BCEVisitor : public HGraphVisitor { has_dom_based_dynamic_bce_(false), initial_block_size_(graph->GetBlocks().size()), side_effects_(side_effects), - induction_range_(induction_analysis) {} + induction_range_(induction_analysis), + next_(nullptr) {} void VisitBasicBlock(HBasicBlock* block) OVERRIDE { DCHECK(!IsAddedBlock(block)); @@ -1618,8 +1619,8 @@ class BCEVisitor : public HGraphVisitor { void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) { HInstruction* suspend = loop->GetSuspendCheck(); block->InsertInstructionBefore(condition, block->GetLastInstruction()); - HDeoptimize* deoptimize = - new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc()); + HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( + GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc()); block->InsertInstructionBefore(deoptimize, block->GetLastInstruction()); if (suspend->HasEnvironment()) { deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( @@ -1631,8 +1632,8 @@ class BCEVisitor : public HGraphVisitor { void InsertDeoptInBlock(HBoundsCheck* bounds_check, HInstruction* condition) { HBasicBlock* block = bounds_check->GetBlock(); block->InsertInstructionBefore(condition, bounds_check); - HDeoptimize* deoptimize = - new (GetGraph()->GetArena()) HDeoptimize(condition, bounds_check->GetDexPc()); + HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( + GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc()); block->InsertInstructionBefore(deoptimize, bounds_check); deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment()); } diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc index fe423012ca..048073e37a 100644 --- a/compiler/optimizing/cha_guard_optimization.cc +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -36,7 +36,8 @@ class CHAGuardVisitor : HGraphVisitor { : HGraphVisitor(graph), block_has_cha_guard_(GetGraph()->GetBlocks().size(), 0, - graph->GetArena()->Adapter(kArenaAllocCHA)) { + graph->GetArena()->Adapter(kArenaAllocCHA)), + instruction_iterator_(nullptr) { number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards(); DCHECK_NE(number_of_guards_to_visit_, 0u); // Will recount number of guards during guard optimization. @@ -201,8 +202,8 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag, HInstruction* suspend = loop_info->GetSuspendCheck(); // Need a new deoptimize instruction that copies the environment // of the suspend instruction for the loop. - HDeoptimize* deoptimize = - new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc()); + HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( + GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc()); pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( suspend->GetEnvironment(), loop_info->GetHeader()); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index d735b27090..d7cc577580 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1134,7 +1134,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); // The read barrier instrumentation of object ArrayGet diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 28cc942dfb..d463830ff6 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1150,7 +1150,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); // The read barrier instrumentation of object ArrayGet @@ -3281,7 +3281,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); Primitive::Type type = instruction->GetResultType(); - DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); LocationSummary* locations = instruction->GetLocations(); Register out = OutputRegister(instruction); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 7471cd5f12..10d8b841f8 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -318,6 +318,11 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator { void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleGoto(HInstruction* got, HBasicBlock* successor); + vixl::aarch64::MemOperand CreateVecMemRegisters( + HVecMemoryOperation* instruction, + Location* reg_loc, + bool is_load); + Arm64Assembler* const assembler_; CodeGeneratorARM64* const codegen_; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index a1c3da9e9c..cce412b314 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -1175,7 +1175,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); // The read barrier instrumentation of object ArrayGet diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 5f02a52417..287891feae 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -461,6 +461,536 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS { DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS); }; +class ArraySetSlowPathMIPS : public SlowPathCodeMIPS { + public: + explicit ArraySetSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove( + locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove( + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt, + nullptr); + parallel_move.AddMove( + locations->InAt(2), + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimNot, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); + mips_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS); +}; + +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathMIPS below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). +// +// If `entrypoint` is a valid location it is assumed to already be +// holding the entrypoint. The case where the entrypoint is passed in +// is for the GcRoot read barrier. +class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS { + public: + ReadBarrierMarkSlowPathMIPS(HInstruction* instruction, + Location ref, + Location entrypoint = Location::NoLocation()) + : SlowPathCodeMIPS(instruction), ref_(ref), entrypoint_(entrypoint) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = ref_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsArraySet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); + DCHECK((V0 <= ref_reg && ref_reg <= T7) || + (S2 <= ref_reg && ref_reg <= S7) || + (ref_reg == FP)) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in A0 and V0 respectively): + // + // A0 <- ref + // V0 <- ReadBarrierMark(A0) + // ref <- V0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + if (entrypoint_.IsValid()) { + mips_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + DCHECK_EQ(entrypoint_.AsRegister<Register>(), T9); + __ Jalr(entrypoint_.AsRegister<Register>()); + __ NopIfNoReordering(); + } else { + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1); + // This runtime call does not require a stack map. + mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, + instruction_, + this, + /* direct */ false); + } + __ B(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + + // The location of the entrypoint if already loaded. + const Location entrypoint_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS); +}; + +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathMIPS above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS { + public: + ReadBarrierMarkAndUpdateFieldSlowPathMIPS(HInstruction* instruction, + Location ref, + Register obj, + Location field_offset, + Register temp1) + : SlowPathCodeMIPS(instruction), + ref_(ref), + obj_(obj), + field_offset_(field_offset), + temp1_(temp1) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register ref_reg = ref_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + DCHECK(field_offset_.IsRegisterPair()) << field_offset_; + + __ Bind(GetEntryLabel()); + + // Save the old reference. + // Note that we cannot use AT or TMP to save the old reference, as those + // are used by the code that follows, but we need the old reference after + // the call to the ReadBarrierMarkRegX entry point. + DCHECK_NE(temp1_, AT); + DCHECK_NE(temp1_, TMP); + __ Move(temp1_, ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); + DCHECK((V0 <= ref_reg && ref_reg <= T7) || + (S2 <= ref_reg && ref_reg <= S7) || + (ref_reg == FP)) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in A0 and V0 respectively): + // + // A0 <- ref + // V0 <- ReadBarrierMark(A0) + // ref <- V0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1); + // This runtime call does not require a stack map. + mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, + instruction_, + this, + /* direct */ false); + + // If the new reference is different from the old reference, + // update the field in the holder (`*(obj_ + field_offset_)`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // the compare-and-set (CAS) loop below would abort, leaving the + // field as-is. + MipsLabel done; + __ Beq(temp1_, ref_reg, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + + // Convenience aliases. + Register base = obj_; + // The UnsafeCASObject intrinsic uses a register pair as field + // offset ("long offset"), of which only the low part contains + // data. + Register offset = field_offset_.AsRegisterPairLow<Register>(); + Register expected = temp1_; + Register value = ref_reg; + Register tmp_ptr = TMP; // Pointer to actual memory. + Register tmp = AT; // Value in memory. + + __ Addu(tmp_ptr, base, offset); + + if (kPoisonHeapReferences) { + __ PoisonHeapReference(expected); + // Do not poison `value` if it is the same register as + // `expected`, which has just been poisoned. + if (value != expected) { + __ PoisonHeapReference(value); + } + } + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + + bool is_r6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + MipsLabel loop_head, exit_loop; + __ Bind(&loop_head); + if (is_r6) { + __ LlR6(tmp, tmp_ptr); + } else { + __ LlR2(tmp, tmp_ptr); + } + __ Bne(tmp, expected, &exit_loop); + __ Move(tmp, value); + if (is_r6) { + __ ScR6(tmp, tmp_ptr); + } else { + __ ScR2(tmp, tmp_ptr); + } + __ Beqz(tmp, &loop_head); + __ Bind(&exit_loop); + + if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(expected); + // Do not unpoison `value` if it is the same register as + // `expected`, which has just been unpoisoned. + if (value != expected) { + __ UnpoisonHeapReference(value); + } + } + + __ Bind(&done); + __ B(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const Register obj_; + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + + const Register temp1_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS); +}; + +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS { + public: + ReadBarrierForHeapReferenceSlowPathMIPS(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : SlowPathCodeMIPS(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ LoadFromOffset(kLoadWord, out, out, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + Register index_reg = index_.AsRegister<Register>(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::mips::MipsAssembler::Sll and + // art::mips::MipsAssembler::Addiu32 below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + Register free_reg = FindAvailableCallerSaveRegister(codegen); + __ Move(free_reg, index_reg); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Sll(index_reg, index_reg, TIMES_4); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ Addiu32(index_reg, index_reg, offset_); + } else { + // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile + // intrinsics, `index_` is not shifted by a scale factor of 2 + // (as in the case of ArrayGet), as it is actually an offset + // to an object field within an object. + DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegisterPair()); + // UnsafeGet's offset location is a register pair, the low + // part contains the correct offset. + index = index_.ToLow(); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ LoadConst32(calling_convention.GetRegisterAt(2), offset_); + } + mips_codegen->InvokeRuntime(kQuickReadBarrierSlow, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + mips_codegen->Move32(out_, calling_convention.GetReturnLocation(Primitive::kPrimNot)); + + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathMIPS"; } + + private: + Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<Register>()); + size_t obj = static_cast<int>(obj_.AsRegister<Register>()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && + i != obj && + !codegen->IsCoreCalleeSaveRegister(i) && + !codegen->IsBlockedCoreRegister(i)) { + return static_cast<Register>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on MIPS + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS { + public: + ReadBarrierForRootSlowPathMIPS(HInstruction* instruction, Location out, Location root) + : SlowPathCodeMIPS(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Register reg_out = out_.AsRegister<Register>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); + mips_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_); + mips_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + mips_codegen->Move32(out_, calling_convention.GetReturnLocation(Primitive::kPrimNot)); + + RestoreLiveRegisters(codegen, locations); + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS"; } + + private: + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS); +}; + CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, const MipsInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, @@ -1310,10 +1840,26 @@ void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint, uint32_t dex_pc, SlowPathCode* slow_path) { ValidateInvokeRuntime(entrypoint, instruction, slow_path); + GenerateInvokeRuntime(GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value(), + IsDirectEntrypoint(entrypoint)); + if (EntrypointRequiresStackMap(entrypoint)) { + RecordPcInfo(instruction, dex_pc, slow_path); + } +} + +void CodeGeneratorMIPS::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path, + bool direct) { + ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + GenerateInvokeRuntime(entry_point_offset, direct); +} + +void CodeGeneratorMIPS::GenerateInvokeRuntime(int32_t entry_point_offset, bool direct) { bool reordering = __ SetReorder(false); - __ LoadFromOffset(kLoadWord, T9, TR, GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value()); + __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); __ Jalr(T9); - if (IsDirectEntrypoint(entrypoint)) { + if (direct) { // Reserve argument space on stack (for $a0-$a3) for // entrypoints that directly reference native implementations. // Called function may use this space to store $a0-$a3 regs. @@ -1323,9 +1869,6 @@ void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint, __ Nop(); // In delay slot. } __ SetReorder(reordering); - if (EntrypointRequiresStackMap(entrypoint)) { - RecordPcInfo(instruction, dex_pc, slow_path); - } } void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, @@ -1885,14 +2428,31 @@ void InstructionCodeGeneratorMIPS::VisitAnd(HAnd* instruction) { } void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { + Primitive::Type type = instruction->GetType(); + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (type == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (Primitive::IsFloatingPointType(type)) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut(Location::RequiresRegister(), + object_array_get_with_read_barrier + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); } } @@ -1905,7 +2465,9 @@ static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS* void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); + Location out_loc = locations->Out(); Location index = locations->InAt(1); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); auto null_checker = GetImplicitNullChecker(instruction, codegen_); @@ -1915,7 +2477,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { instruction->IsStringCharAt(); switch (type) { case Primitive::kPrimBoolean: { - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -1928,7 +2490,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimByte: { - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -1941,7 +2503,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimShort: { - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; @@ -1955,7 +2517,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimChar: { - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (maybe_compressed_char_at) { uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker); @@ -2008,10 +2570,9 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case Primitive::kPrimInt: { DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); - Register out = locations->Out().AsRegister<Register>(); + Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; @@ -2024,8 +2585,53 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } + case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + index, + temp, + /* needs_null_check */ true); + } else { + Register out = out_loc.AsRegister<Register>(); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } else { + __ Sll(TMP, index.AsRegister<Register>(), TIMES_4); + __ Addu(TMP, obj, TMP); + __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, + out_loc, + out_loc, + obj_loc, + data_offset, + index); + } + } + break; + } + case Primitive::kPrimLong: { - Register out = locations->Out().AsRegisterPairLow<Register>(); + Register out = out_loc.AsRegisterPairLow<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; @@ -2039,7 +2645,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimFloat: { - FRegister out = locations->Out().AsFpuRegister<FRegister>(); + FRegister out = out_loc.AsFpuRegister<FRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; @@ -2053,7 +2659,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimDouble: { - FRegister out = locations->Out().AsFpuRegister<FRegister>(); + FRegister out = out_loc.AsFpuRegister<FRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; @@ -2070,11 +2676,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } - - if (type == Primitive::kPrimNot) { - Register out = locations->Out().AsRegister<Register>(); - __ MaybeUnpoisonHeapReference(out); - } } void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) { @@ -2116,23 +2717,28 @@ Location LocationsBuilderMIPS::FpuRegisterOrConstantForStore(HInstruction* instr } void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) { - bool needs_runtime_call = instruction->NeedsTypeCheck(); + Primitive::Type value_type = instruction->GetComponentType(); + + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall); - if (needs_runtime_call) { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + may_need_runtime_call_for_type_check ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { + locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); } else { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { - locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); - } else { - locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); - } + locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); + } + if (needs_write_barrier) { + // Temporary register for the write barrier. + locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. } } @@ -2142,7 +2748,7 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { Location index = locations->InAt(1); Location value_location = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); - bool needs_runtime_call = locations->WillCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); auto null_checker = GetImplicitNullChecker(instruction, codegen_); @@ -2186,9 +2792,27 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimInt: + case Primitive::kPrimInt: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + if (index.IsConstant()) { + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; + } else { + __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4); + __ Addu(base_reg, obj, base_reg); + } + if (value_location.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); + } else { + Register value = value_location.AsRegister<Register>(); + __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); + } + break; + } + case Primitive::kPrimNot: { - if (!needs_runtime_call) { + if (value_location.IsConstant()) { + // Just setting null. uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; @@ -2196,48 +2820,110 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4); __ Addu(base_reg, obj, base_reg); } - if (value_location.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); - DCHECK(!needs_write_barrier); - } else { - Register value = value_location.AsRegister<Register>(); - if (kPoisonHeapReferences && needs_write_barrier) { - // Note that in the case where `value` is a null reference, - // we do not enter this block, as a null reference does not - // need poisoning. - DCHECK_EQ(value_type, Primitive::kPrimNot); - // Use Sw() instead of StoreToOffset() in order to be able to - // hold the poisoned reference in AT and thus avoid allocating - // yet another temporary register. - if (index.IsConstant()) { - if (!IsInt<16>(static_cast<int32_t>(data_offset))) { - int16_t low = Low16Bits(data_offset); - uint32_t high = data_offset - low; - __ Addiu32(TMP, obj, high); - base_reg = TMP; - data_offset = low; - } - } else { - DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))); - } - __ PoisonHeapReference(AT, value); - __ Sw(AT, base_reg, data_offset); - null_checker(); + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + DCHECK_EQ(value, 0); + __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); + DCHECK(!needs_write_barrier); + DCHECK(!may_need_runtime_call_for_type_check); + break; + } + + DCHECK(needs_write_barrier); + Register value = value_location.AsRegister<Register>(); + Register temp1 = locations->GetTemp(0).AsRegister<Register>(); + Register temp2 = TMP; // Doesn't need to survive slow path. + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + MipsLabel done; + SlowPathCodeMIPS* slow_path = nullptr; + + if (may_need_runtime_call_for_type_check) { + slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS(instruction); + codegen_->AddSlowPath(slow_path); + if (instruction->GetValueCanBeNull()) { + MipsLabel non_zero; + __ Bnez(value, &non_zero); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + if (index.IsConstant()) { + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); - } - if (needs_write_barrier) { - DCHECK_EQ(value_type, Primitive::kPrimNot); - codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); + __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4); + __ Addu(base_reg, obj, base_reg); } + __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); + __ B(&done); + __ Bind(&non_zero); } + + // Note that when read barriers are enabled, the type checks + // are performed without read barriers. This is fine, even in + // the case where a class object is in the from-space after + // the flip, as a comparison involving such a type would not + // produce a false positive; it may of course produce a false + // negative, in which case we would take the ArraySet slow + // path. + + // /* HeapReference<Class> */ temp1 = obj->klass_ + __ LoadFromOffset(kLoadWord, temp1, obj, class_offset, null_checker); + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ LoadFromOffset(kLoadWord, temp2, value, class_offset); + // If heap poisoning is enabled, no need to unpoison `temp1` + // nor `temp2`, as we are comparing two poisoned references. + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + MipsLabel do_put; + __ Beq(temp1, temp2, &do_put); + // If heap poisoning is enabled, the `temp1` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp1`, as we are comparing against null below. + __ Bnez(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ Bne(temp1, temp2, slow_path->GetEntryLabel()); + } + } + + Register source = value; + if (kPoisonHeapReferences) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + __ Move(temp1, value); + __ PoisonHeapReference(temp1); + source = temp1; + } + + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + if (index.IsConstant()) { + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - DCHECK_EQ(value_type, Primitive::kPrimNot); - // Note: if heap poisoning is enabled, pAputObject takes care - // of poisoning the reference. - codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4); + __ Addu(base_reg, obj, base_reg); + } + __ StoreToOffset(kStoreWord, source, base_reg, data_offset); + + if (!may_need_runtime_call_for_type_check) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); + + if (done.IsLinked()) { + __ Bind(&done); + } + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); } break; } @@ -2327,6 +3013,23 @@ void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) { __ Bgeu(index, length, slow_path->GetEntryLabel()); } +// Temp is used for read barrier. +static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { + if (kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + return 1; + } + return 0; +} + +// Extra temp is used for read barrier. +static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { + return 1 + NumberOfInstanceOfTemps(type_check_kind); +} + void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); @@ -2337,7 +3040,7 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; @@ -2351,15 +3054,20 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); + locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); - Register temp = locations->GetTemp(0).AsRegister<Register>(); + Location temp_loc = locations->GetTemp(0); + Register temp = temp_loc.AsRegister<Register>(); + const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + DCHECK_LE(num_temps, 2u); + Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -2396,8 +3104,12 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. __ Bne(temp, cls, slow_path->GetEntryLabel()); @@ -2406,15 +3118,22 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kAbstractClassCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. MipsLabel loop; __ Bind(&loop); // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + super_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the class reference currently in `temp` is null, jump to the slow path to throw the // exception. __ Beqz(temp, slow_path->GetEntryLabel()); @@ -2425,15 +3144,22 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kClassHierarchyCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Walk over the class hierarchy to find a match. MipsLabel loop; __ Bind(&loop); __ Beq(temp, cls, &done); // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + super_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the class reference currently in `temp` is null, jump to the slow path to throw the // exception. Otherwise, jump to the beginning of the loop. __ Bnez(temp, &loop); @@ -2443,14 +3169,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kArrayObjectCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Do an exact check. __ Beq(temp, cls, &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ - __ LoadFromOffset(kLoadWord, temp, temp, component_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + component_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the component type is null, jump to the slow path to throw the exception. __ Beqz(temp, slow_path->GetEntryLabel()); // Otherwise, the object is indeed an array, further check that this component @@ -2477,11 +3210,19 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { // Avoid read barriers to improve performance of the fast path. We can not get false // positives by doing this. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // /* HeapReference<Class> */ temp = temp->iftable_ - __ LoadFromOffset(kLoadWord, temp, temp, iftable_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Iftable is never null. __ Lw(TMP, temp, array_length_offset); // Loop through the iftable and check if any class matches. @@ -5032,8 +5773,15 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field Primitive::Type field_type = field_info.GetFieldType(); bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble); bool generate_volatile = field_info.IsVolatile() && is_wide; + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( - instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall); + instruction, + generate_volatile + ? LocationSummary::kCallOnMainOnly + : (object_field_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall)); locations->SetInAt(0, Location::RequiresRegister()); if (generate_volatile) { @@ -5054,7 +5802,18 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object field get with + // read barriers enabled: we do not want the move to overwrite the + // object's location, as we need it to emit the read barrier. + locations->SetOut(Location::RequiresRegister(), + object_field_get_with_read_barrier + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + } + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); } } } @@ -5064,7 +5823,9 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, uint32_t dex_pc) { Primitive::Type type = field_info.GetFieldType(); LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); + Location dst_loc = locations->Out(); LoadOperandType load_type = kLoadUnsignedByte; bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); @@ -5107,40 +5868,61 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>(); if (type == Primitive::kPrimDouble) { // FP results are returned in core registers. Need to move them. - Location out = locations->Out(); - if (out.IsFpuRegister()) { - __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>()); + if (dst_loc.IsFpuRegister()) { + __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), dst_loc.AsFpuRegister<FRegister>()); __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(), - out.AsFpuRegister<FRegister>()); + dst_loc.AsFpuRegister<FRegister>()); } else { - DCHECK(out.IsDoubleStackSlot()); + DCHECK(dst_loc.IsDoubleStackSlot()); __ StoreToOffset(kStoreWord, locations->GetTemp(1).AsRegister<Register>(), SP, - out.GetStackIndex()); + dst_loc.GetStackIndex()); __ StoreToOffset(kStoreWord, locations->GetTemp(2).AsRegister<Register>(), SP, - out.GetStackIndex() + 4); + dst_loc.GetStackIndex() + 4); } } } else { - if (!Primitive::IsFloatingPointType(type)) { + if (type == Primitive::kPrimNot) { + // /* HeapReference<Object> */ dst = *(obj + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + dst_loc, + obj, + offset, + temp_loc, + /* needs_null_check */ true); + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } else { + __ LoadFromOffset(kLoadWord, dst_loc.AsRegister<Register>(), obj, offset, null_checker); + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset); + } + } else if (!Primitive::IsFloatingPointType(type)) { Register dst; if (type == Primitive::kPrimLong) { - DCHECK(locations->Out().IsRegisterPair()); - dst = locations->Out().AsRegisterPairLow<Register>(); + DCHECK(dst_loc.IsRegisterPair()); + dst = dst_loc.AsRegisterPairLow<Register>(); } else { - DCHECK(locations->Out().IsRegister()); - dst = locations->Out().AsRegister<Register>(); + DCHECK(dst_loc.IsRegister()); + dst = dst_loc.AsRegister<Register>(); } __ LoadFromOffset(load_type, dst, obj, offset, null_checker); - if (type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(dst); - } } else { - DCHECK(locations->Out().IsFpuRegister()); - FRegister dst = locations->Out().AsFpuRegister<FRegister>(); + DCHECK(dst_loc.IsFpuRegister()); + FRegister dst = dst_loc.AsFpuRegister<FRegister>(); if (type == Primitive::kPrimFloat) { __ LoadSFromOffset(dst, obj, offset, null_checker); } else { @@ -5149,7 +5931,9 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, } } - if (is_volatile) { + // Memory barriers, in the case of references, are handled in the + // previous switch statement. + if (is_volatile && (type != Primitive::kPrimNot)) { GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } } @@ -5290,7 +6074,6 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, } } - // TODO: memory barriers? if (needs_write_barrier) { Register src = value_location.AsRegister<Register>(); codegen_->MarkGCCard(obj, src, value_can_be_null); @@ -5320,14 +6103,133 @@ void InstructionCodeGeneratorMIPS::VisitInstanceFieldSet(HInstanceFieldSet* inst instruction->GetValueCanBeNull()); } -void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad( - HInstruction* instruction ATTRIBUTE_UNUSED, - Location root, - Register obj, - uint32_t offset) { +void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister( + HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option) { + Register out_reg = out.AsRegister<Register>(); + if (read_barrier_option == kWithReadBarrier) { + CHECK(kEmitCompilerReadBarrier); + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + out_reg, + offset, + maybe_temp, + /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `maybe_temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ Move(maybe_temp.AsRegister<Register>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters( + HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option) { + Register out_reg = out.AsRegister<Register>(); + Register obj_reg = obj.AsRegister<Register>(); + if (read_barrier_option == kWithReadBarrier) { + CHECK(kEmitCompilerReadBarrier); + if (kUseBakerReadBarrier) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + obj_reg, + offset, + maybe_temp, + /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + Register obj, + uint32_t offset, + ReadBarrierOption read_barrier_option) { Register root_reg = root.AsRegister<Register>(); - if (kEmitCompilerReadBarrier) { - UNIMPLEMENTED(FATAL) << "for read barrier"; + if (read_barrier_option == kWithReadBarrier) { + DCHECK(kEmitCompilerReadBarrier); + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path marking the GC root `root`. + Location temp = Location::RegisterLocation(T9); + SlowPathCodeMIPS* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS( + instruction, + root, + /*entrypoint*/ temp); + codegen_->AddSlowPath(slow_path); + + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ Addiu32(root_reg, obj, offset); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } } else { // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) @@ -5337,6 +6239,226 @@ void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad( } } +void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Location no_index = Location::NoLocation(); + ScaleFactor no_scale_factor = TIMES_1; + GenerateReferenceLoadWithBakerReadBarrier(instruction, + ref, + obj, + offset, + no_index, + no_scale_factor, + temp, + needs_null_check); +} + +void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + ScaleFactor scale_factor = TIMES_4; + GenerateReferenceLoadWithBakerReadBarrier(instruction, + ref, + obj, + data_offset, + index, + scale_factor, + temp, + needs_null_check); +} + +void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + Location temp, + bool needs_null_check, + bool always_update_field) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. + + Register ref_reg = ref.AsRegister<Register>(); + Register temp_reg = temp.AsRegister<Register>(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + __ Sync(0); // Barrier to prevent load-load reordering. + + // The actual reference load. + if (index.IsValid()) { + // Load types involving an "index": ArrayGet, + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics. + // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) + if (index.IsConstant()) { + size_t computed_offset = + (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset; + __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset); + } else { + // Handle the special case of the + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics, which use a register pair as index ("long + // offset"), of which only the low part contains data. + Register index_reg = index.IsRegisterPair() + ? index.AsRegisterPairLow<Register>() + : index.AsRegister<Register>(); + __ Sll(TMP, index_reg, scale_factor); + __ Addu(TMP, obj, TMP); + __ LoadFromOffset(kLoadWord, ref_reg, TMP, offset); + } + } else { + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadWord, ref_reg, obj, offset); + } + + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + + // Slow path marking the object `ref` when it is gray. + SlowPathCodeMIPS* slow_path; + if (always_update_field) { + // ReadBarrierMarkAndUpdateFieldSlowPathMIPS only supports address + // of the form `obj + field_offset`, where `obj` is a register and + // `field_offset` is a register pair (of which only the lower half + // is used). Thus `offset` and `scale_factor` above are expected + // to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); + slow_path = new (GetGraph()->GetArena()) + ReadBarrierMarkAndUpdateFieldSlowPathMIPS(instruction, + ref, + obj, + /* field_offset */ index, + temp_reg); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(instruction, ref); + } + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::GrayState()) + // ref = ReadBarrier::Mark(ref); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit into the sign bit (31) and + // performing a branch on less than zero. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size"); + __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift); + __ Bltz(temp_reg, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorMIPS::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // Insert a slow path based read barrier *after* the reference load. + // + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathMIPS(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorMIPS::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<Register>()); + } +} + +void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Insert a slow path based read barrier *after* the GC root load. + // + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCodeMIPS* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS(instruction, out, root); + AddSlowPath(slow_path); + + __ B(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); @@ -5345,7 +6467,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -5360,14 +6483,20 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); } void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); - Register obj = locations->InAt(0).AsRegister<Register>(); + Location obj_loc = locations->InAt(0); + Register obj = obj_loc.AsRegister<Register>(); Register cls = locations->InAt(1).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); + const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + DCHECK_LE(num_temps, 1u); + Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5385,8 +6514,12 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // Classes must be equal for the instanceof to succeed. __ Xor(out, out, cls); __ Sltiu(out, out, 1); @@ -5395,15 +6528,22 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. MipsLabel loop; __ Bind(&loop); // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadOneRegister(instruction, + out_loc, + super_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); __ Bne(out, cls, &loop); @@ -5413,15 +6553,22 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kClassHierarchyCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // Walk over the class hierarchy to find a match. MipsLabel loop, success; __ Bind(&loop); __ Beq(out, cls, &success); // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadOneRegister(instruction, + out_loc, + super_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); __ Bnez(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -5432,15 +6579,22 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // Do an exact check. MipsLabel success; __ Beq(out, cls, &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ - __ LoadFromOffset(kLoadWord, out, out, component_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadOneRegister(instruction, + out_loc, + component_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -5455,8 +6609,12 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayCheck: { // No read barrier since the slow path will retry upon failure. // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction, /* is_fatal */ false); @@ -5627,9 +6785,6 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { - if (kEmitCompilerReadBarrier) { - UNIMPLEMENTED(FATAL) << "for read barrier"; - } // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods @@ -5665,9 +6820,6 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - if (kEmitCompilerReadBarrier) { - UNIMPLEMENTED(FATAL) << "for read barrier"; - } // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); @@ -5916,12 +7068,13 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Location::RegisterLocation(V0)); + calling_convention.GetReturnLocation(Primitive::kPrimNot)); return; } DCHECK(!cls->NeedsAccessCheck()); - LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); @@ -5976,6 +7129,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF break; } + const ReadBarrierOption read_barrier_option = cls->IsInBootImage() + ? kWithoutReadBarrier + : kCompilerReadBarrierOption; bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { @@ -5985,11 +7141,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF GenerateGcRootFieldLoad(cls, out_loc, base_or_current_method_reg, - ArtMethod::DeclaringClassOffset().Int32Value()); + ArtMethod::DeclaringClassOffset().Int32Value(), + read_barrier_option); break; } case HLoadClass::LoadKind::kBootImageLinkTimeAddress: DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); __ LoadLiteral(out, base_or_current_method_reg, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), @@ -5997,6 +7155,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); bool reordering = __ SetReorder(false); @@ -6006,7 +7165,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF break; } case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(cls->GetClass().Get())); DCHECK_NE(address, 0u); @@ -6020,7 +7179,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); - GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); __ SetReorder(reordering); generate_null_check = true; break; @@ -6032,7 +7191,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF bool reordering = __ SetReorder(false); __ Bind(&info->high_label); __ Lui(out, /* placeholder */ 0x1234); - GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); __ SetReorder(reordering); break; } @@ -6165,7 +7324,11 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); - GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(load, + out_loc, + out, + /* placeholder */ 0x5678, + kCompilerReadBarrierOption); __ SetReorder(reordering); SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); codegen_->AddSlowPath(slow_path); @@ -6181,7 +7344,11 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ bool reordering = __ SetReorder(false); __ Bind(&info->high_label); __ Lui(out, /* placeholder */ 0x1234); - GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(load, + out_loc, + out, + /* placeholder */ 0x5678, + kCompilerReadBarrierOption); __ SetReorder(reordering); return; } diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 98fee24a74..3875c4bdba 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -241,6 +241,38 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { uint32_t dex_pc, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a read barrier and + // shall be a register in that case; it may be an invalid location + // otherwise. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a Baker's (fast + // path) read barrier and shall be a register in that case; it may + // be an invalid location otherwise. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option); + // Generate a GC root reference load: // // root <- *(obj + offset) @@ -249,7 +281,9 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, Register obj, - uint32_t offset); + uint32_t offset, + ReadBarrierOption read_barrier_option); + void GenerateIntCompare(IfCondition cond, LocationSummary* locations); // When the function returns `false` it means that the condition holds if `dst` is non-zero // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero @@ -353,6 +387,91 @@ class CodeGeneratorMIPS : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + Register obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + Location temp, + bool needs_null_check, + bool always_update_field = false); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + void MarkGCCard(Register object, Register value, bool value_can_be_null); // Register allocation. @@ -400,6 +519,15 @@ class CodeGeneratorMIPS : public CodeGenerator { uint32_t dex_pc, SlowPathCode* slow_path = nullptr) OVERRIDE; + // Generate code to invoke a runtime entry point, but do not record + // PC-related information in a stack map. + void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path, + bool direct); + + void GenerateInvokeRuntime(int32_t entry_point_offset, bool direct); + ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index c82533bc7d..78b31e9e86 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -407,6 +407,528 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 { DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64); }; +class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 { + public: + explicit ArraySetSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove( + locations->InAt(0), + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove( + locations->InAt(1), + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimInt, + nullptr); + parallel_move.AddMove( + locations->InAt(2), + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimNot, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); + mips64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + RestoreLiveRegisters(codegen, locations); + __ Bc(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS64); +}; + +// Slow path marking an object reference `ref` during a read +// barrier. The field `obj.field` in the object `obj` holding this +// reference does not get updated by this slow path after marking (see +// ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 below for that). +// +// This means that after the execution of this slow path, `ref` will +// always be up-to-date, but `obj.field` may not; i.e., after the +// flip, `ref` will be a to-space reference, but `obj.field` will +// probably still be a from-space reference (unless it gets updated by +// another thread, or if another thread installed another object +// reference (different from `ref`) in `obj.field`). +// +// If `entrypoint` is a valid location it is assumed to already be +// holding the entrypoint. The case where the entrypoint is passed in +// is for the GcRoot read barrier. +class ReadBarrierMarkSlowPathMIPS64 : public SlowPathCodeMIPS64 { + public: + ReadBarrierMarkSlowPathMIPS64(HInstruction* instruction, + Location ref, + Location entrypoint = Location::NoLocation()) + : SlowPathCodeMIPS64(instruction), ref_(ref), entrypoint_(entrypoint) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + GpuRegister ref_reg = ref_.AsRegister<GpuRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsArraySet() || + instruction_->IsLoadClass() || + instruction_->IsLoadString() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) || + (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); + DCHECK((V0 <= ref_reg && ref_reg <= T2) || + (S2 <= ref_reg && ref_reg <= S7) || + (ref_reg == S8)) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in A0 and V0 respectively): + // + // A0 <- ref + // V0 <- ReadBarrierMark(A0) + // ref <- V0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + if (entrypoint_.IsValid()) { + mips64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this); + DCHECK_EQ(entrypoint_.AsRegister<GpuRegister>(), T9); + __ Jalr(entrypoint_.AsRegister<GpuRegister>()); + __ Nop(); + } else { + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1); + // This runtime call does not require a stack map. + mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, + instruction_, + this); + } + __ Bc(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + + // The location of the entrypoint if already loaded. + const Location entrypoint_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS64); +}; + +// Slow path marking an object reference `ref` during a read barrier, +// and if needed, atomically updating the field `obj.field` in the +// object `obj` holding this reference after marking (contrary to +// ReadBarrierMarkSlowPathMIPS64 above, which never tries to update +// `obj.field`). +// +// This means that after the execution of this slow path, both `ref` +// and `obj.field` will be up-to-date; i.e., after the flip, both will +// hold the same to-space reference (unless another thread installed +// another object reference (different from `ref`) in `obj.field`). +class ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 : public SlowPathCodeMIPS64 { + public: + ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(HInstruction* instruction, + Location ref, + GpuRegister obj, + Location field_offset, + GpuRegister temp1) + : SlowPathCodeMIPS64(instruction), + ref_(ref), + obj_(obj), + field_offset_(field_offset), + temp1_(temp1) { + DCHECK(kEmitCompilerReadBarrier); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS64"; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + GpuRegister ref_reg = ref_.AsRegister<GpuRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg; + // This slow path is only used by the UnsafeCASObject intrinsic. + DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier marking and field updating slow path: " + << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject); + DCHECK(field_offset_.IsRegister()) << field_offset_; + + __ Bind(GetEntryLabel()); + + // Save the old reference. + // Note that we cannot use AT or TMP to save the old reference, as those + // are used by the code that follows, but we need the old reference after + // the call to the ReadBarrierMarkRegX entry point. + DCHECK_NE(temp1_, AT); + DCHECK_NE(temp1_, TMP); + __ Move(temp1_, ref_reg); + + // No need to save live registers; it's taken care of by the + // entrypoint. Also, there is no need to update the stack mask, + // as this runtime call will not trigger a garbage collection. + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); + DCHECK((V0 <= ref_reg && ref_reg <= T2) || + (S2 <= ref_reg && ref_reg <= S7) || + (ref_reg == S8)) << ref_reg; + // "Compact" slow path, saving two moves. + // + // Instead of using the standard runtime calling convention (input + // and output in A0 and V0 respectively): + // + // A0 <- ref + // V0 <- ReadBarrierMark(A0) + // ref <- V0 + // + // we just use rX (the register containing `ref`) as input and output + // of a dedicated entrypoint: + // + // rX <- ReadBarrierMarkRegX(rX) + // + int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1); + // This runtime call does not require a stack map. + mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, + instruction_, + this); + + // If the new reference is different from the old reference, + // update the field in the holder (`*(obj_ + field_offset_)`). + // + // Note that this field could also hold a different object, if + // another thread had concurrently changed it. In that case, the + // the compare-and-set (CAS) loop below would abort, leaving the + // field as-is. + Mips64Label done; + __ Beqc(temp1_, ref_reg, &done); + + // Update the the holder's field atomically. This may fail if + // mutator updates before us, but it's OK. This is achieved + // using a strong compare-and-set (CAS) operation with relaxed + // memory synchronization ordering, where the expected value is + // the old reference and the desired value is the new reference. + + // Convenience aliases. + GpuRegister base = obj_; + GpuRegister offset = field_offset_.AsRegister<GpuRegister>(); + GpuRegister expected = temp1_; + GpuRegister value = ref_reg; + GpuRegister tmp_ptr = TMP; // Pointer to actual memory. + GpuRegister tmp = AT; // Value in memory. + + __ Daddu(tmp_ptr, base, offset); + + if (kPoisonHeapReferences) { + __ PoisonHeapReference(expected); + // Do not poison `value` if it is the same register as + // `expected`, which has just been poisoned. + if (value != expected) { + __ PoisonHeapReference(value); + } + } + + // do { + // tmp = [r_ptr] - expected; + // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); + + Mips64Label loop_head, exit_loop; + __ Bind(&loop_head); + __ Ll(tmp, tmp_ptr); + // The LL instruction sign-extends the 32-bit value, but + // 32-bit references must be zero-extended. Zero-extend `tmp`. + __ Dext(tmp, tmp, 0, 32); + __ Bnec(tmp, expected, &exit_loop); + __ Move(tmp, value); + __ Sc(tmp, tmp_ptr); + __ Beqzc(tmp, &loop_head); + __ Bind(&exit_loop); + + if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(expected); + // Do not unpoison `value` if it is the same register as + // `expected`, which has just been unpoisoned. + if (value != expected) { + __ UnpoisonHeapReference(value); + } + } + + __ Bind(&done); + __ Bc(GetExitLabel()); + } + + private: + // The location (register) of the marked object reference. + const Location ref_; + // The register containing the object holding the marked object reference field. + const GpuRegister obj_; + // The location of the offset of the marked reference field within `obj_`. + Location field_offset_; + + const GpuRegister temp1_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS64); +}; + +// Slow path generating a read barrier for a heap reference. +class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 { + public: + ReadBarrierForHeapReferenceSlowPathMIPS64(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) + : SlowPathCodeMIPS64(instruction), + out_(out), + ref_(ref), + obj_(obj), + offset_(offset), + index_(index) { + DCHECK(kEmitCompilerReadBarrier); + // If `obj` is equal to `out` or `ref`, it means the initial object + // has been overwritten by (or after) the heap object reference load + // to be instrumented, e.g.: + // + // __ LoadFromOffset(kLoadWord, out, out, offset); + // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset); + // + // In that case, we have lost the information about the original + // object, and the emitted read barrier cannot work properly. + DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out; + DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref; + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + GpuRegister reg_out = out_.AsRegister<GpuRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsInstanceFieldGet() || + instruction_->IsStaticFieldGet() || + instruction_->IsArrayGet() || + instruction_->IsInstanceOf() || + instruction_->IsCheckCast() || + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) + << "Unexpected instruction in read barrier for heap reference slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + // We may have to change the index's value, but as `index_` is a + // constant member (like other "inputs" of this slow path), + // introduce a copy of it, `index`. + Location index = index_; + if (index_.IsValid()) { + // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. + if (instruction_->IsArrayGet()) { + // Compute the actual memory offset and store it in `index`. + GpuRegister index_reg = index_.AsRegister<GpuRegister>(); + DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg)); + if (codegen->IsCoreCalleeSaveRegister(index_reg)) { + // We are about to change the value of `index_reg` (see the + // calls to art::mips64::Mips64Assembler::Sll and + // art::mips64::MipsAssembler::Addiu32 below), but it has + // not been saved by the previous call to + // art::SlowPathCode::SaveLiveRegisters, as it is a + // callee-save register -- + // art::SlowPathCode::SaveLiveRegisters does not consider + // callee-save registers, as it has been designed with the + // assumption that callee-save registers are supposed to be + // handled by the called function. So, as a callee-save + // register, `index_reg` _would_ eventually be saved onto + // the stack, but it would be too late: we would have + // changed its value earlier. Therefore, we manually save + // it here into another freely available register, + // `free_reg`, chosen of course among the caller-save + // registers (as a callee-save `free_reg` register would + // exhibit the same problem). + // + // Note we could have requested a temporary register from + // the register allocator instead; but we prefer not to, as + // this is a slow path, and we know we can find a + // caller-save register that is available. + GpuRegister free_reg = FindAvailableCallerSaveRegister(codegen); + __ Move(free_reg, index_reg); + index_reg = free_reg; + index = Location::RegisterLocation(index_reg); + } else { + // The initial register stored in `index_` has already been + // saved in the call to art::SlowPathCode::SaveLiveRegisters + // (as it is not a callee-save register), so we can freely + // use it. + } + // Shifting the index value contained in `index_reg` by the scale + // factor (2) cannot overflow in practice, as the runtime is + // unable to allocate object arrays with a size larger than + // 2^26 - 1 (that is, 2^28 - 4 bytes). + __ Sll(index_reg, index_reg, TIMES_4); + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + __ Addiu32(index_reg, index_reg, offset_); + } else { + // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile + // intrinsics, `index_` is not shifted by a scale factor of 2 + // (as in the case of ArrayGet), as it is actually an offset + // to an object field within an object. + DCHECK(instruction_->IsInvoke()) << instruction_->DebugName(); + DCHECK(instruction_->GetLocations()->Intrinsified()); + DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) || + (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)) + << instruction_->AsInvoke()->GetIntrinsic(); + DCHECK_EQ(offset_, 0U); + DCHECK(index_.IsRegister()); + } + } + + // We're moving two or three locations to locations that could + // overlap, so we need a parallel move resolver. + InvokeRuntimeCallingConvention calling_convention; + HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + parallel_move.AddMove(ref_, + Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + Primitive::kPrimNot, + nullptr); + parallel_move.AddMove(obj_, + Location::RegisterLocation(calling_convention.GetRegisterAt(1)), + Primitive::kPrimNot, + nullptr); + if (index.IsValid()) { + parallel_move.AddMove(index, + Location::RegisterLocation(calling_convention.GetRegisterAt(2)), + Primitive::kPrimInt, + nullptr); + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + } else { + codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); + __ LoadConst32(calling_convention.GetRegisterAt(2), offset_); + } + mips64_codegen->InvokeRuntime(kQuickReadBarrierSlow, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes< + kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); + mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ Bc(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { + return "ReadBarrierForHeapReferenceSlowPathMIPS64"; + } + + private: + GpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) { + size_t ref = static_cast<int>(ref_.AsRegister<GpuRegister>()); + size_t obj = static_cast<int>(obj_.AsRegister<GpuRegister>()); + for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) { + if (i != ref && + i != obj && + !codegen->IsCoreCalleeSaveRegister(i) && + !codegen->IsBlockedCoreRegister(i)) { + return static_cast<GpuRegister>(i); + } + } + // We shall never fail to find a free caller-save register, as + // there are more than two core caller-save registers on MIPS64 + // (meaning it is possible to find one which is different from + // `ref` and `obj`). + DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u); + LOG(FATAL) << "Could not find a free caller-save register"; + UNREACHABLE(); + } + + const Location out_; + const Location ref_; + const Location obj_; + const uint32_t offset_; + // An additional location containing an index to an array. + // Only used for HArrayGet and the UnsafeGetObject & + // UnsafeGetObjectVolatile intrinsics. + const Location index_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS64); +}; + +// Slow path generating a read barrier for a GC root. +class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 { + public: + ReadBarrierForRootSlowPathMIPS64(HInstruction* instruction, Location out, Location root) + : SlowPathCodeMIPS64(instruction), out_(out), root_(root) { + DCHECK(kEmitCompilerReadBarrier); + } + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + Primitive::Type type = Primitive::kPrimNot; + GpuRegister reg_out = out_.AsRegister<GpuRegister>(); + DCHECK(locations->CanCall()); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); + DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) + << "Unexpected instruction in read barrier for GC root slow path: " + << instruction_->DebugName(); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); + mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), + root_, + Primitive::kPrimNot); + mips64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); + mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + __ Bc(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS64"; } + + private: + const Location out_; + const Location root_; + + DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS64); +}; + CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, const Mips64InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, @@ -1140,23 +1662,32 @@ void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint, uint32_t dex_pc, SlowPathCode* slow_path) { ValidateInvokeRuntime(entrypoint, instruction, slow_path); - __ LoadFromOffset(kLoadDoubleword, - T9, - TR, - GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value()); - __ Jalr(T9); - __ Nop(); + GenerateInvokeRuntime(GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value()); if (EntrypointRequiresStackMap(entrypoint)) { RecordPcInfo(instruction, dex_pc, slow_path); } } +void CodeGeneratorMIPS64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path) { + ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path); + GenerateInvokeRuntime(entry_point_offset); +} + +void CodeGeneratorMIPS64::GenerateInvokeRuntime(int32_t entry_point_offset) { + __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); + __ Jalr(T9); + __ Nop(); +} + void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg) { __ LoadFromOffset(kLoadWord, TMP, class_reg, mirror::Class::StatusOffset().Int32Value()); __ LoadConst32(AT, mirror::Class::kStatusInitialized); __ Bltc(TMP, AT, slow_path->GetEntryLabel()); - // TODO: barrier needed? + // Even if the initialized flag is set, we need to ensure consistent memory ordering. + __ Sync(0); __ Bind(slow_path->GetExitLabel()); } @@ -1447,14 +1978,31 @@ void InstructionCodeGeneratorMIPS64::VisitAnd(HAnd* instruction) { } void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) { + Primitive::Type type = instruction->GetType(); + bool object_array_get_with_read_barrier = + kEmitCompilerReadBarrier && (type == Primitive::kPrimNot); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetArena()) LocationSummary(instruction, + object_array_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (Primitive::IsFloatingPointType(type)) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object array get with + // read barriers enabled: we do not want the move to overwrite the + // array's location, as we need it to emit the read barrier. + locations->SetOut(Location::RequiresRegister(), + object_array_get_with_read_barrier + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + } + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier. + if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { + locations->AddTemp(Location::RequiresRegister()); } } @@ -1467,7 +2015,9 @@ static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS6 void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { LocationSummary* locations = instruction->GetLocations(); - GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); + Location obj_loc = locations->InAt(0); + GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); + Location out_loc = locations->Out(); Location index = locations->InAt(1); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); auto null_checker = GetImplicitNullChecker(instruction, codegen_); @@ -1477,7 +2027,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { instruction->IsStringCharAt(); switch (type) { case Primitive::kPrimBoolean: { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -1490,7 +2040,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimByte: { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset; @@ -1503,7 +2053,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimShort: { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; @@ -1517,7 +2067,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimChar: { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (maybe_compressed_char_at) { uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker); @@ -1570,10 +2120,9 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case Primitive::kPrimInt: { DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord; if (index.IsConstant()) { size_t offset = @@ -1587,8 +2136,53 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } + case Primitive::kPrimNot: { + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ out = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier call. + codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + index, + temp, + /* needs_null_check */ true); + } else { + GpuRegister out = out_loc.AsRegister<GpuRegister>(); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + __ LoadFromOffset(kLoadUnsignedWord, out, obj, offset, null_checker); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset); + } else { + __ Sll(TMP, index.AsRegister<GpuRegister>(), TIMES_4); + __ Addu(TMP, obj, TMP); + __ LoadFromOffset(kLoadUnsignedWord, out, TMP, data_offset, null_checker); + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, + out_loc, + out_loc, + obj_loc, + data_offset, + index); + } + } + break; + } + case Primitive::kPrimLong: { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; @@ -1602,7 +2196,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimFloat: { - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + FpuRegister out = out_loc.AsFpuRegister<FpuRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; @@ -1616,7 +2210,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { } case Primitive::kPrimDouble: { - FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + FpuRegister out = out_loc.AsFpuRegister<FpuRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; @@ -1633,11 +2227,6 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } - - if (type == Primitive::kPrimNot) { - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - __ MaybeUnpoisonHeapReference(out); - } } void LocationsBuilderMIPS64::VisitArrayLength(HArrayLength* instruction) { @@ -1679,23 +2268,28 @@ Location LocationsBuilderMIPS64::FpuRegisterOrConstantForStore(HInstruction* ins } void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) { - bool needs_runtime_call = instruction->NeedsTypeCheck(); + Primitive::Type value_type = instruction->GetComponentType(); + + bool needs_write_barrier = + CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( instruction, - needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall); - if (needs_runtime_call) { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + may_need_runtime_call_for_type_check ? + LocationSummary::kCallOnSlowPath : + LocationSummary::kNoCall); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { + locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); } else { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { - locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); - } else { - locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); - } + locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); + } + if (needs_write_barrier) { + // Temporary register for the write barrier. + locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too. } } @@ -1705,7 +2299,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { Location index = locations->InAt(1); Location value_location = locations->InAt(2); Primitive::Type value_type = instruction->GetComponentType(); - bool needs_runtime_call = locations->WillCall(); + bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); auto null_checker = GetImplicitNullChecker(instruction, codegen_); @@ -1749,68 +2343,138 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimInt: + case Primitive::kPrimInt: { + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + if (index.IsConstant()) { + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; + } else { + __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4); + __ Daddu(base_reg, obj, base_reg); + } + if (value_location.IsConstant()) { + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); + } else { + GpuRegister value = value_location.AsRegister<GpuRegister>(); + __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); + } + break; + } + case Primitive::kPrimNot: { - if (!needs_runtime_call) { + if (value_location.IsConstant()) { + // Just setting null. uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - DCHECK(index.IsRegister()) << index; __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4); __ Daddu(base_reg, obj, base_reg); } - if (value_location.IsConstant()) { - int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); - __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); - DCHECK(!needs_write_barrier); - } else { - GpuRegister value = value_location.AsRegister<GpuRegister>(); - if (kPoisonHeapReferences && needs_write_barrier) { - // Note that in the case where `value` is a null reference, - // we do not enter this block, as a null reference does not - // need poisoning. - DCHECK_EQ(value_type, Primitive::kPrimNot); - // Use Sw() instead of StoreToOffset() in order to be able to - // hold the poisoned reference in AT and thus avoid allocating - // yet another temporary register. - if (index.IsConstant()) { - if (!IsInt<16>(static_cast<int32_t>(data_offset))) { - int16_t low16 = Low16Bits(data_offset); - // For consistency with StoreToOffset() and such treat data_offset as int32_t. - uint64_t high48 = static_cast<uint64_t>(static_cast<int32_t>(data_offset)) - low16; - int16_t upper16 = High16Bits(high48); - // Allow the full [-2GB,+2GB) range in case `low16` is negative and needs a - // compensatory 64KB added, which may push `high48` above 2GB and require - // the dahi instruction. - int16_t higher16 = High32Bits(high48) + ((upper16 < 0) ? 1 : 0); - __ Daui(TMP, obj, upper16); - if (higher16 != 0) { - __ Dahi(TMP, higher16); - } - base_reg = TMP; - data_offset = low16; - } - } else { - DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))); - } - __ PoisonHeapReference(AT, value); - __ Sw(AT, base_reg, data_offset); - null_checker(); + int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant()); + DCHECK_EQ(value, 0); + __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker); + DCHECK(!needs_write_barrier); + DCHECK(!may_need_runtime_call_for_type_check); + break; + } + + DCHECK(needs_write_barrier); + GpuRegister value = value_location.AsRegister<GpuRegister>(); + GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>(); + GpuRegister temp2 = TMP; // Doesn't need to survive slow path. + uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); + uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); + uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); + Mips64Label done; + SlowPathCodeMIPS64* slow_path = nullptr; + + if (may_need_runtime_call_for_type_check) { + slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS64(instruction); + codegen_->AddSlowPath(slow_path); + if (instruction->GetValueCanBeNull()) { + Mips64Label non_zero; + __ Bnezc(value, &non_zero); + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + if (index.IsConstant()) { + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); - } - if (needs_write_barrier) { - DCHECK_EQ(value_type, Primitive::kPrimNot); - codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); + __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4); + __ Daddu(base_reg, obj, base_reg); } + __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker); + __ Bc(&done); + __ Bind(&non_zero); + } + + // Note that when read barriers are enabled, the type checks + // are performed without read barriers. This is fine, even in + // the case where a class object is in the from-space after + // the flip, as a comparison involving such a type would not + // produce a false positive; it may of course produce a false + // negative, in which case we would take the ArraySet slow + // path. + + // /* HeapReference<Class> */ temp1 = obj->klass_ + __ LoadFromOffset(kLoadUnsignedWord, temp1, obj, class_offset, null_checker); + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->component_type_ + __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, component_offset); + // /* HeapReference<Class> */ temp2 = value->klass_ + __ LoadFromOffset(kLoadUnsignedWord, temp2, value, class_offset); + // If heap poisoning is enabled, no need to unpoison `temp1` + // nor `temp2`, as we are comparing two poisoned references. + + if (instruction->StaticTypeOfArrayIsObjectArray()) { + Mips64Label do_put; + __ Beqc(temp1, temp2, &do_put); + // If heap poisoning is enabled, the `temp1` reference has + // not been unpoisoned yet; unpoison it now. + __ MaybeUnpoisonHeapReference(temp1); + + // /* HeapReference<Class> */ temp1 = temp1->super_class_ + __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, super_offset); + // If heap poisoning is enabled, no need to unpoison + // `temp1`, as we are comparing against null below. + __ Bnezc(temp1, slow_path->GetEntryLabel()); + __ Bind(&do_put); + } else { + __ Bnec(temp1, temp2, slow_path->GetEntryLabel()); } + } + + GpuRegister source = value; + if (kPoisonHeapReferences) { + // Note that in the case where `value` is a null reference, + // we do not enter this block, as a null reference does not + // need poisoning. + __ Move(temp1, value); + __ PoisonHeapReference(temp1); + source = temp1; + } + + uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); + if (index.IsConstant()) { + data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; } else { - DCHECK_EQ(value_type, Primitive::kPrimNot); - // Note: if heap poisoning is enabled, pAputObject takes care - // of poisoning the reference. - codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>(); + __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4); + __ Daddu(base_reg, obj, base_reg); + } + __ StoreToOffset(kStoreWord, source, base_reg, data_offset); + + if (!may_need_runtime_call_for_type_check) { + codegen_->MaybeRecordImplicitNullCheck(instruction); + } + + codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull()); + + if (done.IsLinked()) { + __ Bind(&done); + } + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); } break; } @@ -1900,6 +2564,23 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) __ Bgeuc(index, length, slow_path->GetEntryLabel()); } +// Temp is used for read barrier. +static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { + if (kEmitCompilerReadBarrier && + (kUseBakerReadBarrier || + type_check_kind == TypeCheckKind::kAbstractClassCheck || + type_check_kind == TypeCheckKind::kClassHierarchyCheck || + type_check_kind == TypeCheckKind::kArrayObjectCheck)) { + return 1; + } + return 0; +} + +// Extra temp is used for read barrier. +static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { + return 1 + NumberOfInstanceOfTemps(type_check_kind); +} + void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); @@ -1910,7 +2591,7 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = throws_into_catch + call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. break; @@ -1924,15 +2605,20 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); + locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); } void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); - GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); + Location obj_loc = locations->InAt(0); + GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister temp = locations->GetTemp(0).AsRegister<GpuRegister>(); + Location temp_loc = locations->GetTemp(0); + GpuRegister temp = temp_loc.AsRegister<GpuRegister>(); + const size_t num_temps = NumberOfCheckCastTemps(type_check_kind); + DCHECK_LE(num_temps, 2u); + Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation(); const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -1969,8 +2655,12 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kArrayCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Jump to slow path for throwing the exception or doing a // more involved array check. __ Bnec(temp, cls, slow_path->GetEntryLabel()); @@ -1979,15 +2669,22 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kAbstractClassCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. Mips64Label loop; __ Bind(&loop); // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadUnsignedWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + super_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the class reference currently in `temp` is null, jump to the slow path to throw the // exception. __ Beqzc(temp, slow_path->GetEntryLabel()); @@ -1998,15 +2695,22 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kClassHierarchyCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Walk over the class hierarchy to find a match. Mips64Label loop; __ Bind(&loop); __ Beqc(temp, cls, &done); // /* HeapReference<Class> */ temp = temp->super_class_ - __ LoadFromOffset(kLoadUnsignedWord, temp, temp, super_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + super_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the class reference currently in `temp` is null, jump to the slow path to throw the // exception. Otherwise, jump to the beginning of the loop. __ Bnezc(temp, &loop); @@ -2016,14 +2720,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { case TypeCheckKind::kArrayObjectCheck: { // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Do an exact check. __ Beqc(temp, cls, &done); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ temp = temp->component_type_ - __ LoadFromOffset(kLoadUnsignedWord, temp, temp, component_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadOneRegister(instruction, + temp_loc, + component_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // If the component type is null, jump to the slow path to throw the exception. __ Beqzc(temp, slow_path->GetEntryLabel()); // Otherwise, the object is indeed an array, further check that this component @@ -2050,11 +2761,19 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { // Avoid read barriers to improve performance of the fast path. We can not get false // positives by doing this. // /* HeapReference<Class> */ temp = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // /* HeapReference<Class> */ temp = temp->iftable_ - __ LoadFromOffset(kLoadUnsignedWord, temp, temp, iftable_offset); - __ MaybeUnpoisonHeapReference(temp); + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + temp_loc, + iftable_offset, + maybe_temp2_loc, + kWithoutReadBarrier); // Iftable is never null. __ Lw(TMP, temp, array_length_offset); // Loop through the iftable and check if any class matches. @@ -3270,14 +3989,31 @@ void CodeGeneratorMIPS64::GenerateNop() { } void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, - const FieldInfo& field_info ATTRIBUTE_UNUSED) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + const FieldInfo& field_info) { + Primitive::Type field_type = field_info.GetFieldType(); + bool object_field_get_with_read_barrier = + kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + instruction, + object_field_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (Primitive::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + // The output overlaps in the case of an object field get with + // read barriers enabled: we do not want the move to overwrite the + // object's location, as we need it to emit the read barrier. + locations->SetOut(Location::RequiresRegister(), + object_field_get_with_read_barrier + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + } + if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); } } @@ -3285,8 +4021,11 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { Primitive::Type type = field_info.GetFieldType(); LocationSummary* locations = instruction->GetLocations(); - GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); + Location obj_loc = locations->InAt(0); + GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); + Location dst_loc = locations->Out(); LoadOperandType load_type = kLoadUnsignedByte; + bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); auto null_checker = GetImplicitNullChecker(instruction, codegen_); @@ -3319,19 +4058,46 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, UNREACHABLE(); } if (!Primitive::IsFloatingPointType(type)) { - DCHECK(locations->Out().IsRegister()); - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - __ LoadFromOffset(load_type, dst, obj, offset, null_checker); + DCHECK(dst_loc.IsRegister()); + GpuRegister dst = dst_loc.AsRegister<GpuRegister>(); + if (type == Primitive::kPrimNot) { + // /* HeapReference<Object> */ dst = *(obj + offset) + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp_loc = locations->GetTemp(0); + // Note that a potential implicit null check is handled in this + // CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier call. + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + dst_loc, + obj, + offset, + temp_loc, + /* needs_null_check */ true); + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + } else { + __ LoadFromOffset(kLoadUnsignedWord, dst, obj, offset, null_checker); + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); + } + // If read barriers are enabled, emit read barriers other than + // Baker's using a slow path (and also unpoison the loaded + // reference, if heap poisoning is enabled). + codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset); + } + } else { + __ LoadFromOffset(load_type, dst, obj, offset, null_checker); + } } else { - DCHECK(locations->Out().IsFpuRegister()); - FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); + DCHECK(dst_loc.IsFpuRegister()); + FpuRegister dst = dst_loc.AsFpuRegister<FpuRegister>(); __ LoadFpuFromOffset(load_type, dst, obj, offset, null_checker); } - // TODO: memory barrier? - if (type == Primitive::kPrimNot) { - GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); - __ MaybeUnpoisonHeapReference(dst); + // Memory barriers, in the case of references, are handled in the + // previous switch statement. + if (is_volatile && (type != Primitive::kPrimNot)) { + GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } } @@ -3355,6 +4121,7 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); Location value_location = locations->InAt(1); StoreOperandType store_type = kStoreByte; + bool is_volatile = field_info.IsVolatile(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1)); auto null_checker = GetImplicitNullChecker(instruction, codegen_); @@ -3382,6 +4149,10 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, UNREACHABLE(); } + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyStore); + } + if (value_location.IsConstant()) { int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker); @@ -3405,12 +4176,16 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, __ StoreFpuToOffset(store_type, src, obj, offset, null_checker); } } - // TODO: memory barriers? + if (needs_write_barrier) { DCHECK(value_location.IsRegister()); GpuRegister src = value_location.AsRegister<GpuRegister>(); codegen_->MarkGCCard(obj, src, value_can_be_null); } + + if (is_volatile) { + GenerateMemoryBarrier(MemBarrierKind::kAnyAny); + } } void LocationsBuilderMIPS64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { @@ -3429,14 +4204,134 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* in HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull()); } +void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister( + HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option) { + GpuRegister out_reg = out.AsRegister<GpuRegister>(); + if (read_barrier_option == kWithReadBarrier) { + CHECK(kEmitCompilerReadBarrier); + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (kUseBakerReadBarrier) { + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + out_reg, + offset, + maybe_temp, + /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // Save the value of `out` into `maybe_temp` before overwriting it + // in the following move operation, as we will need it for the + // read barrier below. + __ Move(maybe_temp.AsRegister<GpuRegister>(), out_reg); + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(out + offset) + __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + +void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters( + HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option) { + GpuRegister out_reg = out.AsRegister<GpuRegister>(); + GpuRegister obj_reg = obj.AsRegister<GpuRegister>(); + if (read_barrier_option == kWithReadBarrier) { + CHECK(kEmitCompilerReadBarrier); + if (kUseBakerReadBarrier) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + // Load with fast path based Baker's read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out, + obj_reg, + offset, + maybe_temp, + /* needs_null_check */ false); + } else { + // Load with slow path based read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset); + codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset); + } + } else { + // Plain load with no read barrier. + // /* HeapReference<Object> */ out = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset); + __ MaybeUnpoisonHeapReference(out_reg); + } +} + void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( - HInstruction* instruction ATTRIBUTE_UNUSED, + HInstruction* instruction, Location root, GpuRegister obj, - uint32_t offset) { + uint32_t offset, + ReadBarrierOption read_barrier_option) { GpuRegister root_reg = root.AsRegister<GpuRegister>(); - if (kEmitCompilerReadBarrier) { - UNIMPLEMENTED(FATAL) << "for read barrier"; + if (read_barrier_option == kWithReadBarrier) { + DCHECK(kEmitCompilerReadBarrier); + if (kUseBakerReadBarrier) { + // Fast path implementation of art::ReadBarrier::BarrierForRoot when + // Baker's read barrier are used: + // + // root = obj.field; + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) + // } + + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path marking the GC root `root`. + Location temp = Location::RegisterLocation(T9); + SlowPathCodeMIPS64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64( + instruction, + root, + /*entrypoint*/ temp); + codegen_->AddSlowPath(slow_path); + + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + const int32_t entry_point_offset = + CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset); + // The entrypoint is null when the GC is not marking, this prevents one load compared to + // checking GetIsGcMarking. + __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } else { + // GC root loaded through a slow path for read barriers other + // than Baker's. + // /* GcRoot<mirror::Object>* */ root = obj + offset + __ Daddiu64(root_reg, obj, static_cast<int32_t>(offset)); + // /* mirror::Object* */ root = root->Read() + codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); + } } else { // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) @@ -3446,6 +4341,219 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( } } +void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + GpuRegister obj, + uint32_t offset, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // /* HeapReference<Object> */ ref = *(obj + offset) + Location no_index = Location::NoLocation(); + ScaleFactor no_scale_factor = TIMES_1; + GenerateReferenceLoadWithBakerReadBarrier(instruction, + ref, + obj, + offset, + no_index, + no_scale_factor, + temp, + needs_null_check); +} + +void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + GpuRegister obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + static_assert( + sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + // /* HeapReference<Object> */ ref = + // *(obj + data_offset + index * sizeof(HeapReference<Object>)) + ScaleFactor scale_factor = TIMES_4; + GenerateReferenceLoadWithBakerReadBarrier(instruction, + ref, + obj, + data_offset, + index, + scale_factor, + temp, + needs_null_check); +} + +void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + GpuRegister obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + Location temp, + bool needs_null_check, + bool always_update_field) { + DCHECK(kEmitCompilerReadBarrier); + DCHECK(kUseBakerReadBarrier); + + // In slow path based read barriers, the read barrier call is + // inserted after the original load. However, in fast path based + // Baker's read barriers, we need to perform the load of + // mirror::Object::monitor_ *before* the original reference load. + // This load-load ordering is required by the read barrier. + // The fast path/slow path (for Baker's algorithm) should look like: + // + // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState(); + // lfence; // Load fence or artificial data dependency to prevent load-load reordering + // HeapReference<Object> ref = *src; // Original reference load. + // bool is_gray = (rb_state == ReadBarrier::GrayState()); + // if (is_gray) { + // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path. + // } + // + // Note: the original implementation in ReadBarrier::Barrier is + // slightly more complex as it performs additional checks that we do + // not do here for performance reasons. + + GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); + GpuRegister temp_reg = temp.AsRegister<GpuRegister>(); + uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value(); + + // /* int32_t */ monitor = obj->monitor_ + __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // /* LockWord */ lock_word = LockWord(monitor) + static_assert(sizeof(LockWord) == sizeof(int32_t), + "art::LockWord and int32_t have different sizes."); + + __ Sync(0); // Barrier to prevent load-load reordering. + + // The actual reference load. + if (index.IsValid()) { + // Load types involving an "index": ArrayGet, + // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject + // intrinsics. + // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor)) + if (index.IsConstant()) { + size_t computed_offset = + (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset; + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, computed_offset); + } else { + GpuRegister index_reg = index.AsRegister<GpuRegister>(); + __ Dsll(TMP, index_reg, scale_factor); + __ Daddu(TMP, obj, TMP); + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset); + } + } else { + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset); + } + + // Object* ref = ref_addr->AsMirrorPtr() + __ MaybeUnpoisonHeapReference(ref_reg); + + // Slow path marking the object `ref` when it is gray. + SlowPathCodeMIPS64* slow_path; + if (always_update_field) { + // ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 only supports address + // of the form `obj + field_offset`, where `obj` is a register and + // `field_offset` is a register. Thus `offset` and `scale_factor` + // above are expected to be null in this code path. + DCHECK_EQ(offset, 0u); + DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); + slow_path = new (GetGraph()->GetArena()) + ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(instruction, + ref, + obj, + /* field_offset */ index, + temp_reg); + } else { + slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(instruction, ref); + } + AddSlowPath(slow_path); + + // if (rb_state == ReadBarrier::GrayState()) + // ref = ReadBarrier::Mark(ref); + // Given the numeric representation, it's enough to check the low bit of the + // rb_state. We do that by shifting the bit into the sign bit (31) and + // performing a branch on less than zero. + static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0"); + static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1"); + static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size"); + __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift); + __ Bltzc(temp_reg, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorMIPS64::GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + DCHECK(kEmitCompilerReadBarrier); + + // Insert a slow path based read barrier *after* the reference load. + // + // If heap poisoning is enabled, the unpoisoning of the loaded + // reference will be carried out by the runtime within the slow + // path. + // + // Note that `ref` currently does not get unpoisoned (when heap + // poisoning is enabled), which is alright as the `ref` argument is + // not used by the artReadBarrierSlow entry point. + // + // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. + SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) + ReadBarrierForHeapReferenceSlowPathMIPS64(instruction, out, ref, obj, offset, index); + AddSlowPath(slow_path); + + __ Bc(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + +void CodeGeneratorMIPS64::MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index) { + if (kEmitCompilerReadBarrier) { + // Baker's read barriers shall be handled by the fast path + // (CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier). + DCHECK(!kUseBakerReadBarrier); + // If heap poisoning is enabled, unpoisoning will be taken care of + // by the runtime within the slow path. + GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index); + } else if (kPoisonHeapReferences) { + __ UnpoisonHeapReference(out.AsRegister<GpuRegister>()); + } +} + +void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instruction, + Location out, + Location root) { + DCHECK(kEmitCompilerReadBarrier); + + // Insert a slow path based read barrier *after* the GC root load. + // + // Note that GC roots are not affected by heap poisoning, so we do + // not need to do anything special for this here. + SlowPathCodeMIPS64* slow_path = + new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS64(instruction, out, root); + AddSlowPath(slow_path); + + __ Bc(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); +} + void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); @@ -3454,7 +4562,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: case TypeCheckKind::kArrayObjectCheck: - call_kind = LocationSummary::kNoCall; + call_kind = + kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; break; case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: @@ -3469,14 +4578,20 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { // The output does overlap inputs. // Note that TypeCheckSlowPathMIPS64 uses this register too. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind)); } void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); LocationSummary* locations = instruction->GetLocations(); - GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); + Location obj_loc = locations->InAt(0); + GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + Location out_loc = locations->Out(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); + const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind); + DCHECK_LE(num_temps, 1u); + Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -3494,8 +4609,12 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // Classes must be equal for the instanceof to succeed. __ Xor(out, out, cls); __ Sltiu(out, out, 1); @@ -3504,15 +4623,22 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kAbstractClassCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. Mips64Label loop; __ Bind(&loop); // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadUnsignedWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadOneRegister(instruction, + out_loc, + super_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); __ Bnec(out, cls, &loop); @@ -3522,15 +4648,22 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kClassHierarchyCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // Walk over the class hierarchy to find a match. Mips64Label loop, success; __ Bind(&loop); __ Beqc(out, cls, &success); // /* HeapReference<Class> */ out = out->super_class_ - __ LoadFromOffset(kLoadUnsignedWord, out, out, super_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadOneRegister(instruction, + out_loc, + super_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); __ Bnezc(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ Bc(&done); @@ -3541,15 +4674,22 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayObjectCheck: { // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // Do an exact check. Mips64Label success; __ Beqc(out, cls, &success); // Otherwise, we need to check that the object's class is a non-primitive array. // /* HeapReference<Class> */ out = out->component_type_ - __ LoadFromOffset(kLoadUnsignedWord, out, out, component_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadOneRegister(instruction, + out_loc, + component_offset, + maybe_temp_loc, + kCompilerReadBarrierOption); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -3564,8 +4704,12 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kArrayCheck: { // No read barrier since the slow path will retry upon failure. // /* HeapReference<Class> */ out = obj->klass_ - __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset); - __ MaybeUnpoisonHeapReference(out); + GenerateReferenceLoadTwoRegisters(instruction, + out_loc, + obj_loc, + class_offset, + maybe_temp_loc, + kWithoutReadBarrier); DCHECK(locations->OnlyCallsOnSlowPath()); slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction, /* is_fatal */ false); @@ -3735,9 +4879,6 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codeg HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { - if (kEmitCompilerReadBarrier) { - UNIMPLEMENTED(FATAL) << "for read barrier"; - } bool fallback_load = false; switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: @@ -3765,9 +4906,6 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - if (kEmitCompilerReadBarrier) { - UNIMPLEMENTED(FATAL) << "for read barrier"; - } bool fallback_load = false; switch (desired_class_load_kind) { case HLoadClass::LoadKind::kInvalid: @@ -3960,7 +5098,8 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { } DCHECK(!cls->NeedsAccessCheck()); - LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier) + const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); + LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); @@ -3989,6 +5128,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S current_method_reg = locations->InAt(0).AsRegister<GpuRegister>(); } + const ReadBarrierOption read_barrier_option = cls->IsInBootImage() + ? kWithoutReadBarrier + : kCompilerReadBarrierOption; bool generate_null_check = false; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: @@ -3998,10 +5140,12 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S GenerateGcRootFieldLoad(cls, out_loc, current_method_reg, - ArtMethod::DeclaringClassOffset().Int32Value()); + ArtMethod::DeclaringClassOffset().Int32Value(), + read_barrier_option); break; case HLoadClass::LoadKind::kBootImageLinkTimeAddress: DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); __ LoadLiteral(out, kLoadUnsignedWord, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(), @@ -4009,6 +5153,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorMIPS64::PcRelativePatchInfo* info = codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT); @@ -4016,7 +5161,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S break; } case HLoadClass::LoadKind::kBootImageAddress: { - DCHECK(!kEmitCompilerReadBarrier); + DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(cls->GetClass().Get())); DCHECK_NE(address, 0u); @@ -4029,7 +5174,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S CodeGeneratorMIPS64::PcRelativePatchInfo* info = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out); - GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); generate_null_check = true; break; } @@ -4039,7 +5184,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass())); - GenerateGcRootFieldLoad(cls, out_loc, out, 0); + GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option); break; case HLoadClass::LoadKind::kDexCacheViaMethod: case HLoadClass::LoadKind::kInvalid: @@ -4136,7 +5281,11 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA CodeGeneratorMIPS64::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out); - GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678); + GenerateGcRootFieldLoad(load, + out_loc, + out, + /* placeholder */ 0x5678, + kCompilerReadBarrierOption); SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load); codegen_->AddSlowPath(slow_path); __ Beqzc(out, slow_path->GetEntryLabel()); @@ -4149,7 +5298,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA codegen_->DeduplicateJitStringLiteral(load->GetDexFile(), load->GetStringIndex(), load->GetString())); - GenerateGcRootFieldLoad(load, out_loc, out, 0); + GenerateGcRootFieldLoad(load, out_loc, out, 0, kCompilerReadBarrierOption); return; default: break; diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 6040dc9492..fd1a174608 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -237,6 +237,38 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { const FieldInfo& field_info, bool value_can_be_null); void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info); + + // Generate a heap reference load using one register `out`: + // + // out <- *(out + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a read barrier and + // shall be a register in that case; it may be an invalid location + // otherwise. + void GenerateReferenceLoadOneRegister(HInstruction* instruction, + Location out, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option); + // Generate a heap reference load using two different registers + // `out` and `obj`: + // + // out <- *(obj + offset) + // + // while honoring heap poisoning and/or read barriers (if any). + // + // Location `maybe_temp` is used when generating a Baker's (fast + // path) read barrier and shall be a register in that case; it may + // be an invalid location otherwise. + void GenerateReferenceLoadTwoRegisters(HInstruction* instruction, + Location out, + Location obj, + uint32_t offset, + Location maybe_temp, + ReadBarrierOption read_barrier_option); + // Generate a GC root reference load: // // root <- *(obj + offset) @@ -245,7 +277,9 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { void GenerateGcRootFieldLoad(HInstruction* instruction, Location root, GpuRegister obj, - uint32_t offset); + uint32_t offset, + ReadBarrierOption read_barrier_option); + void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, Mips64Label* true_target, @@ -316,6 +350,91 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference field load when Baker's read barriers are used. + void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + GpuRegister obj, + uint32_t offset, + Location temp, + bool needs_null_check); + // Fast path implementation of ReadBarrier::Barrier for a heap + // reference array load when Baker's read barriers are used. + void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + GpuRegister obj, + uint32_t data_offset, + Location index, + Location temp, + bool needs_null_check); + + // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier, + // GenerateArrayLoadWithBakerReadBarrier and some intrinsics. + // + // Load the object reference located at the address + // `obj + offset + (index << scale_factor)`, held by object `obj`, into + // `ref`, and mark it if needed. + // + // If `always_update_field` is true, the value of the reference is + // atomically updated in the holder (`obj`). + void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction, + Location ref, + GpuRegister obj, + uint32_t offset, + Location index, + ScaleFactor scale_factor, + Location temp, + bool needs_null_check, + bool always_update_field = false); + + // Generate a read barrier for a heap reference within `instruction` + // using a slow path. + // + // A read barrier for an object reference read from the heap is + // implemented as a call to the artReadBarrierSlow runtime entry + // point, which is passed the values in locations `ref`, `obj`, and + // `offset`: + // + // mirror::Object* artReadBarrierSlow(mirror::Object* ref, + // mirror::Object* obj, + // uint32_t offset); + // + // The `out` location contains the value returned by + // artReadBarrierSlow. + // + // When `index` is provided (i.e. for array accesses), the offset + // value passed to artReadBarrierSlow is adjusted to take `index` + // into account. + void GenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // If read barriers are enabled, generate a read barrier for a heap + // reference using a slow path. If heap poisoning is enabled, also + // unpoison the reference in `out`. + void MaybeGenerateReadBarrierSlow(HInstruction* instruction, + Location out, + Location ref, + Location obj, + uint32_t offset, + Location index = Location::NoLocation()); + + // Generate a read barrier for a GC root within `instruction` using + // a slow path. + // + // A read barrier for an object reference GC root is implemented as + // a call to the artReadBarrierForRootSlow runtime entry point, + // which is passed the value in location `root`: + // + // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root); + // + // The `out` location contains the value returned by + // artReadBarrierForRootSlow. + void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + void MarkGCCard(GpuRegister object, GpuRegister value, bool value_can_be_null); // Register allocation. @@ -366,6 +485,14 @@ class CodeGeneratorMIPS64 : public CodeGenerator { uint32_t dex_pc, SlowPathCode* slow_path = nullptr) OVERRIDE; + // Generate code to invoke a runtime entry point, but do not record + // PC-related information in a stack map. + void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset, + HInstruction* instruction, + SlowPathCode* slow_path); + + void GenerateInvokeRuntime(int32_t entry_point_offset); + ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; } diff --git a/compiler/optimizing/code_generator_vector_arm.cc b/compiler/optimizing/code_generator_vector_arm.cc new file mode 100644 index 0000000000..ba2b2cb2c9 --- /dev/null +++ b/compiler/optimizing/code_generator_vector_arm.cc @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_arm.h" + +namespace art { +namespace arm { + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT + +void LocationsBuilderARM::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecCnv(HVecCnv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecNeg(HVecNeg* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecNot(HVecNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecAdd(HVecAdd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecSub(HVecSub* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecMul(HVecMul* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecDiv(HVecDiv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecAnd(HVecAnd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecAndNot(HVecAndNot* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecAndNot(HVecAndNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecOr(HVecOr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecXor(HVecXor* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecShl(HVecShl* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecShr(HVecShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM::VisitVecUShr(HVecUShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +#undef __ + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc new file mode 100644 index 0000000000..96d00210b8 --- /dev/null +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -0,0 +1,641 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_arm64.h" +#include "mirror/array-inl.h" + +using namespace vixl::aarch64; // NOLINT(build/namespaces) + +namespace art { +namespace arm64 { + +using helpers::DRegisterFrom; +using helpers::HeapOperand; +using helpers::InputRegisterAt; +using helpers::Int64ConstantFrom; +using helpers::XRegisterFrom; + +#define __ GetVIXLAssembler()-> + +void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Dup(dst.V8B(), InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Dup(dst.V4H(), InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Dup(dst.V2S(), InputRegisterAt(instruction, 0)); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Dup(dst.V2S(), DRegisterFrom(locations->InAt(0)).V2S(), 0); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), + instruction->IsVecNot() ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + break; + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister src = DRegisterFrom(locations->InAt(0)); + FPRegister dst = DRegisterFrom(locations->Out()); + Primitive::Type from = instruction->GetInputType(); + Primitive::Type to = instruction->GetResultType(); + if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Scvtf(dst.V2S(), src.V2S()); + } else { + LOG(FATAL) << "Unsupported SIMD type"; + } +} + +void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister src = DRegisterFrom(locations->InAt(0)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Neg(dst.V8B(), src.V8B()); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Neg(dst.V4H(), src.V4H()); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Neg(dst.V2S(), src.V2S()); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fneg(dst.V2S(), src.V2S()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister src = DRegisterFrom(locations->InAt(0)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: // special case boolean-not + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Movi(dst.V8B(), 1); + __ Eor(dst.V8B(), dst.V8B(), src.V8B()); + break; + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + __ Not(dst.V8B(), src.V8B()); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Add(dst.V8B(), lhs.V8B(), rhs.V8B()); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Add(dst.V4H(), lhs.V4H(), rhs.V4H()); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Add(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fadd(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Sub(dst.V8B(), lhs.V8B(), rhs.V8B()); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Sub(dst.V4H(), lhs.V4H(), rhs.V4H()); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Sub(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fsub(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Mul(dst.V8B(), lhs.V8B(), rhs.V8B()); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Mul(dst.V4H(), lhs.V4H(), rhs.V4H()); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Mul(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fmul(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Fdiv(dst.V2S(), lhs.V2S(), rhs.V2S()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + __ And(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) { + LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); +} + +void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) { + LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); +} + +void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + __ Orr(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister rhs = DRegisterFrom(locations->InAt(1)); + FPRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + __ Eor(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister dst = DRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Shl(dst.V8B(), lhs.V8B(), value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Shl(dst.V4H(), lhs.V4H(), value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Shl(dst.V2S(), lhs.V2S(), value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister dst = DRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Sshr(dst.V8B(), lhs.V8B(), value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Sshr(dst.V4H(), lhs.V4H(), value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Sshr(dst.V2S(), lhs.V2S(), value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + FPRegister lhs = DRegisterFrom(locations->InAt(0)); + FPRegister dst = DRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Ushr(dst.V8B(), lhs.V8B(), value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Ushr(dst.V4H(), lhs.V4H(), value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Ushr(dst.V2S(), lhs.V2S(), value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector memory operations. +static void CreateVecMemLocations(ArenaAllocator* arena, + HVecMemoryOperation* instruction, + bool is_load) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (is_load) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up registers and address for vector memory operations. +MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters( + HVecMemoryOperation* instruction, + Location* reg_loc, + bool is_load) { + LocationSummary* locations = instruction->GetLocations(); + Register base = InputRegisterAt(instruction, 0); + Location index = locations->InAt(1); + *reg_loc = is_load ? locations->Out() : locations->InAt(2); + + Primitive::Type packed_type = instruction->GetPackedType(); + uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(packed_type)).Uint32Value(); + size_t shift = Primitive::ComponentSizeShift(packed_type); + + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temps.AcquireSameSizeAs(base); + if (index.IsConstant()) { + offset += Int64ConstantFrom(index) << shift; + __ Add(temp, base, offset); + } else { + if (instruction->InputAt(0)->IsIntermediateAddress()) { + temp = base; + } else { + __ Add(temp, base, offset); + } + __ Add(temp.X(), temp.X(), Operand(XRegisterFrom(index), LSL, shift)); + } + return HeapOperand(temp); +} + +void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) { + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); +} + +void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { + Location reg_loc = Location::NoLocation(); + MemOperand mem = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); + FPRegister reg = DRegisterFrom(reg_loc); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Ld1(reg.V8B(), mem); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Ld1(reg.V4H(), mem); + break; + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Ld1(reg.V2S(), mem); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) { + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); +} + +void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { + Location reg_loc = Location::NoLocation(); + MemOperand mem = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); + FPRegister reg = DRegisterFrom(reg_loc); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ St1(reg.V8B(), mem); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ St1(reg.V4H(), mem); + break; + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ St1(reg.V2S(), mem); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +#undef __ + +} // namespace arm64 +} // namespace art diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc new file mode 100644 index 0000000000..171198902d --- /dev/null +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_arm_vixl.h" + +namespace art { +namespace arm { + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT + +void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecCnv(HVecCnv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecDiv(HVecDiv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecAndNot(HVecAndNot* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecAndNot(HVecAndNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +#undef __ + +} // namespace arm +} // namespace art diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc new file mode 100644 index 0000000000..6f5fe0d2a4 --- /dev/null +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_mips.h" + +namespace art { +namespace mips { + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT + +void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecAndNot(HVecAndNot* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecAndNot(HVecAndNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +#undef __ + +} // namespace mips +} // namespace art diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc new file mode 100644 index 0000000000..2ee7ac91cf --- /dev/null +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_mips64.h" + +namespace art { +namespace mips64 { + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT + +void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS64::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecAndNot(HVecAndNot* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK(locations); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS64::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +#undef __ + +} // namespace mips64 +} // namespace art diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc new file mode 100644 index 0000000000..4f3988ee2e --- /dev/null +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -0,0 +1,767 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_x86.h" +#include "mirror/array-inl.h" + +namespace art { +namespace x86 { + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT + +void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimLong: + // Long needs extra temporary to load the register pair. + locations->AddTemp(Location::RequiresFpuRegister()); + FALLTHROUGH_INTENDED; + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<Register>()); + __ punpcklbw(reg, reg); + __ punpcklwd(reg, reg); + __ pshufd(reg, reg, Immediate(0)); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<Register>()); + __ punpcklwd(reg, reg); + __ pshufd(reg, reg, Immediate(0)); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<Register>()); + __ pshufd(reg, reg, Immediate(0)); + break; + case Primitive::kPrimLong: { + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>()); + __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); + __ punpckldq(reg, tmp); + __ punpcklqdq(reg, reg); + break; + } + case Primitive::kPrimFloat: + DCHECK(locations->InAt(0).Equals(locations->Out())); + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ shufps(reg, reg, Immediate(0)); + break; + case Primitive::kPrimDouble: + DCHECK(locations->InAt(0).Equals(locations->Out())); + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ shufpd(reg, reg, Immediate(0)); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + Primitive::Type from = instruction->GetInputType(); + Primitive::Type to = instruction->GetResultType(); + if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ cvtdq2ps(dst, src); + } else { + LOG(FATAL) << "Unsupported SIMD type"; + } +} + +void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubb(dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubd(dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubq(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ xorps(dst, dst); + __ subps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ xorpd(dst, dst); + __ subpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + // Boolean-not requires a temporary to construct the 16 x one. + if (instruction->GetPackedType() == Primitive::kPrimBoolean) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } +} + +void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: { // special case boolean-not + DCHECK_EQ(16u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ pxor(dst, dst); + __ pcmpeqb(tmp, tmp); // all ones + __ psubb(dst, tmp); // 16 x one + __ pxor(dst, src); + break; + } + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pcmpeqb(dst, dst); // all ones + __ pxor(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pcmpeqb(dst, dst); // all ones + __ xorps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ pcmpeqb(dst, dst); // all ones + __ xorpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddb(dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ paddd(dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ paddq(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ addps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ addpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubb(dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ psubd(dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ psubq(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ subps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ subpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pmullw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pmulld(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ mulps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ mulpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ divps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ divpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pand(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ andps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ andpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pandn(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ andnps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ andnpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ por(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ orps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ orpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pxor(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ xorps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ xorpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psllw(dst, Immediate(static_cast<uint8_t>(value))); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pslld(dst, Immediate(static_cast<uint8_t>(value))); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ psllq(dst, Immediate(static_cast<uint8_t>(value))); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psraw(dst, Immediate(static_cast<uint8_t>(value))); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ psrad(dst, Immediate(static_cast<uint8_t>(value))); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psrlw(dst, Immediate(static_cast<uint8_t>(value))); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ psrld(dst, Immediate(static_cast<uint8_t>(value))); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ psrlq(dst, Immediate(static_cast<uint8_t>(value))); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector memory operations. +static void CreateVecMemLocations(ArenaAllocator* arena, + HVecMemoryOperation* instruction, + bool is_load) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (is_load) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up registers and address for vector memory operations. +static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, + Location* reg_loc, + bool is_load) { + LocationSummary* locations = instruction->GetLocations(); + Location base = locations->InAt(0); + Location index = locations->InAt(1); + *reg_loc = is_load ? locations->Out() : locations->InAt(2); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); + ScaleFactor scale = TIMES_1; + switch (size) { + case 2: scale = TIMES_2; break; + case 4: scale = TIMES_4; break; + case 8: scale = TIMES_8; break; + default: break; + } + return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset); +} + +void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) { + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); +} + +void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { + Location reg_loc = Location::NoLocation(); + Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); + XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) { + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); +} + +void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) { + Location reg_loc = Location::NoLocation(); + Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); + XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +#undef __ + +} // namespace x86 +} // namespace art diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc new file mode 100644 index 0000000000..b1c1494f6b --- /dev/null +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -0,0 +1,760 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "code_generator_x86_64.h" +#include "mirror/array-inl.h" + +namespace art { +namespace x86_64 { + +// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. +#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT + +void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); + __ punpcklbw(reg, reg); + __ punpcklwd(reg, reg); + __ pshufd(reg, reg, Immediate(0)); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); + __ punpcklwd(reg, reg); + __ pshufd(reg, reg, Immediate(0)); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); + __ pshufd(reg, reg, Immediate(0)); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit + __ punpcklqdq(reg, reg); + break; + case Primitive::kPrimFloat: + DCHECK(locations->InAt(0).Equals(locations->Out())); + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ shufps(reg, reg, Immediate(0)); + break; + case Primitive::kPrimDouble: + DCHECK(locations->InAt(0).Equals(locations->Out())); + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ shufpd(reg, reg, Immediate(0)); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + Primitive::Type from = instruction->GetInputType(); + Primitive::Type to = instruction->GetResultType(); + if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ cvtdq2ps(dst, src); + } else { + LOG(FATAL) << "Unsupported SIMD type"; + } +} + +void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubb(dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubd(dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ pxor(dst, dst); + __ psubq(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ xorps(dst, dst); + __ subps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ xorpd(dst, dst); + __ subpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) { + CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + // Boolean-not requires a temporary to construct the 16 x one. + if (instruction->GetPackedType() == Primitive::kPrimBoolean) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } +} + +void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: { // special case boolean-not + DCHECK_EQ(16u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ pxor(dst, dst); + __ pcmpeqb(tmp, tmp); // all ones + __ psubb(dst, tmp); // 16 x one + __ pxor(dst, src); + break; + } + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pcmpeqb(dst, dst); // all ones + __ pxor(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pcmpeqb(dst, dst); // all ones + __ xorps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ pcmpeqb(dst, dst); // all ones + __ xorpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector binary operations. +static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ paddb(dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ paddw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ paddd(dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ paddq(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ addps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ addpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ psubb(dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psubw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ psubd(dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ psubq(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ subps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ subpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pmullw(dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pmulld(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ mulps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ mulpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ divps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ divpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pand(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ andps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ andpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pandn(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ andnps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ andnpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ por(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ orps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ orpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) { + CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ pxor(dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ xorps(dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ xorpd(dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector shift operations. +static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psllw(dst, Immediate(static_cast<int8_t>(value))); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pslld(dst, Immediate(static_cast<int8_t>(value))); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ psllq(dst, Immediate(static_cast<int8_t>(value))); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psraw(dst, Immediate(static_cast<int8_t>(value))); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ psrad(dst, Immediate(static_cast<int8_t>(value))); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) { + CreateVecShiftLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ psrlw(dst, Immediate(static_cast<int8_t>(value))); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ psrld(dst, Immediate(static_cast<int8_t>(value))); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ psrlq(dst, Immediate(static_cast<int8_t>(value))); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector memory operations. +static void CreateVecMemLocations(ArenaAllocator* arena, + HVecMemoryOperation* instruction, + bool is_load) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (is_load) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up registers and address for vector memory operations. +static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, + Location* reg_loc, + bool is_load) { + LocationSummary* locations = instruction->GetLocations(); + Location base = locations->InAt(0); + Location index = locations->InAt(1); + *reg_loc = is_load ? locations->Out() : locations->InAt(2); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); + ScaleFactor scale = TIMES_1; + switch (size) { + case 2: scale = TIMES_2; break; + case 4: scale = TIMES_4; break; + case 8: scale = TIMES_8; break; + default: break; + } + return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset); +} + +void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) { + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); +} + +void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { + Location reg_loc = Location::NoLocation(); + Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); + XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) { + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); +} + +void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) { + Location reg_loc = Location::NoLocation(); + Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); + XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); + bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +#undef __ + +} // namespace x86_64 +} // namespace art diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 4db4796985..80776e8b78 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -723,7 +723,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 2ffc398287..49f099f6a9 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -744,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { instruction_->IsArrayGet() || instruction_->IsInstanceOf() || instruction_->IsCheckCast() || - (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified()) + (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified())) << "Unexpected instruction in read barrier for heap reference slow path: " << instruction_->DebugName(); @@ -3660,7 +3660,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); Primitive::Type type = instruction->GetResultType(); - DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong); + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); bool is_div = instruction->IsDiv(); LocationSummary* locations = instruction->GetLocations(); diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index cd954043f5..31cd204c9f 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -74,7 +74,6 @@ class CodegenTargetConfig { } private: - CodegenTargetConfig() {} InstructionSet isa_; CreateCodegenFn create_codegen_; }; diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index e184745520..01304ac35b 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -66,6 +66,11 @@ inline vixl::aarch32::SRegister LowSRegisterFrom(Location location) { return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl::aarch32::SRegister>()); } +inline vixl::aarch32::SRegister HighSRegisterFrom(Location location) { + DCHECK(location.IsFpuRegisterPair()) << location; + return vixl::aarch32::SRegister(location.AsFpuRegisterPairHigh<vixl::aarch32::SRegister>()); +} + inline vixl::aarch32::Register RegisterFrom(Location location) { DCHECK(location.IsRegister()) << location; return vixl::aarch32::Register(location.reg()); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 0dfae11465..cc3c143b15 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -505,6 +505,10 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit"); } + void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE { + StartAttributeStream("kind") << deoptimize->GetKind(); + } + #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE { StartAttributeStream("kind") << instruction->GetOpKind(); diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index 82ee93d5c2..9516ccb385 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -29,7 +29,21 @@ namespace art { */ class InductionVarAnalysisTest : public CommonCompilerTest { public: - InductionVarAnalysisTest() : pool_(), allocator_(&pool_) { + InductionVarAnalysisTest() + : pool_(), + allocator_(&pool_), + iva_(nullptr), + entry_(nullptr), + return_(nullptr), + exit_(nullptr), + parameter_(nullptr), + constant0_(nullptr), + constant1_(nullptr), + constant2_(nullptr), + constant7_(nullptr), + constant100_(nullptr), + constantm1_(nullptr), + float_constant0_(nullptr) { graph_ = CreateGraph(&allocator_); } diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index f7331452c6..79cd7048a5 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -63,7 +63,7 @@ static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64; static constexpr size_t kMaximumNumberOfRecursiveCalls = 4; // Controls the use of inline caches in AOT mode. -static constexpr bool kUseAOTInlineCaches = false; +static constexpr bool kUseAOTInlineCaches = true; // We check for line numbers to make sure the DepthString implementation // aligns the output nicely. @@ -672,6 +672,32 @@ HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker, return result; } +static ArtMethod* ResolveMethodFromInlineCache(Handle<mirror::Class> klass, + ArtMethod* resolved_method, + HInstruction* invoke_instruction, + PointerSize pointer_size) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (Runtime::Current()->IsAotCompiler()) { + // We can get unrelated types when working with profiles (corruption, + // systme updates, or anyone can write to it). So first check if the class + // actually implements the declaring class of the method that is being + // called in bytecode. + // Note: the lookup methods used below require to have assignable types. + if (!resolved_method->GetDeclaringClass()->IsAssignableFrom(klass.Get())) { + return nullptr; + } + } + + if (invoke_instruction->IsInvokeInterface()) { + resolved_method = klass->FindVirtualMethodForInterface(resolved_method, pointer_size); + } else { + DCHECK(invoke_instruction->IsInvokeVirtual()); + resolved_method = klass->FindVirtualMethodForVirtual(resolved_method, pointer_size); + } + DCHECK(resolved_method != nullptr); + return resolved_method; +} + bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, ArtMethod* resolved_method, Handle<mirror::ObjectArray<mirror::Class>> classes) { @@ -690,20 +716,20 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); PointerSize pointer_size = class_linker->GetImagePointerSize(); - if (invoke_instruction->IsInvokeInterface()) { - resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForInterface( - resolved_method, pointer_size); - } else { - DCHECK(invoke_instruction->IsInvokeVirtual()); - resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForVirtual( - resolved_method, pointer_size); - } + Handle<mirror::Class> monomorphic_type = handles_->NewHandle(GetMonomorphicType(classes)); + resolved_method = ResolveMethodFromInlineCache( + monomorphic_type, resolved_method, invoke_instruction, pointer_size); + LOG_NOTE() << "Try inline monomorphic call to " << resolved_method->PrettyMethod(); - DCHECK(resolved_method != nullptr); + if (resolved_method == nullptr) { + // Bogus AOT profile, bail. + DCHECK(Runtime::Current()->IsAotCompiler()); + return false; + } + HInstruction* receiver = invoke_instruction->InputAt(0); HInstruction* cursor = invoke_instruction->GetPrevious(); HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); - Handle<mirror::Class> monomorphic_type = handles_->NewHandle(GetMonomorphicType(classes)); if (!TryInlineAndReplace(invoke_instruction, resolved_method, ReferenceTypeInfo::Create(monomorphic_type, /* is_exact */ true), @@ -742,7 +768,8 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction, HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc); HInstruction* compare = new (graph_->GetArena()) HNotEqual( deopt_flag, graph_->GetIntConstant(0, dex_pc)); - HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc); + HInstruction* deopt = new (graph_->GetArena()) HDeoptimize( + graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc); if (cursor != nullptr) { bb_cursor->InsertInstructionAfter(deopt_flag, cursor); @@ -806,9 +833,16 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, bb_cursor->InsertInstructionAfter(compare, load_class); if (with_deoptimization) { HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( - compare, invoke_instruction->GetDexPc()); + graph_->GetArena(), + compare, + receiver, + HDeoptimize::Kind::kInline, + invoke_instruction->GetDexPc()); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + DCHECK_EQ(invoke_instruction->InputAt(0), receiver); + receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize); + deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo()); } return compare; } @@ -835,11 +869,14 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, ArtMethod* method = nullptr; Handle<mirror::Class> handle = handles_->NewHandle(classes->Get(i)); - if (invoke_instruction->IsInvokeInterface()) { - method = handle->FindVirtualMethodForInterface(resolved_method, pointer_size); - } else { - DCHECK(invoke_instruction->IsInvokeVirtual()); - method = handle->FindVirtualMethodForVirtual(resolved_method, pointer_size); + method = ResolveMethodFromInlineCache( + handle, resolved_method, invoke_instruction, pointer_size); + if (method == nullptr) { + DCHECK(Runtime::Current()->IsAotCompiler()); + // AOT profile is bogus. This loop expects to iterate over all entries, + // so just just continue. + all_targets_inlined = false; + continue; } HInstruction* receiver = invoke_instruction->InputAt(0); @@ -884,7 +921,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, } invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction); // Because the inline cache data can be populated concurrently, we force the end of the - // iteration. Otherhwise, we could see a new receiver type. + // iteration. Otherwise, we could see a new receiver type. break; } else { CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction); @@ -1083,13 +1120,19 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction); } else { HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( - compare, invoke_instruction->GetDexPc()); + graph_->GetArena(), + compare, + receiver, + HDeoptimize::Kind::kInline, + invoke_instruction->GetDexPc()); bb_cursor->InsertInstructionAfter(deoptimize, compare); deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); if (return_replacement != nullptr) { invoke_instruction->ReplaceWith(return_replacement); } + receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize); invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction); + deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo()); } // Run type propagation to get the guard typed. diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 17421fc364..60790e5b84 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -2132,6 +2132,9 @@ void InstructionSimplifierVisitor::VisitDeoptimize(HDeoptimize* deoptimize) { if (cond->IsConstant()) { if (cond->AsIntConstant()->IsFalse()) { // Never deopt: instruction can be removed. + if (deoptimize->GuardsAnInput()) { + deoptimize->ReplaceWith(deoptimize->GuardedInput()); + } deoptimize->GetBlock()->RemoveInstruction(deoptimize); } else { // Always deopt. diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index b25bad7170..0d933eaf82 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -39,6 +39,7 @@ using helpers::Int32ConstantFrom; using helpers::LocationFrom; using helpers::LowRegisterFrom; using helpers::LowSRegisterFrom; +using helpers::HighSRegisterFrom; using helpers::OutputDRegister; using helpers::OutputSRegister; using helpers::OutputRegister; @@ -794,6 +795,58 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) { __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); } +void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { + if (features_.HasARMv8AInstructions()) { + LocationSummary* locations = new (arena_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + } +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); + + ArmVIXLAssembler* assembler = GetAssembler(); + vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0); + vixl32::Register out_reg = OutputRegister(invoke); + vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0)); + vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0)); + vixl32::Label done; + vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done); + + // Round to nearest integer, ties away from zero. + __ Vcvta(S32, F32, temp1, in_reg); + __ Vmov(out_reg, temp1); + + // For positive, zero or NaN inputs, rounding is done. + __ Cmp(out_reg, 0); + __ B(ge, final_label, /* far_target */ false); + + // Handle input < 0 cases. + // If input is negative but not a tie, previous result (round to nearest) is valid. + // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1. + __ Vrinta(F32, F32, temp1, in_reg); + __ Vmov(temp2, 0.5); + __ Vsub(F32, temp1, in_reg, temp1); + __ Vcmp(F32, temp1, temp2); + __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); + { + // Use ExactAsemblyScope here because we are using IT. + ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(eq); + __ add(eq, out_reg, out_reg, 1); + } + + if (done.IsReferenced()) { + __ Bind(&done); + } +} + void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { CreateIntToIntLocations(arena_, invoke); } @@ -3100,7 +3153,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { } UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit) diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index bf85b1989e..b67793c4ed 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -1514,21 +1514,31 @@ void IntrinsicCodeGeneratorMIPS::VisitThreadCurrentThread(HInvoke* invoke) { Thread::PeerOffset<kMipsPointerSize>().Int32Value()); } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - bool can_call = - invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || - invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile; +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - can_call ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetOut(Location::RequiresRegister(), + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } +// Note that the caller must supply a properly aligned memory address. +// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). static void GenUnsafeGet(HInvoke* invoke, Primitive::Type type, bool is_volatile, @@ -1539,49 +1549,109 @@ static void GenUnsafeGet(HInvoke* invoke, (type == Primitive::kPrimLong) || (type == Primitive::kPrimNot)) << type; MipsAssembler* assembler = codegen->GetAssembler(); + // Target register. + Location trg_loc = locations->Out(); // Object pointer. - Register base = locations->InAt(1).AsRegister<Register>(); + Location base_loc = locations->InAt(1); + Register base = base_loc.AsRegister<Register>(); // The "offset" argument is passed as a "long". Since this code is for // a 32-bit processor, we can only use 32-bit addresses, so we only // need the low 32-bits of offset. - Register offset_lo = invoke->GetLocations()->InAt(2).AsRegisterPairLow<Register>(); + Location offset_loc = locations->InAt(2); + Register offset_lo = offset_loc.AsRegisterPairLow<Register>(); - __ Addu(TMP, base, offset_lo); - if (is_volatile) { - __ Sync(0); + if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) { + __ Addu(TMP, base, offset_lo); } - if (type == Primitive::kPrimLong) { - Register trg_lo = locations->Out().AsRegisterPairLow<Register>(); - Register trg_hi = locations->Out().AsRegisterPairHigh<Register>(); - if (is_R6) { - __ Lw(trg_lo, TMP, 0); - __ Lw(trg_hi, TMP, 4); - } else { - __ Lwr(trg_lo, TMP, 0); - __ Lwl(trg_lo, TMP, 3); - __ Lwr(trg_hi, TMP, 4); - __ Lwl(trg_hi, TMP, 7); + switch (type) { + case Primitive::kPrimLong: { + Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); + Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); + CHECK(!is_volatile); // TODO: support atomic 8-byte volatile loads. + if (is_R6) { + __ Lw(trg_lo, TMP, 0); + __ Lw(trg_hi, TMP, 4); + } else { + __ Lwr(trg_lo, TMP, 0); + __ Lwl(trg_lo, TMP, 3); + __ Lwr(trg_hi, TMP, 4); + __ Lwl(trg_hi, TMP, 7); + } + break; } - } else { - Register trg = locations->Out().AsRegister<Register>(); - if (is_R6) { - __ Lw(trg, TMP, 0); - } else { - __ Lwr(trg, TMP, 0); - __ Lwl(trg, TMP, 3); + case Primitive::kPrimInt: { + Register trg = trg_loc.AsRegister<Register>(); + if (is_R6) { + __ Lw(trg, TMP, 0); + } else { + __ Lwr(trg, TMP, 0); + __ Lwl(trg, TMP, 3); + } + if (is_volatile) { + __ Sync(0); + } + break; } - if (type == Primitive::kPrimNot) { - __ MaybeUnpoisonHeapReference(trg); + case Primitive::kPrimNot: { + Register trg = trg_loc.AsRegister<Register>(); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, + trg_loc, + base, + /* offset */ 0U, + /* index */ offset_loc, + TIMES_1, + temp, + /* needs_null_check */ false); + if (is_volatile) { + __ Sync(0); + } + } else { + if (is_R6) { + __ Lw(trg, TMP, 0); + } else { + __ Lwr(trg, TMP, 0); + __ Lwl(trg, TMP, 3); + } + if (is_volatile) { + __ Sync(0); + } + codegen->GenerateReadBarrierSlow(invoke, + trg_loc, + trg_loc, + base_loc, + /* offset */ 0U, + /* index */ offset_loc); + } + } else { + if (is_R6) { + __ Lw(trg, TMP, 0); + } else { + __ Lwr(trg, TMP, 0); + __ Lwl(trg, TMP, 3); + } + if (is_volatile) { + __ Sync(0); + } + __ MaybeUnpoisonHeapReference(trg); + } + break; } + + default: + LOG(FATAL) << "Unexpected type " << type; + UNREACHABLE(); } } // int sun.misc.Unsafe.getInt(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) { @@ -1590,7 +1660,7 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) { // int sun.misc.Unsafe.getIntVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { @@ -1599,25 +1669,16 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { // long sun.misc.Unsafe.getLong(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLong(HInvoke* invoke) { GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, IsR6(), codegen_); } -// long sun.misc.Unsafe.getLongVolatile(Object o, long offset) -void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, IsR6(), codegen_); -} - // Object sun.misc.Unsafe.getObject(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) { @@ -1626,7 +1687,7 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) { // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { @@ -1643,6 +1704,8 @@ static void CreateIntIntIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* in locations->SetInAt(3, Location::RequiresRegister()); } +// Note that the caller must supply a properly aligned memory address. +// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile, @@ -1681,7 +1744,7 @@ static void GenUnsafePut(LocationSummary* locations, } else { Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>(); Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>(); - + CHECK(!is_volatile); // TODO: support atomic 8-byte volatile stores. if (is_R6) { __ Sw(value_lo, TMP, 0); __ Sw(value_hi, TMP, 4); @@ -1815,50 +1878,71 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) { codegen_); } -// void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x) -void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(arena_, invoke); -} - -void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimLong, - /* is_volatile */ true, - /* is_ordered */ false, - IsR6(), - codegen_); -} - -static void CreateIntIntIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + + // Temporary register used in CAS by (Baker) read barrier. + if (can_call) { + locations->AddTemp(Location::RequiresRegister()); + } } -static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS* codegen) { +// Note that the caller must supply a properly aligned memory address. +// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* codegen) { MipsAssembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); bool isR6 = codegen->GetInstructionSetFeatures().IsR6(); Register base = locations->InAt(1).AsRegister<Register>(); - Register offset_lo = locations->InAt(2).AsRegisterPairLow<Register>(); + Location offset_loc = locations->InAt(2); + Register offset_lo = offset_loc.AsRegisterPairLow<Register>(); Register expected = locations->InAt(3).AsRegister<Register>(); Register value = locations->InAt(4).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); + Location out_loc = locations->Out(); + Register out = out_loc.AsRegister<Register>(); DCHECK_NE(base, out); DCHECK_NE(offset_lo, out); DCHECK_NE(expected, out); if (type == Primitive::kPrimNot) { - // Mark card for object assuming new value is stored. + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged + // object and scan the receiver at the next GC for nothing. bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + ScaleFactor::TIMES_1, + temp, + /* needs_null_check */ false, + /* always_update_field */ true); + } } MipsLabel loop_head, exit_loop; @@ -1926,20 +2010,30 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x) void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); + GenCas(invoke, Primitive::kPrimInt, codegen_); } // boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x) void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASObject(HInvoke* invoke) { - CreateIntIntIntIntIntToIntLocations(arena_, invoke); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + return; + } + + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + GenCas(invoke, Primitive::kPrimNot, codegen_); } // int java.lang.String.compareTo(String anotherString) @@ -2664,6 +2758,8 @@ UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil) UNIMPLEMENTED_INTRINSIC(MIPS, MathFloor) UNIMPLEMENTED_INTRINSIC(MIPS, MathRint) UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble) +UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetLongVolatile); +UNIMPLEMENTED_INTRINSIC(MIPS, UnsafePutLongVolatile); UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong) UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 1ee89cf127..6098767aae 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1151,16 +1151,31 @@ void IntrinsicCodeGeneratorMIPS64::VisitThreadCurrentThread(HInvoke* invoke) { Thread::PeerOffset<kMips64PointerSize>().Int32Value()); } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, + HInvoke* invoke, + Primitive::Type type) { + bool can_call = kEmitCompilerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || + invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + locations->SetOut(Location::RequiresRegister(), + (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); + if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + // We need a temporary register for the read barrier marking slow + // path in InstructionCodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier. + locations->AddTemp(Location::RequiresRegister()); + } } +// Note that the caller must supply a properly aligned memory address. +// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). static void GenUnsafeGet(HInvoke* invoke, Primitive::Type type, bool is_volatile, @@ -1168,30 +1183,71 @@ static void GenUnsafeGet(HInvoke* invoke, LocationSummary* locations = invoke->GetLocations(); DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong) || - (type == Primitive::kPrimNot)); + (type == Primitive::kPrimNot)) << type; Mips64Assembler* assembler = codegen->GetAssembler(); + // Target register. + Location trg_loc = locations->Out(); + GpuRegister trg = trg_loc.AsRegister<GpuRegister>(); // Object pointer. - GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>(); + Location base_loc = locations->InAt(1); + GpuRegister base = base_loc.AsRegister<GpuRegister>(); // Long offset. - GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>(); - GpuRegister trg = locations->Out().AsRegister<GpuRegister>(); + Location offset_loc = locations->InAt(2); + GpuRegister offset = offset_loc.AsRegister<GpuRegister>(); - __ Daddu(TMP, base, offset); - if (is_volatile) { - __ Sync(0); + if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) { + __ Daddu(TMP, base, offset); } + switch (type) { + case Primitive::kPrimLong: + __ Ld(trg, TMP, 0); + if (is_volatile) { + __ Sync(0); + } + break; + case Primitive::kPrimInt: __ Lw(trg, TMP, 0); + if (is_volatile) { + __ Sync(0); + } break; case Primitive::kPrimNot: - __ Lwu(trg, TMP, 0); - __ MaybeUnpoisonHeapReference(trg); - break; - - case Primitive::kPrimLong: - __ Ld(trg, TMP, 0); + if (kEmitCompilerReadBarrier) { + if (kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, + trg_loc, + base, + /* offset */ 0U, + /* index */ offset_loc, + TIMES_1, + temp, + /* needs_null_check */ false); + if (is_volatile) { + __ Sync(0); + } + } else { + __ Lwu(trg, TMP, 0); + if (is_volatile) { + __ Sync(0); + } + codegen->GenerateReadBarrierSlow(invoke, + trg_loc, + trg_loc, + base_loc, + /* offset */ 0U, + /* index */ offset_loc); + } + } else { + __ Lwu(trg, TMP, 0); + if (is_volatile) { + __ Sync(0); + } + __ MaybeUnpoisonHeapReference(trg); + } break; default: @@ -1202,7 +1258,7 @@ static void GenUnsafeGet(HInvoke* invoke, // int sun.misc.Unsafe.getInt(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) { @@ -1211,7 +1267,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) { // int sun.misc.Unsafe.getIntVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { @@ -1220,7 +1276,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { // long sun.misc.Unsafe.getLong(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { @@ -1229,7 +1285,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { // long sun.misc.Unsafe.getLongVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { @@ -1238,7 +1294,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { // Object sun.misc.Unsafe.getObject(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { @@ -1247,7 +1303,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { @@ -1264,6 +1320,8 @@ static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) { locations->SetInAt(3, Location::RequiresRegister()); } +// Note that the caller must supply a properly aligned memory address. +// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile, @@ -1429,35 +1487,70 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) { codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { + bool can_call = kEmitCompilerReadBarrier && + kUseBakerReadBarrier && + (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, + (can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall), kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); locations->SetInAt(4, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); + + // Temporary register used in CAS by (Baker) read barrier. + if (can_call) { + locations->AddTemp(Location::RequiresRegister()); + } } -static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS64* codegen) { +// Note that the caller must supply a properly aligned memory address. +// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). +static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* codegen) { Mips64Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>(); - GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>(); + Location offset_loc = locations->InAt(2); + GpuRegister offset = offset_loc.AsRegister<GpuRegister>(); GpuRegister expected = locations->InAt(3).AsRegister<GpuRegister>(); GpuRegister value = locations->InAt(4).AsRegister<GpuRegister>(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + Location out_loc = locations->Out(); + GpuRegister out = out_loc.AsRegister<GpuRegister>(); DCHECK_NE(base, out); DCHECK_NE(offset, out); DCHECK_NE(expected, out); if (type == Primitive::kPrimNot) { - // Mark card for object assuming new value is stored. + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + // Mark card for object assuming new value is stored. Worst case we will mark an unchanged + // object and scan the receiver at the next GC for nothing. bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); + + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + Location temp = locations->GetTemp(0); + // Need to make sure the reference stored in the field is a to-space + // one before attempting the CAS or the CAS could fail incorrectly. + codegen->GenerateReferenceLoadWithBakerReadBarrier( + invoke, + out_loc, // Unused, used only as a "temporary" within the read barrier. + base, + /* offset */ 0u, + /* index */ offset_loc, + ScaleFactor::TIMES_1, + temp, + /* needs_null_check */ false, + /* always_update_field */ true); + } } Mips64Label loop_head, exit_loop; @@ -1521,29 +1614,39 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_); + GenCas(invoke, Primitive::kPrimInt, codegen_); } // boolean sun.misc.Unsafe.compareAndSwapLong(Object o, long offset, long expected, long x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASLong(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASLong(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_); + GenCas(invoke, Primitive::kPrimLong, codegen_); } // boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASObject(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, invoke); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + return; + } + + CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) { - GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_); + // The only read barrier implementation supporting the + // UnsafeCASObject intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + GenCas(invoke, Primitive::kPrimNot, codegen_); } // int java.lang.String.compareTo(String anotherString) diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc index 5bcfa4c98b..8d15f78cce 100644 --- a/compiler/optimizing/licm_test.cc +++ b/compiler/optimizing/licm_test.cc @@ -28,7 +28,18 @@ namespace art { */ class LICMTest : public CommonCompilerTest { public: - LICMTest() : pool_(), allocator_(&pool_) { + LICMTest() + : pool_(), + allocator_(&pool_), + entry_(nullptr), + loop_preheader_(nullptr), + loop_header_(nullptr), + loop_body_(nullptr), + return_(nullptr), + exit_(nullptr), + parameter_(nullptr), + int_constant_(nullptr), + float_constant_(nullptr) { graph_ = CreateGraph(&allocator_); } diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 8df513f410..42ed04dfa3 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -16,11 +16,21 @@ #include "loop_optimization.h" +#include "arch/instruction_set.h" +#include "arch/arm/instruction_set_features_arm.h" +#include "arch/arm64/instruction_set_features_arm64.h" +#include "arch/mips/instruction_set_features_mips.h" +#include "arch/mips64/instruction_set_features_mips64.h" +#include "arch/x86/instruction_set_features_x86.h" +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "driver/compiler_driver.h" #include "linear_order.h" namespace art { +// Enables vectorization (SIMDization) in the loop optimizer. +static constexpr bool kEnableVectorization = true; + // Remove the instruction from the graph. A bit more elaborate than the usual // instruction removal, since there may be a cycle in the use structure. static void RemoveFromCycle(HInstruction* instruction) { @@ -53,6 +63,19 @@ static bool IsEarlyExit(HLoopInformation* loop_info) { return false; } +// Test vector restrictions. +static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) { + return (restrictions & tested) != 0; +} + +// Inserts an instruction. +static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { + DCHECK(block != nullptr); + DCHECK(instruction != nullptr); + block->InsertInstructionBefore(instruction, block->GetLastInstruction()); + return instruction; +} + // // Class methods. // @@ -64,11 +87,15 @@ HLoopOptimization::HLoopOptimization(HGraph* graph, compiler_driver_(compiler_driver), induction_range_(induction_analysis), loop_allocator_(nullptr), + global_allocator_(graph_->GetArena()), top_loop_(nullptr), last_loop_(nullptr), iset_(nullptr), induction_simplication_count_(0), - simplified_(false) { + simplified_(false), + vector_length_(0), + vector_refs_(nullptr), + vector_map_(nullptr) { } void HLoopOptimization::Run() { @@ -81,15 +108,13 @@ void HLoopOptimization::Run() { // Phase-local allocator that draws from the global pool. Since the allocator // itself resides on the stack, it is destructed on exiting Run(), which // implies its underlying memory is released immediately. - ArenaAllocator allocator(graph_->GetArena()->GetArenaPool()); + ArenaAllocator allocator(global_allocator_->GetArenaPool()); loop_allocator_ = &allocator; // Perform loop optimizations. LocalRun(); - if (top_loop_ == nullptr) { - // All loops have been eliminated. - graph_->SetHasLoops(false); + graph_->SetHasLoops(false); // no more loops } // Detach. @@ -111,18 +136,29 @@ void HLoopOptimization::LocalRun() { } // Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use - // a temporary set that stores instructions using the phase-local allocator. + // temporary data structures using the phase-local allocator. All new HIR + // should use the global allocator. if (top_loop_ != nullptr) { ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ArenaSafeMap<HInstruction*, HInstruction*> map( + std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + // Attach. iset_ = &iset; + vector_refs_ = &refs; + vector_map_ = ↦ + // Traverse. TraverseLoopsInnerToOuter(top_loop_); - iset_ = nullptr; // detach + // Detach. + iset_ = nullptr; + vector_refs_ = nullptr; + vector_map_ = nullptr; } } void HLoopOptimization::AddLoop(HLoopInformation* loop_info) { DCHECK(loop_info != nullptr); - LoopNode* node = new (loop_allocator_) LoopNode(loop_info); // phase-local allocator + LoopNode* node = new (loop_allocator_) LoopNode(loop_info); if (last_loop_ == nullptr) { // First loop. DCHECK(top_loop_ == nullptr); @@ -170,7 +206,7 @@ void HLoopOptimization::RemoveLoop(LoopNode* node) { void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { for ( ; node != nullptr; node = node->next) { // Visit inner loops first. - int current_induction_simplification_count = induction_simplication_count_; + uint32_t current_induction_simplification_count = induction_simplication_count_; if (node->inner != nullptr) { TraverseLoopsInnerToOuter(node->inner); } @@ -179,7 +215,7 @@ void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { if (current_induction_simplification_count != induction_simplication_count_) { induction_range_.ReVisit(node->loop_info); } - // Repeat simplifications in the body of this loop until no more changes occur. + // Repeat simplifications in the loop-body until no more changes occur. // Note that since each simplification consists of eliminating code (without // introducing new code), this process is always finite. do { @@ -187,13 +223,17 @@ void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { SimplifyInduction(node); SimplifyBlocks(node); } while (simplified_); - // Simplify inner loop. + // Optimize inner loop. if (node->inner == nullptr) { - SimplifyInnerLoop(node); + OptimizeInnerLoop(node); } } } +// +// Optimization. +// + void HLoopOptimization::SimplifyInduction(LoopNode* node) { HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -204,13 +244,9 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) { // for (int i = 0; i < 10; i++, k++) { .... no k .... } return k; for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); - iset_->clear(); - int32_t use_count = 0; - if (IsPhiInduction(phi) && - IsOnlyUsedAfterLoop(node->loop_info, phi, /*collect_loop_uses*/ false, &use_count) && - // No uses, or no early-exit with proper replacement. - (use_count == 0 || - (!IsEarlyExit(node->loop_info) && TryReplaceWithLastValue(phi, preheader)))) { + iset_->clear(); // prepare phi induction + if (TrySetPhiInduction(phi, /*restrict_uses*/ true) && + TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) { for (HInstruction* i : *iset_) { RemoveFromCycle(i); } @@ -256,49 +292,47 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) { } } -bool HLoopOptimization::SimplifyInnerLoop(LoopNode* node) { +void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); // Ensure loop header logic is finite. - int64_t tc = 0; - if (!induction_range_.IsFinite(node->loop_info, &tc)) { - return false; + int64_t trip_count = 0; + if (!induction_range_.IsFinite(node->loop_info, &trip_count)) { + return; } + // Ensure there is only a single loop-body (besides the header). HBasicBlock* body = nullptr; for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { if (it.Current() != header) { if (body != nullptr) { - return false; + return; } body = it.Current(); } } // Ensure there is only a single exit point. if (header->GetSuccessors().size() != 2) { - return false; + return; } HBasicBlock* exit = (header->GetSuccessors()[0] == body) ? header->GetSuccessors()[1] : header->GetSuccessors()[0]; // Ensure exit can only be reached by exiting loop. if (exit->GetPredecessors().size() != 1) { - return false; + return; } // Detect either an empty loop (no side effects other than plain iteration) or // a trivial loop (just iterating once). Replace subsequent index uses, if any, // with the last value and remove the loop, possibly after unrolling its body. HInstruction* phi = header->GetFirstPhi(); - iset_->clear(); - int32_t use_count = 0; - if (IsEmptyHeader(header)) { + iset_->clear(); // prepare phi induction + if (TrySetSimpleLoopHeader(header)) { bool is_empty = IsEmptyBody(body); - if ((is_empty || tc == 1) && - IsOnlyUsedAfterLoop(node->loop_info, phi, /*collect_loop_uses*/ true, &use_count) && - // No uses, or proper replacement. - (use_count == 0 || TryReplaceWithLastValue(phi, preheader))) { + if ((is_empty || trip_count == 1) && + TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) { if (!is_empty) { - // Unroll the loop body, which sees initial value of the index. + // Unroll the loop-body, which sees initial value of the index. phi->ReplaceWith(phi->InputAt(0)); preheader->MergeInstructionsWith(body); } @@ -308,28 +342,649 @@ bool HLoopOptimization::SimplifyInnerLoop(LoopNode* node) { header->RemoveDominatedBlock(exit); header->DisconnectAndDelete(); preheader->AddSuccessor(exit); - preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator + preheader->AddInstruction(new (global_allocator_) HGoto()); preheader->AddDominatedBlock(exit); exit->SetDominator(preheader); RemoveLoop(node); // update hierarchy + return; + } + } + + // Vectorize loop, if possible and valid. + if (kEnableVectorization) { + iset_->clear(); // prepare phi induction + if (TrySetSimpleLoopHeader(header) && + CanVectorize(node, body, trip_count) && + TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) { + Vectorize(node, body, exit, trip_count); + graph_->SetHasSIMD(true); // flag SIMD usage + return; + } + } +} + +// +// Loop vectorization. The implementation is based on the book by Aart J.C. Bik: +// "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance." +// Intel Press, June, 2004 (http://www.aartbik.com/). +// + +bool HLoopOptimization::CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count) { + // Reset vector bookkeeping. + vector_length_ = 0; + vector_refs_->clear(); + vector_runtime_test_a_ = + vector_runtime_test_b_= nullptr; + + // Phis in the loop-body prevent vectorization. + if (!block->GetPhis().IsEmpty()) { + return false; + } + + // Scan the loop-body, starting a right-hand-side tree traversal at each left-hand-side + // occurrence, which allows passing down attributes down the use tree. + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) { + return false; // failure to vectorize a left-hand-side + } + } + + // Heuristics. Does vectorization seem profitable? + // TODO: refine + if (vector_length_ == 0) { + return false; // nothing found + } else if (0 < trip_count && trip_count < vector_length_) { + return false; // insufficient iterations + } + + // Data dependence analysis. Find each pair of references with same type, where + // at least one is a write. Each such pair denotes a possible data dependence. + // This analysis exploits the property that differently typed arrays cannot be + // aliased, as well as the property that references either point to the same + // array or to two completely disjoint arrays, i.e., no partial aliasing. + // Other than a few simply heuristics, no detailed subscript analysis is done. + for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) { + for (auto j = i; ++j != vector_refs_->end(); ) { + if (i->type == j->type && (i->lhs || j->lhs)) { + // Found same-typed a[i+x] vs. b[i+y], where at least one is a write. + HInstruction* a = i->base; + HInstruction* b = j->base; + HInstruction* x = i->offset; + HInstruction* y = j->offset; + if (a == b) { + // Found a[i+x] vs. a[i+y]. Accept if x == y (loop-independent data dependence). + // Conservatively assume a loop-carried data dependence otherwise, and reject. + if (x != y) { + return false; + } + } else { + // Found a[i+x] vs. b[i+y]. Accept if x == y (at worst loop-independent data dependence). + // Conservatively assume a potential loop-carried data dependence otherwise, avoided by + // generating an explicit a != b disambiguation runtime test on the two references. + if (x != y) { + // For now, we reject after one test to avoid excessive overhead. + if (vector_runtime_test_a_ != nullptr) { + return false; + } + vector_runtime_test_a_ = a; + vector_runtime_test_b_ = b; + } + } + } + } + } + + // Success! + return true; +} + +void HLoopOptimization::Vectorize(LoopNode* node, + HBasicBlock* block, + HBasicBlock* exit, + int64_t trip_count) { + Primitive::Type induc_type = Primitive::kPrimInt; + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + + // A cleanup is needed for any unknown trip count or for a known trip count + // with remainder iterations after vectorization. + bool needs_cleanup = trip_count == 0 || (trip_count % vector_length_) != 0; + + // Adjust vector bookkeeping. + iset_->clear(); // prepare phi induction + bool is_simple_loop_header = TrySetSimpleLoopHeader(header); // fills iset_ + DCHECK(is_simple_loop_header); + + // Generate preheader: + // stc = <trip-count>; + // vtc = stc - stc % VL; + HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader); + HInstruction* vtc = stc; + if (needs_cleanup) { + DCHECK(IsPowerOfTwo(vector_length_)); + HInstruction* rem = Insert( + preheader, new (global_allocator_) HAnd(induc_type, + stc, + graph_->GetIntConstant(vector_length_ - 1))); + vtc = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, rem)); + } + + // Generate runtime disambiguation test: + // vtc = a != b ? vtc : 0; + if (vector_runtime_test_a_ != nullptr) { + HInstruction* rt = Insert( + preheader, + new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_)); + vtc = Insert(preheader, + new (global_allocator_) HSelect(rt, vtc, graph_->GetIntConstant(0), kNoDexPc)); + needs_cleanup = true; + } + + // Generate vector loop: + // for (i = 0; i < vtc; i += VL) + // <vectorized-loop-body> + vector_mode_ = kVector; + GenerateNewLoop(node, + block, + graph_->TransformLoopForVectorization(header, block, exit), + graph_->GetIntConstant(0), + vtc, + graph_->GetIntConstant(vector_length_)); + HLoopInformation* vloop = vector_header_->GetLoopInformation(); + + // Generate cleanup loop, if needed: + // for ( ; i < stc; i += 1) + // <loop-body> + if (needs_cleanup) { + vector_mode_ = kSequential; + GenerateNewLoop(node, + block, + graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), + vector_phi_, + stc, + graph_->GetIntConstant(1)); + } + + // Remove the original loop by disconnecting the body block + // and removing all instructions from the header. + block->DisconnectAndDelete(); + while (!header->GetFirstInstruction()->IsGoto()) { + header->RemoveInstruction(header->GetFirstInstruction()); + } + // Update loop hierarchy: the old header now resides in the + // same outer loop as the old preheader. + header->SetLoopInformation(preheader->GetLoopInformation()); // outward + node->loop_info = vloop; +} + +void HLoopOptimization::GenerateNewLoop(LoopNode* node, + HBasicBlock* block, + HBasicBlock* new_preheader, + HInstruction* lo, + HInstruction* hi, + HInstruction* step) { + Primitive::Type induc_type = Primitive::kPrimInt; + // Prepare new loop. + vector_map_->clear(); + vector_preheader_ = new_preheader, + vector_header_ = vector_preheader_->GetSingleSuccessor(); + vector_body_ = vector_header_->GetSuccessors()[1]; + vector_phi_ = new (global_allocator_) HPhi(global_allocator_, + kNoRegNumber, + 0, + HPhi::ToPhiType(induc_type)); + // Generate header. + // for (i = lo; i < hi; i += step) + // <loop-body> + HInstruction* cond = new (global_allocator_) HAboveOrEqual(vector_phi_, hi); + vector_header_->AddPhi(vector_phi_); + vector_header_->AddInstruction(cond); + vector_header_->AddInstruction(new (global_allocator_) HIf(cond)); + // Suspend check and environment. + HInstruction* suspend = vector_header_->GetFirstInstruction(); + suspend->CopyEnvironmentFromWithLoopPhiAdjustment( + node->loop_info->GetSuspendCheck()->GetEnvironment(), vector_header_); + // Generate body. + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true); + DCHECK(vectorized_def); + } + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + auto i = vector_map_->find(it.Current()); + if (i != vector_map_->end() && !i->second->IsInBlock()) { + Insert(vector_body_, i->second); // lays out in original order + if (i->second->NeedsEnvironment()) { + i->second->CopyEnvironmentFromWithLoopPhiAdjustment( + suspend->GetEnvironment(), vector_header_); + } + } + } + // Finalize increment and phi. + HInstruction* inc = new (global_allocator_) HAdd(induc_type, vector_phi_, step); + vector_phi_->AddInput(lo); + vector_phi_->AddInput(Insert(vector_body_, inc)); +} + +// TODO: accept reductions at left-hand-side, mixed-type store idioms, etc. +bool HLoopOptimization::VectorizeDef(LoopNode* node, + HInstruction* instruction, + bool generate_code) { + // Accept a left-hand-side array base[index] for + // (1) supported vector type, + // (2) loop-invariant base, + // (3) unit stride index, + // (4) vectorizable right-hand-side value. + uint64_t restrictions = kNone; + if (instruction->IsArraySet()) { + Primitive::Type type = instruction->AsArraySet()->GetComponentType(); + HInstruction* base = instruction->InputAt(0); + HInstruction* index = instruction->InputAt(1); + HInstruction* value = instruction->InputAt(2); + HInstruction* offset = nullptr; + if (TrySetVectorType(type, &restrictions) && + node->loop_info->IsDefinedOutOfTheLoop(base) && + induction_range_.IsUnitStride(index, &offset) && + VectorizeUse(node, value, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecSub(index, offset); + GenerateVecMem(instruction, vector_map_->Get(index), vector_map_->Get(value), type); + } else { + vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ true)); + } return true; } + return false; + } + // Branch back okay. + if (instruction->IsGoto()) { + return true; + } + // Otherwise accept only expressions with no effects outside the immediate loop-body. + // Note that actual uses are inspected during right-hand-side tree traversal. + return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite(); +} + +// TODO: more operations and intrinsics, detect saturation arithmetic, etc. +bool HLoopOptimization::VectorizeUse(LoopNode* node, + HInstruction* instruction, + bool generate_code, + Primitive::Type type, + uint64_t restrictions) { + // Accept anything for which code has already been generated. + if (generate_code) { + if (vector_map_->find(instruction) != vector_map_->end()) { + return true; + } + } + // Continue the right-hand-side tree traversal, passing in proper + // types and vector restrictions along the way. During code generation, + // all new nodes are drawn from the global allocator. + if (node->loop_info->IsDefinedOutOfTheLoop(instruction)) { + // Accept invariant use, using scalar expansion. + if (generate_code) { + GenerateVecInv(instruction, type); + } + return true; + } else if (instruction->IsArrayGet()) { + // Accept a right-hand-side array base[index] for + // (1) exact matching vector type, + // (2) loop-invariant base, + // (3) unit stride index, + // (4) vectorizable right-hand-side value. + HInstruction* base = instruction->InputAt(0); + HInstruction* index = instruction->InputAt(1); + HInstruction* offset = nullptr; + if (type == instruction->GetType() && + node->loop_info->IsDefinedOutOfTheLoop(base) && + induction_range_.IsUnitStride(index, &offset)) { + if (generate_code) { + GenerateVecSub(index, offset); + GenerateVecMem(instruction, vector_map_->Get(index), nullptr, type); + } else { + vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ false)); + } + return true; + } + } else if (instruction->IsTypeConversion()) { + // Accept particular type conversions. + HTypeConversion* conversion = instruction->AsTypeConversion(); + HInstruction* opa = conversion->InputAt(0); + Primitive::Type from = conversion->GetInputType(); + Primitive::Type to = conversion->GetResultType(); + if ((to == Primitive::kPrimByte || + to == Primitive::kPrimChar || + to == Primitive::kPrimShort) && from == Primitive::kPrimInt) { + // Accept a "narrowing" type conversion from a "wider" computation for + // (1) conversion into final required type, + // (2) vectorizable operand, + // (3) "wider" operations cannot bring in higher order bits. + if (to == type && VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) { + if (generate_code) { + if (vector_mode_ == kVector) { + vector_map_->Put(instruction, vector_map_->Get(opa)); // operand pass-through + } else { + GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type); + } + } + return true; + } + } else if (to == Primitive::kPrimFloat && from == Primitive::kPrimInt) { + DCHECK_EQ(to, type); + // Accept int to float conversion for + // (1) supported int, + // (2) vectorizable operand. + if (TrySetVectorType(from, &restrictions) && + VectorizeUse(node, opa, generate_code, from, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type); + } + return true; + } + } + return false; + } else if (instruction->IsNeg() || instruction->IsNot() || instruction->IsBooleanNot()) { + // Accept unary operator for vectorizable operand. + HInstruction* opa = instruction->InputAt(0); + if (VectorizeUse(node, opa, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type); + } + return true; + } + } else if (instruction->IsAdd() || instruction->IsSub() || + instruction->IsMul() || instruction->IsDiv() || + instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) { + // Deal with vector restrictions. + if ((instruction->IsMul() && HasVectorRestrictions(restrictions, kNoMul)) || + (instruction->IsDiv() && HasVectorRestrictions(restrictions, kNoDiv))) { + return false; + } + // Accept binary operator for vectorizable operands. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + if (VectorizeUse(node, opa, generate_code, type, restrictions) && + VectorizeUse(node, opb, generate_code, type, restrictions)) { + if (generate_code) { + GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type); + } + return true; + } + } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) { + // Deal with vector restrictions. + if ((HasVectorRestrictions(restrictions, kNoShift)) || + (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) { + return false; // unsupported instruction + } else if ((instruction->IsShr() || instruction->IsUShr()) && + HasVectorRestrictions(restrictions, kNoHiBits)) { + return false; // hibits may impact lobits; TODO: we can do better! + } + // Accept shift operator for vectorizable/invariant operands. + // TODO: accept symbolic, albeit loop invariant shift factors. + HInstruction* opa = instruction->InputAt(0); + HInstruction* opb = instruction->InputAt(1); + if (VectorizeUse(node, opa, generate_code, type, restrictions) && opb->IsIntConstant()) { + if (generate_code) { + // Make sure shift factor only looks at lower bits, as defined for sequential shifts. + // Note that even the narrower SIMD shifts do the right thing after that. + int32_t mask = (instruction->GetType() == Primitive::kPrimLong) + ? kMaxLongShiftDistance + : kMaxIntShiftDistance; + HInstruction* s = graph_->GetIntConstant(opb->AsIntConstant()->GetValue() & mask); + GenerateVecOp(instruction, vector_map_->Get(opa), s, type); + } + return true; + } + } else if (instruction->IsInvokeStaticOrDirect()) { + // TODO: coming soon. + return false; } return false; } -bool HLoopOptimization::IsPhiInduction(HPhi* phi) { +bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restrictions) { + const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); + switch (compiler_driver_->GetInstructionSet()) { + case kArm: + case kThumb2: + return false; + case kArm64: + // Allow vectorization for all ARM devices, because Android assumes that + // ARMv8 AArch64 always supports advanced SIMD. For now, only D registers + // (64-bit vectors) not Q registers (128-bit vectors). + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + *restrictions |= kNoDiv; + return TrySetVectorLength(8); + case Primitive::kPrimChar: + case Primitive::kPrimShort: + *restrictions |= kNoDiv; + return TrySetVectorLength(4); + case Primitive::kPrimInt: + *restrictions |= kNoDiv; + return TrySetVectorLength(2); + case Primitive::kPrimFloat: + return TrySetVectorLength(2); + default: + return false; + } + case kX86: + case kX86_64: + // Allow vectorization for SSE4-enabled X86 devices only (128-bit vectors). + if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + *restrictions |= kNoMul | kNoDiv | kNoShift; + return TrySetVectorLength(16); + case Primitive::kPrimChar: + case Primitive::kPrimShort: + *restrictions |= kNoDiv; + return TrySetVectorLength(8); + case Primitive::kPrimInt: + *restrictions |= kNoDiv; + return TrySetVectorLength(4); + case Primitive::kPrimLong: + *restrictions |= kNoMul | kNoDiv | kNoShr; + return TrySetVectorLength(2); + case Primitive::kPrimFloat: + return TrySetVectorLength(4); + case Primitive::kPrimDouble: + return TrySetVectorLength(2); + default: + break; + } // switch type + } + return false; + case kMips: + case kMips64: + // TODO: implement MIPS SIMD. + return false; + default: + return false; + } // switch instruction set +} + +bool HLoopOptimization::TrySetVectorLength(uint32_t length) { + DCHECK(IsPowerOfTwo(length) && length >= 2u); + // First time set? + if (vector_length_ == 0) { + vector_length_ = length; + } + // Different types are acceptable within a loop-body, as long as all the corresponding vector + // lengths match exactly to obtain a uniform traversal through the vector iteration space + // (idiomatic exceptions to this rule can be handled by further unrolling sub-expressions). + return vector_length_ == length; +} + +void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type) { + if (vector_map_->find(org) == vector_map_->end()) { + // In scalar code, just use a self pass-through for scalar invariants + // (viz. expression remains itself). + if (vector_mode_ == kSequential) { + vector_map_->Put(org, org); + return; + } + // In vector code, explicit scalar expansion is needed. + HInstruction* vector = new (global_allocator_) HVecReplicateScalar( + global_allocator_, org, type, vector_length_); + vector_map_->Put(org, Insert(vector_preheader_, vector)); + } +} + +void HLoopOptimization::GenerateVecSub(HInstruction* org, HInstruction* offset) { + if (vector_map_->find(org) == vector_map_->end()) { + HInstruction* subscript = vector_phi_; + if (offset != nullptr) { + subscript = new (global_allocator_) HAdd(Primitive::kPrimInt, subscript, offset); + if (org->IsPhi()) { + Insert(vector_body_, subscript); // lacks layout placeholder + } + } + vector_map_->Put(org, subscript); + } +} + +void HLoopOptimization::GenerateVecMem(HInstruction* org, + HInstruction* opa, + HInstruction* opb, + Primitive::Type type) { + HInstruction* vector = nullptr; + if (vector_mode_ == kVector) { + // Vector store or load. + if (opb != nullptr) { + vector = new (global_allocator_) HVecStore( + global_allocator_, org->InputAt(0), opa, opb, type, vector_length_); + } else { + vector = new (global_allocator_) HVecLoad( + global_allocator_, org->InputAt(0), opa, type, vector_length_); + } + } else { + // Scalar store or load. + DCHECK(vector_mode_ == kSequential); + if (opb != nullptr) { + vector = new (global_allocator_) HArraySet(org->InputAt(0), opa, opb, type, kNoDexPc); + } else { + vector = new (global_allocator_) HArrayGet(org->InputAt(0), opa, type, kNoDexPc); + } + } + vector_map_->Put(org, vector); +} + +#define GENERATE_VEC(x, y) \ + if (vector_mode_ == kVector) { \ + vector = (x); \ + } else { \ + DCHECK(vector_mode_ == kSequential); \ + vector = (y); \ + } \ + break; + +void HLoopOptimization::GenerateVecOp(HInstruction* org, + HInstruction* opa, + HInstruction* opb, + Primitive::Type type) { + if (vector_mode_ == kSequential) { + // Scalar code follows implicit integral promotion. + if (type == Primitive::kPrimBoolean || + type == Primitive::kPrimByte || + type == Primitive::kPrimChar || + type == Primitive::kPrimShort) { + type = Primitive::kPrimInt; + } + } + HInstruction* vector = nullptr; + switch (org->GetKind()) { + case HInstruction::kNeg: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecNeg(global_allocator_, opa, type, vector_length_), + new (global_allocator_) HNeg(type, opa)); + case HInstruction::kNot: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_), + new (global_allocator_) HNot(type, opa)); + case HInstruction::kBooleanNot: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_), + new (global_allocator_) HBooleanNot(opa)); + case HInstruction::kTypeConversion: + DCHECK(opb == nullptr); + GENERATE_VEC( + new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_), + new (global_allocator_) HTypeConversion(type, opa, kNoDexPc)); + case HInstruction::kAdd: + GENERATE_VEC( + new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HAdd(type, opa, opb)); + case HInstruction::kSub: + GENERATE_VEC( + new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HSub(type, opa, opb)); + case HInstruction::kMul: + GENERATE_VEC( + new (global_allocator_) HVecMul(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HMul(type, opa, opb)); + case HInstruction::kDiv: + GENERATE_VEC( + new (global_allocator_) HVecDiv(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HDiv(type, opa, opb, kNoDexPc)); + case HInstruction::kAnd: + GENERATE_VEC( + new (global_allocator_) HVecAnd(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HAnd(type, opa, opb)); + case HInstruction::kOr: + GENERATE_VEC( + new (global_allocator_) HVecOr(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HOr(type, opa, opb)); + case HInstruction::kXor: + GENERATE_VEC( + new (global_allocator_) HVecXor(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HXor(type, opa, opb)); + case HInstruction::kShl: + GENERATE_VEC( + new (global_allocator_) HVecShl(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HShl(type, opa, opb)); + case HInstruction::kShr: + GENERATE_VEC( + new (global_allocator_) HVecShr(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HShr(type, opa, opb)); + case HInstruction::kUShr: + GENERATE_VEC( + new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_), + new (global_allocator_) HUShr(type, opa, opb)); + case HInstruction::kInvokeStaticOrDirect: { + // TODO: coming soon. + break; + } + default: + break; + } // switch + CHECK(vector != nullptr) << "Unsupported SIMD operator"; + vector_map_->Put(org, vector); +} + +#undef GENERATE_VEC + +// +// Helpers. +// + +bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) { + DCHECK(iset_->empty()); ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi); if (set != nullptr) { - DCHECK(iset_->empty()); for (HInstruction* i : *set) { // Check that, other than instructions that are no longer in the graph (removed earlier) - // each instruction is removable and, other than the phi, uses are contained in the cycle. + // each instruction is removable and, when restrict uses are requested, other than for phi, + // all uses are contained within the cycle. if (!i->IsInBlock()) { continue; } else if (!i->IsRemovable()) { return false; - } else if (i != phi) { + } else if (i != phi && restrict_uses) { for (const HUseListNode<HInstruction*>& use : i->GetUses()) { if (set->find(use.GetUser()) == set->end()) { return false; @@ -348,10 +1003,12 @@ bool HLoopOptimization::IsPhiInduction(HPhi* phi) { // c: Condition(phi, bound) // i: If(c) // TODO: Find a less pattern matching approach? -bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) { +bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) { DCHECK(iset_->empty()); HInstruction* phi = block->GetFirstPhi(); - if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi())) { + if (phi != nullptr && + phi->GetNext() == nullptr && + TrySetPhiInduction(phi->AsPhi(), /*restrict_uses*/ false)) { HInstruction* s = block->GetFirstInstruction(); if (s != nullptr && s->IsSuspendCheck()) { HInstruction* c = s->GetNext(); @@ -369,14 +1026,24 @@ bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) { } bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) { - if (block->GetFirstPhi() == nullptr) { - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instruction = it.Current(); - if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) { - return false; - } + if (!block->GetPhis().IsEmpty()) { + return false; + } + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) { + return false; + } + } + return true; +} + +bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info, + HInstruction* instruction) { + for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { + if (use.GetUser()->GetBlock()->GetLoopInformation() != loop_info) { + return true; } - return true; } return false; } @@ -438,6 +1105,19 @@ bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasi return false; } +bool HLoopOptimization::TryAssignLastValue(HLoopInformation* loop_info, + HInstruction* instruction, + HBasicBlock* block, + bool collect_loop_uses) { + // Assigning the last value is always successful if there are no uses. + // Otherwise, it succeeds in a no early-exit loop by generating the + // proper last value assignment. + int32_t use_count = 0; + return IsOnlyUsedAfterLoop(loop_info, instruction, collect_loop_uses, &use_count) && + (use_count == 0 || + (!IsEarlyExit(loop_info) && TryReplaceWithLastValue(instruction, block))); +} + void HLoopOptimization::RemoveDeadInstructions(const HInstructionList& list) { for (HBackwardInstructionIterator i(list); !i.Done(); i.Advance()) { HInstruction* instruction = i.Current(); diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 0b798fc7a9..16f7691af2 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -27,7 +27,8 @@ class CompilerDriver; /** * Loop optimizations. Builds a loop hierarchy and applies optimizations to - * the detected nested loops, such as removal of dead induction and empty loops. + * the detected nested loops, such as removal of dead induction and empty loops + * and inner loop vectorization. */ class HLoopOptimization : public HOptimization { public: @@ -50,34 +51,105 @@ class HLoopOptimization : public HOptimization { inner(nullptr), previous(nullptr), next(nullptr) {} - HLoopInformation* const loop_info; + HLoopInformation* loop_info; LoopNode* outer; LoopNode* inner; LoopNode* previous; LoopNode* next; }; - void LocalRun(); + /* + * Vectorization restrictions (bit mask). + */ + enum VectorRestrictions { + kNone = 0, // no restrictions + kNoMul = 1, // no multiplication + kNoDiv = 2, // no division + kNoShift = 4, // no shift + kNoShr = 8, // no arithmetic shift right + kNoHiBits = 16, // "wider" operations cannot bring in higher order bits + }; + + /* + * Vectorization mode during synthesis + * (sequential peeling/cleanup loop or vector loop). + */ + enum VectorMode { + kSequential, + kVector + }; + + /* + * Representation of a unit-stride array reference. + */ + struct ArrayReference { + ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l) + : base(b), offset(o), type(t), lhs(l) { } + bool operator<(const ArrayReference& other) const { + return + (base < other.base) || + (base == other.base && + (offset < other.offset || (offset == other.offset && + (type < other.type || + (type == other.type && lhs < other.lhs))))); + } + HInstruction* base; // base address + HInstruction* offset; // offset + i + Primitive::Type type; // component type + bool lhs; // def/use + }; + // Loop setup and traversal. + void LocalRun(); void AddLoop(HLoopInformation* loop_info); void RemoveLoop(LoopNode* node); - void TraverseLoopsInnerToOuter(LoopNode* node); - // Simplification. + // Optimization. void SimplifyInduction(LoopNode* node); void SimplifyBlocks(LoopNode* node); - bool SimplifyInnerLoop(LoopNode* node); + void OptimizeInnerLoop(LoopNode* node); + + // Vectorization analysis and synthesis. + bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count); + void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count); + void GenerateNewLoop(LoopNode* node, + HBasicBlock* block, + HBasicBlock* new_preheader, + HInstruction* lo, + HInstruction* hi, + HInstruction* step); + bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code); + bool VectorizeUse(LoopNode* node, + HInstruction* instruction, + bool generate_code, + Primitive::Type type, + uint64_t restrictions); + bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions); + bool TrySetVectorLength(uint32_t length); + void GenerateVecInv(HInstruction* org, Primitive::Type type); + void GenerateVecSub(HInstruction* org, HInstruction* off); + void GenerateVecMem(HInstruction* org, + HInstruction* opa, + HInstruction* opb, + Primitive::Type type); + void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type); // Helpers. - bool IsPhiInduction(HPhi* phi); - bool IsEmptyHeader(HBasicBlock* block); + bool TrySetPhiInduction(HPhi* phi, bool restrict_uses); + bool TrySetSimpleLoopHeader(HBasicBlock* block); bool IsEmptyBody(HBasicBlock* block); bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info, HInstruction* instruction, bool collect_loop_uses, /*out*/ int32_t* use_count); + bool IsUsedOutsideLoop(HLoopInformation* loop_info, + HInstruction* instruction); bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block); + bool TryAssignLastValue(HLoopInformation* loop_info, + HInstruction* instruction, + HBasicBlock* block, + bool collect_loop_uses); void RemoveDeadInstructions(const HInstructionList& list); // Compiler driver (to query ISA features). @@ -90,6 +162,9 @@ class HLoopOptimization : public HOptimization { // through this allocator is immediately released when the loop optimizer is done. ArenaAllocator* loop_allocator_; + // Global heap memory allocator. Used to build HIR. + ArenaAllocator* global_allocator_; + // Entries into the loop hierarchy representation. The hierarchy resides // in phase-local heap memory. LoopNode* top_loop_; @@ -102,11 +177,33 @@ class HLoopOptimization : public HOptimization { // Counter that tracks how many induction cycles have been simplified. Useful // to trigger incremental updates of induction variable analysis of outer loops // when the induction of inner loops has changed. - int32_t induction_simplication_count_; + uint32_t induction_simplication_count_; // Flag that tracks if any simplifications have occurred. bool simplified_; + // Number of "lanes" for selected packed type. + uint32_t vector_length_; + + // Set of array references in the vector loop. + // Contents reside in phase-local heap memory. + ArenaSet<ArrayReference>* vector_refs_; + + // Mapping used during vectorization synthesis for both the scalar peeling/cleanup + // loop (simd_ is false) and the actual vector loop (simd_ is true). The data + // structure maps original instructions into the new instructions. + // Contents reside in phase-local heap memory. + ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_; + + // Temporary vectorization bookkeeping. + HBasicBlock* vector_preheader_; // preheader of the new loop + HBasicBlock* vector_header_; // header of the new loop + HBasicBlock* vector_body_; // body of the new loop + HInstruction* vector_runtime_test_a_; + HInstruction* vector_runtime_test_b_; // defines a != b runtime test + HPhi* vector_phi_; // the Phi representing the normalized loop index + VectorMode vector_mode_; // selects synthesis mode + friend class LoopOptimizationTest; DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index ec706e6694..5617e4bfcb 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -1088,6 +1088,19 @@ void HInstruction::ReplaceWith(HInstruction* other) { DCHECK(env_uses_.empty()); } +void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) { + const HUseList<HInstruction*>& uses = GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput(). + ++it; + if (dominator->StrictlyDominates(user)) { + user->ReplaceInput(replacement, index); + } + } +} + void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) { HUserRecord<HInstruction*> input_use = InputRecordAt(index); if (input_use.GetInstruction() == replacement) { @@ -1323,6 +1336,18 @@ std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs) { } } +std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) { + switch (rhs) { + case HDeoptimize::Kind::kBCE: + return os << "bce"; + case HDeoptimize::Kind::kInline: + return os << "inline"; + default: + LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs); + UNREACHABLE(); + } +} + bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const { return this == instruction->GetPreviousDisregardingMoves(); } @@ -2315,6 +2340,66 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { new_pre_header, old_pre_header, /* replace_if_back_edge */ false); } +HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header, + HBasicBlock* body, + HBasicBlock* exit) { + DCHECK(header->IsLoopHeader()); + HLoopInformation* loop = header->GetLoopInformation(); + + // Add new loop blocks. + HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_header = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_body = new (arena_) HBasicBlock(this, header->GetDexPc()); + AddBlock(new_pre_header); + AddBlock(new_header); + AddBlock(new_body); + + // Set up control flow. + header->ReplaceSuccessor(exit, new_pre_header); + new_pre_header->AddSuccessor(new_header); + new_header->AddSuccessor(exit); + new_header->AddSuccessor(new_body); + new_body->AddSuccessor(new_header); + + // Set up dominators. + header->ReplaceDominatedBlock(exit, new_pre_header); + new_pre_header->SetDominator(header); + new_pre_header->dominated_blocks_.push_back(new_header); + new_header->SetDominator(new_pre_header); + new_header->dominated_blocks_.push_back(new_body); + new_body->SetDominator(new_header); + new_header->dominated_blocks_.push_back(exit); + exit->SetDominator(new_header); + + // Fix reverse post order. + size_t index_of_header = IndexOfElement(reverse_post_order_, header); + MakeRoomFor(&reverse_post_order_, 2, index_of_header); + reverse_post_order_[++index_of_header] = new_pre_header; + reverse_post_order_[++index_of_header] = new_header; + size_t index_of_body = IndexOfElement(reverse_post_order_, body); + MakeRoomFor(&reverse_post_order_, 1, index_of_body - 1); + reverse_post_order_[index_of_body] = new_body; + + // Add gotos and suspend check (client must add conditional in header and copy environment). + new_pre_header->AddInstruction(new (arena_) HGoto()); + HSuspendCheck* suspend_check = new (arena_) HSuspendCheck(header->GetDexPc()); + new_header->AddInstruction(suspend_check); + new_body->AddInstruction(new (arena_) HGoto()); + + // Update loop information. + new_header->AddBackEdge(new_body); + new_header->GetLoopInformation()->SetSuspendCheck(suspend_check); + new_header->GetLoopInformation()->Populate(); + new_pre_header->SetLoopInformation(loop->GetPreHeader()->GetLoopInformation()); // outward + HLoopInformationOutwardIterator it(*new_header); + for (it.Advance(); !it.Done(); it.Advance()) { + it.Current()->Add(new_pre_header); + it.Current()->Add(new_header); + it.Current()->Add(new_body); + } + return new_pre_header; +} + static void CheckAgainstUpperBound(ReferenceTypeInfo rti, ReferenceTypeInfo upper_bound_rti) REQUIRES_SHARED(Locks::mutator_lock_) { if (rti.IsValid()) { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index fb0c889792..52a02c2285 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -400,6 +400,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // put deoptimization instructions, etc. void TransformLoopHeaderForBCE(HBasicBlock* header); + // Adds a new loop directly after the loop with the given header and exit. + // Returns the new preheader. + HBasicBlock* TransformLoopForVectorization(HBasicBlock* header, + HBasicBlock* body, + HBasicBlock* exit); + // Removes `block` from the graph. Assumes `block` has been disconnected from // other blocks and has no instructions or phis. void DeleteDeadEmptyBlock(HBasicBlock* block); @@ -1363,6 +1369,25 @@ class HLoopInformationOutwardIterator : public ValueObject { M(TypeConversion, Instruction) \ M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ + M(VecReplicateScalar, VecUnaryOperation) \ + M(VecSetScalars, VecUnaryOperation) \ + M(VecSumReduce, VecUnaryOperation) \ + M(VecCnv, VecUnaryOperation) \ + M(VecNeg, VecUnaryOperation) \ + M(VecNot, VecUnaryOperation) \ + M(VecAdd, VecBinaryOperation) \ + M(VecSub, VecBinaryOperation) \ + M(VecMul, VecBinaryOperation) \ + M(VecDiv, VecBinaryOperation) \ + M(VecAnd, VecBinaryOperation) \ + M(VecAndNot, VecBinaryOperation) \ + M(VecOr, VecBinaryOperation) \ + M(VecXor, VecBinaryOperation) \ + M(VecShl, VecBinaryOperation) \ + M(VecShr, VecBinaryOperation) \ + M(VecUShr, VecBinaryOperation) \ + M(VecLoad, VecMemoryOperation) \ + M(VecStore, VecMemoryOperation) \ /* * Instructions, shared across several (not all) architectures. @@ -1424,7 +1449,11 @@ class HLoopInformationOutwardIterator : public ValueObject { M(Constant, Instruction) \ M(UnaryOperation, Instruction) \ M(BinaryOperation, Instruction) \ - M(Invoke, Instruction) + M(Invoke, Instruction) \ + M(VecOperation, Instruction) \ + M(VecUnaryOperation, VecOperation) \ + M(VecBinaryOperation, VecOperation) \ + M(VecMemoryOperation, VecOperation) #define FOR_EACH_INSTRUCTION(M) \ FOR_EACH_CONCRETE_INSTRUCTION(M) \ @@ -2081,6 +2110,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void SetLocations(LocationSummary* locations) { locations_ = locations; } void ReplaceWith(HInstruction* instruction); + void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement); void ReplaceInput(HInstruction* replacement, size_t index); // This is almost the same as doing `ReplaceWith()`. But in this helper, the @@ -2944,28 +2974,97 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { }; // Deoptimize to interpreter, upon checking a condition. -class HDeoptimize FINAL : public HTemplateInstruction<1> { +class HDeoptimize FINAL : public HVariableInputSizeInstruction { public: + enum class Kind { + kBCE, + kInline, + kLast = kInline + }; + + // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move + // across. + HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc) + : HVariableInputSizeInstruction( + SideEffects::All(), + dex_pc, + arena, + /* number_of_inputs */ 1, + kArenaAllocMisc) { + SetPackedFlag<kFieldCanBeMoved>(false); + SetPackedField<DeoptimizeKindField>(kind); + SetRawInputAt(0, cond); + } + + // Use this constructor when the `HDeoptimize` guards an instruction, and any user + // that relies on the deoptimization to pass should have its input be the `HDeoptimize` + // instead of `guard`. // We set CanTriggerGC to prevent any intermediate address to be live // at the point of the `HDeoptimize`. - HDeoptimize(HInstruction* cond, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) { + HDeoptimize(ArenaAllocator* arena, + HInstruction* cond, + HInstruction* guard, + Kind kind, + uint32_t dex_pc) + : HVariableInputSizeInstruction( + SideEffects::CanTriggerGC(), + dex_pc, + arena, + /* number_of_inputs */ 2, + kArenaAllocMisc) { + SetPackedFlag<kFieldCanBeMoved>(true); + SetPackedField<DeoptimizeKindField>(kind); SetRawInputAt(0, cond); + SetRawInputAt(1, guard); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; + bool CanBeMoved() const OVERRIDE { return GetPackedFlag<kFieldCanBeMoved>(); } + + bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind()); } + bool NeedsEnvironment() const OVERRIDE { return true; } + bool CanThrow() const OVERRIDE { return true; } + Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); } + + Primitive::Type GetType() const OVERRIDE { + return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid; + } + + bool GuardsAnInput() const { + return InputCount() == 2; + } + + HInstruction* GuardedInput() const { + DCHECK(GuardsAnInput()); + return InputAt(1); + } + + void RemoveGuard() { + RemoveInputAt(1); + } + DECLARE_INSTRUCTION(Deoptimize); private: + static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits; + static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1; + static constexpr size_t kFieldDeoptimizeKindSize = + MinimumBitsToStore(static_cast<size_t>(Kind::kLast)); + static constexpr size_t kNumberOfDeoptimizePackedBits = + kFieldDeoptimizeKind + kFieldDeoptimizeKindSize; + static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits, + "Too many packed fields."); + using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>; + DISALLOW_COPY_AND_ASSIGN(HDeoptimize); }; +std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs); + // Represents a should_deoptimize flag. Currently used for CHA-based devirtualization. // The compiled code checks this flag value in a guard before devirtualized call and // if it's true, starts to do deoptimization. @@ -6619,6 +6718,8 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { } // namespace art +#include "nodes_vector.h" + #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) #include "nodes_shared.h" #endif diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h new file mode 100644 index 0000000000..9f9b918f17 --- /dev/null +++ b/compiler/optimizing/nodes_vector.h @@ -0,0 +1,585 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_ +#define ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_ + +// This #include should never be used by compilation, because this header file (nodes_vector.h) +// is included in the header file nodes.h itself. However it gives editing tools better context. +#include "nodes.h" + +namespace art { + +// Memory alignment, represented as an offset relative to a base, where 0 <= offset < base, +// and base is a power of two. For example, the value Alignment(16, 0) means memory is +// perfectly aligned at a 16-byte boundary, whereas the value Alignment(16, 4) means +// memory is always exactly 4 bytes above such a boundary. +class Alignment { + public: + Alignment(size_t base, size_t offset) : base_(base), offset_(offset) { + DCHECK_LT(offset, base); + DCHECK(IsPowerOfTwo(base)); + } + + // Returns true if memory is "at least" aligned at the given boundary. + // Assumes requested base is power of two. + bool IsAlignedAt(size_t base) const { + DCHECK_NE(0u, base); + DCHECK(IsPowerOfTwo(base)); + return ((offset_ | base_) & (base - 1u)) == 0; + } + + std::string ToString() const { + return "ALIGN(" + std::to_string(base_) + "," + std::to_string(offset_) + ")"; + } + + private: + size_t base_; + size_t offset_; +}; + +// +// Definitions of abstract vector operations in HIR. +// + +// Abstraction of a vector operation, i.e., an operation that performs +// GetVectorLength() x GetPackedType() operations simultaneously. +class HVecOperation : public HVariableInputSizeInstruction { + public: + HVecOperation(ArenaAllocator* arena, + Primitive::Type packed_type, + SideEffects side_effects, + size_t number_of_inputs, + size_t vector_length, + uint32_t dex_pc) + : HVariableInputSizeInstruction(side_effects, + dex_pc, + arena, + number_of_inputs, + kArenaAllocVectorNode), + vector_length_(vector_length) { + SetPackedField<TypeField>(packed_type); + DCHECK_LT(1u, vector_length); + } + + // Returns the number of elements packed in a vector. + size_t GetVectorLength() const { + return vector_length_; + } + + // Returns the number of bytes in a full vector. + size_t GetVectorNumberOfBytes() const { + return vector_length_ * Primitive::ComponentSize(GetPackedType()); + } + + // Returns the type of the vector operation: a SIMD operation looks like a FPU location. + // TODO: we could introduce SIMD types in HIR. + Primitive::Type GetType() const OVERRIDE { + return Primitive::kPrimDouble; + } + + // Returns the true component type packed in a vector. + Primitive::Type GetPackedType() const { + return GetPackedField<TypeField>(); + } + + DECLARE_ABSTRACT_INSTRUCTION(VecOperation); + + private: + // Additional packed bits. + static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits; + static constexpr size_t kFieldTypeSize = + MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast)); + static constexpr size_t kNumberOfVectorOpPackedBits = kFieldType + kFieldTypeSize; + static_assert(kNumberOfVectorOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); + using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>; + + const size_t vector_length_; + + DISALLOW_COPY_AND_ASSIGN(HVecOperation); +}; + +// Abstraction of a unary vector operation. +class HVecUnaryOperation : public HVecOperation { + public: + HVecUnaryOperation(ArenaAllocator* arena, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc) + : HVecOperation(arena, + packed_type, + SideEffects::None(), + /*number_of_inputs*/ 1, + vector_length, + dex_pc) { } + DECLARE_ABSTRACT_INSTRUCTION(VecUnaryOperation); + private: + DISALLOW_COPY_AND_ASSIGN(HVecUnaryOperation); +}; + +// Abstraction of a binary vector operation. +class HVecBinaryOperation : public HVecOperation { + public: + HVecBinaryOperation(ArenaAllocator* arena, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc) + : HVecOperation(arena, + packed_type, + SideEffects::None(), + /*number_of_inputs*/ 2, + vector_length, + dex_pc) { } + DECLARE_ABSTRACT_INSTRUCTION(VecBinaryOperation); + private: + DISALLOW_COPY_AND_ASSIGN(HVecBinaryOperation); +}; + +// Abstraction of a vector operation that references memory, with an alignment. +// The Android runtime guarantees at least "component size" alignment for array +// elements and, thus, vectors. +class HVecMemoryOperation : public HVecOperation { + public: + HVecMemoryOperation(ArenaAllocator* arena, + Primitive::Type packed_type, + SideEffects side_effects, + size_t number_of_inputs, + size_t vector_length, + uint32_t dex_pc) + : HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc), + alignment_(Primitive::ComponentSize(packed_type), 0) { } + + void SetAlignment(Alignment alignment) { alignment_ = alignment; } + + Alignment GetAlignment() const { return alignment_; } + + DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation); + + private: + Alignment alignment_; + + DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation); +}; + +// +// Definitions of concrete vector operations in HIR. +// + +// Replicates the given scalar into a vector, +// viz. replicate(x) = [ x, .. , x ]. +class HVecReplicateScalar FINAL : public HVecUnaryOperation { + public: + HVecReplicateScalar(ArenaAllocator* arena, + HInstruction* scalar, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) { + SetRawInputAt(0, scalar); + } + DECLARE_INSTRUCTION(VecReplicateScalar); + private: + DISALLOW_COPY_AND_ASSIGN(HVecReplicateScalar); +}; + +// Assigns the given scalar elements to a vector, +// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ]. +class HVecSetScalars FINAL : public HVecUnaryOperation { + HVecSetScalars(ArenaAllocator* arena, + HInstruction** scalars, // array + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) { + for (size_t i = 0; i < vector_length; i++) { + SetRawInputAt(0, scalars[i]); + } + } + DECLARE_INSTRUCTION(VecSetScalars); + private: + DISALLOW_COPY_AND_ASSIGN(HVecSetScalars); +}; + +// Sum-reduces the given vector into a shorter vector (m < n) or scalar (m = 1), +// viz. sum-reduce[ x1, .. , xn ] = [ y1, .., ym ], where yi = sum_j x_j. +class HVecSumReduce FINAL : public HVecUnaryOperation { + HVecSumReduce(ArenaAllocator* arena, + HInstruction* input, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(input->IsVecOperation()); + DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, input); + } + + // TODO: probably integral promotion + Primitive::Type GetType() const OVERRIDE { return GetPackedType(); } + + DECLARE_INSTRUCTION(VecSumReduce); + private: + DISALLOW_COPY_AND_ASSIGN(HVecSumReduce); +}; + +// Converts every component in the vector, +// viz. cnv[ x1, .. , xn ] = [ cnv(x1), .. , cnv(xn) ]. +class HVecCnv FINAL : public HVecUnaryOperation { + public: + HVecCnv(ArenaAllocator* arena, + HInstruction* input, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(input->IsVecOperation()); + DCHECK_NE(input->AsVecOperation()->GetPackedType(), packed_type); // actual convert + SetRawInputAt(0, input); + } + + Primitive::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); } + Primitive::Type GetResultType() const { return GetPackedType(); } + + DECLARE_INSTRUCTION(VecCnv); + + private: + DISALLOW_COPY_AND_ASSIGN(HVecCnv); +}; + +// Negates every component in the vector, +// viz. neg[ x1, .. , xn ] = [ -x1, .. , -xn ]. +class HVecNeg FINAL : public HVecUnaryOperation { + public: + HVecNeg(ArenaAllocator* arena, + HInstruction* input, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(input->IsVecOperation()); + DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, input); + } + DECLARE_INSTRUCTION(VecNeg); + private: + DISALLOW_COPY_AND_ASSIGN(HVecNeg); +}; + +// Bitwise- or boolean-nots every component in the vector, +// viz. not[ x1, .. , xn ] = [ ~x1, .. , ~xn ], or +// not[ x1, .. , xn ] = [ !x1, .. , !xn ] for boolean. +class HVecNot FINAL : public HVecUnaryOperation { + public: + HVecNot(ArenaAllocator* arena, + HInstruction* input, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(input->IsVecOperation()); + SetRawInputAt(0, input); + } + DECLARE_INSTRUCTION(VecNot); + private: + DISALLOW_COPY_AND_ASSIGN(HVecNot); +}; + +// Adds every component in the two vectors, +// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 + y1, .. , xn + yn ]. +class HVecAdd FINAL : public HVecBinaryOperation { + public: + HVecAdd(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecAdd); + private: + DISALLOW_COPY_AND_ASSIGN(HVecAdd); +}; + +// Subtracts every component in the two vectors, +// viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ]. +class HVecSub FINAL : public HVecBinaryOperation { + public: + HVecSub(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecSub); + private: + DISALLOW_COPY_AND_ASSIGN(HVecSub); +}; + +// Multiplies every component in the two vectors, +// viz. [ x1, .. , xn ] * [ y1, .. , yn ] = [ x1 * y1, .. , xn * yn ]. +class HVecMul FINAL : public HVecBinaryOperation { + public: + HVecMul(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecMul); + private: + DISALLOW_COPY_AND_ASSIGN(HVecMul); +}; + +// Divides every component in the two vectors, +// viz. [ x1, .. , xn ] / [ y1, .. , yn ] = [ x1 / y1, .. , xn / yn ]. +class HVecDiv FINAL : public HVecBinaryOperation { + public: + HVecDiv(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecDiv); + private: + DISALLOW_COPY_AND_ASSIGN(HVecDiv); +}; + +// Bitwise-ands every component in the two vectors, +// viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ]. +class HVecAnd FINAL : public HVecBinaryOperation { + public: + HVecAnd(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecAnd); + private: + DISALLOW_COPY_AND_ASSIGN(HVecAnd); +}; + +// Bitwise-and-nots every component in the two vectors, +// viz. [ x1, .. , xn ] and-not [ y1, .. , yn ] = [ ~x1 & y1, .. , ~xn & yn ]. +class HVecAndNot FINAL : public HVecBinaryOperation { + public: + HVecAndNot(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecAndNot); + private: + DISALLOW_COPY_AND_ASSIGN(HVecAndNot); +}; + +// Bitwise-ors every component in the two vectors, +// viz. [ x1, .. , xn ] | [ y1, .. , yn ] = [ x1 | y1, .. , xn | yn ]. +class HVecOr FINAL : public HVecBinaryOperation { + public: + HVecOr(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecOr); + private: + DISALLOW_COPY_AND_ASSIGN(HVecOr); +}; + +// Bitwise-xors every component in the two vectors, +// viz. [ x1, .. , xn ] ^ [ y1, .. , yn ] = [ x1 ^ y1, .. , xn ^ yn ]. +class HVecXor FINAL : public HVecBinaryOperation { + public: + HVecXor(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation() && right->IsVecOperation()); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecXor); + private: + DISALLOW_COPY_AND_ASSIGN(HVecXor); +}; + +// Logically shifts every component in the vector left by the given distance, +// viz. [ x1, .. , xn ] << d = [ x1 << d, .. , xn << d ]. +class HVecShl FINAL : public HVecBinaryOperation { + public: + HVecShl(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecShl); + private: + DISALLOW_COPY_AND_ASSIGN(HVecShl); +}; + +// Arithmetically shifts every component in the vector right by the given distance, +// viz. [ x1, .. , xn ] >> d = [ x1 >> d, .. , xn >> d ]. +class HVecShr FINAL : public HVecBinaryOperation { + public: + HVecShr(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecShr); + private: + DISALLOW_COPY_AND_ASSIGN(HVecShr); +}; + +// Logically shifts every component in the vector right by the given distance, +// viz. [ x1, .. , xn ] >>> d = [ x1 >>> d, .. , xn >>> d ]. +class HVecUShr FINAL : public HVecBinaryOperation { + public: + HVecUShr(ArenaAllocator* arena, + HInstruction* left, + HInstruction* right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) { + DCHECK(left->IsVecOperation()); + DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, left); + SetRawInputAt(1, right); + } + DECLARE_INSTRUCTION(VecUShr); + private: + DISALLOW_COPY_AND_ASSIGN(HVecUShr); +}; + +// Loads a vector from memory, viz. load(mem, 1) +// yield the vector [ mem(1), .. , mem(n) ]. +class HVecLoad FINAL : public HVecMemoryOperation { + public: + HVecLoad(ArenaAllocator* arena, + HInstruction* base, + HInstruction* index, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecMemoryOperation(arena, + packed_type, + SideEffects::ArrayReadOfType(packed_type), + /*number_of_inputs*/ 2, + vector_length, + dex_pc) { + SetRawInputAt(0, base); + SetRawInputAt(1, index); + } + DECLARE_INSTRUCTION(VecLoad); + private: + DISALLOW_COPY_AND_ASSIGN(HVecLoad); +}; + +// Stores a vector to memory, viz. store(m, 1, [x1, .. , xn] ) +// sets mem(1) = x1, .. , mem(n) = xn. +class HVecStore FINAL : public HVecMemoryOperation { + public: + HVecStore(ArenaAllocator* arena, + HInstruction* base, + HInstruction* index, + HInstruction* value, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecMemoryOperation(arena, + packed_type, + SideEffects::ArrayWriteOfType(packed_type), + /*number_of_inputs*/ 3, + vector_length, + dex_pc) { + DCHECK(value->IsVecOperation()); + DCHECK_EQ(value->AsVecOperation()->GetPackedType(), packed_type); + SetRawInputAt(0, base); + SetRawInputAt(1, index); + SetRawInputAt(2, value); + } + DECLARE_INSTRUCTION(VecStore); + private: + DISALLOW_COPY_AND_ASSIGN(HVecStore); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 3c6d2d64a9..eb88fdee84 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -454,6 +454,8 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) { static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) { return instruction_set == kArm64 || instruction_set == kThumb2 + || instruction_set == kMips + || instruction_set == kMips64 || instruction_set == kX86 || instruction_set == kX86_64; } diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index efbaf6c221..66bfea9860 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -40,6 +40,14 @@ void PrepareForRegisterAllocation::VisitDivZeroCheck(HDivZeroCheck* check) { check->ReplaceWith(check->InputAt(0)); } +void PrepareForRegisterAllocation::VisitDeoptimize(HDeoptimize* deoptimize) { + if (deoptimize->GuardsAnInput()) { + // Replace the uses with the actual guarded instruction. + deoptimize->ReplaceWith(deoptimize->GuardedInput()); + deoptimize->RemoveGuard(); + } +} + void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) { check->ReplaceWith(check->InputAt(0)); if (check->IsStringCharAt()) { diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index c128227654..7ffbe44ef6 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -44,6 +44,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { void VisitClinitCheck(HClinitCheck* check) OVERRIDE; void VisitCondition(HCondition* condition) OVERRIDE; void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE; + void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const; bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const; diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 6e332ca59b..d5637b9b75 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -310,8 +310,8 @@ static void BoundTypeForClassCheck(HInstruction* check) { BoundTypeIn(receiver, trueBlock, /* start_instruction */ nullptr, class_rti); } else { DCHECK(check->IsDeoptimize()); - if (compare->IsEqual()) { - BoundTypeIn(receiver, check->GetBlock(), check, class_rti); + if (compare->IsEqual() && check->AsDeoptimize()->GuardsAnInput()) { + check->SetReferenceTypeInfo(class_rti); } } } diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc index 84a4bab1a9..0b49ce1a4c 100644 --- a/compiler/optimizing/reference_type_propagation_test.cc +++ b/compiler/optimizing/reference_type_propagation_test.cc @@ -29,7 +29,7 @@ namespace art { */ class ReferenceTypePropagationTest : public CommonCompilerTest { public: - ReferenceTypePropagationTest() : pool_(), allocator_(&pool_) { + ReferenceTypePropagationTest() : pool_(), allocator_(&pool_), propagation_(nullptr) { graph_ = CreateGraph(&allocator_); } diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index ab0dad4300..9236a0e4fa 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -315,7 +315,10 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor { // This class and its sub-classes will never be used to drive a visit of an // `HGraph` but only to visit `HInstructions` one at a time, so we do not need // to pass a valid graph to `HGraphDelegateVisitor()`. - SchedulingLatencyVisitor() : HGraphDelegateVisitor(nullptr) {} + SchedulingLatencyVisitor() + : HGraphDelegateVisitor(nullptr), + last_visited_latency_(0), + last_visited_internal_latency_(0) {} void VisitInstruction(HInstruction* instruction) OVERRIDE { LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". " @@ -413,6 +416,7 @@ class HScheduler { selector_(selector), only_optimize_loop_blocks_(true), scheduling_graph_(this, arena), + cursor_(nullptr), candidates_(arena_->Adapter(kArenaAllocScheduler)) {} virtual ~HScheduler() {} diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 36ee5a903a..b538a89a06 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -470,7 +470,12 @@ bool LiveInterval::SameRegisterKind(Location other) const { } size_t LiveInterval::NumberOfSpillSlotsNeeded() const { - // TODO: detect vector operation. + // For a SIMD operation, compute the number of needed spill slots. + // TODO: do through vector type? + HInstruction* definition = GetParent()->GetDefinedBy(); + if (definition != nullptr && definition->IsVecOperation()) { + return definition->AsVecOperation()->GetVectorNumberOfBytes() / kVRegSize; + } // Return number of needed spill slots based on type. return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1; } diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc index 1916c73ca4..a1016d1d47 100644 --- a/compiler/optimizing/ssa_liveness_analysis_test.cc +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -189,13 +189,14 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { // Use HAboveOrEqual+HDeoptimize as the bounds check. HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length); block->AddInstruction(ae); - HInstruction* deoptimize = new(&allocator_) HDeoptimize(ae, /* dex_pc */ 0u); + HInstruction* deoptimize = + new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u); block->AddInstruction(deoptimize); HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_, - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, - deoptimize); + /* number_of_vregs */ 5, + /* method */ nullptr, + /* dex_pc */ 0u, + deoptimize); deoptimize_env->CopyFrom(args); deoptimize->SetRawEnvironment(deoptimize_env); HInstruction* array_set = diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h index d265a44092..f655994bd3 100644 --- a/compiler/utils/assembler_test.h +++ b/compiler/utils/assembler_test.h @@ -309,7 +309,7 @@ class AssemblerTest : public testing::Test { template <typename RegType, typename ImmType> std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType), int imm_bits, - const std::vector<Reg*> registers, + const std::vector<RegType*> registers, std::string (AssemblerTest::*GetName)(const RegType&), const std::string& fmt, int bias) { @@ -573,6 +573,19 @@ class AssemblerTest : public testing::Test { } template <typename ImmType> + std::string RepeatVIb(void (Ass::*f)(VecReg, ImmType), + int imm_bits, + std::string fmt, + int bias = 0) { + return RepeatTemplatedRegisterImmBits<VecReg, ImmType>(f, + imm_bits, + GetVectorRegisters(), + &AssemblerTest::GetVecRegName, + fmt, + bias); + } + + template <typename ImmType> std::string RepeatVRIb(void (Ass::*f)(VecReg, Reg, ImmType), int imm_bits, const std::string& fmt, diff --git a/compiler/utils/atomic_method_ref_map-inl.h b/compiler/utils/atomic_method_ref_map-inl.h index d71c2fe997..ad3a099eb6 100644 --- a/compiler/utils/atomic_method_ref_map-inl.h +++ b/compiler/utils/atomic_method_ref_map-inl.h @@ -42,7 +42,7 @@ template <typename T> inline bool AtomicMethodRefMap<T>::Get(MethodReference ref, T* out) const { const ElementArray* const array = GetArray(ref.dex_file); if (array == nullptr) { - return kInsertResultInvalidDexFile; + return false; } *out = (*array)[ref.dex_method_index].LoadRelaxed(); return true; diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc index 8a5ae754df..0cff44d830 100644 --- a/compiler/utils/mips64/assembler_mips64.cc +++ b/compiler/utils/mips64/assembler_mips64.cc @@ -252,6 +252,22 @@ void Mips64Assembler::EmitMsaMI10(int s10, Emit(encoding); } +void Mips64Assembler::EmitMsaI10(int operation, + int df, + int i10, + VectorRegister wd, + int minor_opcode) { + CHECK_NE(wd, kNoVectorRegister); + CHECK(IsUint<10>(i10)) << i10; + uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift | + operation << kMsaOperationShift | + df << kDfShift | + i10 << kI10Shift | + static_cast<uint32_t>(wd) << kWdShift | + minor_opcode; + Emit(encoding); +} + void Mips64Assembler::EmitMsa2R(int operation, int df, VectorRegister ws, @@ -1581,6 +1597,30 @@ void Mips64Assembler::FillD(VectorRegister wd, GpuRegister rs) { EmitMsa2R(0xc0, 0x3, static_cast<VectorRegister>(rs), wd, 0x1e); } +void Mips64Assembler::LdiB(VectorRegister wd, int imm8) { + CHECK(HasMsa()); + CHECK(IsInt<8>(imm8)) << imm8; + EmitMsaI10(0x6, 0x0, imm8 & kMsaS10Mask, wd, 0x7); +} + +void Mips64Assembler::LdiH(VectorRegister wd, int imm10) { + CHECK(HasMsa()); + CHECK(IsInt<10>(imm10)) << imm10; + EmitMsaI10(0x6, 0x1, imm10 & kMsaS10Mask, wd, 0x7); +} + +void Mips64Assembler::LdiW(VectorRegister wd, int imm10) { + CHECK(HasMsa()); + CHECK(IsInt<10>(imm10)) << imm10; + EmitMsaI10(0x6, 0x2, imm10 & kMsaS10Mask, wd, 0x7); +} + +void Mips64Assembler::LdiD(VectorRegister wd, int imm10) { + CHECK(HasMsa()); + CHECK(IsInt<10>(imm10)) << imm10; + EmitMsaI10(0x6, 0x3, imm10 & kMsaS10Mask, wd, 0x7); +} + void Mips64Assembler::LdB(VectorRegister wd, GpuRegister rs, int offset) { CHECK(HasMsa()); CHECK(IsInt<10>(offset)) << offset; @@ -1661,6 +1701,7 @@ void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value) { } } +// TODO: don't use rtmp, use daui, dahi, dati. void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) { if (IsInt<16>(value)) { Daddiu(rt, rs, value); diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h index a8035b6da4..666c6935a1 100644 --- a/compiler/utils/mips64/assembler_mips64.h +++ b/compiler/utils/mips64/assembler_mips64.h @@ -734,6 +734,10 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void FillW(VectorRegister wd, GpuRegister rs); void FillD(VectorRegister wd, GpuRegister rs); + void LdiB(VectorRegister wd, int imm8); + void LdiH(VectorRegister wd, int imm10); + void LdiW(VectorRegister wd, int imm10); + void LdiD(VectorRegister wd, int imm10); void LdB(VectorRegister wd, GpuRegister rs, int offset); void LdH(VectorRegister wd, GpuRegister rs, int offset); void LdW(VectorRegister wd, GpuRegister rs, int offset); @@ -1457,6 +1461,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer void EmitMsaBIT(int operation, int df_m, VectorRegister ws, VectorRegister wd, int minor_opcode); void EmitMsaELM(int operation, int df_n, VectorRegister ws, VectorRegister wd, int minor_opcode); void EmitMsaMI10(int s10, GpuRegister rs, VectorRegister wd, int minor_opcode, int df); + void EmitMsaI10(int operation, int df, int i10, VectorRegister wd, int minor_opcode); void EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); void EmitMsa2RF(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode); diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc index cadbe27819..f2e3b1610c 100644 --- a/compiler/utils/mips64/assembler_mips64_test.cc +++ b/compiler/utils/mips64/assembler_mips64_test.cc @@ -2836,6 +2836,22 @@ TEST_F(AssemblerMIPS64Test, FillD) { DriverStr(RepeatVR(&mips64::Mips64Assembler::FillD, "fill.d ${reg1}, ${reg2}"), "fill.d"); } +TEST_F(AssemblerMIPS64Test, LdiB) { + DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b"); +} + +TEST_F(AssemblerMIPS64Test, LdiH) { + DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiH, -10, "ldi.h ${reg}, {imm}"), "ldi.h"); +} + +TEST_F(AssemblerMIPS64Test, LdiW) { + DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiW, -10, "ldi.w ${reg}, {imm}"), "ldi.w"); +} + +TEST_F(AssemblerMIPS64Test, LdiD) { + DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiD, -10, "ldi.d ${reg}, {imm}"), "ldi.d"); +} + TEST_F(AssemblerMIPS64Test, LdB) { DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdB, -10, "ld.b ${reg1}, {imm}(${reg2})"), "ld.b"); } diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h index 5ae9c73589..bc8e40b437 100644 --- a/compiler/utils/mips64/constants_mips64.h +++ b/compiler/utils/mips64/constants_mips64.h @@ -66,6 +66,7 @@ enum InstructionFields { kWdShift = 6, kWdBits = 5, kS10Shift = 16, + kI10Shift = 11, kS10MinorShift = 2, kBranchOffsetMask = 0x0000ffff, diff --git a/compiler/utils/mips64/managed_register_mips64.cc b/compiler/utils/mips64/managed_register_mips64.cc index dea396e4a7..42d061ec15 100644 --- a/compiler/utils/mips64/managed_register_mips64.cc +++ b/compiler/utils/mips64/managed_register_mips64.cc @@ -26,6 +26,11 @@ bool Mips64ManagedRegister::Overlaps(const Mips64ManagedRegister& other) const { CHECK(IsValidManagedRegister()); CHECK(other.IsValidManagedRegister()); if (Equals(other)) return true; + if (IsFpuRegister() && other.IsVectorRegister()) { + return (AsFpuRegister() == other.AsOverlappingFpuRegister()); + } else if (IsVectorRegister() && other.IsFpuRegister()) { + return (AsVectorRegister() == other.AsOverlappingVectorRegister()); + } return false; } @@ -36,6 +41,8 @@ void Mips64ManagedRegister::Print(std::ostream& os) const { os << "GPU: " << static_cast<int>(AsGpuRegister()); } else if (IsFpuRegister()) { os << "FpuRegister: " << static_cast<int>(AsFpuRegister()); + } else if (IsVectorRegister()) { + os << "VectorRegister: " << static_cast<int>(AsVectorRegister()); } else { os << "??: " << RegId(); } diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h index c9f95569cf..3980199b1e 100644 --- a/compiler/utils/mips64/managed_register_mips64.h +++ b/compiler/utils/mips64/managed_register_mips64.h @@ -30,11 +30,27 @@ const int kNumberOfGpuAllocIds = kNumberOfGpuRegisters; const int kNumberOfFpuRegIds = kNumberOfFpuRegisters; const int kNumberOfFpuAllocIds = kNumberOfFpuRegisters; -const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds; -const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds; - -// An instance of class 'ManagedRegister' represents a single GPU register (enum -// Register) or a double precision FP register (enum FpuRegister) +const int kNumberOfVecRegIds = kNumberOfVectorRegisters; +const int kNumberOfVecAllocIds = kNumberOfVectorRegisters; + +const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds + kNumberOfVecRegIds; +const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds + kNumberOfVecAllocIds; + +// Register ids map: +// [0..R[ core registers (enum GpuRegister) +// [R..F[ floating-point registers (enum FpuRegister) +// [F..W[ MSA vector registers (enum VectorRegister) +// where +// R = kNumberOfGpuRegIds +// F = R + kNumberOfFpuRegIds +// W = F + kNumberOfVecRegIds + +// An instance of class 'ManagedRegister' represents a single Mips64 register. +// A register can be one of the following: +// * core register (enum GpuRegister) +// * floating-point register (enum FpuRegister) +// * MSA vector register (enum VectorRegister) +// // 'ManagedRegister::NoRegister()' provides an invalid register. // There is a one-to-one mapping between ManagedRegister and register id. class Mips64ManagedRegister : public ManagedRegister { @@ -49,6 +65,21 @@ class Mips64ManagedRegister : public ManagedRegister { return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds); } + constexpr VectorRegister AsVectorRegister() const { + CHECK(IsVectorRegister()); + return static_cast<VectorRegister>(id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegisters)); + } + + constexpr FpuRegister AsOverlappingFpuRegister() const { + CHECK(IsValidManagedRegister()); + return static_cast<FpuRegister>(AsVectorRegister()); + } + + constexpr VectorRegister AsOverlappingVectorRegister() const { + CHECK(IsValidManagedRegister()); + return static_cast<VectorRegister>(AsFpuRegister()); + } + constexpr bool IsGpuRegister() const { CHECK(IsValidManagedRegister()); return (0 <= id_) && (id_ < kNumberOfGpuRegIds); @@ -60,6 +91,12 @@ class Mips64ManagedRegister : public ManagedRegister { return (0 <= test) && (test < kNumberOfFpuRegIds); } + constexpr bool IsVectorRegister() const { + CHECK(IsValidManagedRegister()); + const int test = id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegIds); + return (0 <= test) && (test < kNumberOfVecRegIds); + } + void Print(std::ostream& os) const; // Returns true if the two managed-registers ('this' and 'other') overlap. @@ -77,6 +114,11 @@ class Mips64ManagedRegister : public ManagedRegister { return FromRegId(r + kNumberOfGpuRegIds); } + static constexpr Mips64ManagedRegister FromVectorRegister(VectorRegister r) { + CHECK_NE(r, kNoVectorRegister); + return FromRegId(r + kNumberOfGpuRegIds + kNumberOfFpuRegIds); + } + private: constexpr bool IsValidManagedRegister() const { return (0 <= id_) && (id_ < kNumberOfRegIds); diff --git a/compiler/utils/mips64/managed_register_mips64_test.cc b/compiler/utils/mips64/managed_register_mips64_test.cc new file mode 100644 index 0000000000..8b72d7e61d --- /dev/null +++ b/compiler/utils/mips64/managed_register_mips64_test.cc @@ -0,0 +1,480 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "managed_register_mips64.h" +#include "globals.h" +#include "gtest/gtest.h" + +namespace art { +namespace mips64 { + +TEST(Mips64ManagedRegister, NoRegister) { + Mips64ManagedRegister reg = ManagedRegister::NoRegister().AsMips64(); + EXPECT_TRUE(reg.IsNoRegister()); + EXPECT_FALSE(reg.Overlaps(reg)); +} + +TEST(Mips64ManagedRegister, GpuRegister) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromGpuRegister(ZERO); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(ZERO, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(AT); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(AT, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(V0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(V0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(A0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(A0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(A7); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(A7, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(T0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(T0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(T3); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(T3, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(S0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(S0, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(GP); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(GP, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(SP); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(SP, reg.AsGpuRegister()); + + reg = Mips64ManagedRegister::FromGpuRegister(RA); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_TRUE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_EQ(RA, reg.AsGpuRegister()); +} + +TEST(Mips64ManagedRegister, FpuRegister) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0); + Mips64ManagedRegister vreg = Mips64ManagedRegister::FromVectorRegister(W0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F0, reg.AsFpuRegister()); + EXPECT_EQ(W0, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + + reg = Mips64ManagedRegister::FromFpuRegister(F1); + vreg = Mips64ManagedRegister::FromVectorRegister(W1); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F1, reg.AsFpuRegister()); + EXPECT_EQ(W1, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F1))); + + reg = Mips64ManagedRegister::FromFpuRegister(F20); + vreg = Mips64ManagedRegister::FromVectorRegister(W20); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F20, reg.AsFpuRegister()); + EXPECT_EQ(W20, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F20))); + + reg = Mips64ManagedRegister::FromFpuRegister(F31); + vreg = Mips64ManagedRegister::FromVectorRegister(W31); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_TRUE(reg.IsFpuRegister()); + EXPECT_FALSE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(vreg)); + EXPECT_EQ(F31, reg.AsFpuRegister()); + EXPECT_EQ(W31, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F31))); +} + +TEST(Mips64ManagedRegister, VectorRegister) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromVectorRegister(W0); + Mips64ManagedRegister freg = Mips64ManagedRegister::FromFpuRegister(F0); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W0, reg.AsVectorRegister()); + EXPECT_EQ(F0, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + reg = Mips64ManagedRegister::FromVectorRegister(W2); + freg = Mips64ManagedRegister::FromFpuRegister(F2); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W2, reg.AsVectorRegister()); + EXPECT_EQ(F2, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W2))); + + reg = Mips64ManagedRegister::FromVectorRegister(W13); + freg = Mips64ManagedRegister::FromFpuRegister(F13); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W13, reg.AsVectorRegister()); + EXPECT_EQ(F13, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W13))); + + reg = Mips64ManagedRegister::FromVectorRegister(W29); + freg = Mips64ManagedRegister::FromFpuRegister(F29); + EXPECT_FALSE(reg.IsNoRegister()); + EXPECT_FALSE(reg.IsGpuRegister()); + EXPECT_FALSE(reg.IsFpuRegister()); + EXPECT_TRUE(reg.IsVectorRegister()); + EXPECT_TRUE(reg.Overlaps(freg)); + EXPECT_EQ(W29, reg.AsVectorRegister()); + EXPECT_EQ(F29, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W29))); +} + +TEST(Mips64ManagedRegister, Equals) { + ManagedRegister no_reg = ManagedRegister::NoRegister(); + EXPECT_TRUE(no_reg.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_ZERO = Mips64ManagedRegister::FromGpuRegister(ZERO); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_TRUE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_A1 = Mips64ManagedRegister::FromGpuRegister(A1); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_TRUE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_S2 = Mips64ManagedRegister::FromGpuRegister(S2); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S1))); + EXPECT_TRUE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_F0 = Mips64ManagedRegister::FromFpuRegister(F0); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_TRUE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F1))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_F31 = Mips64ManagedRegister::FromFpuRegister(F31); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(S2))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F1))); + EXPECT_TRUE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + + Mips64ManagedRegister reg_W0 = Mips64ManagedRegister::FromVectorRegister(W0); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(S1))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_TRUE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W1))); + EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W31))); + + Mips64ManagedRegister reg_W31 = Mips64ManagedRegister::FromVectorRegister(W31); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::NoRegister())); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(A1))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(S1))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W1))); + EXPECT_TRUE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W31))); +} + +TEST(Mips64ManagedRegister, Overlaps) { + Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0); + Mips64ManagedRegister reg_o = Mips64ManagedRegister::FromVectorRegister(W0); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F0, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W0, reg.AsOverlappingVectorRegister()); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromFpuRegister(F4); + reg_o = Mips64ManagedRegister::FromVectorRegister(W4); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F4, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W4, reg.AsOverlappingVectorRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromFpuRegister(F16); + reg_o = Mips64ManagedRegister::FromVectorRegister(W16); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F16, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W16, reg.AsOverlappingVectorRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromFpuRegister(F31); + reg_o = Mips64ManagedRegister::FromVectorRegister(W31); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(F31, reg_o.AsOverlappingFpuRegister()); + EXPECT_EQ(W31, reg.AsOverlappingVectorRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W0); + reg_o = Mips64ManagedRegister::FromFpuRegister(F0); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W0, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F0, reg.AsOverlappingFpuRegister()); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W4); + reg_o = Mips64ManagedRegister::FromFpuRegister(F4); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W4, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F4, reg.AsOverlappingFpuRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W16); + reg_o = Mips64ManagedRegister::FromFpuRegister(F16); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W16, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F16, reg.AsOverlappingFpuRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromVectorRegister(W31); + reg_o = Mips64ManagedRegister::FromFpuRegister(F31); + EXPECT_TRUE(reg.Overlaps(reg_o)); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_EQ(W31, reg_o.AsOverlappingVectorRegister()); + EXPECT_EQ(F31, reg.AsOverlappingFpuRegister()); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(ZERO); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(A0); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(S0); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); + + reg = Mips64ManagedRegister::FromGpuRegister(RA); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0))); + EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16))); + EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31))); +} + +} // namespace mips64 +} // namespace art diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index 5307dc09d9..9c934b7f39 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -1221,6 +1221,24 @@ void X86Assembler::por(XmmRegister dst, XmmRegister src) { } +void X86Assembler::pavgb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xE0); + EmitXmmRegisterOperand(dst, src); +} + + +void X86Assembler::pavgw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitUint8(0x0F); + EmitUint8(0xE3); + EmitXmmRegisterOperand(dst, src); +} + + void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index f52cf16c8b..b87522a017 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -495,6 +495,9 @@ class X86Assembler FINAL : public Assembler { void orps(XmmRegister dst, XmmRegister src); void por(XmmRegister dst, XmmRegister src); + void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pavgw(XmmRegister dst, XmmRegister src); + void pcmpeqb(XmmRegister dst, XmmRegister src); void pcmpeqw(XmmRegister dst, XmmRegister src); void pcmpeqd(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index 23049079e0..a01eb6dc23 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -605,6 +605,14 @@ TEST_F(AssemblerX86Test, POr) { DriverStr(RepeatFF(&x86::X86Assembler::por, "por %{reg2}, %{reg1}"), "por"); } +TEST_F(AssemblerX86Test, PAvgB) { + DriverStr(RepeatFF(&x86::X86Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb"); +} + +TEST_F(AssemblerX86Test, PAvgW) { + DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw"); +} + TEST_F(AssemblerX86Test, PCmpeqB) { DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb"); } diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index d20a6965c3..488c75de41 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -1427,6 +1427,24 @@ void X86_64Assembler::por(XmmRegister dst, XmmRegister src) { EmitXmmRegisterOperand(dst.LowBits(), src); } +void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xE0); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + +void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x66); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xE3); + EmitXmmRegisterOperand(dst.LowBits(), src); +} + void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 08e17e81e5..fc2b117f71 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -523,6 +523,9 @@ class X86_64Assembler FINAL : public Assembler { void orps(XmmRegister dst, XmmRegister src); void por(XmmRegister dst, XmmRegister src); + void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now) + void pavgw(XmmRegister dst, XmmRegister src); + void pcmpeqb(XmmRegister dst, XmmRegister src); void pcmpeqw(XmmRegister dst, XmmRegister src); void pcmpeqd(XmmRegister dst, XmmRegister src); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 20062fdb07..4adf210e47 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1293,6 +1293,14 @@ TEST_F(AssemblerX86_64Test, Por) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::por, "por %{reg2}, %{reg1}"), "por"); } +TEST_F(AssemblerX86_64Test, Pavgb) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb"); +} + +TEST_F(AssemblerX86_64Test, Pavgw) { + DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw"); +} + TEST_F(AssemblerX86_64Test, PCmpeqb) { DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb"); } diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc index 4bfc84990d..fa7e98586c 100644 --- a/compiler/verifier_deps_test.cc +++ b/compiler/verifier_deps_test.cc @@ -18,21 +18,21 @@ #include "verifier/verifier_deps.h" #include "class_linker.h" -#include "compiler/common_compiler_test.h" -#include "compiler/dex/verification_results.h" -#include "compiler/dex/verified_method.h" -#include "compiler/driver/compiler_options.h" -#include "compiler/driver/compiler_driver.h" -#include "compiler/utils/atomic_method_ref_map-inl.h" +#include "common_compiler_test.h" #include "compiler_callbacks.h" +#include "dex/verification_results.h" +#include "dex/verified_method.h" #include "dex_file.h" #include "dex_file_types.h" +#include "driver/compiler_options.h" +#include "driver/compiler_driver.h" #include "handle_scope-inl.h" #include "verifier/method_verifier-inl.h" #include "mirror/class_loader.h" #include "runtime.h" #include "thread.h" #include "scoped_thread_state_change-inl.h" +#include "utils/atomic_method_ref_map-inl.h" namespace art { namespace verifier { |