summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/Android.bp8
-rw-r--r--compiler/dex/dex_to_dex_compiler.cc12
-rw-r--r--compiler/dex/dex_to_dex_compiler.h3
-rw-r--r--compiler/driver/compiler_driver.cc9
-rw-r--r--compiler/image_writer.cc154
-rw-r--r--compiler/image_writer.h22
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc11
-rw-r--r--compiler/optimizing/cha_guard_optimization.cc7
-rw-r--r--compiler/optimizing/code_generator_arm.cc2
-rw-r--r--compiler/optimizing/code_generator_arm64.cc4
-rw-r--r--compiler/optimizing/code_generator_arm64.h5
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc2
-rw-r--r--compiler/optimizing/code_generator_mips.cc1493
-rw-r--r--compiler/optimizing/code_generator_mips.h130
-rw-r--r--compiler/optimizing/code_generator_mips64.cc1477
-rw-r--r--compiler/optimizing/code_generator_mips64.h129
-rw-r--r--compiler/optimizing/code_generator_vector_arm.cc235
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc641
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc235
-rw-r--r--compiler/optimizing/code_generator_vector_mips.cc235
-rw-r--r--compiler/optimizing/code_generator_vector_mips64.cc235
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc767
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc760
-rw-r--r--compiler/optimizing/code_generator_x86.cc2
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc4
-rw-r--r--compiler/optimizing/codegen_test_utils.h1
-rw-r--r--compiler/optimizing/common_arm.h5
-rw-r--r--compiler/optimizing/graph_visualizer.cc4
-rw-r--r--compiler/optimizing/induction_var_analysis_test.cc16
-rw-r--r--compiler/optimizing/inliner.cc83
-rw-r--r--compiler/optimizing/instruction_simplifier.cc3
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc54
-rw-r--r--compiler/optimizing/intrinsics_mips.cc242
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc173
-rw-r--r--compiler/optimizing/licm_test.cc13
-rw-r--r--compiler/optimizing/loop_optimization.cc776
-rw-r--r--compiler/optimizing/loop_optimization.h115
-rw-r--r--compiler/optimizing/nodes.cc85
-rw-r--r--compiler/optimizing/nodes.h115
-rw-r--r--compiler/optimizing/nodes_vector.h585
-rw-r--r--compiler/optimizing/optimizing_compiler.cc2
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc8
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h1
-rw-r--r--compiler/optimizing/reference_type_propagation.cc4
-rw-r--r--compiler/optimizing/reference_type_propagation_test.cc2
-rw-r--r--compiler/optimizing/scheduler.h6
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc7
-rw-r--r--compiler/optimizing/ssa_liveness_analysis_test.cc11
-rw-r--r--compiler/utils/assembler_test.h15
-rw-r--r--compiler/utils/atomic_method_ref_map-inl.h2
-rw-r--r--compiler/utils/mips64/assembler_mips64.cc41
-rw-r--r--compiler/utils/mips64/assembler_mips64.h5
-rw-r--r--compiler/utils/mips64/assembler_mips64_test.cc16
-rw-r--r--compiler/utils/mips64/constants_mips64.h1
-rw-r--r--compiler/utils/mips64/managed_register_mips64.cc7
-rw-r--r--compiler/utils/mips64/managed_register_mips64.h52
-rw-r--r--compiler/utils/mips64/managed_register_mips64_test.cc480
-rw-r--r--compiler/utils/x86/assembler_x86.cc18
-rw-r--r--compiler/utils/x86/assembler_x86.h3
-rw-r--r--compiler/utils/x86/assembler_x86_test.cc8
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.cc18
-rw-r--r--compiler/utils/x86_64/assembler_x86_64.h3
-rw-r--r--compiler/utils/x86_64/assembler_x86_64_test.cc8
-rw-r--r--compiler/verifier_deps_test.cc12
64 files changed, 8938 insertions, 644 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp
index d57f301ff9..312fc7b35a 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -106,7 +106,9 @@ art_cc_defaults {
"linker/arm/relative_patcher_arm_base.cc",
"linker/arm/relative_patcher_thumb2.cc",
"optimizing/code_generator_arm.cc",
+ "optimizing/code_generator_vector_arm.cc",
"optimizing/code_generator_arm_vixl.cc",
+ "optimizing/code_generator_vector_arm_vixl.cc",
"optimizing/dex_cache_array_fixups_arm.cc",
"optimizing/instruction_simplifier_arm.cc",
"optimizing/instruction_simplifier_shared.cc",
@@ -126,6 +128,7 @@ art_cc_defaults {
"jni/quick/arm64/calling_convention_arm64.cc",
"linker/arm64/relative_patcher_arm64.cc",
"optimizing/code_generator_arm64.cc",
+ "optimizing/code_generator_vector_arm64.cc",
"optimizing/scheduler_arm64.cc",
"optimizing/instruction_simplifier_arm64.cc",
"optimizing/intrinsics_arm64.cc",
@@ -139,6 +142,7 @@ art_cc_defaults {
"jni/quick/mips/calling_convention_mips.cc",
"linker/mips/relative_patcher_mips.cc",
"optimizing/code_generator_mips.cc",
+ "optimizing/code_generator_vector_mips.cc",
"optimizing/dex_cache_array_fixups_mips.cc",
"optimizing/intrinsics_mips.cc",
"optimizing/pc_relative_fixups_mips.cc",
@@ -151,6 +155,7 @@ art_cc_defaults {
"jni/quick/mips64/calling_convention_mips64.cc",
"linker/mips64/relative_patcher_mips64.cc",
"optimizing/code_generator_mips64.cc",
+ "optimizing/code_generator_vector_mips64.cc",
"optimizing/intrinsics_mips64.cc",
"utils/mips64/assembler_mips64.cc",
"utils/mips64/managed_register_mips64.cc",
@@ -162,6 +167,7 @@ art_cc_defaults {
"linker/x86/relative_patcher_x86.cc",
"linker/x86/relative_patcher_x86_base.cc",
"optimizing/code_generator_x86.cc",
+ "optimizing/code_generator_vector_x86.cc",
"optimizing/intrinsics_x86.cc",
"optimizing/pc_relative_fixups_x86.cc",
"optimizing/x86_memory_gen.cc",
@@ -176,6 +182,7 @@ art_cc_defaults {
"linker/x86_64/relative_patcher_x86_64.cc",
"optimizing/intrinsics_x86_64.cc",
"optimizing/code_generator_x86_64.cc",
+ "optimizing/code_generator_vector_x86_64.cc",
"utils/x86_64/assembler_x86_64.cc",
"utils/x86_64/jni_macro_assembler_x86_64.cc",
"utils/x86_64/managed_register_x86_64.cc",
@@ -391,6 +398,7 @@ art_cc_test {
mips64: {
srcs: [
"linker/mips64/relative_patcher_mips64_test.cc",
+ "utils/mips64/managed_register_mips64_test.cc",
],
},
x86: {
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 808e28c9ea..538fe93793 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -70,10 +70,6 @@ class DexCompiler {
return *unit_.GetDexFile();
}
- bool PerformOptimizations() const {
- return dex_to_dex_compilation_level_ >= DexToDexCompilationLevel::kOptimize;
- }
-
// Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where
// a barrier is required.
void CompileReturnVoid(Instruction* inst, uint32_t dex_pc);
@@ -114,7 +110,7 @@ class DexCompiler {
};
void DexCompiler::Compile() {
- DCHECK_GE(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kRequired);
+ DCHECK_EQ(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kOptimize);
const DexFile::CodeItem* code_item = unit_.GetCodeItem();
const uint16_t* insns = code_item->insns_;
const uint32_t insns_size = code_item->insns_size_in_code_units_;
@@ -221,7 +217,7 @@ void DexCompiler::CompileReturnVoid(Instruction* inst, uint32_t dex_pc) {
}
Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) {
- if (!kEnableCheckCastEllision || !PerformOptimizations()) {
+ if (!kEnableCheckCastEllision) {
return inst;
}
if (!driver_.IsSafeCast(&unit_, dex_pc)) {
@@ -254,7 +250,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst,
uint32_t dex_pc,
Instruction::Code new_opcode,
bool is_put) {
- if (!kEnableQuickening || !PerformOptimizations()) {
+ if (!kEnableQuickening) {
return;
}
uint32_t field_idx = inst->VRegC_22c();
@@ -279,7 +275,7 @@ void DexCompiler::CompileInstanceFieldAccess(Instruction* inst,
void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
Instruction::Code new_opcode, bool is_range) {
- if (!kEnableQuickening || !PerformOptimizations()) {
+ if (!kEnableQuickening) {
return;
}
uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h
index 00c596d60e..87ddb395ad 100644
--- a/compiler/dex/dex_to_dex_compiler.h
+++ b/compiler/dex/dex_to_dex_compiler.h
@@ -34,8 +34,7 @@ namespace optimizer {
enum class DexToDexCompilationLevel {
kDontDexToDexCompile, // Only meaning wrt image time interpretation.
- kRequired, // Dex-to-dex compilation required for correctness.
- kOptimize // Perform required transformation and peep-hole optimizations.
+ kOptimize // Perform peep-hole optimizations.
};
std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 995098799c..e823f67d3c 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -532,16 +532,13 @@ static optimizer::DexToDexCompilationLevel GetDexToDexCompilationLevel(
if (driver.GetCompilerOptions().GetDebuggable()) {
// We are debuggable so definitions of classes might be changed. We don't want to do any
// optimizations that could break that.
- max_level = optimizer::DexToDexCompilationLevel::kRequired;
+ max_level = optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
}
if (klass->IsVerified()) {
// Class is verified so we can enable DEX-to-DEX compilation for performance.
return max_level;
- } else if (klass->ShouldVerifyAtRuntime()) {
- // Class verification has soft-failed. Anyway, ensure at least correctness.
- return optimizer::DexToDexCompilationLevel::kRequired;
} else {
- // Class verification has failed: do not run DEX-to-DEX compilation.
+ // Class verification has failed: do not run DEX-to-DEX optimizations.
return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
}
}
@@ -611,7 +608,7 @@ static void CompileMethod(Thread* self,
dex_file,
(verified_method != nullptr)
? dex_to_dex_compilation_level
- : optimizer::DexToDexCompilationLevel::kRequired);
+ : optimizer::DexToDexCompilationLevel::kDontDexToDexCompile);
}
} else if ((access_flags & kAccNative) != 0) {
// Are we extracting only and have support for generic JNI down calls?
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index d156644484..d129249d63 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -1338,21 +1338,20 @@ mirror::Object* ImageWriter::TryAssignBinSlot(WorkStack& work_stack,
// live.
if (as_klass->ShouldHaveImt()) {
ImTable* imt = as_klass->GetImt(target_ptr_size_);
- for (size_t i = 0; i < ImTable::kSize; ++i) {
- ArtMethod* imt_method = imt->Get(i, target_ptr_size_);
- DCHECK(imt_method != nullptr);
- if (imt_method->IsRuntimeMethod() &&
- !IsInBootImage(imt_method) &&
- !NativeRelocationAssigned(imt_method)) {
- AssignMethodOffset(imt_method, kNativeObjectRelocationTypeRuntimeMethod, oat_index);
+ if (TryAssignImTableOffset(imt, oat_index)) {
+ // Since imt's can be shared only do this the first time to not double count imt method
+ // fixups.
+ for (size_t i = 0; i < ImTable::kSize; ++i) {
+ ArtMethod* imt_method = imt->Get(i, target_ptr_size_);
+ DCHECK(imt_method != nullptr);
+ if (imt_method->IsRuntimeMethod() &&
+ !IsInBootImage(imt_method) &&
+ !NativeRelocationAssigned(imt_method)) {
+ AssignMethodOffset(imt_method, kNativeObjectRelocationTypeRuntimeMethod, oat_index);
+ }
}
}
}
-
- if (as_klass->ShouldHaveImt()) {
- ImTable* imt = as_klass->GetImt(target_ptr_size_);
- TryAssignImTableOffset(imt, oat_index);
- }
} else if (obj->IsClassLoader()) {
// Register the class loader if it has a class table.
// The fake boot class loader should not get registered and we should end up with only one
@@ -1386,10 +1385,10 @@ bool ImageWriter::NativeRelocationAssigned(void* ptr) const {
return native_object_relocations_.find(ptr) != native_object_relocations_.end();
}
-void ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) {
+bool ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) {
// No offset, or already assigned.
if (imt == nullptr || IsInBootImage(imt) || NativeRelocationAssigned(imt)) {
- return;
+ return false;
}
// If the method is a conflict method we also want to assign the conflict table offset.
ImageInfo& image_info = GetImageInfo(oat_index);
@@ -1401,6 +1400,7 @@ void ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) {
image_info.bin_slot_sizes_[kBinImTable],
kNativeObjectRelocationTypeIMTable});
image_info.bin_slot_sizes_[kBinImTable] += size;
+ return true;
}
void ImageWriter::TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index) {
@@ -1499,8 +1499,7 @@ class ImageWriter::VisitReferencesVisitor {
ALWAYS_INLINE void operator() (ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED,
ObjPtr<mirror::Reference> ref) const
REQUIRES_SHARED(Locks::mutator_lock_) {
- ref->SetReferent</*kTransactionActive*/false>(
- VisitReference(ref->GetReferent<kWithoutReadBarrier>()));
+ operator()(ref, mirror::Reference::ReferentOffset(), /* is_static */ false);
}
private:
@@ -1658,7 +1657,7 @@ void ImageWriter::CalculateNewObjectOffsets() {
// Calculate size of the dex cache arrays slot and prepare offsets.
PrepareDexCacheArraySlots();
- // Calculate the sizes of the intern tables and class tables.
+ // Calculate the sizes of the intern tables, class tables, and fixup tables.
for (ImageInfo& image_info : image_infos_) {
// Calculate how big the intern table will be after being serialized.
InternTable* const intern_table = image_info.intern_table_.get();
@@ -1666,6 +1665,7 @@ void ImageWriter::CalculateNewObjectOffsets() {
if (intern_table->StrongSize() != 0u) {
image_info.intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
}
+
// Calculate the size of the class table.
ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
DCHECK_EQ(image_info.class_table_->NumReferencedZygoteClasses(), 0u);
@@ -1718,8 +1718,6 @@ void ImageWriter::CalculateNewObjectOffsets() {
// Transform each object's bin slot into an offset which will be used to do the final copy.
heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
- // DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_);
-
size_t i = 0;
for (ImageInfo& image_info : image_infos_) {
image_info.image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots[i].Get()));
@@ -1733,8 +1731,6 @@ void ImageWriter::CalculateNewObjectOffsets() {
ImageInfo& image_info = GetImageInfo(relocation.oat_index);
relocation.offset += image_info.bin_slot_offsets_[bin_type];
}
-
- // Note that image_info.image_end_ is left at end of used mirror object section.
}
size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) const {
@@ -1776,7 +1772,6 @@ size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) c
ImageSection* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays];
*dex_cache_arrays_section = ImageSection(bin_slot_offsets_[kBinDexCacheArray],
bin_slot_sizes_[kBinDexCacheArray]);
-
// Round up to the alignment the string table expects. See HashSet::WriteToMemory.
size_t cur_pos = RoundUp(dex_cache_arrays_section->End(), sizeof(uint64_t));
// Calculate the size of the interned strings.
@@ -1868,18 +1863,18 @@ class ImageWriter::FixupRootVisitor : public RootVisitor {
explicit FixupRootVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {
}
- void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
+ void VisitRoots(mirror::Object*** roots ATTRIBUTE_UNUSED,
+ size_t count ATTRIBUTE_UNUSED,
+ const RootInfo& info ATTRIBUTE_UNUSED)
OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
- for (size_t i = 0; i < count; ++i) {
- *roots[i] = image_writer_->GetImageAddress(*roots[i]);
- }
+ LOG(FATAL) << "Unsupported";
}
void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
const RootInfo& info ATTRIBUTE_UNUSED)
OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
for (size_t i = 0; i < count; ++i) {
- roots[i]->Assign(image_writer_->GetImageAddress(roots[i]->AsMirrorPtr()));
+ image_writer_->CopyReference(roots[i], roots[i]->AsMirrorPtr());
}
}
@@ -1890,7 +1885,9 @@ class ImageWriter::FixupRootVisitor : public RootVisitor {
void ImageWriter::CopyAndFixupImTable(ImTable* orig, ImTable* copy) {
for (size_t i = 0; i < ImTable::kSize; ++i) {
ArtMethod* method = orig->Get(i, target_ptr_size_);
- copy->Set(i, NativeLocationInImage(method), target_ptr_size_);
+ void** address = reinterpret_cast<void**>(copy->AddressOfElement(i, target_ptr_size_));
+ CopyAndFixupPointer(address, method);
+ DCHECK_EQ(copy->Get(i, target_ptr_size_), NativeLocationInImage(method));
}
}
@@ -1899,10 +1896,13 @@ void ImageWriter::CopyAndFixupImtConflictTable(ImtConflictTable* orig, ImtConfli
for (size_t i = 0; i < count; ++i) {
ArtMethod* interface_method = orig->GetInterfaceMethod(i, target_ptr_size_);
ArtMethod* implementation_method = orig->GetImplementationMethod(i, target_ptr_size_);
- copy->SetInterfaceMethod(i, target_ptr_size_, NativeLocationInImage(interface_method));
- copy->SetImplementationMethod(i,
- target_ptr_size_,
- NativeLocationInImage(implementation_method));
+ CopyAndFixupPointer(copy->AddressOfInterfaceMethod(i, target_ptr_size_), interface_method);
+ CopyAndFixupPointer(copy->AddressOfImplementationMethod(i, target_ptr_size_),
+ implementation_method);
+ DCHECK_EQ(copy->GetInterfaceMethod(i, target_ptr_size_),
+ NativeLocationInImage(interface_method));
+ DCHECK_EQ(copy->GetImplementationMethod(i, target_ptr_size_),
+ NativeLocationInImage(implementation_method));
}
}
@@ -1921,8 +1921,9 @@ void ImageWriter::CopyAndFixupNativeData(size_t oat_index) {
switch (relocation.type) {
case kNativeObjectRelocationTypeArtField: {
memcpy(dest, pair.first, sizeof(ArtField));
- reinterpret_cast<ArtField*>(dest)->SetDeclaringClass(
- GetImageAddress(reinterpret_cast<ArtField*>(pair.first)->GetDeclaringClass().Ptr()));
+ CopyReference(
+ reinterpret_cast<ArtField*>(dest)->GetDeclaringClassAddressWithoutBarrier(),
+ reinterpret_cast<ArtField*>(pair.first)->GetDeclaringClass().Ptr());
break;
}
case kNativeObjectRelocationTypeRuntimeMethod:
@@ -2039,8 +2040,10 @@ void ImageWriter::CopyAndFixupObjectsCallback(Object* obj, void* arg) {
reinterpret_cast<ImageWriter*>(arg)->CopyAndFixupObject(obj);
}
-void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* arr,
- mirror::Class* klass, Bin array_type) {
+void ImageWriter::FixupPointerArray(mirror::Object* dst,
+ mirror::PointerArray* arr,
+ mirror::Class* klass,
+ Bin array_type) {
CHECK(klass->IsArrayClass());
CHECK(arr->IsIntArray() || arr->IsLongArray()) << klass->PrettyClass() << " " << arr;
// Fixup int and long pointers for the ArtMethod or ArtField arrays.
@@ -2049,7 +2052,7 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a
auto* dest_array = down_cast<mirror::PointerArray*>(dst);
for (size_t i = 0, count = num_elements; i < count; ++i) {
void* elem = arr->GetElementPtrSize<void*>(i, target_ptr_size_);
- if (elem != nullptr && !IsInBootImage(elem)) {
+ if (kIsDebugBuild && elem != nullptr && !IsInBootImage(elem)) {
auto it = native_object_relocations_.find(elem);
if (UNLIKELY(it == native_object_relocations_.end())) {
if (it->second.IsArtMethodRelocation()) {
@@ -2065,12 +2068,9 @@ void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* a
<< Class::PrettyClass(field->GetDeclaringClass());
}
UNREACHABLE();
- } else {
- ImageInfo& image_info = GetImageInfo(it->second.oat_index);
- elem = image_info.image_begin_ + it->second.offset;
}
}
- dest_array->SetElementPtrSize<false, true>(i, elem, target_ptr_size_);
+ CopyAndFixupPointer(dest_array->ElementAddress(i, target_ptr_size_), elem);
}
}
@@ -2118,22 +2118,19 @@ class ImageWriter::FixupVisitor {
void operator()(ObjPtr<Object> obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
- REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+ REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
ObjPtr<Object> ref = obj->GetFieldObject<Object, kVerifyNone>(offset);
- // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
- // image.
- copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
- offset,
- image_writer_->GetImageAddress(ref.Ptr()));
+ // Copy the reference and record the fixup if necessary.
+ image_writer_->CopyReference(
+ copy_->GetFieldObjectReferenceAddr<kVerifyNone>(offset),
+ ref.Ptr());
}
// java.lang.ref.Reference visitor.
void operator()(ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED,
ObjPtr<mirror::Reference> ref) const
REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
- copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
- mirror::Reference::ReferentOffset(),
- image_writer_->GetImageAddress(ref->GetReferent()));
+ operator()(ref, mirror::Reference::ReferentOffset(), /* is_static */ false);
}
protected:
@@ -2211,7 +2208,10 @@ class ImageWriter::NativeLocationVisitor {
explicit NativeLocationVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
template <typename T>
- T* operator()(T* ptr) const REQUIRES_SHARED(Locks::mutator_lock_) {
+ T* operator()(T* ptr, void** dest_addr = nullptr) const REQUIRES_SHARED(Locks::mutator_lock_) {
+ if (dest_addr != nullptr) {
+ image_writer_->CopyAndFixupPointer(dest_addr, ptr);
+ }
return image_writer_->NativeLocationInImage(ptr);
}
@@ -2274,10 +2274,10 @@ void ImageWriter::FixupObject(Object* orig, Object* copy) {
}
}
-
-class ImageAddressVisitor {
+class ImageWriter::ImageAddressVisitorForDexCacheArray {
public:
- explicit ImageAddressVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
+ explicit ImageAddressVisitorForDexCacheArray(ImageWriter* image_writer)
+ : image_writer_(image_writer) {}
template <typename T>
T* operator()(T* ptr) const REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -2288,9 +2288,9 @@ class ImageAddressVisitor {
ImageWriter* const image_writer_;
};
-
void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache,
mirror::DexCache* copy_dex_cache) {
+ ImageAddressVisitorForDexCacheArray fixup_visitor(this);
// Though the DexCache array fields are usually treated as native pointers, we set the full
// 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is
// done by casting to the unsigned type uintptr_t before casting to int64_t, i.e.
@@ -2300,8 +2300,7 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache,
copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::StringsOffset(),
NativeLocationInImage(orig_strings),
PointerSize::k64);
- orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache),
- ImageAddressVisitor(this));
+ orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache), fixup_visitor);
}
mirror::TypeDexCacheType* orig_types = orig_dex_cache->GetResolvedTypes();
if (orig_types != nullptr) {
@@ -2309,7 +2308,7 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache,
NativeLocationInImage(orig_types),
PointerSize::k64);
orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types, orig_dex_cache),
- ImageAddressVisitor(this));
+ fixup_visitor);
}
ArtMethod** orig_methods = orig_dex_cache->GetResolvedMethods();
if (orig_methods != nullptr) {
@@ -2333,7 +2332,8 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache,
for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) {
mirror::FieldDexCachePair orig =
mirror::DexCache::GetNativePairPtrSize(orig_fields, i, target_ptr_size_);
- mirror::FieldDexCachePair copy(NativeLocationInImage(orig.object), orig.index);
+ mirror::FieldDexCachePair copy = orig;
+ copy.object = NativeLocationInImage(orig.object);
mirror::DexCache::SetNativePairPtrSize(copy_fields, i, copy, target_ptr_size_);
}
}
@@ -2343,7 +2343,7 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache,
NativeLocationInImage(orig_method_types),
PointerSize::k64);
orig_dex_cache->FixupResolvedMethodTypes(NativeCopyLocation(orig_method_types, orig_dex_cache),
- ImageAddressVisitor(this));
+ fixup_visitor);
}
GcRoot<mirror::CallSite>* orig_call_sites = orig_dex_cache->GetResolvedCallSites();
if (orig_call_sites != nullptr) {
@@ -2351,7 +2351,7 @@ void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache,
NativeLocationInImage(orig_call_sites),
PointerSize::k64);
orig_dex_cache->FixupResolvedCallSites(NativeCopyLocation(orig_call_sites, orig_dex_cache),
- ImageAddressVisitor(this));
+ fixup_visitor);
}
// Remove the DexFile pointers. They will be fixed up when the runtime loads the oat file. Leaving
@@ -2459,7 +2459,8 @@ void ImageWriter::CopyAndFixupMethod(ArtMethod* orig,
memcpy(copy, orig, ArtMethod::Size(target_ptr_size_));
- copy->SetDeclaringClass(GetImageAddress(orig->GetDeclaringClassUnchecked()));
+ CopyReference(copy->GetDeclaringClassAddressWithoutBarrier(), orig->GetDeclaringClassUnchecked());
+
ArtMethod** orig_resolved_methods = orig->GetDexCacheResolvedMethods(target_ptr_size_);
copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods), target_ptr_size_);
@@ -2571,7 +2572,7 @@ size_t ImageWriter::GetOatIndex(mirror::Object* obj) const {
return GetDefaultOatIndex();
}
auto it = oat_index_map_.find(obj);
- DCHECK(it != oat_index_map_.end());
+ DCHECK(it != oat_index_map_.end()) << obj;
return it->second;
}
@@ -2672,4 +2673,31 @@ ImageWriter::ImageInfo::ImageInfo()
: intern_table_(new InternTable),
class_table_(new ClassTable) {}
+void ImageWriter::CopyReference(mirror::HeapReference<mirror::Object>* dest,
+ ObjPtr<mirror::Object> src) {
+ dest->Assign(GetImageAddress(src.Ptr()));
+}
+
+void ImageWriter::CopyReference(mirror::CompressedReference<mirror::Object>* dest,
+ ObjPtr<mirror::Object> src) {
+ dest->Assign(GetImageAddress(src.Ptr()));
+}
+
+void ImageWriter::CopyAndFixupPointer(void** target, void* value) {
+ void* new_value = value;
+ if (value != nullptr && !IsInBootImage(value)) {
+ auto it = native_object_relocations_.find(value);
+ CHECK(it != native_object_relocations_.end()) << value;
+ const NativeObjectRelocation& relocation = it->second;
+ ImageInfo& image_info = GetImageInfo(relocation.oat_index);
+ new_value = reinterpret_cast<void*>(image_info.image_begin_ + relocation.offset);
+ }
+ if (target_ptr_size_ == PointerSize::k32) {
+ *reinterpret_cast<uint32_t*>(target) = PointerToLowMemUInt32(new_value);
+ } else {
+ *reinterpret_cast<uint64_t*>(target) = reinterpret_cast<uintptr_t>(new_value);
+ }
+}
+
+
} // namespace art
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 16aff61dab..39113c8143 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -38,8 +38,9 @@
#include "image.h"
#include "lock_word.h"
#include "mem_map.h"
-#include "oat_file.h"
#include "mirror/dex_cache.h"
+#include "obj_ptr.h"
+#include "oat_file.h"
#include "os.h"
#include "safe_map.h"
#include "utils.h"
@@ -317,6 +318,12 @@ class ImageWriter FINAL {
// Number of image class table bytes.
size_t class_table_bytes_ = 0;
+ // Number of object fixup bytes.
+ size_t object_fixup_bytes_ = 0;
+
+ // Number of pointer fixup bytes.
+ size_t pointer_fixup_bytes_ = 0;
+
// Intern table associated with this image for serialization.
std::unique_ptr<InternTable> intern_table_;
@@ -464,7 +471,8 @@ class ImageWriter FINAL {
size_t oat_index)
REQUIRES_SHARED(Locks::mutator_lock_);
- void TryAssignImTableOffset(ImTable* imt, size_t oat_index) REQUIRES_SHARED(Locks::mutator_lock_);
+ // Return true if imt was newly inserted.
+ bool TryAssignImTableOffset(ImTable* imt, size_t oat_index) REQUIRES_SHARED(Locks::mutator_lock_);
// Assign the offset for an IMT conflict table. Does nothing if the table already has a native
// relocation.
@@ -534,6 +542,14 @@ class ImageWriter FINAL {
// Return true if there already exists a native allocation for an object.
bool NativeRelocationAssigned(void* ptr) const;
+ void CopyReference(mirror::HeapReference<mirror::Object>* dest, ObjPtr<mirror::Object> src)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ void CopyReference(mirror::CompressedReference<mirror::Object>* dest, ObjPtr<mirror::Object> src)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ void CopyAndFixupPointer(void** target, void* value);
+
const CompilerDriver& compiler_driver_;
// Beginning target image address for the first image.
@@ -608,9 +624,11 @@ class ImageWriter FINAL {
class FixupRootVisitor;
class FixupVisitor;
class GetRootsVisitor;
+ class ImageAddressVisitorForDexCacheArray;
class NativeLocationVisitor;
class PruneClassesVisitor;
class PruneClassLoaderClassesVisitor;
+ class RegisterBootClassPathClassesVisitor;
class VisitReferencesVisitor;
DISALLOW_COPY_AND_ASSIGN(ImageWriter);
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 2ee4db923a..476906a768 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -528,7 +528,8 @@ class BCEVisitor : public HGraphVisitor {
has_dom_based_dynamic_bce_(false),
initial_block_size_(graph->GetBlocks().size()),
side_effects_(side_effects),
- induction_range_(induction_analysis) {}
+ induction_range_(induction_analysis),
+ next_(nullptr) {}
void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
DCHECK(!IsAddedBlock(block));
@@ -1618,8 +1619,8 @@ class BCEVisitor : public HGraphVisitor {
void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
HInstruction* suspend = loop->GetSuspendCheck();
block->InsertInstructionBefore(condition, block->GetLastInstruction());
- HDeoptimize* deoptimize =
- new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+ HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+ GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc());
block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
if (suspend->HasEnvironment()) {
deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
@@ -1631,8 +1632,8 @@ class BCEVisitor : public HGraphVisitor {
void InsertDeoptInBlock(HBoundsCheck* bounds_check, HInstruction* condition) {
HBasicBlock* block = bounds_check->GetBlock();
block->InsertInstructionBefore(condition, bounds_check);
- HDeoptimize* deoptimize =
- new (GetGraph()->GetArena()) HDeoptimize(condition, bounds_check->GetDexPc());
+ HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+ GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc());
block->InsertInstructionBefore(deoptimize, bounds_check);
deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
}
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
index fe423012ca..048073e37a 100644
--- a/compiler/optimizing/cha_guard_optimization.cc
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -36,7 +36,8 @@ class CHAGuardVisitor : HGraphVisitor {
: HGraphVisitor(graph),
block_has_cha_guard_(GetGraph()->GetBlocks().size(),
0,
- graph->GetArena()->Adapter(kArenaAllocCHA)) {
+ graph->GetArena()->Adapter(kArenaAllocCHA)),
+ instruction_iterator_(nullptr) {
number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards();
DCHECK_NE(number_of_guards_to_visit_, 0u);
// Will recount number of guards during guard optimization.
@@ -201,8 +202,8 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag,
HInstruction* suspend = loop_info->GetSuspendCheck();
// Need a new deoptimize instruction that copies the environment
// of the suspend instruction for the loop.
- HDeoptimize* deoptimize =
- new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc());
+ HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+ GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc());
pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
suspend->GetEnvironment(), loop_info->GetHeader());
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index d735b27090..d7cc577580 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1134,7 +1134,7 @@ class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCodeARM {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
// The read barrier instrumentation of object ArrayGet
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 28cc942dfb..d463830ff6 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1150,7 +1150,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
// The read barrier instrumentation of object ArrayGet
@@ -3281,7 +3281,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati
void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
Primitive::Type type = instruction->GetResultType();
- DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
LocationSummary* locations = instruction->GetLocations();
Register out = OutputRegister(instruction);
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 7471cd5f12..10d8b841f8 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -318,6 +318,11 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
+ vixl::aarch64::MemOperand CreateVecMemRegisters(
+ HVecMemoryOperation* instruction,
+ Location* reg_loc,
+ bool is_load);
+
Arm64Assembler* const assembler_;
CodeGeneratorARM64* const codegen_;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index a1c3da9e9c..cce412b314 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -1175,7 +1175,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
// The read barrier instrumentation of object ArrayGet
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 5f02a52417..287891feae 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -461,6 +461,536 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
};
+class ArraySetSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+ explicit ArraySetSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+ parallel_move.AddMove(
+ locations->InAt(0),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
+ nullptr);
+ parallel_move.AddMove(
+ locations->InAt(1),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimInt,
+ nullptr);
+ parallel_move.AddMove(
+ locations->InAt(2),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ Primitive::kPrimNot,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+ mips_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+ RestoreLiveRegisters(codegen, locations);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS);
+};
+
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathMIPS below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is for the GcRoot read barrier.
+class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+ ReadBarrierMarkSlowPathMIPS(HInstruction* instruction,
+ Location ref,
+ Location entrypoint = Location::NoLocation())
+ : SlowPathCodeMIPS(instruction), ref_(ref), entrypoint_(entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+ DCHECK((V0 <= ref_reg && ref_reg <= T7) ||
+ (S2 <= ref_reg && ref_reg <= S7) ||
+ (ref_reg == FP)) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in A0 and V0 respectively):
+ //
+ // A0 <- ref
+ // V0 <- ReadBarrierMark(A0)
+ // ref <- V0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ mips_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ DCHECK_EQ(entrypoint_.AsRegister<Register>(), T9);
+ __ Jalr(entrypoint_.AsRegister<Register>());
+ __ NopIfNoReordering();
+ } else {
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
+ // This runtime call does not require a stack map.
+ mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+ instruction_,
+ this,
+ /* direct */ false);
+ }
+ __ B(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the entrypoint if already loaded.
+ const Location entrypoint_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS);
+};
+
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathMIPS above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathMIPS(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ Location field_offset,
+ Register temp1)
+ : SlowPathCodeMIPS(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_offset_(field_offset),
+ temp1_(temp1) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register ref_reg = ref_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+ DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+
+ __ Bind(GetEntryLabel());
+
+ // Save the old reference.
+ // Note that we cannot use AT or TMP to save the old reference, as those
+ // are used by the code that follows, but we need the old reference after
+ // the call to the ReadBarrierMarkRegX entry point.
+ DCHECK_NE(temp1_, AT);
+ DCHECK_NE(temp1_, TMP);
+ __ Move(temp1_, ref_reg);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+ DCHECK((V0 <= ref_reg && ref_reg <= T7) ||
+ (S2 <= ref_reg && ref_reg <= S7) ||
+ (ref_reg == FP)) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in A0 and V0 respectively):
+ //
+ // A0 <- ref
+ // V0 <- ReadBarrierMark(A0)
+ // ref <- V0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
+ // This runtime call does not require a stack map.
+ mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+ instruction_,
+ this,
+ /* direct */ false);
+
+ // If the new reference is different from the old reference,
+ // update the field in the holder (`*(obj_ + field_offset_)`).
+ //
+ // Note that this field could also hold a different object, if
+ // another thread had concurrently changed it. In that case, the
+ // the compare-and-set (CAS) loop below would abort, leaving the
+ // field as-is.
+ MipsLabel done;
+ __ Beq(temp1_, ref_reg, &done);
+
+ // Update the the holder's field atomically. This may fail if
+ // mutator updates before us, but it's OK. This is achieved
+ // using a strong compare-and-set (CAS) operation with relaxed
+ // memory synchronization ordering, where the expected value is
+ // the old reference and the desired value is the new reference.
+
+ // Convenience aliases.
+ Register base = obj_;
+ // The UnsafeCASObject intrinsic uses a register pair as field
+ // offset ("long offset"), of which only the low part contains
+ // data.
+ Register offset = field_offset_.AsRegisterPairLow<Register>();
+ Register expected = temp1_;
+ Register value = ref_reg;
+ Register tmp_ptr = TMP; // Pointer to actual memory.
+ Register tmp = AT; // Value in memory.
+
+ __ Addu(tmp_ptr, base, offset);
+
+ if (kPoisonHeapReferences) {
+ __ PoisonHeapReference(expected);
+ // Do not poison `value` if it is the same register as
+ // `expected`, which has just been poisoned.
+ if (value != expected) {
+ __ PoisonHeapReference(value);
+ }
+ }
+
+ // do {
+ // tmp = [r_ptr] - expected;
+ // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+
+ bool is_r6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+ MipsLabel loop_head, exit_loop;
+ __ Bind(&loop_head);
+ if (is_r6) {
+ __ LlR6(tmp, tmp_ptr);
+ } else {
+ __ LlR2(tmp, tmp_ptr);
+ }
+ __ Bne(tmp, expected, &exit_loop);
+ __ Move(tmp, value);
+ if (is_r6) {
+ __ ScR6(tmp, tmp_ptr);
+ } else {
+ __ ScR2(tmp, tmp_ptr);
+ }
+ __ Beqz(tmp, &loop_head);
+ __ Bind(&exit_loop);
+
+ if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(expected);
+ // Do not unpoison `value` if it is the same register as
+ // `expected`, which has just been unpoisoned.
+ if (value != expected) {
+ __ UnpoisonHeapReference(value);
+ }
+ }
+
+ __ Bind(&done);
+ __ B(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ const Register obj_;
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset_;
+
+ const Register temp1_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS);
+};
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+ ReadBarrierForHeapReferenceSlowPathMIPS(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index)
+ : SlowPathCodeMIPS(instruction),
+ out_(out),
+ ref_(ref),
+ obj_(obj),
+ offset_(offset),
+ index_(index) {
+ DCHECK(kEmitCompilerReadBarrier);
+ // If `obj` is equal to `out` or `ref`, it means the initial object
+ // has been overwritten by (or after) the heap object reference load
+ // to be instrumented, e.g.:
+ //
+ // __ LoadFromOffset(kLoadWord, out, out, offset);
+ // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
+ //
+ // In that case, we have lost the information about the original
+ // object, and the emitted read barrier cannot work properly.
+ DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+ DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier for heap reference slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ // We may have to change the index's value, but as `index_` is a
+ // constant member (like other "inputs" of this slow path),
+ // introduce a copy of it, `index`.
+ Location index = index_;
+ if (index_.IsValid()) {
+ // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+ if (instruction_->IsArrayGet()) {
+ // Compute the actual memory offset and store it in `index`.
+ Register index_reg = index_.AsRegister<Register>();
+ DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+ if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+ // We are about to change the value of `index_reg` (see the
+ // calls to art::mips::MipsAssembler::Sll and
+ // art::mips::MipsAssembler::Addiu32 below), but it has
+ // not been saved by the previous call to
+ // art::SlowPathCode::SaveLiveRegisters, as it is a
+ // callee-save register --
+ // art::SlowPathCode::SaveLiveRegisters does not consider
+ // callee-save registers, as it has been designed with the
+ // assumption that callee-save registers are supposed to be
+ // handled by the called function. So, as a callee-save
+ // register, `index_reg` _would_ eventually be saved onto
+ // the stack, but it would be too late: we would have
+ // changed its value earlier. Therefore, we manually save
+ // it here into another freely available register,
+ // `free_reg`, chosen of course among the caller-save
+ // registers (as a callee-save `free_reg` register would
+ // exhibit the same problem).
+ //
+ // Note we could have requested a temporary register from
+ // the register allocator instead; but we prefer not to, as
+ // this is a slow path, and we know we can find a
+ // caller-save register that is available.
+ Register free_reg = FindAvailableCallerSaveRegister(codegen);
+ __ Move(free_reg, index_reg);
+ index_reg = free_reg;
+ index = Location::RegisterLocation(index_reg);
+ } else {
+ // The initial register stored in `index_` has already been
+ // saved in the call to art::SlowPathCode::SaveLiveRegisters
+ // (as it is not a callee-save register), so we can freely
+ // use it.
+ }
+ // Shifting the index value contained in `index_reg` by the scale
+ // factor (2) cannot overflow in practice, as the runtime is
+ // unable to allocate object arrays with a size larger than
+ // 2^26 - 1 (that is, 2^28 - 4 bytes).
+ __ Sll(index_reg, index_reg, TIMES_4);
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ __ Addiu32(index_reg, index_reg, offset_);
+ } else {
+ // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+ // intrinsics, `index_` is not shifted by a scale factor of 2
+ // (as in the case of ArrayGet), as it is actually an offset
+ // to an object field within an object.
+ DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+ (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+ << instruction_->AsInvoke()->GetIntrinsic();
+ DCHECK_EQ(offset_, 0U);
+ DCHECK(index_.IsRegisterPair());
+ // UnsafeGet's offset location is a register pair, the low
+ // part contains the correct offset.
+ index = index_.ToLow();
+ }
+ }
+
+ // We're moving two or three locations to locations that could
+ // overlap, so we need a parallel move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+ parallel_move.AddMove(ref_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
+ nullptr);
+ parallel_move.AddMove(obj_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimNot,
+ nullptr);
+ if (index.IsValid()) {
+ parallel_move.AddMove(index,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ Primitive::kPrimInt,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ } else {
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ __ LoadConst32(calling_convention.GetRegisterAt(2), offset_);
+ }
+ mips_codegen->InvokeRuntime(kQuickReadBarrierSlow,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<
+ kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+ mips_codegen->Move32(out_, calling_convention.GetReturnLocation(Primitive::kPrimNot));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathMIPS"; }
+
+ private:
+ Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+ size_t ref = static_cast<int>(ref_.AsRegister<Register>());
+ size_t obj = static_cast<int>(obj_.AsRegister<Register>());
+ for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (i != ref &&
+ i != obj &&
+ !codegen->IsCoreCalleeSaveRegister(i) &&
+ !codegen->IsBlockedCoreRegister(i)) {
+ return static_cast<Register>(i);
+ }
+ }
+ // We shall never fail to find a free caller-save register, as
+ // there are more than two core caller-save registers on MIPS
+ // (meaning it is possible to find one which is different from
+ // `ref` and `obj`).
+ DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+ LOG(FATAL) << "Could not find a free caller-save register";
+ UNREACHABLE();
+ }
+
+ const Location out_;
+ const Location ref_;
+ const Location obj_;
+ const uint32_t offset_;
+ // An additional location containing an index to an array.
+ // Only used for HArrayGet and the UnsafeGetObject &
+ // UnsafeGetObjectVolatile intrinsics.
+ const Location index_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+ ReadBarrierForRootSlowPathMIPS(HInstruction* instruction, Location out, Location root)
+ : SlowPathCodeMIPS(instruction), out_(out), root_(root) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier for GC root slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+ mips_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+ mips_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+ mips_codegen->Move32(out_, calling_convention.GetReturnLocation(Primitive::kPrimNot));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS"; }
+
+ private:
+ const Location out_;
+ const Location root_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS);
+};
+
CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
const MipsInstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
@@ -1310,10 +1840,26 @@ void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint,
uint32_t dex_pc,
SlowPathCode* slow_path) {
ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+ GenerateInvokeRuntime(GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value(),
+ IsDirectEntrypoint(entrypoint));
+ if (EntrypointRequiresStackMap(entrypoint)) {
+ RecordPcInfo(instruction, dex_pc, slow_path);
+ }
+}
+
+void CodeGeneratorMIPS::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path,
+ bool direct) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ GenerateInvokeRuntime(entry_point_offset, direct);
+}
+
+void CodeGeneratorMIPS::GenerateInvokeRuntime(int32_t entry_point_offset, bool direct) {
bool reordering = __ SetReorder(false);
- __ LoadFromOffset(kLoadWord, T9, TR, GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value());
+ __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
__ Jalr(T9);
- if (IsDirectEntrypoint(entrypoint)) {
+ if (direct) {
// Reserve argument space on stack (for $a0-$a3) for
// entrypoints that directly reference native implementations.
// Called function may use this space to store $a0-$a3 regs.
@@ -1323,9 +1869,6 @@ void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint,
__ Nop(); // In delay slot.
}
__ SetReorder(reordering);
- if (EntrypointRequiresStackMap(entrypoint)) {
- RecordPcInfo(instruction, dex_pc, slow_path);
- }
}
void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path,
@@ -1885,14 +2428,31 @@ void InstructionCodeGeneratorMIPS::VisitAnd(HAnd* instruction) {
}
void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) {
+ Primitive::Type type = instruction->GetType();
+ bool object_array_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (type == Primitive::kPrimNot);
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ object_array_get_with_read_barrier
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- if (Primitive::IsFloatingPointType(instruction->GetType())) {
+ if (Primitive::IsFloatingPointType(type)) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps in the case of an object array get with
+ // read barriers enabled: we do not want the move to overwrite the
+ // array's location, as we need it to emit the read barrier.
+ locations->SetOut(Location::RequiresRegister(),
+ object_array_get_with_read_barrier
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ }
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier.
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->AddTemp(Location::RequiresRegister());
}
}
@@ -1905,7 +2465,9 @@ static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS*
void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
+ Location out_loc = locations->Out();
Location index = locations->InAt(1);
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
auto null_checker = GetImplicitNullChecker(instruction, codegen_);
@@ -1915,7 +2477,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
instruction->IsStringCharAt();
switch (type) {
case Primitive::kPrimBoolean: {
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -1928,7 +2490,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimByte: {
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -1941,7 +2503,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimShort: {
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -1955,7 +2517,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimChar: {
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (maybe_compressed_char_at) {
uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
__ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker);
@@ -2008,10 +2570,9 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
break;
}
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
+ case Primitive::kPrimInt: {
DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
- Register out = locations->Out().AsRegister<Register>();
+ Register out = out_loc.AsRegister<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -2024,8 +2585,53 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
break;
}
+ case Primitive::kPrimNot: {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ // /* HeapReference<Object> */ out =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier call.
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ index,
+ temp,
+ /* needs_null_check */ true);
+ } else {
+ Register out = out_loc.AsRegister<Register>();
+ if (index.IsConstant()) {
+ size_t offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+ } else {
+ __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
+ __ Addu(TMP, obj, TMP);
+ __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction,
+ out_loc,
+ out_loc,
+ obj_loc,
+ data_offset,
+ index);
+ }
+ }
+ break;
+ }
+
case Primitive::kPrimLong: {
- Register out = locations->Out().AsRegisterPairLow<Register>();
+ Register out = out_loc.AsRegisterPairLow<Register>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -2039,7 +2645,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimFloat: {
- FRegister out = locations->Out().AsFpuRegister<FRegister>();
+ FRegister out = out_loc.AsFpuRegister<FRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -2053,7 +2659,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimDouble: {
- FRegister out = locations->Out().AsFpuRegister<FRegister>();
+ FRegister out = out_loc.AsFpuRegister<FRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -2070,11 +2676,6 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
-
- if (type == Primitive::kPrimNot) {
- Register out = locations->Out().AsRegister<Register>();
- __ MaybeUnpoisonHeapReference(out);
- }
}
void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) {
@@ -2116,23 +2717,28 @@ Location LocationsBuilderMIPS::FpuRegisterOrConstantForStore(HInstruction* instr
}
void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) {
- bool needs_runtime_call = instruction->NeedsTypeCheck();
+ Primitive::Type value_type = instruction->GetComponentType();
+
+ bool needs_write_barrier =
+ CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
- if (needs_runtime_call) {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ may_need_runtime_call_for_type_check ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+ locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
} else {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
- locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
- } else {
- locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
- }
+ locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
+ }
+ if (needs_write_barrier) {
+ // Temporary register for the write barrier.
+ locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
}
}
@@ -2142,7 +2748,7 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
Location index = locations->InAt(1);
Location value_location = locations->InAt(2);
Primitive::Type value_type = instruction->GetComponentType();
- bool needs_runtime_call = locations->WillCall();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
auto null_checker = GetImplicitNullChecker(instruction, codegen_);
@@ -2186,9 +2792,27 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
break;
}
- case Primitive::kPrimInt:
+ case Primitive::kPrimInt: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ if (index.IsConstant()) {
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+ } else {
+ __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+ __ Addu(base_reg, obj, base_reg);
+ }
+ if (value_location.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ Register value = value_location.AsRegister<Register>();
+ __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+ }
+ break;
+ }
+
case Primitive::kPrimNot: {
- if (!needs_runtime_call) {
+ if (value_location.IsConstant()) {
+ // Just setting null.
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
@@ -2196,48 +2820,110 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) {
__ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
__ Addu(base_reg, obj, base_reg);
}
- if (value_location.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
- __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
- DCHECK(!needs_write_barrier);
- } else {
- Register value = value_location.AsRegister<Register>();
- if (kPoisonHeapReferences && needs_write_barrier) {
- // Note that in the case where `value` is a null reference,
- // we do not enter this block, as a null reference does not
- // need poisoning.
- DCHECK_EQ(value_type, Primitive::kPrimNot);
- // Use Sw() instead of StoreToOffset() in order to be able to
- // hold the poisoned reference in AT and thus avoid allocating
- // yet another temporary register.
- if (index.IsConstant()) {
- if (!IsInt<16>(static_cast<int32_t>(data_offset))) {
- int16_t low = Low16Bits(data_offset);
- uint32_t high = data_offset - low;
- __ Addiu32(TMP, obj, high);
- base_reg = TMP;
- data_offset = low;
- }
- } else {
- DCHECK(IsInt<16>(static_cast<int32_t>(data_offset)));
- }
- __ PoisonHeapReference(AT, value);
- __ Sw(AT, base_reg, data_offset);
- null_checker();
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ DCHECK_EQ(value, 0);
+ __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+ DCHECK(!needs_write_barrier);
+ DCHECK(!may_need_runtime_call_for_type_check);
+ break;
+ }
+
+ DCHECK(needs_write_barrier);
+ Register value = value_location.AsRegister<Register>();
+ Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+ Register temp2 = TMP; // Doesn't need to survive slow path.
+ uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ MipsLabel done;
+ SlowPathCodeMIPS* slow_path = nullptr;
+
+ if (may_need_runtime_call_for_type_check) {
+ slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS(instruction);
+ codegen_->AddSlowPath(slow_path);
+ if (instruction->GetValueCanBeNull()) {
+ MipsLabel non_zero;
+ __ Bnez(value, &non_zero);
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ if (index.IsConstant()) {
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
- }
- if (needs_write_barrier) {
- DCHECK_EQ(value_type, Primitive::kPrimNot);
- codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+ __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+ __ Addu(base_reg, obj, base_reg);
}
+ __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+ __ B(&done);
+ __ Bind(&non_zero);
}
+
+ // Note that when read barriers are enabled, the type checks
+ // are performed without read barriers. This is fine, even in
+ // the case where a class object is in the from-space after
+ // the flip, as a comparison involving such a type would not
+ // produce a false positive; it may of course produce a false
+ // negative, in which case we would take the ArraySet slow
+ // path.
+
+ // /* HeapReference<Class> */ temp1 = obj->klass_
+ __ LoadFromOffset(kLoadWord, temp1, obj, class_offset, null_checker);
+ __ MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+ // /* HeapReference<Class> */ temp2 = value->klass_
+ __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+ // If heap poisoning is enabled, no need to unpoison `temp1`
+ // nor `temp2`, as we are comparing two poisoned references.
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ MipsLabel do_put;
+ __ Beq(temp1, temp2, &do_put);
+ // If heap poisoning is enabled, the `temp1` reference has
+ // not been unpoisoned yet; unpoison it now.
+ __ MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ // If heap poisoning is enabled, no need to unpoison
+ // `temp1`, as we are comparing against null below.
+ __ Bnez(temp1, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ Bne(temp1, temp2, slow_path->GetEntryLabel());
+ }
+ }
+
+ Register source = value;
+ if (kPoisonHeapReferences) {
+ // Note that in the case where `value` is a null reference,
+ // we do not enter this block, as a null reference does not
+ // need poisoning.
+ __ Move(temp1, value);
+ __ PoisonHeapReference(temp1);
+ source = temp1;
+ }
+
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ if (index.IsConstant()) {
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- DCHECK_EQ(value_type, Primitive::kPrimNot);
- // Note: if heap poisoning is enabled, pAputObject takes care
- // of poisoning the reference.
- codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+ __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+ __ Addu(base_reg, obj, base_reg);
+ }
+ __ StoreToOffset(kStoreWord, source, base_reg, data_offset);
+
+ if (!may_need_runtime_call_for_type_check) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
}
break;
}
@@ -2327,6 +3013,23 @@ void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) {
__ Bgeu(index, length, slow_path->GetEntryLabel());
}
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+ if (kEmitCompilerReadBarrier &&
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ return 1;
+ }
+ return 0;
+}
+
+// Extra temp is used for read barrier.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+ return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
+
void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
@@ -2337,7 +3040,7 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
- call_kind = throws_into_catch
+ call_kind = (throws_into_catch || kEmitCompilerReadBarrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
break;
@@ -2351,15 +3054,20 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
+ locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
Register cls = locations->InAt(1).AsRegister<Register>();
- Register temp = locations->GetTemp(0).AsRegister<Register>();
+ Location temp_loc = locations->GetTemp(0);
+ Register temp = temp_loc.AsRegister<Register>();
+ const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+ DCHECK_LE(num_temps, 2u);
+ Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -2396,8 +3104,12 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kArrayCheck: {
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Jump to slow path for throwing the exception or doing a
// more involved array check.
__ Bne(temp, cls, slow_path->GetEntryLabel());
@@ -2406,15 +3118,22 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kAbstractClassCheck: {
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
MipsLabel loop;
__ Bind(&loop);
// /* HeapReference<Class> */ temp = temp->super_class_
- __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ super_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// If the class reference currently in `temp` is null, jump to the slow path to throw the
// exception.
__ Beqz(temp, slow_path->GetEntryLabel());
@@ -2425,15 +3144,22 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kClassHierarchyCheck: {
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Walk over the class hierarchy to find a match.
MipsLabel loop;
__ Bind(&loop);
__ Beq(temp, cls, &done);
// /* HeapReference<Class> */ temp = temp->super_class_
- __ LoadFromOffset(kLoadWord, temp, temp, super_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ super_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// If the class reference currently in `temp` is null, jump to the slow path to throw the
// exception. Otherwise, jump to the beginning of the loop.
__ Bnez(temp, &loop);
@@ -2443,14 +3169,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kArrayObjectCheck: {
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Do an exact check.
__ Beq(temp, cls, &done);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
- __ LoadFromOffset(kLoadWord, temp, temp, component_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ component_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// If the component type is null, jump to the slow path to throw the exception.
__ Beqz(temp, slow_path->GetEntryLabel());
// Otherwise, the object is indeed an array, further check that this component
@@ -2477,11 +3210,19 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
// Avoid read barriers to improve performance of the fast path. We can not get false
// positives by doing this.
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// /* HeapReference<Class> */ temp = temp->iftable_
- __ LoadFromOffset(kLoadWord, temp, temp, iftable_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Iftable is never null.
__ Lw(TMP, temp, array_length_offset);
// Loop through the iftable and check if any class matches.
@@ -5032,8 +5773,15 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field
Primitive::Type field_type = field_info.GetFieldType();
bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
bool generate_volatile = field_info.IsVolatile() && is_wide;
+ bool object_field_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
- instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
+ instruction,
+ generate_volatile
+ ? LocationSummary::kCallOnMainOnly
+ : (object_field_get_with_read_barrier
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall));
locations->SetInAt(0, Location::RequiresRegister());
if (generate_volatile) {
@@ -5054,7 +5802,18 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps in the case of an object field get with
+ // read barriers enabled: we do not want the move to overwrite the
+ // object's location, as we need it to emit the read barrier.
+ locations->SetOut(Location::RequiresRegister(),
+ object_field_get_with_read_barrier
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ }
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
}
}
@@ -5064,7 +5823,9 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
uint32_t dex_pc) {
Primitive::Type type = field_info.GetFieldType();
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
+ Location dst_loc = locations->Out();
LoadOperandType load_type = kLoadUnsignedByte;
bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
@@ -5107,40 +5868,61 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>();
if (type == Primitive::kPrimDouble) {
// FP results are returned in core registers. Need to move them.
- Location out = locations->Out();
- if (out.IsFpuRegister()) {
- __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>());
+ if (dst_loc.IsFpuRegister()) {
+ __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), dst_loc.AsFpuRegister<FRegister>());
__ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
- out.AsFpuRegister<FRegister>());
+ dst_loc.AsFpuRegister<FRegister>());
} else {
- DCHECK(out.IsDoubleStackSlot());
+ DCHECK(dst_loc.IsDoubleStackSlot());
__ StoreToOffset(kStoreWord,
locations->GetTemp(1).AsRegister<Register>(),
SP,
- out.GetStackIndex());
+ dst_loc.GetStackIndex());
__ StoreToOffset(kStoreWord,
locations->GetTemp(2).AsRegister<Register>(),
SP,
- out.GetStackIndex() + 4);
+ dst_loc.GetStackIndex() + 4);
}
}
} else {
- if (!Primitive::IsFloatingPointType(type)) {
+ if (type == Primitive::kPrimNot) {
+ // /* HeapReference<Object> */ dst = *(obj + offset)
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp_loc = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier call.
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ dst_loc,
+ obj,
+ offset,
+ temp_loc,
+ /* needs_null_check */ true);
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ } else {
+ __ LoadFromOffset(kLoadWord, dst_loc.AsRegister<Register>(), obj, offset, null_checker);
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset);
+ }
+ } else if (!Primitive::IsFloatingPointType(type)) {
Register dst;
if (type == Primitive::kPrimLong) {
- DCHECK(locations->Out().IsRegisterPair());
- dst = locations->Out().AsRegisterPairLow<Register>();
+ DCHECK(dst_loc.IsRegisterPair());
+ dst = dst_loc.AsRegisterPairLow<Register>();
} else {
- DCHECK(locations->Out().IsRegister());
- dst = locations->Out().AsRegister<Register>();
+ DCHECK(dst_loc.IsRegister());
+ dst = dst_loc.AsRegister<Register>();
}
__ LoadFromOffset(load_type, dst, obj, offset, null_checker);
- if (type == Primitive::kPrimNot) {
- __ MaybeUnpoisonHeapReference(dst);
- }
} else {
- DCHECK(locations->Out().IsFpuRegister());
- FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+ DCHECK(dst_loc.IsFpuRegister());
+ FRegister dst = dst_loc.AsFpuRegister<FRegister>();
if (type == Primitive::kPrimFloat) {
__ LoadSFromOffset(dst, obj, offset, null_checker);
} else {
@@ -5149,7 +5931,9 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
}
}
- if (is_volatile) {
+ // Memory barriers, in the case of references, are handled in the
+ // previous switch statement.
+ if (is_volatile && (type != Primitive::kPrimNot)) {
GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
}
@@ -5290,7 +6074,6 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
}
}
- // TODO: memory barriers?
if (needs_write_barrier) {
Register src = value_location.AsRegister<Register>();
codegen_->MarkGCCard(obj, src, value_can_be_null);
@@ -5320,14 +6103,133 @@ void InstructionCodeGeneratorMIPS::VisitInstanceFieldSet(HInstanceFieldSet* inst
instruction->GetValueCanBeNull());
}
-void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(
- HInstruction* instruction ATTRIBUTE_UNUSED,
- Location root,
- Register obj,
- uint32_t offset) {
+void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister(
+ HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option) {
+ Register out_reg = out.AsRegister<Register>();
+ if (read_barrier_option == kWithReadBarrier) {
+ CHECK(kEmitCompilerReadBarrier);
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ out_reg,
+ offset,
+ maybe_temp,
+ /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // Save the value of `out` into `maybe_temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ __ Move(maybe_temp.AsRegister<Register>(), out_reg);
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters(
+ HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option) {
+ Register out_reg = out.AsRegister<Register>();
+ Register obj_reg = obj.AsRegister<Register>();
+ if (read_barrier_option == kWithReadBarrier) {
+ CHECK(kEmitCompilerReadBarrier);
+ if (kUseBakerReadBarrier) {
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ obj_reg,
+ offset,
+ maybe_temp,
+ /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ Register obj,
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option) {
Register root_reg = root.AsRegister<Register>();
- if (kEmitCompilerReadBarrier) {
- UNIMPLEMENTED(FATAL) << "for read barrier";
+ if (read_barrier_option == kWithReadBarrier) {
+ DCHECK(kEmitCompilerReadBarrier);
+ if (kUseBakerReadBarrier) {
+ // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+ // Baker's read barrier are used:
+ //
+ // root = obj.field;
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp != null) {
+ // root = temp(root)
+ // }
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path marking the GC root `root`.
+ Location temp = Location::RegisterLocation(T9);
+ SlowPathCodeMIPS* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(
+ instruction,
+ root,
+ /*entrypoint*/ temp);
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ } else {
+ // GC root loaded through a slow path for read barriers other
+ // than Baker's.
+ // /* GcRoot<mirror::Object>* */ root = obj + offset
+ __ Addiu32(root_reg, obj, offset);
+ // /* mirror::Object* */ root = root->Read()
+ codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ }
} else {
// Plain GC root load with no read barrier.
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
@@ -5337,6 +6239,226 @@ void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(
}
}
+void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ Location no_index = Location::NoLocation();
+ ScaleFactor no_scale_factor = TIMES_1;
+ GenerateReferenceLoadWithBakerReadBarrier(instruction,
+ ref,
+ obj,
+ offset,
+ no_index,
+ no_scale_factor,
+ temp,
+ needs_null_check);
+}
+
+void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ // /* HeapReference<Object> */ ref =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ ScaleFactor scale_factor = TIMES_4;
+ GenerateReferenceLoadWithBakerReadBarrier(instruction,
+ ref,
+ obj,
+ data_offset,
+ index,
+ scale_factor,
+ temp,
+ needs_null_check);
+}
+
+void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ Location temp,
+ bool needs_null_check,
+ bool always_update_field) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // In slow path based read barriers, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
+ // }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
+
+ Register ref_reg = ref.AsRegister<Register>();
+ Register temp_reg = temp.AsRegister<Register>();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+ // /* int32_t */ monitor = obj->monitor_
+ __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ __ Sync(0); // Barrier to prevent load-load reordering.
+
+ // The actual reference load.
+ if (index.IsValid()) {
+ // Load types involving an "index": ArrayGet,
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics.
+ // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+ if (index.IsConstant()) {
+ size_t computed_offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
+ __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+ } else {
+ // Handle the special case of the
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics, which use a register pair as index ("long
+ // offset"), of which only the low part contains data.
+ Register index_reg = index.IsRegisterPair()
+ ? index.AsRegisterPairLow<Register>()
+ : index.AsRegister<Register>();
+ __ Sll(TMP, index_reg, scale_factor);
+ __ Addu(TMP, obj, TMP);
+ __ LoadFromOffset(kLoadWord, ref_reg, TMP, offset);
+ }
+ } else {
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ __ LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+ }
+
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
+
+ // Slow path marking the object `ref` when it is gray.
+ SlowPathCodeMIPS* slow_path;
+ if (always_update_field) {
+ // ReadBarrierMarkAndUpdateFieldSlowPathMIPS only supports address
+ // of the form `obj + field_offset`, where `obj` is a register and
+ // `field_offset` is a register pair (of which only the lower half
+ // is used). Thus `offset` and `scale_factor` above are expected
+ // to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+ slow_path = new (GetGraph()->GetArena())
+ ReadBarrierMarkAndUpdateFieldSlowPathMIPS(instruction,
+ ref,
+ obj,
+ /* field_offset */ index,
+ temp_reg);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(instruction, ref);
+ }
+ AddSlowPath(slow_path);
+
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit into the sign bit (31) and
+ // performing a branch on less than zero.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size");
+ __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift);
+ __ Bltz(temp_reg, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS::GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Insert a slow path based read barrier *after* the reference load.
+ //
+ // If heap poisoning is enabled, the unpoisoning of the loaded
+ // reference will be carried out by the runtime within the slow
+ // path.
+ //
+ // Note that `ref` currently does not get unpoisoned (when heap
+ // poisoning is enabled), which is alright as the `ref` argument is
+ // not used by the artReadBarrierSlow entry point.
+ //
+ // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+ SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena())
+ ReadBarrierForHeapReferenceSlowPathMIPS(instruction, out, ref, obj, offset, index);
+ AddSlowPath(slow_path);
+
+ __ B(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ if (kEmitCompilerReadBarrier) {
+ // Baker's read barriers shall be handled by the fast path
+ // (CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier).
+ DCHECK(!kUseBakerReadBarrier);
+ // If heap poisoning is enabled, unpoisoning will be taken care of
+ // by the runtime within the slow path.
+ GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+ } else if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(out.AsRegister<Register>());
+ }
+}
+
+void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+ Location out,
+ Location root) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Insert a slow path based read barrier *after* the GC root load.
+ //
+ // Note that GC roots are not affected by heap poisoning, so we do
+ // not need to do anything special for this here.
+ SlowPathCodeMIPS* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS(instruction, out, root);
+ AddSlowPath(slow_path);
+
+ __ B(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -5345,7 +6467,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
- call_kind = LocationSummary::kNoCall;
+ call_kind =
+ kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -5360,14 +6483,20 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
// The output does overlap inputs.
// Note that TypeCheckSlowPathMIPS uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
}
void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
Register cls = locations->InAt(1).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>();
+ const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+ DCHECK_LE(num_temps, 1u);
+ Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5385,8 +6514,12 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
// /* HeapReference<Class> */ out = obj->klass_
- __ LoadFromOffset(kLoadWord, out, obj, class_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
// Classes must be equal for the instanceof to succeed.
__ Xor(out, out, cls);
__ Sltiu(out, out, 1);
@@ -5395,15 +6528,22 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kAbstractClassCheck: {
// /* HeapReference<Class> */ out = obj->klass_
- __ LoadFromOffset(kLoadWord, out, obj, class_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
MipsLabel loop;
__ Bind(&loop);
// /* HeapReference<Class> */ out = out->super_class_
- __ LoadFromOffset(kLoadWord, out, out, super_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadOneRegister(instruction,
+ out_loc,
+ super_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqz(out, &done);
__ Bne(out, cls, &loop);
@@ -5413,15 +6553,22 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kClassHierarchyCheck: {
// /* HeapReference<Class> */ out = obj->klass_
- __ LoadFromOffset(kLoadWord, out, obj, class_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
// Walk over the class hierarchy to find a match.
MipsLabel loop, success;
__ Bind(&loop);
__ Beq(out, cls, &success);
// /* HeapReference<Class> */ out = out->super_class_
- __ LoadFromOffset(kLoadWord, out, out, super_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadOneRegister(instruction,
+ out_loc,
+ super_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
__ Bnez(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
__ B(&done);
@@ -5432,15 +6579,22 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck: {
// /* HeapReference<Class> */ out = obj->klass_
- __ LoadFromOffset(kLoadWord, out, obj, class_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
// Do an exact check.
MipsLabel success;
__ Beq(out, cls, &success);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ out = out->component_type_
- __ LoadFromOffset(kLoadWord, out, out, component_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadOneRegister(instruction,
+ out_loc,
+ component_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqz(out, &done);
__ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -5455,8 +6609,12 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayCheck: {
// No read barrier since the slow path will retry upon failure.
// /* HeapReference<Class> */ out = obj->klass_
- __ LoadFromOffset(kLoadWord, out, obj, class_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction,
/* is_fatal */ false);
@@ -5627,9 +6785,6 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen
HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
- if (kEmitCompilerReadBarrier) {
- UNIMPLEMENTED(FATAL) << "for read barrier";
- }
// We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
// is incompatible with it.
// TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
@@ -5665,9 +6820,6 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
- if (kEmitCompilerReadBarrier) {
- UNIMPLEMENTED(FATAL) << "for read barrier";
- }
// We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
// is incompatible with it.
bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
@@ -5916,12 +7068,13 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
cls,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
- Location::RegisterLocation(V0));
+ calling_convention.GetReturnLocation(Primitive::kPrimNot));
return;
}
DCHECK(!cls->NeedsAccessCheck());
- LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
@@ -5976,6 +7129,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
break;
}
+ const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+ ? kWithoutReadBarrier
+ : kCompilerReadBarrierOption;
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass: {
@@ -5985,11 +7141,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
GenerateGcRootFieldLoad(cls,
out_loc,
base_or_current_method_reg,
- ArtMethod::DeclaringClassOffset().Int32Value());
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ read_barrier_option);
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
__ LoadLiteral(out,
base_or_current_method_reg,
codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
@@ -5997,6 +7155,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
bool reordering = __ SetReorder(false);
@@ -6006,7 +7165,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
break;
}
case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
uint32_t address = dchecked_integral_cast<uint32_t>(
reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
DCHECK_NE(address, 0u);
@@ -6020,7 +7179,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
bool reordering = __ SetReorder(false);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
- GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
__ SetReorder(reordering);
generate_null_check = true;
break;
@@ -6032,7 +7191,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
bool reordering = __ SetReorder(false);
__ Bind(&info->high_label);
__ Lui(out, /* placeholder */ 0x1234);
- GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
__ SetReorder(reordering);
break;
}
@@ -6165,7 +7324,11 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
bool reordering = __ SetReorder(false);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
- GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
+ GenerateGcRootFieldLoad(load,
+ out_loc,
+ out,
+ /* placeholder */ 0x5678,
+ kCompilerReadBarrierOption);
__ SetReorder(reordering);
SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
codegen_->AddSlowPath(slow_path);
@@ -6181,7 +7344,11 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
bool reordering = __ SetReorder(false);
__ Bind(&info->high_label);
__ Lui(out, /* placeholder */ 0x1234);
- GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
+ GenerateGcRootFieldLoad(load,
+ out_loc,
+ out,
+ /* placeholder */ 0x5678,
+ kCompilerReadBarrierOption);
__ SetReorder(reordering);
return;
}
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 98fee24a74..3875c4bdba 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -241,6 +241,38 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
uint32_t dex_pc,
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+
+ // Generate a heap reference load using one register `out`:
+ //
+ // out <- *(out + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a read barrier and
+ // shall be a register in that case; it may be an invalid location
+ // otherwise.
+ void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option);
+ // Generate a heap reference load using two different registers
+ // `out` and `obj`:
+ //
+ // out <- *(obj + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a Baker's (fast
+ // path) read barrier and shall be a register in that case; it may
+ // be an invalid location otherwise.
+ void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option);
+
// Generate a GC root reference load:
//
// root <- *(obj + offset)
@@ -249,7 +281,9 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
Register obj,
- uint32_t offset);
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option);
+
void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
// When the function returns `false` it means that the condition holds if `dst` is non-zero
// and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero
@@ -353,6 +387,91 @@ class CodeGeneratorMIPS : public CodeGenerator {
void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference array load when Baker's read barriers are used.
+ void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+ //
+ // Load the object reference located at the address
+ // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+ // `ref`, and mark it if needed.
+ //
+ // If `always_update_field` is true, the value of the reference is
+ // atomically updated in the holder (`obj`).
+ void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ Location temp,
+ bool needs_null_check,
+ bool always_update_field = false);
+
+ // Generate a read barrier for a heap reference within `instruction`
+ // using a slow path.
+ //
+ // A read barrier for an object reference read from the heap is
+ // implemented as a call to the artReadBarrierSlow runtime entry
+ // point, which is passed the values in locations `ref`, `obj`, and
+ // `offset`:
+ //
+ // mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+ // mirror::Object* obj,
+ // uint32_t offset);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierSlow.
+ //
+ // When `index` is provided (i.e. for array accesses), the offset
+ // value passed to artReadBarrierSlow is adjusted to take `index`
+ // into account.
+ void GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction` using
+ // a slow path.
+ //
+ // A read barrier for an object reference GC root is implemented as
+ // a call to the artReadBarrierForRootSlow runtime entry point,
+ // which is passed the value in location `root`:
+ //
+ // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierForRootSlow.
+ void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
void MarkGCCard(Register object, Register value, bool value_can_be_null);
// Register allocation.
@@ -400,6 +519,15 @@ class CodeGeneratorMIPS : public CodeGenerator {
uint32_t dex_pc,
SlowPathCode* slow_path = nullptr) OVERRIDE;
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path,
+ bool direct);
+
+ void GenerateInvokeRuntime(int32_t entry_point_offset, bool direct);
+
ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index c82533bc7d..78b31e9e86 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -407,6 +407,528 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
};
+class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+ explicit ArraySetSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+ parallel_move.AddMove(
+ locations->InAt(0),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
+ nullptr);
+ parallel_move.AddMove(
+ locations->InAt(1),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimInt,
+ nullptr);
+ parallel_move.AddMove(
+ locations->InAt(2),
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ Primitive::kPrimNot,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+ CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+ mips64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+ RestoreLiveRegisters(codegen, locations);
+ __ Bc(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS64"; }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS64);
+};
+
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is for the GcRoot read barrier.
+class ReadBarrierMarkSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+ ReadBarrierMarkSlowPathMIPS64(HInstruction* instruction,
+ Location ref,
+ Location entrypoint = Location::NoLocation())
+ : SlowPathCodeMIPS64(instruction), ref_(ref), entrypoint_(entrypoint) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ GpuRegister ref_reg = ref_.AsRegister<GpuRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsArraySet() ||
+ instruction_->IsLoadClass() ||
+ instruction_->IsLoadString() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+ (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+ DCHECK((V0 <= ref_reg && ref_reg <= T2) ||
+ (S2 <= ref_reg && ref_reg <= S7) ||
+ (ref_reg == S8)) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in A0 and V0 respectively):
+ //
+ // A0 <- ref
+ // V0 <- ReadBarrierMark(A0)
+ // ref <- V0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ if (entrypoint_.IsValid()) {
+ mips64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+ DCHECK_EQ(entrypoint_.AsRegister<GpuRegister>(), T9);
+ __ Jalr(entrypoint_.AsRegister<GpuRegister>());
+ __ Nop();
+ } else {
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
+ // This runtime call does not require a stack map.
+ mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+ instruction_,
+ this);
+ }
+ __ Bc(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+
+ // The location of the entrypoint if already loaded.
+ const Location entrypoint_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS64);
+};
+
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathMIPS64 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+ ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ Location field_offset,
+ GpuRegister temp1)
+ : SlowPathCodeMIPS64(instruction),
+ ref_(ref),
+ obj_(obj),
+ field_offset_(field_offset),
+ temp1_(temp1) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS64";
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ GpuRegister ref_reg = ref_.AsRegister<GpuRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+ // This slow path is only used by the UnsafeCASObject intrinsic.
+ DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier marking and field updating slow path: "
+ << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+ DCHECK(field_offset_.IsRegister()) << field_offset_;
+
+ __ Bind(GetEntryLabel());
+
+ // Save the old reference.
+ // Note that we cannot use AT or TMP to save the old reference, as those
+ // are used by the code that follows, but we need the old reference after
+ // the call to the ReadBarrierMarkRegX entry point.
+ DCHECK_NE(temp1_, AT);
+ DCHECK_NE(temp1_, TMP);
+ __ Move(temp1_, ref_reg);
+
+ // No need to save live registers; it's taken care of by the
+ // entrypoint. Also, there is no need to update the stack mask,
+ // as this runtime call will not trigger a garbage collection.
+ CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+ DCHECK((V0 <= ref_reg && ref_reg <= T2) ||
+ (S2 <= ref_reg && ref_reg <= S7) ||
+ (ref_reg == S8)) << ref_reg;
+ // "Compact" slow path, saving two moves.
+ //
+ // Instead of using the standard runtime calling convention (input
+ // and output in A0 and V0 respectively):
+ //
+ // A0 <- ref
+ // V0 <- ReadBarrierMark(A0)
+ // ref <- V0
+ //
+ // we just use rX (the register containing `ref`) as input and output
+ // of a dedicated entrypoint:
+ //
+ // rX <- ReadBarrierMarkRegX(rX)
+ //
+ int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
+ // This runtime call does not require a stack map.
+ mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+ instruction_,
+ this);
+
+ // If the new reference is different from the old reference,
+ // update the field in the holder (`*(obj_ + field_offset_)`).
+ //
+ // Note that this field could also hold a different object, if
+ // another thread had concurrently changed it. In that case, the
+ // the compare-and-set (CAS) loop below would abort, leaving the
+ // field as-is.
+ Mips64Label done;
+ __ Beqc(temp1_, ref_reg, &done);
+
+ // Update the the holder's field atomically. This may fail if
+ // mutator updates before us, but it's OK. This is achieved
+ // using a strong compare-and-set (CAS) operation with relaxed
+ // memory synchronization ordering, where the expected value is
+ // the old reference and the desired value is the new reference.
+
+ // Convenience aliases.
+ GpuRegister base = obj_;
+ GpuRegister offset = field_offset_.AsRegister<GpuRegister>();
+ GpuRegister expected = temp1_;
+ GpuRegister value = ref_reg;
+ GpuRegister tmp_ptr = TMP; // Pointer to actual memory.
+ GpuRegister tmp = AT; // Value in memory.
+
+ __ Daddu(tmp_ptr, base, offset);
+
+ if (kPoisonHeapReferences) {
+ __ PoisonHeapReference(expected);
+ // Do not poison `value` if it is the same register as
+ // `expected`, which has just been poisoned.
+ if (value != expected) {
+ __ PoisonHeapReference(value);
+ }
+ }
+
+ // do {
+ // tmp = [r_ptr] - expected;
+ // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+
+ Mips64Label loop_head, exit_loop;
+ __ Bind(&loop_head);
+ __ Ll(tmp, tmp_ptr);
+ // The LL instruction sign-extends the 32-bit value, but
+ // 32-bit references must be zero-extended. Zero-extend `tmp`.
+ __ Dext(tmp, tmp, 0, 32);
+ __ Bnec(tmp, expected, &exit_loop);
+ __ Move(tmp, value);
+ __ Sc(tmp, tmp_ptr);
+ __ Beqzc(tmp, &loop_head);
+ __ Bind(&exit_loop);
+
+ if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(expected);
+ // Do not unpoison `value` if it is the same register as
+ // `expected`, which has just been unpoisoned.
+ if (value != expected) {
+ __ UnpoisonHeapReference(value);
+ }
+ }
+
+ __ Bind(&done);
+ __ Bc(GetExitLabel());
+ }
+
+ private:
+ // The location (register) of the marked object reference.
+ const Location ref_;
+ // The register containing the object holding the marked object reference field.
+ const GpuRegister obj_;
+ // The location of the offset of the marked reference field within `obj_`.
+ Location field_offset_;
+
+ const GpuRegister temp1_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS64);
+};
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+ ReadBarrierForHeapReferenceSlowPathMIPS64(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index)
+ : SlowPathCodeMIPS64(instruction),
+ out_(out),
+ ref_(ref),
+ obj_(obj),
+ offset_(offset),
+ index_(index) {
+ DCHECK(kEmitCompilerReadBarrier);
+ // If `obj` is equal to `out` or `ref`, it means the initial object
+ // has been overwritten by (or after) the heap object reference load
+ // to be instrumented, e.g.:
+ //
+ // __ LoadFromOffset(kLoadWord, out, out, offset);
+ // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
+ //
+ // In that case, we have lost the information about the original
+ // object, and the emitted read barrier cannot work properly.
+ DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+ DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ Primitive::Type type = Primitive::kPrimNot;
+ GpuRegister reg_out = out_.AsRegister<GpuRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsInstanceFieldGet() ||
+ instruction_->IsStaticFieldGet() ||
+ instruction_->IsArrayGet() ||
+ instruction_->IsInstanceOf() ||
+ instruction_->IsCheckCast() ||
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+ << "Unexpected instruction in read barrier for heap reference slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ // We may have to change the index's value, but as `index_` is a
+ // constant member (like other "inputs" of this slow path),
+ // introduce a copy of it, `index`.
+ Location index = index_;
+ if (index_.IsValid()) {
+ // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+ if (instruction_->IsArrayGet()) {
+ // Compute the actual memory offset and store it in `index`.
+ GpuRegister index_reg = index_.AsRegister<GpuRegister>();
+ DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+ if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+ // We are about to change the value of `index_reg` (see the
+ // calls to art::mips64::Mips64Assembler::Sll and
+ // art::mips64::MipsAssembler::Addiu32 below), but it has
+ // not been saved by the previous call to
+ // art::SlowPathCode::SaveLiveRegisters, as it is a
+ // callee-save register --
+ // art::SlowPathCode::SaveLiveRegisters does not consider
+ // callee-save registers, as it has been designed with the
+ // assumption that callee-save registers are supposed to be
+ // handled by the called function. So, as a callee-save
+ // register, `index_reg` _would_ eventually be saved onto
+ // the stack, but it would be too late: we would have
+ // changed its value earlier. Therefore, we manually save
+ // it here into another freely available register,
+ // `free_reg`, chosen of course among the caller-save
+ // registers (as a callee-save `free_reg` register would
+ // exhibit the same problem).
+ //
+ // Note we could have requested a temporary register from
+ // the register allocator instead; but we prefer not to, as
+ // this is a slow path, and we know we can find a
+ // caller-save register that is available.
+ GpuRegister free_reg = FindAvailableCallerSaveRegister(codegen);
+ __ Move(free_reg, index_reg);
+ index_reg = free_reg;
+ index = Location::RegisterLocation(index_reg);
+ } else {
+ // The initial register stored in `index_` has already been
+ // saved in the call to art::SlowPathCode::SaveLiveRegisters
+ // (as it is not a callee-save register), so we can freely
+ // use it.
+ }
+ // Shifting the index value contained in `index_reg` by the scale
+ // factor (2) cannot overflow in practice, as the runtime is
+ // unable to allocate object arrays with a size larger than
+ // 2^26 - 1 (that is, 2^28 - 4 bytes).
+ __ Sll(index_reg, index_reg, TIMES_4);
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ __ Addiu32(index_reg, index_reg, offset_);
+ } else {
+ // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+ // intrinsics, `index_` is not shifted by a scale factor of 2
+ // (as in the case of ArrayGet), as it is actually an offset
+ // to an object field within an object.
+ DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+ (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+ << instruction_->AsInvoke()->GetIntrinsic();
+ DCHECK_EQ(offset_, 0U);
+ DCHECK(index_.IsRegister());
+ }
+ }
+
+ // We're moving two or three locations to locations that could
+ // overlap, so we need a parallel move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+ parallel_move.AddMove(ref_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
+ nullptr);
+ parallel_move.AddMove(obj_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimNot,
+ nullptr);
+ if (index.IsValid()) {
+ parallel_move.AddMove(index,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ Primitive::kPrimInt,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ } else {
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ __ LoadConst32(calling_convention.GetRegisterAt(2), offset_);
+ }
+ mips64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<
+ kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+ mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+ RestoreLiveRegisters(codegen, locations);
+ __ Bc(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "ReadBarrierForHeapReferenceSlowPathMIPS64";
+ }
+
+ private:
+ GpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+ size_t ref = static_cast<int>(ref_.AsRegister<GpuRegister>());
+ size_t obj = static_cast<int>(obj_.AsRegister<GpuRegister>());
+ for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (i != ref &&
+ i != obj &&
+ !codegen->IsCoreCalleeSaveRegister(i) &&
+ !codegen->IsBlockedCoreRegister(i)) {
+ return static_cast<GpuRegister>(i);
+ }
+ }
+ // We shall never fail to find a free caller-save register, as
+ // there are more than two core caller-save registers on MIPS64
+ // (meaning it is possible to find one which is different from
+ // `ref` and `obj`).
+ DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+ LOG(FATAL) << "Could not find a free caller-save register";
+ UNREACHABLE();
+ }
+
+ const Location out_;
+ const Location ref_;
+ const Location obj_;
+ const uint32_t offset_;
+ // An additional location containing an index to an array.
+ // Only used for HArrayGet and the UnsafeGetObject &
+ // UnsafeGetObjectVolatile intrinsics.
+ const Location index_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+ ReadBarrierForRootSlowPathMIPS64(HInstruction* instruction, Location out, Location root)
+ : SlowPathCodeMIPS64(instruction), out_(out), root_(root) {
+ DCHECK(kEmitCompilerReadBarrier);
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Primitive::Type type = Primitive::kPrimNot;
+ GpuRegister reg_out = out_.AsRegister<GpuRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+ << "Unexpected instruction in read barrier for GC root slow path: "
+ << instruction_->DebugName();
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+ mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ root_,
+ Primitive::kPrimNot);
+ mips64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+ mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+ RestoreLiveRegisters(codegen, locations);
+ __ Bc(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS64"; }
+
+ private:
+ const Location out_;
+ const Location root_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS64);
+};
+
CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
const Mips64InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
@@ -1140,23 +1662,32 @@ void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint,
uint32_t dex_pc,
SlowPathCode* slow_path) {
ValidateInvokeRuntime(entrypoint, instruction, slow_path);
- __ LoadFromOffset(kLoadDoubleword,
- T9,
- TR,
- GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value());
- __ Jalr(T9);
- __ Nop();
+ GenerateInvokeRuntime(GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value());
if (EntrypointRequiresStackMap(entrypoint)) {
RecordPcInfo(instruction, dex_pc, slow_path);
}
}
+void CodeGeneratorMIPS64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path) {
+ ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+ GenerateInvokeRuntime(entry_point_offset);
+}
+
+void CodeGeneratorMIPS64::GenerateInvokeRuntime(int32_t entry_point_offset) {
+ __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
+ __ Jalr(T9);
+ __ Nop();
+}
+
void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path,
GpuRegister class_reg) {
__ LoadFromOffset(kLoadWord, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
__ LoadConst32(AT, mirror::Class::kStatusInitialized);
__ Bltc(TMP, AT, slow_path->GetEntryLabel());
- // TODO: barrier needed?
+ // Even if the initialized flag is set, we need to ensure consistent memory ordering.
+ __ Sync(0);
__ Bind(slow_path->GetExitLabel());
}
@@ -1447,14 +1978,31 @@ void InstructionCodeGeneratorMIPS64::VisitAnd(HAnd* instruction) {
}
void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) {
+ Primitive::Type type = instruction->GetType();
+ bool object_array_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (type == Primitive::kPrimNot);
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ object_array_get_with_read_barrier
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- if (Primitive::IsFloatingPointType(instruction->GetType())) {
+ if (Primitive::IsFloatingPointType(type)) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps in the case of an object array get with
+ // read barriers enabled: we do not want the move to overwrite the
+ // array's location, as we need it to emit the read barrier.
+ locations->SetOut(Location::RequiresRegister(),
+ object_array_get_with_read_barrier
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ }
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier.
+ if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+ locations->AddTemp(Location::RequiresRegister());
}
}
@@ -1467,7 +2015,9 @@ static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS6
void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+ Location obj_loc = locations->InAt(0);
+ GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
+ Location out_loc = locations->Out();
Location index = locations->InAt(1);
uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
auto null_checker = GetImplicitNullChecker(instruction, codegen_);
@@ -1477,7 +2027,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
instruction->IsStringCharAt();
switch (type) {
case Primitive::kPrimBoolean: {
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -1490,7 +2040,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimByte: {
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -1503,7 +2053,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimShort: {
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -1517,7 +2067,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimChar: {
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
if (maybe_compressed_char_at) {
uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
__ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker);
@@ -1570,10 +2120,9 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
break;
}
- case Primitive::kPrimInt:
- case Primitive::kPrimNot: {
+ case Primitive::kPrimInt: {
DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord;
if (index.IsConstant()) {
size_t offset =
@@ -1587,8 +2136,53 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
break;
}
+ case Primitive::kPrimNot: {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ // /* HeapReference<Object> */ out =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier call.
+ codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+ out_loc,
+ obj,
+ data_offset,
+ index,
+ temp,
+ /* needs_null_check */ true);
+ } else {
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
+ if (index.IsConstant()) {
+ size_t offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ __ LoadFromOffset(kLoadUnsignedWord, out, obj, offset, null_checker);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+ } else {
+ __ Sll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
+ __ Addu(TMP, obj, TMP);
+ __ LoadFromOffset(kLoadUnsignedWord, out, TMP, data_offset, null_checker);
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction,
+ out_loc,
+ out_loc,
+ obj_loc,
+ data_offset,
+ index);
+ }
+ }
+ break;
+ }
+
case Primitive::kPrimLong: {
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -1602,7 +2196,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimFloat: {
- FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+ FpuRegister out = out_loc.AsFpuRegister<FpuRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -1616,7 +2210,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
}
case Primitive::kPrimDouble: {
- FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+ FpuRegister out = out_loc.AsFpuRegister<FpuRegister>();
if (index.IsConstant()) {
size_t offset =
(index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -1633,11 +2227,6 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
LOG(FATAL) << "Unreachable type " << instruction->GetType();
UNREACHABLE();
}
-
- if (type == Primitive::kPrimNot) {
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
- __ MaybeUnpoisonHeapReference(out);
- }
}
void LocationsBuilderMIPS64::VisitArrayLength(HArrayLength* instruction) {
@@ -1679,23 +2268,28 @@ Location LocationsBuilderMIPS64::FpuRegisterOrConstantForStore(HInstruction* ins
}
void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) {
- bool needs_runtime_call = instruction->NeedsTypeCheck();
+ Primitive::Type value_type = instruction->GetComponentType();
+
+ bool needs_write_barrier =
+ CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
- if (needs_runtime_call) {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+ may_need_runtime_call_for_type_check ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+ locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
} else {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
- locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
- } else {
- locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
- }
+ locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
+ }
+ if (needs_write_barrier) {
+ // Temporary register for the write barrier.
+ locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
}
}
@@ -1705,7 +2299,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
Location index = locations->InAt(1);
Location value_location = locations->InAt(2);
Primitive::Type value_type = instruction->GetComponentType();
- bool needs_runtime_call = locations->WillCall();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
auto null_checker = GetImplicitNullChecker(instruction, codegen_);
@@ -1749,68 +2343,138 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
break;
}
- case Primitive::kPrimInt:
+ case Primitive::kPrimInt: {
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ if (index.IsConstant()) {
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+ } else {
+ __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+ __ Daddu(base_reg, obj, base_reg);
+ }
+ if (value_location.IsConstant()) {
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+ } else {
+ GpuRegister value = value_location.AsRegister<GpuRegister>();
+ __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+ }
+ break;
+ }
+
case Primitive::kPrimNot: {
- if (!needs_runtime_call) {
+ if (value_location.IsConstant()) {
+ // Just setting null.
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
if (index.IsConstant()) {
data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- DCHECK(index.IsRegister()) << index;
__ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
__ Daddu(base_reg, obj, base_reg);
}
- if (value_location.IsConstant()) {
- int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
- __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
- DCHECK(!needs_write_barrier);
- } else {
- GpuRegister value = value_location.AsRegister<GpuRegister>();
- if (kPoisonHeapReferences && needs_write_barrier) {
- // Note that in the case where `value` is a null reference,
- // we do not enter this block, as a null reference does not
- // need poisoning.
- DCHECK_EQ(value_type, Primitive::kPrimNot);
- // Use Sw() instead of StoreToOffset() in order to be able to
- // hold the poisoned reference in AT and thus avoid allocating
- // yet another temporary register.
- if (index.IsConstant()) {
- if (!IsInt<16>(static_cast<int32_t>(data_offset))) {
- int16_t low16 = Low16Bits(data_offset);
- // For consistency with StoreToOffset() and such treat data_offset as int32_t.
- uint64_t high48 = static_cast<uint64_t>(static_cast<int32_t>(data_offset)) - low16;
- int16_t upper16 = High16Bits(high48);
- // Allow the full [-2GB,+2GB) range in case `low16` is negative and needs a
- // compensatory 64KB added, which may push `high48` above 2GB and require
- // the dahi instruction.
- int16_t higher16 = High32Bits(high48) + ((upper16 < 0) ? 1 : 0);
- __ Daui(TMP, obj, upper16);
- if (higher16 != 0) {
- __ Dahi(TMP, higher16);
- }
- base_reg = TMP;
- data_offset = low16;
- }
- } else {
- DCHECK(IsInt<16>(static_cast<int32_t>(data_offset)));
- }
- __ PoisonHeapReference(AT, value);
- __ Sw(AT, base_reg, data_offset);
- null_checker();
+ int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+ DCHECK_EQ(value, 0);
+ __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+ DCHECK(!needs_write_barrier);
+ DCHECK(!may_need_runtime_call_for_type_check);
+ break;
+ }
+
+ DCHECK(needs_write_barrier);
+ GpuRegister value = value_location.AsRegister<GpuRegister>();
+ GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>();
+ GpuRegister temp2 = TMP; // Doesn't need to survive slow path.
+ uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+ uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+ uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+ Mips64Label done;
+ SlowPathCodeMIPS64* slow_path = nullptr;
+
+ if (may_need_runtime_call_for_type_check) {
+ slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS64(instruction);
+ codegen_->AddSlowPath(slow_path);
+ if (instruction->GetValueCanBeNull()) {
+ Mips64Label non_zero;
+ __ Bnezc(value, &non_zero);
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ if (index.IsConstant()) {
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
- }
- if (needs_write_barrier) {
- DCHECK_EQ(value_type, Primitive::kPrimNot);
- codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+ __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+ __ Daddu(base_reg, obj, base_reg);
}
+ __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+ __ Bc(&done);
+ __ Bind(&non_zero);
+ }
+
+ // Note that when read barriers are enabled, the type checks
+ // are performed without read barriers. This is fine, even in
+ // the case where a class object is in the from-space after
+ // the flip, as a comparison involving such a type would not
+ // produce a false positive; it may of course produce a false
+ // negative, in which case we would take the ArraySet slow
+ // path.
+
+ // /* HeapReference<Class> */ temp1 = obj->klass_
+ __ LoadFromOffset(kLoadUnsignedWord, temp1, obj, class_offset, null_checker);
+ __ MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, component_offset);
+ // /* HeapReference<Class> */ temp2 = value->klass_
+ __ LoadFromOffset(kLoadUnsignedWord, temp2, value, class_offset);
+ // If heap poisoning is enabled, no need to unpoison `temp1`
+ // nor `temp2`, as we are comparing two poisoned references.
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ Mips64Label do_put;
+ __ Beqc(temp1, temp2, &do_put);
+ // If heap poisoning is enabled, the `temp1` reference has
+ // not been unpoisoned yet; unpoison it now.
+ __ MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, super_offset);
+ // If heap poisoning is enabled, no need to unpoison
+ // `temp1`, as we are comparing against null below.
+ __ Bnezc(temp1, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ Bnec(temp1, temp2, slow_path->GetEntryLabel());
}
+ }
+
+ GpuRegister source = value;
+ if (kPoisonHeapReferences) {
+ // Note that in the case where `value` is a null reference,
+ // we do not enter this block, as a null reference does not
+ // need poisoning.
+ __ Move(temp1, value);
+ __ PoisonHeapReference(temp1);
+ source = temp1;
+ }
+
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ if (index.IsConstant()) {
+ data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
} else {
- DCHECK_EQ(value_type, Primitive::kPrimNot);
- // Note: if heap poisoning is enabled, pAputObject takes care
- // of poisoning the reference.
- codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+ __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+ __ Daddu(base_reg, obj, base_reg);
+ }
+ __ StoreToOffset(kStoreWord, source, base_reg, data_offset);
+
+ if (!may_need_runtime_call_for_type_check) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
+
+ codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+
+ if (done.IsLinked()) {
+ __ Bind(&done);
+ }
+
+ if (slow_path != nullptr) {
+ __ Bind(slow_path->GetExitLabel());
}
break;
}
@@ -1900,6 +2564,23 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction)
__ Bgeuc(index, length, slow_path->GetEntryLabel());
}
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+ if (kEmitCompilerReadBarrier &&
+ (kUseBakerReadBarrier ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ return 1;
+ }
+ return 0;
+}
+
+// Extra temp is used for read barrier.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+ return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
+
void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
@@ -1910,7 +2591,7 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
- call_kind = throws_into_catch
+ call_kind = (throws_into_catch || kEmitCompilerReadBarrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
break;
@@ -1924,15 +2605,20 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
+ locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
- GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+ Location obj_loc = locations->InAt(0);
+ GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
- GpuRegister temp = locations->GetTemp(0).AsRegister<GpuRegister>();
+ Location temp_loc = locations->GetTemp(0);
+ GpuRegister temp = temp_loc.AsRegister<GpuRegister>();
+ const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+ DCHECK_LE(num_temps, 2u);
+ Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -1969,8 +2655,12 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kArrayCheck: {
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Jump to slow path for throwing the exception or doing a
// more involved array check.
__ Bnec(temp, cls, slow_path->GetEntryLabel());
@@ -1979,15 +2669,22 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kAbstractClassCheck: {
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
Mips64Label loop;
__ Bind(&loop);
// /* HeapReference<Class> */ temp = temp->super_class_
- __ LoadFromOffset(kLoadUnsignedWord, temp, temp, super_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ super_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// If the class reference currently in `temp` is null, jump to the slow path to throw the
// exception.
__ Beqzc(temp, slow_path->GetEntryLabel());
@@ -1998,15 +2695,22 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kClassHierarchyCheck: {
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Walk over the class hierarchy to find a match.
Mips64Label loop;
__ Bind(&loop);
__ Beqc(temp, cls, &done);
// /* HeapReference<Class> */ temp = temp->super_class_
- __ LoadFromOffset(kLoadUnsignedWord, temp, temp, super_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ super_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// If the class reference currently in `temp` is null, jump to the slow path to throw the
// exception. Otherwise, jump to the beginning of the loop.
__ Bnezc(temp, &loop);
@@ -2016,14 +2720,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
case TypeCheckKind::kArrayObjectCheck: {
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Do an exact check.
__ Beqc(temp, cls, &done);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
- __ LoadFromOffset(kLoadUnsignedWord, temp, temp, component_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadOneRegister(instruction,
+ temp_loc,
+ component_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// If the component type is null, jump to the slow path to throw the exception.
__ Beqzc(temp, slow_path->GetEntryLabel());
// Otherwise, the object is indeed an array, further check that this component
@@ -2050,11 +2761,19 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
// Avoid read barriers to improve performance of the fast path. We can not get false
// positives by doing this.
// /* HeapReference<Class> */ temp = obj->klass_
- __ LoadFromOffset(kLoadUnsignedWord, temp, obj, class_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// /* HeapReference<Class> */ temp = temp->iftable_
- __ LoadFromOffset(kLoadUnsignedWord, temp, temp, iftable_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ temp_loc,
+ iftable_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
// Iftable is never null.
__ Lw(TMP, temp, array_length_offset);
// Loop through the iftable and check if any class matches.
@@ -3270,14 +3989,31 @@ void CodeGeneratorMIPS64::GenerateNop() {
}
void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
- const FieldInfo& field_info ATTRIBUTE_UNUSED) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ const FieldInfo& field_info) {
+ Primitive::Type field_type = field_info.GetFieldType();
+ bool object_field_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot);
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+ instruction,
+ object_field_get_with_read_barrier
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps in the case of an object field get with
+ // read barriers enabled: we do not want the move to overwrite the
+ // object's location, as we need it to emit the read barrier.
+ locations->SetOut(Location::RequiresRegister(),
+ object_field_get_with_read_barrier
+ ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ }
+ if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
}
@@ -3285,8 +4021,11 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction,
const FieldInfo& field_info) {
Primitive::Type type = field_info.GetFieldType();
LocationSummary* locations = instruction->GetLocations();
- GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+ Location obj_loc = locations->InAt(0);
+ GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
+ Location dst_loc = locations->Out();
LoadOperandType load_type = kLoadUnsignedByte;
+ bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
auto null_checker = GetImplicitNullChecker(instruction, codegen_);
@@ -3319,19 +4058,46 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction,
UNREACHABLE();
}
if (!Primitive::IsFloatingPointType(type)) {
- DCHECK(locations->Out().IsRegister());
- GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
- __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
+ DCHECK(dst_loc.IsRegister());
+ GpuRegister dst = dst_loc.AsRegister<GpuRegister>();
+ if (type == Primitive::kPrimNot) {
+ // /* HeapReference<Object> */ dst = *(obj + offset)
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp_loc = locations->GetTemp(0);
+ // Note that a potential implicit null check is handled in this
+ // CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier call.
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ dst_loc,
+ obj,
+ offset,
+ temp_loc,
+ /* needs_null_check */ true);
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ } else {
+ __ LoadFromOffset(kLoadUnsignedWord, dst, obj, offset, null_checker);
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+ }
+ // If read barriers are enabled, emit read barriers other than
+ // Baker's using a slow path (and also unpoison the loaded
+ // reference, if heap poisoning is enabled).
+ codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset);
+ }
+ } else {
+ __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
+ }
} else {
- DCHECK(locations->Out().IsFpuRegister());
- FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
+ DCHECK(dst_loc.IsFpuRegister());
+ FpuRegister dst = dst_loc.AsFpuRegister<FpuRegister>();
__ LoadFpuFromOffset(load_type, dst, obj, offset, null_checker);
}
- // TODO: memory barrier?
- if (type == Primitive::kPrimNot) {
- GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
- __ MaybeUnpoisonHeapReference(dst);
+ // Memory barriers, in the case of references, are handled in the
+ // previous switch statement.
+ if (is_volatile && (type != Primitive::kPrimNot)) {
+ GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
}
@@ -3355,6 +4121,7 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction,
GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
Location value_location = locations->InAt(1);
StoreOperandType store_type = kStoreByte;
+ bool is_volatile = field_info.IsVolatile();
uint32_t offset = field_info.GetFieldOffset().Uint32Value();
bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1));
auto null_checker = GetImplicitNullChecker(instruction, codegen_);
@@ -3382,6 +4149,10 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction,
UNREACHABLE();
}
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+ }
+
if (value_location.IsConstant()) {
int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
__ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker);
@@ -3405,12 +4176,16 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction,
__ StoreFpuToOffset(store_type, src, obj, offset, null_checker);
}
}
- // TODO: memory barriers?
+
if (needs_write_barrier) {
DCHECK(value_location.IsRegister());
GpuRegister src = value_location.AsRegister<GpuRegister>();
codegen_->MarkGCCard(obj, src, value_can_be_null);
}
+
+ if (is_volatile) {
+ GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+ }
}
void LocationsBuilderMIPS64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -3429,14 +4204,134 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceFieldSet(HInstanceFieldSet* in
HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
}
+void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister(
+ HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option) {
+ GpuRegister out_reg = out.AsRegister<GpuRegister>();
+ if (read_barrier_option == kWithReadBarrier) {
+ CHECK(kEmitCompilerReadBarrier);
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ if (kUseBakerReadBarrier) {
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ out_reg,
+ offset,
+ maybe_temp,
+ /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // Save the value of `out` into `maybe_temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ __ Move(maybe_temp.AsRegister<GpuRegister>(), out_reg);
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(out + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset);
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters(
+ HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option) {
+ GpuRegister out_reg = out.AsRegister<GpuRegister>();
+ GpuRegister obj_reg = obj.AsRegister<GpuRegister>();
+ if (read_barrier_option == kWithReadBarrier) {
+ CHECK(kEmitCompilerReadBarrier);
+ if (kUseBakerReadBarrier) {
+ DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+ // Load with fast path based Baker's read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+ out,
+ obj_reg,
+ offset,
+ maybe_temp,
+ /* needs_null_check */ false);
+ } else {
+ // Load with slow path based read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset);
+ codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+ }
+ } else {
+ // Plain load with no read barrier.
+ // /* HeapReference<Object> */ out = *(obj + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset);
+ __ MaybeUnpoisonHeapReference(out_reg);
+ }
+}
+
void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(
- HInstruction* instruction ATTRIBUTE_UNUSED,
+ HInstruction* instruction,
Location root,
GpuRegister obj,
- uint32_t offset) {
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option) {
GpuRegister root_reg = root.AsRegister<GpuRegister>();
- if (kEmitCompilerReadBarrier) {
- UNIMPLEMENTED(FATAL) << "for read barrier";
+ if (read_barrier_option == kWithReadBarrier) {
+ DCHECK(kEmitCompilerReadBarrier);
+ if (kUseBakerReadBarrier) {
+ // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+ // Baker's read barrier are used:
+ //
+ // root = obj.field;
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ // if (temp != null) {
+ // root = temp(root)
+ // }
+
+ // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset);
+ static_assert(
+ sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+ "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+ "have different sizes.");
+ static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::CompressedReference<mirror::Object> and int32_t "
+ "have different sizes.");
+
+ // Slow path marking the GC root `root`.
+ Location temp = Location::RegisterLocation(T9);
+ SlowPathCodeMIPS64* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(
+ instruction,
+ root,
+ /*entrypoint*/ temp);
+ codegen_->AddSlowPath(slow_path);
+
+ // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+ const int32_t entry_point_offset =
+ CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1);
+ // Loading the entrypoint does not require a load acquire since it is only changed when
+ // threads are suspended or running a checkpoint.
+ __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset);
+ // The entrypoint is null when the GC is not marking, this prevents one load compared to
+ // checking GetIsGcMarking.
+ __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+ } else {
+ // GC root loaded through a slow path for read barriers other
+ // than Baker's.
+ // /* GcRoot<mirror::Object>* */ root = obj + offset
+ __ Daddiu64(root_reg, obj, static_cast<int32_t>(offset));
+ // /* mirror::Object* */ root = root->Read()
+ codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ }
} else {
// Plain GC root load with no read barrier.
// /* GcRoot<mirror::Object> */ root = *(obj + offset)
@@ -3446,6 +4341,219 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(
}
}
+void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ Location no_index = Location::NoLocation();
+ ScaleFactor no_scale_factor = TIMES_1;
+ GenerateReferenceLoadWithBakerReadBarrier(instruction,
+ ref,
+ obj,
+ offset,
+ no_index,
+ no_scale_factor,
+ temp,
+ needs_null_check);
+}
+
+void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ // /* HeapReference<Object> */ ref =
+ // *(obj + data_offset + index * sizeof(HeapReference<Object>))
+ ScaleFactor scale_factor = TIMES_4;
+ GenerateReferenceLoadWithBakerReadBarrier(instruction,
+ ref,
+ obj,
+ data_offset,
+ index,
+ scale_factor,
+ temp,
+ needs_null_check);
+}
+
+void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ Location temp,
+ bool needs_null_check,
+ bool always_update_field) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // In slow path based read barriers, the read barrier call is
+ // inserted after the original load. However, in fast path based
+ // Baker's read barriers, we need to perform the load of
+ // mirror::Object::monitor_ *before* the original reference load.
+ // This load-load ordering is required by the read barrier.
+ // The fast path/slow path (for Baker's algorithm) should look like:
+ //
+ // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+ // lfence; // Load fence or artificial data dependency to prevent load-load reordering
+ // HeapReference<Object> ref = *src; // Original reference load.
+ // bool is_gray = (rb_state == ReadBarrier::GrayState());
+ // if (is_gray) {
+ // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
+ // }
+ //
+ // Note: the original implementation in ReadBarrier::Barrier is
+ // slightly more complex as it performs additional checks that we do
+ // not do here for performance reasons.
+
+ GpuRegister ref_reg = ref.AsRegister<GpuRegister>();
+ GpuRegister temp_reg = temp.AsRegister<GpuRegister>();
+ uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+ // /* int32_t */ monitor = obj->monitor_
+ __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // /* LockWord */ lock_word = LockWord(monitor)
+ static_assert(sizeof(LockWord) == sizeof(int32_t),
+ "art::LockWord and int32_t have different sizes.");
+
+ __ Sync(0); // Barrier to prevent load-load reordering.
+
+ // The actual reference load.
+ if (index.IsValid()) {
+ // Load types involving an "index": ArrayGet,
+ // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+ // intrinsics.
+ // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+ if (index.IsConstant()) {
+ size_t computed_offset =
+ (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
+ __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, computed_offset);
+ } else {
+ GpuRegister index_reg = index.AsRegister<GpuRegister>();
+ __ Dsll(TMP, index_reg, scale_factor);
+ __ Daddu(TMP, obj, TMP);
+ __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset);
+ }
+ } else {
+ // /* HeapReference<Object> */ ref = *(obj + offset)
+ __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset);
+ }
+
+ // Object* ref = ref_addr->AsMirrorPtr()
+ __ MaybeUnpoisonHeapReference(ref_reg);
+
+ // Slow path marking the object `ref` when it is gray.
+ SlowPathCodeMIPS64* slow_path;
+ if (always_update_field) {
+ // ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 only supports address
+ // of the form `obj + field_offset`, where `obj` is a register and
+ // `field_offset` is a register. Thus `offset` and `scale_factor`
+ // above are expected to be null in this code path.
+ DCHECK_EQ(offset, 0u);
+ DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+ slow_path = new (GetGraph()->GetArena())
+ ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(instruction,
+ ref,
+ obj,
+ /* field_offset */ index,
+ temp_reg);
+ } else {
+ slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(instruction, ref);
+ }
+ AddSlowPath(slow_path);
+
+ // if (rb_state == ReadBarrier::GrayState())
+ // ref = ReadBarrier::Mark(ref);
+ // Given the numeric representation, it's enough to check the low bit of the
+ // rb_state. We do that by shifting the bit into the sign bit (31) and
+ // performing a branch on less than zero.
+ static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size");
+ __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift);
+ __ Bltzc(temp_reg, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS64::GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Insert a slow path based read barrier *after* the reference load.
+ //
+ // If heap poisoning is enabled, the unpoisoning of the loaded
+ // reference will be carried out by the runtime within the slow
+ // path.
+ //
+ // Note that `ref` currently does not get unpoisoned (when heap
+ // poisoning is enabled), which is alright as the `ref` argument is
+ // not used by the artReadBarrierSlow entry point.
+ //
+ // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+ SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
+ ReadBarrierForHeapReferenceSlowPathMIPS64(instruction, out, ref, obj, offset, index);
+ AddSlowPath(slow_path);
+
+ __ Bc(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ if (kEmitCompilerReadBarrier) {
+ // Baker's read barriers shall be handled by the fast path
+ // (CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier).
+ DCHECK(!kUseBakerReadBarrier);
+ // If heap poisoning is enabled, unpoisoning will be taken care of
+ // by the runtime within the slow path.
+ GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+ } else if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(out.AsRegister<GpuRegister>());
+ }
+}
+
+void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+ Location out,
+ Location root) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Insert a slow path based read barrier *after* the GC root load.
+ //
+ // Note that GC roots are not affected by heap poisoning, so we do
+ // not need to do anything special for this here.
+ SlowPathCodeMIPS64* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS64(instruction, out, root);
+ AddSlowPath(slow_path);
+
+ __ Bc(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
@@ -3454,7 +4562,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
- call_kind = LocationSummary::kNoCall;
+ call_kind =
+ kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
break;
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
@@ -3469,14 +4578,20 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
// The output does overlap inputs.
// Note that TypeCheckSlowPathMIPS64 uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
}
void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
LocationSummary* locations = instruction->GetLocations();
- GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+ Location obj_loc = locations->InAt(0);
+ GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ Location out_loc = locations->Out();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
+ const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+ DCHECK_LE(num_temps, 1u);
+ Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -3494,8 +4609,12 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
switch (type_check_kind) {
case TypeCheckKind::kExactCheck: {
// /* HeapReference<Class> */ out = obj->klass_
- __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
// Classes must be equal for the instanceof to succeed.
__ Xor(out, out, cls);
__ Sltiu(out, out, 1);
@@ -3504,15 +4623,22 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kAbstractClassCheck: {
// /* HeapReference<Class> */ out = obj->klass_
- __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
Mips64Label loop;
__ Bind(&loop);
// /* HeapReference<Class> */ out = out->super_class_
- __ LoadFromOffset(kLoadUnsignedWord, out, out, super_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadOneRegister(instruction,
+ out_loc,
+ super_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqzc(out, &done);
__ Bnec(out, cls, &loop);
@@ -3522,15 +4648,22 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kClassHierarchyCheck: {
// /* HeapReference<Class> */ out = obj->klass_
- __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
// Walk over the class hierarchy to find a match.
Mips64Label loop, success;
__ Bind(&loop);
__ Beqc(out, cls, &success);
// /* HeapReference<Class> */ out = out->super_class_
- __ LoadFromOffset(kLoadUnsignedWord, out, out, super_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadOneRegister(instruction,
+ out_loc,
+ super_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
__ Bnezc(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
__ Bc(&done);
@@ -3541,15 +4674,22 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayObjectCheck: {
// /* HeapReference<Class> */ out = obj->klass_
- __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
// Do an exact check.
Mips64Label success;
__ Beqc(out, cls, &success);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ out = out->component_type_
- __ LoadFromOffset(kLoadUnsignedWord, out, out, component_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadOneRegister(instruction,
+ out_loc,
+ component_offset,
+ maybe_temp_loc,
+ kCompilerReadBarrierOption);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqzc(out, &done);
__ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -3564,8 +4704,12 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kArrayCheck: {
// No read barrier since the slow path will retry upon failure.
// /* HeapReference<Class> */ out = obj->klass_
- __ LoadFromOffset(kLoadUnsignedWord, out, obj, class_offset);
- __ MaybeUnpoisonHeapReference(out);
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
/* is_fatal */ false);
@@ -3735,9 +4879,6 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codeg
HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
- if (kEmitCompilerReadBarrier) {
- UNIMPLEMENTED(FATAL) << "for read barrier";
- }
bool fallback_load = false;
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
@@ -3765,9 +4906,6 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
- if (kEmitCompilerReadBarrier) {
- UNIMPLEMENTED(FATAL) << "for read barrier";
- }
bool fallback_load = false;
switch (desired_class_load_kind) {
case HLoadClass::LoadKind::kInvalid:
@@ -3960,7 +5098,8 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
}
DCHECK(!cls->NeedsAccessCheck());
- LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+ const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+ LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall;
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
@@ -3989,6 +5128,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
current_method_reg = locations->InAt(0).AsRegister<GpuRegister>();
}
+ const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+ ? kWithoutReadBarrier
+ : kCompilerReadBarrierOption;
bool generate_null_check = false;
switch (load_kind) {
case HLoadClass::LoadKind::kReferrersClass:
@@ -3998,10 +5140,12 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
GenerateGcRootFieldLoad(cls,
out_loc,
current_method_reg,
- ArtMethod::DeclaringClassOffset().Int32Value());
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ read_barrier_option);
break;
case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
__ LoadLiteral(out,
kLoadUnsignedWord,
codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
@@ -4009,6 +5153,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
@@ -4016,7 +5161,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
break;
}
case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK(!kEmitCompilerReadBarrier);
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
uint32_t address = dchecked_integral_cast<uint32_t>(
reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
DCHECK_NE(address, 0u);
@@ -4029,7 +5174,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
- GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
generate_null_check = true;
break;
}
@@ -4039,7 +5184,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
cls->GetTypeIndex(),
cls->GetClass()));
- GenerateGcRootFieldLoad(cls, out_loc, out, 0);
+ GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option);
break;
case HLoadClass::LoadKind::kDexCacheViaMethod:
case HLoadClass::LoadKind::kInvalid:
@@ -4136,7 +5281,11 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
CodeGeneratorMIPS64::PcRelativePatchInfo* info =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
- GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
+ GenerateGcRootFieldLoad(load,
+ out_loc,
+ out,
+ /* placeholder */ 0x5678,
+ kCompilerReadBarrierOption);
SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
codegen_->AddSlowPath(slow_path);
__ Beqzc(out, slow_path->GetEntryLabel());
@@ -4149,7 +5298,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
load->GetStringIndex(),
load->GetString()));
- GenerateGcRootFieldLoad(load, out_loc, out, 0);
+ GenerateGcRootFieldLoad(load, out_loc, out, 0, kCompilerReadBarrierOption);
return;
default:
break;
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 6040dc9492..fd1a174608 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -237,6 +237,38 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
const FieldInfo& field_info,
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+ // Generate a heap reference load using one register `out`:
+ //
+ // out <- *(out + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a read barrier and
+ // shall be a register in that case; it may be an invalid location
+ // otherwise.
+ void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+ Location out,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option);
+ // Generate a heap reference load using two different registers
+ // `out` and `obj`:
+ //
+ // out <- *(obj + offset)
+ //
+ // while honoring heap poisoning and/or read barriers (if any).
+ //
+ // Location `maybe_temp` is used when generating a Baker's (fast
+ // path) read barrier and shall be a register in that case; it may
+ // be an invalid location otherwise.
+ void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+ Location out,
+ Location obj,
+ uint32_t offset,
+ Location maybe_temp,
+ ReadBarrierOption read_barrier_option);
+
// Generate a GC root reference load:
//
// root <- *(obj + offset)
@@ -245,7 +277,9 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
void GenerateGcRootFieldLoad(HInstruction* instruction,
Location root,
GpuRegister obj,
- uint32_t offset);
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option);
+
void GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
Mips64Label* true_target,
@@ -316,6 +350,91 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference array load when Baker's read barriers are used.
+ void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ uint32_t data_offset,
+ Location index,
+ Location temp,
+ bool needs_null_check);
+
+ // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+ // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+ //
+ // Load the object reference located at the address
+ // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+ // `ref`, and mark it if needed.
+ //
+ // If `always_update_field` is true, the value of the reference is
+ // atomically updated in the holder (`obj`).
+ void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ GpuRegister obj,
+ uint32_t offset,
+ Location index,
+ ScaleFactor scale_factor,
+ Location temp,
+ bool needs_null_check,
+ bool always_update_field = false);
+
+ // Generate a read barrier for a heap reference within `instruction`
+ // using a slow path.
+ //
+ // A read barrier for an object reference read from the heap is
+ // implemented as a call to the artReadBarrierSlow runtime entry
+ // point, which is passed the values in locations `ref`, `obj`, and
+ // `offset`:
+ //
+ // mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+ // mirror::Object* obj,
+ // uint32_t offset);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierSlow.
+ //
+ // When `index` is provided (i.e. for array accesses), the offset
+ // value passed to artReadBarrierSlow is adjusted to take `index`
+ // into account.
+ void GenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap
+ // reference using a slow path. If heap poisoning is enabled, also
+ // unpoison the reference in `out`.
+ void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction` using
+ // a slow path.
+ //
+ // A read barrier for an object reference GC root is implemented as
+ // a call to the artReadBarrierForRootSlow runtime entry point,
+ // which is passed the value in location `root`:
+ //
+ // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierForRootSlow.
+ void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
void MarkGCCard(GpuRegister object, GpuRegister value, bool value_can_be_null);
// Register allocation.
@@ -366,6 +485,14 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
uint32_t dex_pc,
SlowPathCode* slow_path = nullptr) OVERRIDE;
+ // Generate code to invoke a runtime entry point, but do not record
+ // PC-related information in a stack map.
+ void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+ HInstruction* instruction,
+ SlowPathCode* slow_path);
+
+ void GenerateInvokeRuntime(int32_t entry_point_offset);
+
ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; }
diff --git a/compiler/optimizing/code_generator_vector_arm.cc b/compiler/optimizing/code_generator_vector_arm.cc
new file mode 100644
index 0000000000..ba2b2cb2c9
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_arm.cc
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_arm.h"
+
+namespace art {
+namespace arm {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(GetAssembler())-> // NOLINT
+
+void LocationsBuilderARM::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecCnv(HVecCnv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecNeg(HVecNeg* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecNot(HVecNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecAdd(HVecAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecSub(HVecSub* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecMul(HVecMul* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecDiv(HVecDiv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecAnd(HVecAnd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecAndNot(HVecAndNot* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecOr(HVecOr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecXor(HVecXor* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecShl(HVecShl* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecShr(HVecShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecUShr(HVecUShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+#undef __
+
+} // namespace arm
+} // namespace art
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
new file mode 100644
index 0000000000..96d00210b8
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -0,0 +1,641 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_arm64.h"
+#include "mirror/array-inl.h"
+
+using namespace vixl::aarch64; // NOLINT(build/namespaces)
+
+namespace art {
+namespace arm64 {
+
+using helpers::DRegisterFrom;
+using helpers::HeapOperand;
+using helpers::InputRegisterAt;
+using helpers::Int64ConstantFrom;
+using helpers::XRegisterFrom;
+
+#define __ GetVIXLAssembler()->
+
+void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Dup(dst.V8B(), InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Dup(dst.V4H(), InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Dup(dst.V2S(), InputRegisterAt(instruction, 0));
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Dup(dst.V2S(), DRegisterFrom(locations->InAt(0)).V2S(), 0);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(),
+ instruction->IsVecNot() ? Location::kOutputOverlap
+ : Location::kNoOutputOverlap);
+ break;
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister src = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ Primitive::Type from = instruction->GetInputType();
+ Primitive::Type to = instruction->GetResultType();
+ if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Scvtf(dst.V2S(), src.V2S());
+ } else {
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+}
+
+void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister src = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Neg(dst.V8B(), src.V8B());
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Neg(dst.V4H(), src.V4H());
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Neg(dst.V2S(), src.V2S());
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fneg(dst.V2S(), src.V2S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister src = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean: // special case boolean-not
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Movi(dst.V8B(), 1);
+ __ Eor(dst.V8B(), dst.V8B(), src.V8B());
+ break;
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ __ Not(dst.V8B(), src.V8B()); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Add(dst.V8B(), lhs.V8B(), rhs.V8B());
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Add(dst.V4H(), lhs.V4H(), rhs.V4H());
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Add(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fadd(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sub(dst.V8B(), lhs.V8B(), rhs.V8B());
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Sub(dst.V4H(), lhs.V4H(), rhs.V4H());
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Sub(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fsub(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Mul(dst.V8B(), lhs.V8B(), rhs.V8B());
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Mul(dst.V4H(), lhs.V4H(), rhs.V4H());
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Mul(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fmul(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Fdiv(dst.V2S(), lhs.V2S(), rhs.V2S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ __ And(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
+void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ __ Orr(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister rhs = DRegisterFrom(locations->InAt(1));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ __ Eor(dst.V8B(), lhs.V8B(), rhs.V8B()); // lanes do not matter
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Shl(dst.V8B(), lhs.V8B(), value);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Shl(dst.V4H(), lhs.V4H(), value);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Shl(dst.V2S(), lhs.V2S(), value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sshr(dst.V8B(), lhs.V8B(), value);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Sshr(dst.V4H(), lhs.V4H(), value);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Sshr(dst.V2S(), lhs.V2S(), value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ FPRegister lhs = DRegisterFrom(locations->InAt(0));
+ FPRegister dst = DRegisterFrom(locations->Out());
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Ushr(dst.V8B(), lhs.V8B(), value);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Ushr(dst.V4H(), lhs.V4H(), value);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Ushr(dst.V2S(), lhs.V2S(), value);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+ HVecMemoryOperation* instruction,
+ bool is_load) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (is_load) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up registers and address for vector memory operations.
+MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters(
+ HVecMemoryOperation* instruction,
+ Location* reg_loc,
+ bool is_load) {
+ LocationSummary* locations = instruction->GetLocations();
+ Register base = InputRegisterAt(instruction, 0);
+ Location index = locations->InAt(1);
+ *reg_loc = is_load ? locations->Out() : locations->InAt(2);
+
+ Primitive::Type packed_type = instruction->GetPackedType();
+ uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(packed_type)).Uint32Value();
+ size_t shift = Primitive::ComponentSizeShift(packed_type);
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireSameSizeAs(base);
+ if (index.IsConstant()) {
+ offset += Int64ConstantFrom(index) << shift;
+ __ Add(temp, base, offset);
+ } else {
+ if (instruction->InputAt(0)->IsIntermediateAddress()) {
+ temp = base;
+ } else {
+ __ Add(temp, base, offset);
+ }
+ __ Add(temp.X(), temp.X(), Operand(XRegisterFrom(index), LSL, shift));
+ }
+ return HeapOperand(temp);
+}
+
+void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
+ Location reg_loc = Location::NoLocation();
+ MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
+ FPRegister reg = DRegisterFrom(reg_loc);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Ld1(reg.V8B(), mem);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Ld1(reg.V4H(), mem);
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Ld1(reg.V2S(), mem);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
+ Location reg_loc = Location::NoLocation();
+ MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
+ FPRegister reg = DRegisterFrom(reg_loc);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ St1(reg.V8B(), mem);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ St1(reg.V4H(), mem);
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ St1(reg.V2S(), mem);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+#undef __
+
+} // namespace arm64
+} // namespace art
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
new file mode 100644
index 0000000000..171198902d
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_arm_vixl.h"
+
+namespace art {
+namespace arm {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()-> // NOLINT
+
+void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecCnv(HVecCnv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecDiv(HVecDiv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecAndNot(HVecAndNot* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+#undef __
+
+} // namespace arm
+} // namespace art
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
new file mode 100644
index 0000000000..6f5fe0d2a4
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_mips.h"
+
+namespace art {
+namespace mips {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT
+
+void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderMIPS::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecAndNot(HVecAndNot* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderMIPS::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+#undef __
+
+} // namespace mips
+} // namespace art
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
new file mode 100644
index 0000000000..2ee7ac91cf
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_mips64.h"
+
+namespace art {
+namespace mips64 {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT
+
+void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderMIPS64::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecAndNot(HVecAndNot* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK(locations);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderMIPS64::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+#undef __
+
+} // namespace mips64
+} // namespace art
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
new file mode 100644
index 0000000000..4f3988ee2e
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -0,0 +1,767 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_x86.h"
+#include "mirror/array-inl.h"
+
+namespace art {
+namespace x86 {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
+
+void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimLong:
+ // Long needs extra temporary to load the register pair.
+ locations->AddTemp(Location::RequiresFpuRegister());
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<Register>());
+ __ punpcklbw(reg, reg);
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<Register>());
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<Register>());
+ __ pshufd(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimLong: {
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>());
+ __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
+ __ punpckldq(reg, tmp);
+ __ punpcklqdq(reg, reg);
+ break;
+ }
+ case Primitive::kPrimFloat:
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ shufps(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ shufpd(reg, reg, Immediate(0));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ Primitive::Type from = instruction->GetInputType();
+ Primitive::Type to = instruction->GetResultType();
+ if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ cvtdq2ps(dst, src);
+ } else {
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+}
+
+void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubb(dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubd(dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubq(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ xorps(dst, dst);
+ __ subps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ xorpd(dst, dst);
+ __ subpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ // Boolean-not requires a temporary to construct the 16 x one.
+ if (instruction->GetPackedType() == Primitive::kPrimBoolean) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean: { // special case boolean-not
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ pxor(dst, dst);
+ __ pcmpeqb(tmp, tmp); // all ones
+ __ psubb(dst, tmp); // 16 x one
+ __ pxor(dst, src);
+ break;
+ }
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pcmpeqb(dst, dst); // all ones
+ __ pxor(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ xorps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ xorpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ paddb(dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ paddw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ paddd(dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ paddq(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ addps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ addpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ psubb(dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psubw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ psubd(dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ psubq(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ subps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ subpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pmullw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pmulld(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ mulps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ mulpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ divps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ divpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pand(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ andps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ andpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pandn(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ andnps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ andnpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ por(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ orps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ orpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pxor(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ xorps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ xorpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psllw(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pslld(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psraw(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psrlw(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ psrld(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+ HVecMemoryOperation* instruction,
+ bool is_load) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (is_load) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up registers and address for vector memory operations.
+static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
+ Location* reg_loc,
+ bool is_load) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location base = locations->InAt(0);
+ Location index = locations->InAt(1);
+ *reg_loc = is_load ? locations->Out() : locations->InAt(2);
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
+ ScaleFactor scale = TIMES_1;
+ switch (size) {
+ case 2: scale = TIMES_2; break;
+ case 4: scale = TIMES_4; break;
+ case 8: scale = TIMES_8; break;
+ default: break;
+ }
+ return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
+}
+
+void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+}
+
+void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
+ Location reg_loc = Location::NoLocation();
+ Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
+ XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+ bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
+}
+
+void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
+ Location reg_loc = Location::NoLocation();
+ Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
+ XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+ bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+#undef __
+
+} // namespace x86
+} // namespace art
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
new file mode 100644
index 0000000000..b1c1494f6b
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -0,0 +1,760 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_x86_64.h"
+#include "mirror/array-inl.h"
+
+namespace art {
+namespace x86_64 {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
+
+void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+ __ punpcklbw(reg, reg);
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+ __ punpcklwd(reg, reg);
+ __ pshufd(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+ __ pshufd(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit
+ __ punpcklqdq(reg, reg);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ shufps(reg, reg, Immediate(0));
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ shufpd(reg, reg, Immediate(0));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ Primitive::Type from = instruction->GetInputType();
+ Primitive::Type to = instruction->GetResultType();
+ if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ cvtdq2ps(dst, src);
+ } else {
+ LOG(FATAL) << "Unsupported SIMD type";
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubb(dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubd(dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ pxor(dst, dst);
+ __ psubq(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ xorps(dst, dst);
+ __ subps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ xorpd(dst, dst);
+ __ subpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
+ CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+ // Boolean-not requires a temporary to construct the 16 x one.
+ if (instruction->GetPackedType() == Primitive::kPrimBoolean) {
+ instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean: { // special case boolean-not
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ pxor(dst, dst);
+ __ pcmpeqb(tmp, tmp); // all ones
+ __ psubb(dst, tmp); // 16 x one
+ __ pxor(dst, src);
+ break;
+ }
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pcmpeqb(dst, dst); // all ones
+ __ pxor(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ xorps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ pcmpeqb(dst, dst); // all ones
+ __ xorpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ paddb(dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ paddw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ paddd(dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ paddq(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ addps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ addpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ psubb(dst, src);
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psubw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ psubd(dst, src);
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ psubq(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ subps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ subpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ pmullw(dst, src);
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pmulld(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ mulps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ mulpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ divps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ divpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pand(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ andps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ andpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pandn(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ andnps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ andnpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ por(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ orps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ orpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ __ pxor(dst, src);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ xorps(dst, src);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ xorpd(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psllw(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ pslld(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ psllq(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psraw(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ psrad(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) {
+ CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psrlw(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ psrld(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ psrlq(dst, Immediate(static_cast<int8_t>(value)));
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+ HVecMemoryOperation* instruction,
+ bool is_load) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ case Primitive::kPrimFloat:
+ case Primitive::kPrimDouble:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (is_load) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+// Helper to set up registers and address for vector memory operations.
+static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
+ Location* reg_loc,
+ bool is_load) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location base = locations->InAt(0);
+ Location index = locations->InAt(1);
+ *reg_loc = is_load ? locations->Out() : locations->InAt(2);
+ size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+ uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
+ ScaleFactor scale = TIMES_1;
+ switch (size) {
+ case 2: scale = TIMES_2; break;
+ case 4: scale = TIMES_4; break;
+ case 8: scale = TIMES_8; break;
+ default: break;
+ }
+ return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
+}
+
+void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
+ Location reg_loc = Location::NoLocation();
+ Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
+ XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+ bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
+ CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
+ Location reg_loc = Location::NoLocation();
+ Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
+ XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+ bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 16u);
+ is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
+ break;
+ case Primitive::kPrimFloat:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
+ break;
+ case Primitive::kPrimDouble:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+#undef __
+
+} // namespace x86_64
+} // namespace art
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4db4796985..80776e8b78 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -723,7 +723,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 2ffc398287..49f099f6a9 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -744,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
instruction_->IsArrayGet() ||
instruction_->IsInstanceOf() ||
instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+ (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
<< "Unexpected instruction in read barrier for heap reference slow path: "
<< instruction_->DebugName();
@@ -3660,7 +3660,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat
void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
DCHECK(instruction->IsDiv() || instruction->IsRem());
Primitive::Type type = instruction->GetResultType();
- DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
bool is_div = instruction->IsDiv();
LocationSummary* locations = instruction->GetLocations();
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index cd954043f5..31cd204c9f 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -74,7 +74,6 @@ class CodegenTargetConfig {
}
private:
- CodegenTargetConfig() {}
InstructionSet isa_;
CreateCodegenFn create_codegen_;
};
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index e184745520..01304ac35b 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -66,6 +66,11 @@ inline vixl::aarch32::SRegister LowSRegisterFrom(Location location) {
return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl::aarch32::SRegister>());
}
+inline vixl::aarch32::SRegister HighSRegisterFrom(Location location) {
+ DCHECK(location.IsFpuRegisterPair()) << location;
+ return vixl::aarch32::SRegister(location.AsFpuRegisterPairHigh<vixl::aarch32::SRegister>());
+}
+
inline vixl::aarch32::Register RegisterFrom(Location location) {
DCHECK(location.IsRegister()) << location;
return vixl::aarch32::Register(location.reg());
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 0dfae11465..cc3c143b15 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -505,6 +505,10 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
}
+ void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+ StartAttributeStream("kind") << deoptimize->GetKind();
+ }
+
#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
StartAttributeStream("kind") << instruction->GetOpKind();
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 82ee93d5c2..9516ccb385 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -29,7 +29,21 @@ namespace art {
*/
class InductionVarAnalysisTest : public CommonCompilerTest {
public:
- InductionVarAnalysisTest() : pool_(), allocator_(&pool_) {
+ InductionVarAnalysisTest()
+ : pool_(),
+ allocator_(&pool_),
+ iva_(nullptr),
+ entry_(nullptr),
+ return_(nullptr),
+ exit_(nullptr),
+ parameter_(nullptr),
+ constant0_(nullptr),
+ constant1_(nullptr),
+ constant2_(nullptr),
+ constant7_(nullptr),
+ constant100_(nullptr),
+ constantm1_(nullptr),
+ float_constant0_(nullptr) {
graph_ = CreateGraph(&allocator_);
}
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index f7331452c6..79cd7048a5 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -63,7 +63,7 @@ static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64;
static constexpr size_t kMaximumNumberOfRecursiveCalls = 4;
// Controls the use of inline caches in AOT mode.
-static constexpr bool kUseAOTInlineCaches = false;
+static constexpr bool kUseAOTInlineCaches = true;
// We check for line numbers to make sure the DepthString implementation
// aligns the output nicely.
@@ -672,6 +672,32 @@ HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker,
return result;
}
+static ArtMethod* ResolveMethodFromInlineCache(Handle<mirror::Class> klass,
+ ArtMethod* resolved_method,
+ HInstruction* invoke_instruction,
+ PointerSize pointer_size)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ if (Runtime::Current()->IsAotCompiler()) {
+ // We can get unrelated types when working with profiles (corruption,
+ // systme updates, or anyone can write to it). So first check if the class
+ // actually implements the declaring class of the method that is being
+ // called in bytecode.
+ // Note: the lookup methods used below require to have assignable types.
+ if (!resolved_method->GetDeclaringClass()->IsAssignableFrom(klass.Get())) {
+ return nullptr;
+ }
+ }
+
+ if (invoke_instruction->IsInvokeInterface()) {
+ resolved_method = klass->FindVirtualMethodForInterface(resolved_method, pointer_size);
+ } else {
+ DCHECK(invoke_instruction->IsInvokeVirtual());
+ resolved_method = klass->FindVirtualMethodForVirtual(resolved_method, pointer_size);
+ }
+ DCHECK(resolved_method != nullptr);
+ return resolved_method;
+}
+
bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
Handle<mirror::ObjectArray<mirror::Class>> classes) {
@@ -690,20 +716,20 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
PointerSize pointer_size = class_linker->GetImagePointerSize();
- if (invoke_instruction->IsInvokeInterface()) {
- resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForInterface(
- resolved_method, pointer_size);
- } else {
- DCHECK(invoke_instruction->IsInvokeVirtual());
- resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForVirtual(
- resolved_method, pointer_size);
- }
+ Handle<mirror::Class> monomorphic_type = handles_->NewHandle(GetMonomorphicType(classes));
+ resolved_method = ResolveMethodFromInlineCache(
+ monomorphic_type, resolved_method, invoke_instruction, pointer_size);
+
LOG_NOTE() << "Try inline monomorphic call to " << resolved_method->PrettyMethod();
- DCHECK(resolved_method != nullptr);
+ if (resolved_method == nullptr) {
+ // Bogus AOT profile, bail.
+ DCHECK(Runtime::Current()->IsAotCompiler());
+ return false;
+ }
+
HInstruction* receiver = invoke_instruction->InputAt(0);
HInstruction* cursor = invoke_instruction->GetPrevious();
HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
- Handle<mirror::Class> monomorphic_type = handles_->NewHandle(GetMonomorphicType(classes));
if (!TryInlineAndReplace(invoke_instruction,
resolved_method,
ReferenceTypeInfo::Create(monomorphic_type, /* is_exact */ true),
@@ -742,7 +768,8 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction,
HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc);
HInstruction* compare = new (graph_->GetArena()) HNotEqual(
deopt_flag, graph_->GetIntConstant(0, dex_pc));
- HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc);
+ HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(
+ graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc);
if (cursor != nullptr) {
bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
@@ -806,9 +833,16 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
bb_cursor->InsertInstructionAfter(compare, load_class);
if (with_deoptimization) {
HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
- compare, invoke_instruction->GetDexPc());
+ graph_->GetArena(),
+ compare,
+ receiver,
+ HDeoptimize::Kind::kInline,
+ invoke_instruction->GetDexPc());
bb_cursor->InsertInstructionAfter(deoptimize, compare);
deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+ DCHECK_EQ(invoke_instruction->InputAt(0), receiver);
+ receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
+ deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
}
return compare;
}
@@ -835,11 +869,14 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
ArtMethod* method = nullptr;
Handle<mirror::Class> handle = handles_->NewHandle(classes->Get(i));
- if (invoke_instruction->IsInvokeInterface()) {
- method = handle->FindVirtualMethodForInterface(resolved_method, pointer_size);
- } else {
- DCHECK(invoke_instruction->IsInvokeVirtual());
- method = handle->FindVirtualMethodForVirtual(resolved_method, pointer_size);
+ method = ResolveMethodFromInlineCache(
+ handle, resolved_method, invoke_instruction, pointer_size);
+ if (method == nullptr) {
+ DCHECK(Runtime::Current()->IsAotCompiler());
+ // AOT profile is bogus. This loop expects to iterate over all entries,
+ // so just just continue.
+ all_targets_inlined = false;
+ continue;
}
HInstruction* receiver = invoke_instruction->InputAt(0);
@@ -884,7 +921,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
}
invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
// Because the inline cache data can be populated concurrently, we force the end of the
- // iteration. Otherhwise, we could see a new receiver type.
+ // iteration. Otherwise, we could see a new receiver type.
break;
} else {
CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
@@ -1083,13 +1120,19 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
} else {
HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
- compare, invoke_instruction->GetDexPc());
+ graph_->GetArena(),
+ compare,
+ receiver,
+ HDeoptimize::Kind::kInline,
+ invoke_instruction->GetDexPc());
bb_cursor->InsertInstructionAfter(deoptimize, compare);
deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
if (return_replacement != nullptr) {
invoke_instruction->ReplaceWith(return_replacement);
}
+ receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+ deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
}
// Run type propagation to get the guard typed.
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 17421fc364..60790e5b84 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -2132,6 +2132,9 @@ void InstructionSimplifierVisitor::VisitDeoptimize(HDeoptimize* deoptimize) {
if (cond->IsConstant()) {
if (cond->AsIntConstant()->IsFalse()) {
// Never deopt: instruction can be removed.
+ if (deoptimize->GuardsAnInput()) {
+ deoptimize->ReplaceWith(deoptimize->GuardedInput());
+ }
deoptimize->GetBlock()->RemoveInstruction(deoptimize);
} else {
// Always deopt.
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index b25bad7170..0d933eaf82 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -39,6 +39,7 @@ using helpers::Int32ConstantFrom;
using helpers::LocationFrom;
using helpers::LowRegisterFrom;
using helpers::LowSRegisterFrom;
+using helpers::HighSRegisterFrom;
using helpers::OutputDRegister;
using helpers::OutputSRegister;
using helpers::OutputRegister;
@@ -794,6 +795,58 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
__ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
}
+void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
+ if (features_.HasARMv8AInstructions()) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
+
+ ArmVIXLAssembler* assembler = GetAssembler();
+ vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
+ vixl32::Register out_reg = OutputRegister(invoke);
+ vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
+ vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
+ vixl32::Label done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
+
+ // Round to nearest integer, ties away from zero.
+ __ Vcvta(S32, F32, temp1, in_reg);
+ __ Vmov(out_reg, temp1);
+
+ // For positive, zero or NaN inputs, rounding is done.
+ __ Cmp(out_reg, 0);
+ __ B(ge, final_label, /* far_target */ false);
+
+ // Handle input < 0 cases.
+ // If input is negative but not a tie, previous result (round to nearest) is valid.
+ // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
+ __ Vrinta(F32, F32, temp1, in_reg);
+ __ Vmov(temp2, 0.5);
+ __ Vsub(F32, temp1, in_reg, temp1);
+ __ Vcmp(F32, temp1, temp2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ {
+ // Use ExactAsemblyScope here because we are using IT.
+ ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(eq);
+ __ add(eq, out_reg, out_reg, 1);
+ }
+
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
CreateIntToIntLocations(arena_, invoke);
}
@@ -3100,7 +3153,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
}
UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe?
UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index bf85b1989e..b67793c4ed 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1514,21 +1514,31 @@ void IntrinsicCodeGeneratorMIPS::VisitThreadCurrentThread(HInvoke* invoke) {
Thread::PeerOffset<kMipsPointerSize>().Int32Value());
}
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
- bool can_call =
- invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
- invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile;
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ Primitive::Type type) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+ invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- can_call ?
- LocationSummary::kCallOnSlowPath :
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ locations->SetOut(Location::RequiresRegister(),
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in InstructionCodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
static void GenUnsafeGet(HInvoke* invoke,
Primitive::Type type,
bool is_volatile,
@@ -1539,49 +1549,109 @@ static void GenUnsafeGet(HInvoke* invoke,
(type == Primitive::kPrimLong) ||
(type == Primitive::kPrimNot)) << type;
MipsAssembler* assembler = codegen->GetAssembler();
+ // Target register.
+ Location trg_loc = locations->Out();
// Object pointer.
- Register base = locations->InAt(1).AsRegister<Register>();
+ Location base_loc = locations->InAt(1);
+ Register base = base_loc.AsRegister<Register>();
// The "offset" argument is passed as a "long". Since this code is for
// a 32-bit processor, we can only use 32-bit addresses, so we only
// need the low 32-bits of offset.
- Register offset_lo = invoke->GetLocations()->InAt(2).AsRegisterPairLow<Register>();
+ Location offset_loc = locations->InAt(2);
+ Register offset_lo = offset_loc.AsRegisterPairLow<Register>();
- __ Addu(TMP, base, offset_lo);
- if (is_volatile) {
- __ Sync(0);
+ if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) {
+ __ Addu(TMP, base, offset_lo);
}
- if (type == Primitive::kPrimLong) {
- Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
- Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
- if (is_R6) {
- __ Lw(trg_lo, TMP, 0);
- __ Lw(trg_hi, TMP, 4);
- } else {
- __ Lwr(trg_lo, TMP, 0);
- __ Lwl(trg_lo, TMP, 3);
- __ Lwr(trg_hi, TMP, 4);
- __ Lwl(trg_hi, TMP, 7);
+ switch (type) {
+ case Primitive::kPrimLong: {
+ Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
+ Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
+ CHECK(!is_volatile); // TODO: support atomic 8-byte volatile loads.
+ if (is_R6) {
+ __ Lw(trg_lo, TMP, 0);
+ __ Lw(trg_hi, TMP, 4);
+ } else {
+ __ Lwr(trg_lo, TMP, 0);
+ __ Lwl(trg_lo, TMP, 3);
+ __ Lwr(trg_hi, TMP, 4);
+ __ Lwl(trg_hi, TMP, 7);
+ }
+ break;
}
- } else {
- Register trg = locations->Out().AsRegister<Register>();
- if (is_R6) {
- __ Lw(trg, TMP, 0);
- } else {
- __ Lwr(trg, TMP, 0);
- __ Lwl(trg, TMP, 3);
+ case Primitive::kPrimInt: {
+ Register trg = trg_loc.AsRegister<Register>();
+ if (is_R6) {
+ __ Lw(trg, TMP, 0);
+ } else {
+ __ Lwr(trg, TMP, 0);
+ __ Lwl(trg, TMP, 3);
+ }
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ break;
}
- if (type == Primitive::kPrimNot) {
- __ MaybeUnpoisonHeapReference(trg);
+ case Primitive::kPrimNot: {
+ Register trg = trg_loc.AsRegister<Register>();
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+ trg_loc,
+ base,
+ /* offset */ 0U,
+ /* index */ offset_loc,
+ TIMES_1,
+ temp,
+ /* needs_null_check */ false);
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ } else {
+ if (is_R6) {
+ __ Lw(trg, TMP, 0);
+ } else {
+ __ Lwr(trg, TMP, 0);
+ __ Lwl(trg, TMP, 3);
+ }
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ codegen->GenerateReadBarrierSlow(invoke,
+ trg_loc,
+ trg_loc,
+ base_loc,
+ /* offset */ 0U,
+ /* index */ offset_loc);
+ }
+ } else {
+ if (is_R6) {
+ __ Lw(trg, TMP, 0);
+ } else {
+ __ Lwr(trg, TMP, 0);
+ __ Lwl(trg, TMP, 3);
+ }
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ __ MaybeUnpoisonHeapReference(trg);
+ }
+ break;
}
+
+ default:
+ LOG(FATAL) << "Unexpected type " << type;
+ UNREACHABLE();
}
}
// int sun.misc.Unsafe.getInt(Object o, long offset)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) {
@@ -1590,7 +1660,7 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) {
// int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
@@ -1599,25 +1669,16 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
// long sun.misc.Unsafe.getLong(Object o, long offset)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLong(HInvoke* invoke) {
GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, IsR6(), codegen_);
}
-// long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
-void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, IsR6(), codegen_);
-}
-
// Object sun.misc.Unsafe.getObject(Object o, long offset)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
@@ -1626,7 +1687,7 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
// Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
@@ -1643,6 +1704,8 @@ static void CreateIntIntIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* in
locations->SetInAt(3, Location::RequiresRegister());
}
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
static void GenUnsafePut(LocationSummary* locations,
Primitive::Type type,
bool is_volatile,
@@ -1681,7 +1744,7 @@ static void GenUnsafePut(LocationSummary* locations,
} else {
Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>();
Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>();
-
+ CHECK(!is_volatile); // TODO: support atomic 8-byte volatile stores.
if (is_R6) {
__ Sw(value_lo, TMP, 0);
__ Sw(value_hi, TMP, 4);
@@ -1815,50 +1878,71 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) {
codegen_);
}
-// void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x)
-void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) {
- CreateIntIntIntIntToVoidLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(),
- Primitive::kPrimLong,
- /* is_volatile */ true,
- /* is_ordered */ false,
- IsR6(),
- codegen_);
-}
-
-static void CreateIntIntIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ kUseBakerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
-
locations->SetOut(Location::RequiresRegister());
+
+ // Temporary register used in CAS by (Baker) read barrier.
+ if (can_call) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS* codegen) {
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* codegen) {
MipsAssembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
bool isR6 = codegen->GetInstructionSetFeatures().IsR6();
Register base = locations->InAt(1).AsRegister<Register>();
- Register offset_lo = locations->InAt(2).AsRegisterPairLow<Register>();
+ Location offset_loc = locations->InAt(2);
+ Register offset_lo = offset_loc.AsRegisterPairLow<Register>();
Register expected = locations->InAt(3).AsRegister<Register>();
Register value = locations->InAt(4).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>();
DCHECK_NE(base, out);
DCHECK_NE(offset_lo, out);
DCHECK_NE(expected, out);
if (type == Primitive::kPrimNot) {
- // Mark card for object assuming new value is stored.
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+ // object and scan the receiver at the next GC for nothing.
bool value_can_be_null = true; // TODO: Worth finding out this information?
codegen->MarkGCCard(base, value, value_can_be_null);
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Need to make sure the reference stored in the field is a to-space
+ // one before attempting the CAS or the CAS could fail incorrectly.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ out_loc, // Unused, used only as a "temporary" within the read barrier.
+ base,
+ /* offset */ 0u,
+ /* index */ offset_loc,
+ ScaleFactor::TIMES_1,
+ temp,
+ /* needs_null_check */ false,
+ /* always_update_field */ true);
+ }
}
MipsLabel loop_head, exit_loop;
@@ -1926,20 +2010,30 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASInt(HInvoke* invoke) {
- CreateIntIntIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASInt(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+ GenCas(invoke, Primitive::kPrimInt, codegen_);
}
// boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x)
void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASObject(HInvoke* invoke) {
- CreateIntIntIntIntIntToIntLocations(arena_, invoke);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ return;
+ }
+
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ GenCas(invoke, Primitive::kPrimNot, codegen_);
}
// int java.lang.String.compareTo(String anotherString)
@@ -2664,6 +2758,8 @@ UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil)
UNIMPLEMENTED_INTRINSIC(MIPS, MathFloor)
UNIMPLEMENTED_INTRINSIC(MIPS, MathRint)
UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetLongVolatile);
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafePutLongVolatile);
UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong)
UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 1ee89cf127..6098767aae 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1151,16 +1151,31 @@ void IntrinsicCodeGeneratorMIPS64::VisitThreadCurrentThread(HInvoke* invoke) {
Thread::PeerOffset<kMips64PointerSize>().Int32Value());
}
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+ HInvoke* invoke,
+ Primitive::Type type) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+ invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ locations->SetOut(Location::RequiresRegister(),
+ (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+ if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ // We need a temporary register for the read barrier marking slow
+ // path in InstructionCodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
static void GenUnsafeGet(HInvoke* invoke,
Primitive::Type type,
bool is_volatile,
@@ -1168,30 +1183,71 @@ static void GenUnsafeGet(HInvoke* invoke,
LocationSummary* locations = invoke->GetLocations();
DCHECK((type == Primitive::kPrimInt) ||
(type == Primitive::kPrimLong) ||
- (type == Primitive::kPrimNot));
+ (type == Primitive::kPrimNot)) << type;
Mips64Assembler* assembler = codegen->GetAssembler();
+ // Target register.
+ Location trg_loc = locations->Out();
+ GpuRegister trg = trg_loc.AsRegister<GpuRegister>();
// Object pointer.
- GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>();
+ Location base_loc = locations->InAt(1);
+ GpuRegister base = base_loc.AsRegister<GpuRegister>();
// Long offset.
- GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>();
- GpuRegister trg = locations->Out().AsRegister<GpuRegister>();
+ Location offset_loc = locations->InAt(2);
+ GpuRegister offset = offset_loc.AsRegister<GpuRegister>();
- __ Daddu(TMP, base, offset);
- if (is_volatile) {
- __ Sync(0);
+ if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) {
+ __ Daddu(TMP, base, offset);
}
+
switch (type) {
+ case Primitive::kPrimLong:
+ __ Ld(trg, TMP, 0);
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ break;
+
case Primitive::kPrimInt:
__ Lw(trg, TMP, 0);
+ if (is_volatile) {
+ __ Sync(0);
+ }
break;
case Primitive::kPrimNot:
- __ Lwu(trg, TMP, 0);
- __ MaybeUnpoisonHeapReference(trg);
- break;
-
- case Primitive::kPrimLong:
- __ Ld(trg, TMP, 0);
+ if (kEmitCompilerReadBarrier) {
+ if (kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+ trg_loc,
+ base,
+ /* offset */ 0U,
+ /* index */ offset_loc,
+ TIMES_1,
+ temp,
+ /* needs_null_check */ false);
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ } else {
+ __ Lwu(trg, TMP, 0);
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ codegen->GenerateReadBarrierSlow(invoke,
+ trg_loc,
+ trg_loc,
+ base_loc,
+ /* offset */ 0U,
+ /* index */ offset_loc);
+ }
+ } else {
+ __ Lwu(trg, TMP, 0);
+ if (is_volatile) {
+ __ Sync(0);
+ }
+ __ MaybeUnpoisonHeapReference(trg);
+ }
break;
default:
@@ -1202,7 +1258,7 @@ static void GenUnsafeGet(HInvoke* invoke,
// int sun.misc.Unsafe.getInt(Object o, long offset)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) {
@@ -1211,7 +1267,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) {
// int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
@@ -1220,7 +1276,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
// long sun.misc.Unsafe.getLong(Object o, long offset)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
@@ -1229,7 +1285,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
// long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
@@ -1238,7 +1294,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
// Object sun.misc.Unsafe.getObject(Object o, long offset)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
@@ -1247,7 +1303,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
// Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(arena_, invoke);
+ CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
@@ -1264,6 +1320,8 @@ static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) {
locations->SetInAt(3, Location::RequiresRegister());
}
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
static void GenUnsafePut(LocationSummary* locations,
Primitive::Type type,
bool is_volatile,
@@ -1429,35 +1487,70 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
codegen_);
}
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ kUseBakerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ (can_call
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall),
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
-
locations->SetOut(Location::RequiresRegister());
+
+ // Temporary register used in CAS by (Baker) read barrier.
+ if (can_call) {
+ locations->AddTemp(Location::RequiresRegister());
+ }
}
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS64* codegen) {
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* codegen) {
Mips64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>();
- GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>();
+ Location offset_loc = locations->InAt(2);
+ GpuRegister offset = offset_loc.AsRegister<GpuRegister>();
GpuRegister expected = locations->InAt(3).AsRegister<GpuRegister>();
GpuRegister value = locations->InAt(4).AsRegister<GpuRegister>();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ Location out_loc = locations->Out();
+ GpuRegister out = out_loc.AsRegister<GpuRegister>();
DCHECK_NE(base, out);
DCHECK_NE(offset, out);
DCHECK_NE(expected, out);
if (type == Primitive::kPrimNot) {
- // Mark card for object assuming new value is stored.
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+ // object and scan the receiver at the next GC for nothing.
bool value_can_be_null = true; // TODO: Worth finding out this information?
codegen->MarkGCCard(base, value, value_can_be_null);
+
+ if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+ Location temp = locations->GetTemp(0);
+ // Need to make sure the reference stored in the field is a to-space
+ // one before attempting the CAS or the CAS could fail incorrectly.
+ codegen->GenerateReferenceLoadWithBakerReadBarrier(
+ invoke,
+ out_loc, // Unused, used only as a "temporary" within the read barrier.
+ base,
+ /* offset */ 0u,
+ /* index */ offset_loc,
+ ScaleFactor::TIMES_1,
+ temp,
+ /* needs_null_check */ false,
+ /* always_update_field */ true);
+ }
}
Mips64Label loop_head, exit_loop;
@@ -1521,29 +1614,39 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASInt(HInvoke* invoke) {
- CreateIntIntIntIntIntToInt(arena_, invoke);
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASInt(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+ GenCas(invoke, Primitive::kPrimInt, codegen_);
}
// boolean sun.misc.Unsafe.compareAndSwapLong(Object o, long offset, long expected, long x)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASLong(HInvoke* invoke) {
- CreateIntIntIntIntIntToInt(arena_, invoke);
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASLong(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
+ GenCas(invoke, Primitive::kPrimLong, codegen_);
}
// boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x)
void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
- CreateIntIntIntIntIntToInt(arena_, invoke);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ return;
+ }
+
+ CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
- GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ GenCas(invoke, Primitive::kPrimNot, codegen_);
}
// int java.lang.String.compareTo(String anotherString)
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 5bcfa4c98b..8d15f78cce 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -28,7 +28,18 @@ namespace art {
*/
class LICMTest : public CommonCompilerTest {
public:
- LICMTest() : pool_(), allocator_(&pool_) {
+ LICMTest()
+ : pool_(),
+ allocator_(&pool_),
+ entry_(nullptr),
+ loop_preheader_(nullptr),
+ loop_header_(nullptr),
+ loop_body_(nullptr),
+ return_(nullptr),
+ exit_(nullptr),
+ parameter_(nullptr),
+ int_constant_(nullptr),
+ float_constant_(nullptr) {
graph_ = CreateGraph(&allocator_);
}
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 8df513f410..42ed04dfa3 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -16,11 +16,21 @@
#include "loop_optimization.h"
+#include "arch/instruction_set.h"
+#include "arch/arm/instruction_set_features_arm.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/mips/instruction_set_features_mips.h"
+#include "arch/mips64/instruction_set_features_mips64.h"
+#include "arch/x86/instruction_set_features_x86.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "driver/compiler_driver.h"
#include "linear_order.h"
namespace art {
+// Enables vectorization (SIMDization) in the loop optimizer.
+static constexpr bool kEnableVectorization = true;
+
// Remove the instruction from the graph. A bit more elaborate than the usual
// instruction removal, since there may be a cycle in the use structure.
static void RemoveFromCycle(HInstruction* instruction) {
@@ -53,6 +63,19 @@ static bool IsEarlyExit(HLoopInformation* loop_info) {
return false;
}
+// Test vector restrictions.
+static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
+ return (restrictions & tested) != 0;
+}
+
+// Inserts an instruction.
+static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
+ DCHECK(block != nullptr);
+ DCHECK(instruction != nullptr);
+ block->InsertInstructionBefore(instruction, block->GetLastInstruction());
+ return instruction;
+}
+
//
// Class methods.
//
@@ -64,11 +87,15 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
compiler_driver_(compiler_driver),
induction_range_(induction_analysis),
loop_allocator_(nullptr),
+ global_allocator_(graph_->GetArena()),
top_loop_(nullptr),
last_loop_(nullptr),
iset_(nullptr),
induction_simplication_count_(0),
- simplified_(false) {
+ simplified_(false),
+ vector_length_(0),
+ vector_refs_(nullptr),
+ vector_map_(nullptr) {
}
void HLoopOptimization::Run() {
@@ -81,15 +108,13 @@ void HLoopOptimization::Run() {
// Phase-local allocator that draws from the global pool. Since the allocator
// itself resides on the stack, it is destructed on exiting Run(), which
// implies its underlying memory is released immediately.
- ArenaAllocator allocator(graph_->GetArena()->GetArenaPool());
+ ArenaAllocator allocator(global_allocator_->GetArenaPool());
loop_allocator_ = &allocator;
// Perform loop optimizations.
LocalRun();
-
if (top_loop_ == nullptr) {
- // All loops have been eliminated.
- graph_->SetHasLoops(false);
+ graph_->SetHasLoops(false); // no more loops
}
// Detach.
@@ -111,18 +136,29 @@ void HLoopOptimization::LocalRun() {
}
// Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use
- // a temporary set that stores instructions using the phase-local allocator.
+ // temporary data structures using the phase-local allocator. All new HIR
+ // should use the global allocator.
if (top_loop_ != nullptr) {
ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ ArenaSafeMap<HInstruction*, HInstruction*> map(
+ std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ // Attach.
iset_ = &iset;
+ vector_refs_ = &refs;
+ vector_map_ = &map;
+ // Traverse.
TraverseLoopsInnerToOuter(top_loop_);
- iset_ = nullptr; // detach
+ // Detach.
+ iset_ = nullptr;
+ vector_refs_ = nullptr;
+ vector_map_ = nullptr;
}
}
void HLoopOptimization::AddLoop(HLoopInformation* loop_info) {
DCHECK(loop_info != nullptr);
- LoopNode* node = new (loop_allocator_) LoopNode(loop_info); // phase-local allocator
+ LoopNode* node = new (loop_allocator_) LoopNode(loop_info);
if (last_loop_ == nullptr) {
// First loop.
DCHECK(top_loop_ == nullptr);
@@ -170,7 +206,7 @@ void HLoopOptimization::RemoveLoop(LoopNode* node) {
void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
for ( ; node != nullptr; node = node->next) {
// Visit inner loops first.
- int current_induction_simplification_count = induction_simplication_count_;
+ uint32_t current_induction_simplification_count = induction_simplication_count_;
if (node->inner != nullptr) {
TraverseLoopsInnerToOuter(node->inner);
}
@@ -179,7 +215,7 @@ void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
if (current_induction_simplification_count != induction_simplication_count_) {
induction_range_.ReVisit(node->loop_info);
}
- // Repeat simplifications in the body of this loop until no more changes occur.
+ // Repeat simplifications in the loop-body until no more changes occur.
// Note that since each simplification consists of eliminating code (without
// introducing new code), this process is always finite.
do {
@@ -187,13 +223,17 @@ void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
SimplifyInduction(node);
SimplifyBlocks(node);
} while (simplified_);
- // Simplify inner loop.
+ // Optimize inner loop.
if (node->inner == nullptr) {
- SimplifyInnerLoop(node);
+ OptimizeInnerLoop(node);
}
}
}
+//
+// Optimization.
+//
+
void HLoopOptimization::SimplifyInduction(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
@@ -204,13 +244,9 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) {
// for (int i = 0; i < 10; i++, k++) { .... no k .... } return k;
for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->AsPhi();
- iset_->clear();
- int32_t use_count = 0;
- if (IsPhiInduction(phi) &&
- IsOnlyUsedAfterLoop(node->loop_info, phi, /*collect_loop_uses*/ false, &use_count) &&
- // No uses, or no early-exit with proper replacement.
- (use_count == 0 ||
- (!IsEarlyExit(node->loop_info) && TryReplaceWithLastValue(phi, preheader)))) {
+ iset_->clear(); // prepare phi induction
+ if (TrySetPhiInduction(phi, /*restrict_uses*/ true) &&
+ TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) {
for (HInstruction* i : *iset_) {
RemoveFromCycle(i);
}
@@ -256,49 +292,47 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
}
}
-bool HLoopOptimization::SimplifyInnerLoop(LoopNode* node) {
+void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
// Ensure loop header logic is finite.
- int64_t tc = 0;
- if (!induction_range_.IsFinite(node->loop_info, &tc)) {
- return false;
+ int64_t trip_count = 0;
+ if (!induction_range_.IsFinite(node->loop_info, &trip_count)) {
+ return;
}
+
// Ensure there is only a single loop-body (besides the header).
HBasicBlock* body = nullptr;
for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
if (it.Current() != header) {
if (body != nullptr) {
- return false;
+ return;
}
body = it.Current();
}
}
// Ensure there is only a single exit point.
if (header->GetSuccessors().size() != 2) {
- return false;
+ return;
}
HBasicBlock* exit = (header->GetSuccessors()[0] == body)
? header->GetSuccessors()[1]
: header->GetSuccessors()[0];
// Ensure exit can only be reached by exiting loop.
if (exit->GetPredecessors().size() != 1) {
- return false;
+ return;
}
// Detect either an empty loop (no side effects other than plain iteration) or
// a trivial loop (just iterating once). Replace subsequent index uses, if any,
// with the last value and remove the loop, possibly after unrolling its body.
HInstruction* phi = header->GetFirstPhi();
- iset_->clear();
- int32_t use_count = 0;
- if (IsEmptyHeader(header)) {
+ iset_->clear(); // prepare phi induction
+ if (TrySetSimpleLoopHeader(header)) {
bool is_empty = IsEmptyBody(body);
- if ((is_empty || tc == 1) &&
- IsOnlyUsedAfterLoop(node->loop_info, phi, /*collect_loop_uses*/ true, &use_count) &&
- // No uses, or proper replacement.
- (use_count == 0 || TryReplaceWithLastValue(phi, preheader))) {
+ if ((is_empty || trip_count == 1) &&
+ TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) {
if (!is_empty) {
- // Unroll the loop body, which sees initial value of the index.
+ // Unroll the loop-body, which sees initial value of the index.
phi->ReplaceWith(phi->InputAt(0));
preheader->MergeInstructionsWith(body);
}
@@ -308,28 +342,649 @@ bool HLoopOptimization::SimplifyInnerLoop(LoopNode* node) {
header->RemoveDominatedBlock(exit);
header->DisconnectAndDelete();
preheader->AddSuccessor(exit);
- preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator
+ preheader->AddInstruction(new (global_allocator_) HGoto());
preheader->AddDominatedBlock(exit);
exit->SetDominator(preheader);
RemoveLoop(node); // update hierarchy
+ return;
+ }
+ }
+
+ // Vectorize loop, if possible and valid.
+ if (kEnableVectorization) {
+ iset_->clear(); // prepare phi induction
+ if (TrySetSimpleLoopHeader(header) &&
+ CanVectorize(node, body, trip_count) &&
+ TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) {
+ Vectorize(node, body, exit, trip_count);
+ graph_->SetHasSIMD(true); // flag SIMD usage
+ return;
+ }
+ }
+}
+
+//
+// Loop vectorization. The implementation is based on the book by Aart J.C. Bik:
+// "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance."
+// Intel Press, June, 2004 (http://www.aartbik.com/).
+//
+
+bool HLoopOptimization::CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count) {
+ // Reset vector bookkeeping.
+ vector_length_ = 0;
+ vector_refs_->clear();
+ vector_runtime_test_a_ =
+ vector_runtime_test_b_= nullptr;
+
+ // Phis in the loop-body prevent vectorization.
+ if (!block->GetPhis().IsEmpty()) {
+ return false;
+ }
+
+ // Scan the loop-body, starting a right-hand-side tree traversal at each left-hand-side
+ // occurrence, which allows passing down attributes down the use tree.
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) {
+ return false; // failure to vectorize a left-hand-side
+ }
+ }
+
+ // Heuristics. Does vectorization seem profitable?
+ // TODO: refine
+ if (vector_length_ == 0) {
+ return false; // nothing found
+ } else if (0 < trip_count && trip_count < vector_length_) {
+ return false; // insufficient iterations
+ }
+
+ // Data dependence analysis. Find each pair of references with same type, where
+ // at least one is a write. Each such pair denotes a possible data dependence.
+ // This analysis exploits the property that differently typed arrays cannot be
+ // aliased, as well as the property that references either point to the same
+ // array or to two completely disjoint arrays, i.e., no partial aliasing.
+ // Other than a few simply heuristics, no detailed subscript analysis is done.
+ for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) {
+ for (auto j = i; ++j != vector_refs_->end(); ) {
+ if (i->type == j->type && (i->lhs || j->lhs)) {
+ // Found same-typed a[i+x] vs. b[i+y], where at least one is a write.
+ HInstruction* a = i->base;
+ HInstruction* b = j->base;
+ HInstruction* x = i->offset;
+ HInstruction* y = j->offset;
+ if (a == b) {
+ // Found a[i+x] vs. a[i+y]. Accept if x == y (loop-independent data dependence).
+ // Conservatively assume a loop-carried data dependence otherwise, and reject.
+ if (x != y) {
+ return false;
+ }
+ } else {
+ // Found a[i+x] vs. b[i+y]. Accept if x == y (at worst loop-independent data dependence).
+ // Conservatively assume a potential loop-carried data dependence otherwise, avoided by
+ // generating an explicit a != b disambiguation runtime test on the two references.
+ if (x != y) {
+ // For now, we reject after one test to avoid excessive overhead.
+ if (vector_runtime_test_a_ != nullptr) {
+ return false;
+ }
+ vector_runtime_test_a_ = a;
+ vector_runtime_test_b_ = b;
+ }
+ }
+ }
+ }
+ }
+
+ // Success!
+ return true;
+}
+
+void HLoopOptimization::Vectorize(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* exit,
+ int64_t trip_count) {
+ Primitive::Type induc_type = Primitive::kPrimInt;
+ HBasicBlock* header = node->loop_info->GetHeader();
+ HBasicBlock* preheader = node->loop_info->GetPreHeader();
+
+ // A cleanup is needed for any unknown trip count or for a known trip count
+ // with remainder iterations after vectorization.
+ bool needs_cleanup = trip_count == 0 || (trip_count % vector_length_) != 0;
+
+ // Adjust vector bookkeeping.
+ iset_->clear(); // prepare phi induction
+ bool is_simple_loop_header = TrySetSimpleLoopHeader(header); // fills iset_
+ DCHECK(is_simple_loop_header);
+
+ // Generate preheader:
+ // stc = <trip-count>;
+ // vtc = stc - stc % VL;
+ HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader);
+ HInstruction* vtc = stc;
+ if (needs_cleanup) {
+ DCHECK(IsPowerOfTwo(vector_length_));
+ HInstruction* rem = Insert(
+ preheader, new (global_allocator_) HAnd(induc_type,
+ stc,
+ graph_->GetIntConstant(vector_length_ - 1)));
+ vtc = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, rem));
+ }
+
+ // Generate runtime disambiguation test:
+ // vtc = a != b ? vtc : 0;
+ if (vector_runtime_test_a_ != nullptr) {
+ HInstruction* rt = Insert(
+ preheader,
+ new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_));
+ vtc = Insert(preheader,
+ new (global_allocator_) HSelect(rt, vtc, graph_->GetIntConstant(0), kNoDexPc));
+ needs_cleanup = true;
+ }
+
+ // Generate vector loop:
+ // for (i = 0; i < vtc; i += VL)
+ // <vectorized-loop-body>
+ vector_mode_ = kVector;
+ GenerateNewLoop(node,
+ block,
+ graph_->TransformLoopForVectorization(header, block, exit),
+ graph_->GetIntConstant(0),
+ vtc,
+ graph_->GetIntConstant(vector_length_));
+ HLoopInformation* vloop = vector_header_->GetLoopInformation();
+
+ // Generate cleanup loop, if needed:
+ // for ( ; i < stc; i += 1)
+ // <loop-body>
+ if (needs_cleanup) {
+ vector_mode_ = kSequential;
+ GenerateNewLoop(node,
+ block,
+ graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
+ vector_phi_,
+ stc,
+ graph_->GetIntConstant(1));
+ }
+
+ // Remove the original loop by disconnecting the body block
+ // and removing all instructions from the header.
+ block->DisconnectAndDelete();
+ while (!header->GetFirstInstruction()->IsGoto()) {
+ header->RemoveInstruction(header->GetFirstInstruction());
+ }
+ // Update loop hierarchy: the old header now resides in the
+ // same outer loop as the old preheader.
+ header->SetLoopInformation(preheader->GetLoopInformation()); // outward
+ node->loop_info = vloop;
+}
+
+void HLoopOptimization::GenerateNewLoop(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* new_preheader,
+ HInstruction* lo,
+ HInstruction* hi,
+ HInstruction* step) {
+ Primitive::Type induc_type = Primitive::kPrimInt;
+ // Prepare new loop.
+ vector_map_->clear();
+ vector_preheader_ = new_preheader,
+ vector_header_ = vector_preheader_->GetSingleSuccessor();
+ vector_body_ = vector_header_->GetSuccessors()[1];
+ vector_phi_ = new (global_allocator_) HPhi(global_allocator_,
+ kNoRegNumber,
+ 0,
+ HPhi::ToPhiType(induc_type));
+ // Generate header.
+ // for (i = lo; i < hi; i += step)
+ // <loop-body>
+ HInstruction* cond = new (global_allocator_) HAboveOrEqual(vector_phi_, hi);
+ vector_header_->AddPhi(vector_phi_);
+ vector_header_->AddInstruction(cond);
+ vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
+ // Suspend check and environment.
+ HInstruction* suspend = vector_header_->GetFirstInstruction();
+ suspend->CopyEnvironmentFromWithLoopPhiAdjustment(
+ node->loop_info->GetSuspendCheck()->GetEnvironment(), vector_header_);
+ // Generate body.
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
+ DCHECK(vectorized_def);
+ }
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ auto i = vector_map_->find(it.Current());
+ if (i != vector_map_->end() && !i->second->IsInBlock()) {
+ Insert(vector_body_, i->second); // lays out in original order
+ if (i->second->NeedsEnvironment()) {
+ i->second->CopyEnvironmentFromWithLoopPhiAdjustment(
+ suspend->GetEnvironment(), vector_header_);
+ }
+ }
+ }
+ // Finalize increment and phi.
+ HInstruction* inc = new (global_allocator_) HAdd(induc_type, vector_phi_, step);
+ vector_phi_->AddInput(lo);
+ vector_phi_->AddInput(Insert(vector_body_, inc));
+}
+
+// TODO: accept reductions at left-hand-side, mixed-type store idioms, etc.
+bool HLoopOptimization::VectorizeDef(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code) {
+ // Accept a left-hand-side array base[index] for
+ // (1) supported vector type,
+ // (2) loop-invariant base,
+ // (3) unit stride index,
+ // (4) vectorizable right-hand-side value.
+ uint64_t restrictions = kNone;
+ if (instruction->IsArraySet()) {
+ Primitive::Type type = instruction->AsArraySet()->GetComponentType();
+ HInstruction* base = instruction->InputAt(0);
+ HInstruction* index = instruction->InputAt(1);
+ HInstruction* value = instruction->InputAt(2);
+ HInstruction* offset = nullptr;
+ if (TrySetVectorType(type, &restrictions) &&
+ node->loop_info->IsDefinedOutOfTheLoop(base) &&
+ induction_range_.IsUnitStride(index, &offset) &&
+ VectorizeUse(node, value, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecSub(index, offset);
+ GenerateVecMem(instruction, vector_map_->Get(index), vector_map_->Get(value), type);
+ } else {
+ vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ true));
+ }
return true;
}
+ return false;
+ }
+ // Branch back okay.
+ if (instruction->IsGoto()) {
+ return true;
+ }
+ // Otherwise accept only expressions with no effects outside the immediate loop-body.
+ // Note that actual uses are inspected during right-hand-side tree traversal.
+ return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
+}
+
+// TODO: more operations and intrinsics, detect saturation arithmetic, etc.
+bool HLoopOptimization::VectorizeUse(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions) {
+ // Accept anything for which code has already been generated.
+ if (generate_code) {
+ if (vector_map_->find(instruction) != vector_map_->end()) {
+ return true;
+ }
+ }
+ // Continue the right-hand-side tree traversal, passing in proper
+ // types and vector restrictions along the way. During code generation,
+ // all new nodes are drawn from the global allocator.
+ if (node->loop_info->IsDefinedOutOfTheLoop(instruction)) {
+ // Accept invariant use, using scalar expansion.
+ if (generate_code) {
+ GenerateVecInv(instruction, type);
+ }
+ return true;
+ } else if (instruction->IsArrayGet()) {
+ // Accept a right-hand-side array base[index] for
+ // (1) exact matching vector type,
+ // (2) loop-invariant base,
+ // (3) unit stride index,
+ // (4) vectorizable right-hand-side value.
+ HInstruction* base = instruction->InputAt(0);
+ HInstruction* index = instruction->InputAt(1);
+ HInstruction* offset = nullptr;
+ if (type == instruction->GetType() &&
+ node->loop_info->IsDefinedOutOfTheLoop(base) &&
+ induction_range_.IsUnitStride(index, &offset)) {
+ if (generate_code) {
+ GenerateVecSub(index, offset);
+ GenerateVecMem(instruction, vector_map_->Get(index), nullptr, type);
+ } else {
+ vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ false));
+ }
+ return true;
+ }
+ } else if (instruction->IsTypeConversion()) {
+ // Accept particular type conversions.
+ HTypeConversion* conversion = instruction->AsTypeConversion();
+ HInstruction* opa = conversion->InputAt(0);
+ Primitive::Type from = conversion->GetInputType();
+ Primitive::Type to = conversion->GetResultType();
+ if ((to == Primitive::kPrimByte ||
+ to == Primitive::kPrimChar ||
+ to == Primitive::kPrimShort) && from == Primitive::kPrimInt) {
+ // Accept a "narrowing" type conversion from a "wider" computation for
+ // (1) conversion into final required type,
+ // (2) vectorizable operand,
+ // (3) "wider" operations cannot bring in higher order bits.
+ if (to == type && VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) {
+ if (generate_code) {
+ if (vector_mode_ == kVector) {
+ vector_map_->Put(instruction, vector_map_->Get(opa)); // operand pass-through
+ } else {
+ GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+ }
+ }
+ return true;
+ }
+ } else if (to == Primitive::kPrimFloat && from == Primitive::kPrimInt) {
+ DCHECK_EQ(to, type);
+ // Accept int to float conversion for
+ // (1) supported int,
+ // (2) vectorizable operand.
+ if (TrySetVectorType(from, &restrictions) &&
+ VectorizeUse(node, opa, generate_code, from, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+ }
+ return true;
+ }
+ }
+ return false;
+ } else if (instruction->IsNeg() || instruction->IsNot() || instruction->IsBooleanNot()) {
+ // Accept unary operator for vectorizable operand.
+ HInstruction* opa = instruction->InputAt(0);
+ if (VectorizeUse(node, opa, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+ }
+ return true;
+ }
+ } else if (instruction->IsAdd() || instruction->IsSub() ||
+ instruction->IsMul() || instruction->IsDiv() ||
+ instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) {
+ // Deal with vector restrictions.
+ if ((instruction->IsMul() && HasVectorRestrictions(restrictions, kNoMul)) ||
+ (instruction->IsDiv() && HasVectorRestrictions(restrictions, kNoDiv))) {
+ return false;
+ }
+ // Accept binary operator for vectorizable operands.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* opb = instruction->InputAt(1);
+ if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+ VectorizeUse(node, opb, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type);
+ }
+ return true;
+ }
+ } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) {
+ // Deal with vector restrictions.
+ if ((HasVectorRestrictions(restrictions, kNoShift)) ||
+ (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
+ return false; // unsupported instruction
+ } else if ((instruction->IsShr() || instruction->IsUShr()) &&
+ HasVectorRestrictions(restrictions, kNoHiBits)) {
+ return false; // hibits may impact lobits; TODO: we can do better!
+ }
+ // Accept shift operator for vectorizable/invariant operands.
+ // TODO: accept symbolic, albeit loop invariant shift factors.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* opb = instruction->InputAt(1);
+ if (VectorizeUse(node, opa, generate_code, type, restrictions) && opb->IsIntConstant()) {
+ if (generate_code) {
+ // Make sure shift factor only looks at lower bits, as defined for sequential shifts.
+ // Note that even the narrower SIMD shifts do the right thing after that.
+ int32_t mask = (instruction->GetType() == Primitive::kPrimLong)
+ ? kMaxLongShiftDistance
+ : kMaxIntShiftDistance;
+ HInstruction* s = graph_->GetIntConstant(opb->AsIntConstant()->GetValue() & mask);
+ GenerateVecOp(instruction, vector_map_->Get(opa), s, type);
+ }
+ return true;
+ }
+ } else if (instruction->IsInvokeStaticOrDirect()) {
+ // TODO: coming soon.
+ return false;
}
return false;
}
-bool HLoopOptimization::IsPhiInduction(HPhi* phi) {
+bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restrictions) {
+ const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
+ switch (compiler_driver_->GetInstructionSet()) {
+ case kArm:
+ case kThumb2:
+ return false;
+ case kArm64:
+ // Allow vectorization for all ARM devices, because Android assumes that
+ // ARMv8 AArch64 always supports advanced SIMD. For now, only D registers
+ // (64-bit vectors) not Q registers (128-bit vectors).
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ *restrictions |= kNoDiv;
+ return TrySetVectorLength(8);
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ *restrictions |= kNoDiv;
+ return TrySetVectorLength(4);
+ case Primitive::kPrimInt:
+ *restrictions |= kNoDiv;
+ return TrySetVectorLength(2);
+ case Primitive::kPrimFloat:
+ return TrySetVectorLength(2);
+ default:
+ return false;
+ }
+ case kX86:
+ case kX86_64:
+ // Allow vectorization for SSE4-enabled X86 devices only (128-bit vectors).
+ if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) {
+ switch (type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ *restrictions |= kNoMul | kNoDiv | kNoShift;
+ return TrySetVectorLength(16);
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ *restrictions |= kNoDiv;
+ return TrySetVectorLength(8);
+ case Primitive::kPrimInt:
+ *restrictions |= kNoDiv;
+ return TrySetVectorLength(4);
+ case Primitive::kPrimLong:
+ *restrictions |= kNoMul | kNoDiv | kNoShr;
+ return TrySetVectorLength(2);
+ case Primitive::kPrimFloat:
+ return TrySetVectorLength(4);
+ case Primitive::kPrimDouble:
+ return TrySetVectorLength(2);
+ default:
+ break;
+ } // switch type
+ }
+ return false;
+ case kMips:
+ case kMips64:
+ // TODO: implement MIPS SIMD.
+ return false;
+ default:
+ return false;
+ } // switch instruction set
+}
+
+bool HLoopOptimization::TrySetVectorLength(uint32_t length) {
+ DCHECK(IsPowerOfTwo(length) && length >= 2u);
+ // First time set?
+ if (vector_length_ == 0) {
+ vector_length_ = length;
+ }
+ // Different types are acceptable within a loop-body, as long as all the corresponding vector
+ // lengths match exactly to obtain a uniform traversal through the vector iteration space
+ // (idiomatic exceptions to this rule can be handled by further unrolling sub-expressions).
+ return vector_length_ == length;
+}
+
+void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type) {
+ if (vector_map_->find(org) == vector_map_->end()) {
+ // In scalar code, just use a self pass-through for scalar invariants
+ // (viz. expression remains itself).
+ if (vector_mode_ == kSequential) {
+ vector_map_->Put(org, org);
+ return;
+ }
+ // In vector code, explicit scalar expansion is needed.
+ HInstruction* vector = new (global_allocator_) HVecReplicateScalar(
+ global_allocator_, org, type, vector_length_);
+ vector_map_->Put(org, Insert(vector_preheader_, vector));
+ }
+}
+
+void HLoopOptimization::GenerateVecSub(HInstruction* org, HInstruction* offset) {
+ if (vector_map_->find(org) == vector_map_->end()) {
+ HInstruction* subscript = vector_phi_;
+ if (offset != nullptr) {
+ subscript = new (global_allocator_) HAdd(Primitive::kPrimInt, subscript, offset);
+ if (org->IsPhi()) {
+ Insert(vector_body_, subscript); // lacks layout placeholder
+ }
+ }
+ vector_map_->Put(org, subscript);
+ }
+}
+
+void HLoopOptimization::GenerateVecMem(HInstruction* org,
+ HInstruction* opa,
+ HInstruction* opb,
+ Primitive::Type type) {
+ HInstruction* vector = nullptr;
+ if (vector_mode_ == kVector) {
+ // Vector store or load.
+ if (opb != nullptr) {
+ vector = new (global_allocator_) HVecStore(
+ global_allocator_, org->InputAt(0), opa, opb, type, vector_length_);
+ } else {
+ vector = new (global_allocator_) HVecLoad(
+ global_allocator_, org->InputAt(0), opa, type, vector_length_);
+ }
+ } else {
+ // Scalar store or load.
+ DCHECK(vector_mode_ == kSequential);
+ if (opb != nullptr) {
+ vector = new (global_allocator_) HArraySet(org->InputAt(0), opa, opb, type, kNoDexPc);
+ } else {
+ vector = new (global_allocator_) HArrayGet(org->InputAt(0), opa, type, kNoDexPc);
+ }
+ }
+ vector_map_->Put(org, vector);
+}
+
+#define GENERATE_VEC(x, y) \
+ if (vector_mode_ == kVector) { \
+ vector = (x); \
+ } else { \
+ DCHECK(vector_mode_ == kSequential); \
+ vector = (y); \
+ } \
+ break;
+
+void HLoopOptimization::GenerateVecOp(HInstruction* org,
+ HInstruction* opa,
+ HInstruction* opb,
+ Primitive::Type type) {
+ if (vector_mode_ == kSequential) {
+ // Scalar code follows implicit integral promotion.
+ if (type == Primitive::kPrimBoolean ||
+ type == Primitive::kPrimByte ||
+ type == Primitive::kPrimChar ||
+ type == Primitive::kPrimShort) {
+ type = Primitive::kPrimInt;
+ }
+ }
+ HInstruction* vector = nullptr;
+ switch (org->GetKind()) {
+ case HInstruction::kNeg:
+ DCHECK(opb == nullptr);
+ GENERATE_VEC(
+ new (global_allocator_) HVecNeg(global_allocator_, opa, type, vector_length_),
+ new (global_allocator_) HNeg(type, opa));
+ case HInstruction::kNot:
+ DCHECK(opb == nullptr);
+ GENERATE_VEC(
+ new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_),
+ new (global_allocator_) HNot(type, opa));
+ case HInstruction::kBooleanNot:
+ DCHECK(opb == nullptr);
+ GENERATE_VEC(
+ new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_),
+ new (global_allocator_) HBooleanNot(opa));
+ case HInstruction::kTypeConversion:
+ DCHECK(opb == nullptr);
+ GENERATE_VEC(
+ new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_),
+ new (global_allocator_) HTypeConversion(type, opa, kNoDexPc));
+ case HInstruction::kAdd:
+ GENERATE_VEC(
+ new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HAdd(type, opa, opb));
+ case HInstruction::kSub:
+ GENERATE_VEC(
+ new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HSub(type, opa, opb));
+ case HInstruction::kMul:
+ GENERATE_VEC(
+ new (global_allocator_) HVecMul(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HMul(type, opa, opb));
+ case HInstruction::kDiv:
+ GENERATE_VEC(
+ new (global_allocator_) HVecDiv(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HDiv(type, opa, opb, kNoDexPc));
+ case HInstruction::kAnd:
+ GENERATE_VEC(
+ new (global_allocator_) HVecAnd(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HAnd(type, opa, opb));
+ case HInstruction::kOr:
+ GENERATE_VEC(
+ new (global_allocator_) HVecOr(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HOr(type, opa, opb));
+ case HInstruction::kXor:
+ GENERATE_VEC(
+ new (global_allocator_) HVecXor(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HXor(type, opa, opb));
+ case HInstruction::kShl:
+ GENERATE_VEC(
+ new (global_allocator_) HVecShl(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HShl(type, opa, opb));
+ case HInstruction::kShr:
+ GENERATE_VEC(
+ new (global_allocator_) HVecShr(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HShr(type, opa, opb));
+ case HInstruction::kUShr:
+ GENERATE_VEC(
+ new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_),
+ new (global_allocator_) HUShr(type, opa, opb));
+ case HInstruction::kInvokeStaticOrDirect: {
+ // TODO: coming soon.
+ break;
+ }
+ default:
+ break;
+ } // switch
+ CHECK(vector != nullptr) << "Unsupported SIMD operator";
+ vector_map_->Put(org, vector);
+}
+
+#undef GENERATE_VEC
+
+//
+// Helpers.
+//
+
+bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) {
+ DCHECK(iset_->empty());
ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
if (set != nullptr) {
- DCHECK(iset_->empty());
for (HInstruction* i : *set) {
// Check that, other than instructions that are no longer in the graph (removed earlier)
- // each instruction is removable and, other than the phi, uses are contained in the cycle.
+ // each instruction is removable and, when restrict uses are requested, other than for phi,
+ // all uses are contained within the cycle.
if (!i->IsInBlock()) {
continue;
} else if (!i->IsRemovable()) {
return false;
- } else if (i != phi) {
+ } else if (i != phi && restrict_uses) {
for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
if (set->find(use.GetUser()) == set->end()) {
return false;
@@ -348,10 +1003,12 @@ bool HLoopOptimization::IsPhiInduction(HPhi* phi) {
// c: Condition(phi, bound)
// i: If(c)
// TODO: Find a less pattern matching approach?
-bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) {
+bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) {
DCHECK(iset_->empty());
HInstruction* phi = block->GetFirstPhi();
- if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi())) {
+ if (phi != nullptr &&
+ phi->GetNext() == nullptr &&
+ TrySetPhiInduction(phi->AsPhi(), /*restrict_uses*/ false)) {
HInstruction* s = block->GetFirstInstruction();
if (s != nullptr && s->IsSuspendCheck()) {
HInstruction* c = s->GetNext();
@@ -369,14 +1026,24 @@ bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) {
}
bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) {
- if (block->GetFirstPhi() == nullptr) {
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- HInstruction* instruction = it.Current();
- if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) {
- return false;
- }
+ if (!block->GetPhis().IsEmpty()) {
+ return false;
+ }
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info,
+ HInstruction* instruction) {
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ if (use.GetUser()->GetBlock()->GetLoopInformation() != loop_info) {
+ return true;
}
- return true;
}
return false;
}
@@ -438,6 +1105,19 @@ bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasi
return false;
}
+bool HLoopOptimization::TryAssignLastValue(HLoopInformation* loop_info,
+ HInstruction* instruction,
+ HBasicBlock* block,
+ bool collect_loop_uses) {
+ // Assigning the last value is always successful if there are no uses.
+ // Otherwise, it succeeds in a no early-exit loop by generating the
+ // proper last value assignment.
+ int32_t use_count = 0;
+ return IsOnlyUsedAfterLoop(loop_info, instruction, collect_loop_uses, &use_count) &&
+ (use_count == 0 ||
+ (!IsEarlyExit(loop_info) && TryReplaceWithLastValue(instruction, block)));
+}
+
void HLoopOptimization::RemoveDeadInstructions(const HInstructionList& list) {
for (HBackwardInstructionIterator i(list); !i.Done(); i.Advance()) {
HInstruction* instruction = i.Current();
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 0b798fc7a9..16f7691af2 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -27,7 +27,8 @@ class CompilerDriver;
/**
* Loop optimizations. Builds a loop hierarchy and applies optimizations to
- * the detected nested loops, such as removal of dead induction and empty loops.
+ * the detected nested loops, such as removal of dead induction and empty loops
+ * and inner loop vectorization.
*/
class HLoopOptimization : public HOptimization {
public:
@@ -50,34 +51,105 @@ class HLoopOptimization : public HOptimization {
inner(nullptr),
previous(nullptr),
next(nullptr) {}
- HLoopInformation* const loop_info;
+ HLoopInformation* loop_info;
LoopNode* outer;
LoopNode* inner;
LoopNode* previous;
LoopNode* next;
};
- void LocalRun();
+ /*
+ * Vectorization restrictions (bit mask).
+ */
+ enum VectorRestrictions {
+ kNone = 0, // no restrictions
+ kNoMul = 1, // no multiplication
+ kNoDiv = 2, // no division
+ kNoShift = 4, // no shift
+ kNoShr = 8, // no arithmetic shift right
+ kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
+ };
+
+ /*
+ * Vectorization mode during synthesis
+ * (sequential peeling/cleanup loop or vector loop).
+ */
+ enum VectorMode {
+ kSequential,
+ kVector
+ };
+
+ /*
+ * Representation of a unit-stride array reference.
+ */
+ struct ArrayReference {
+ ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l)
+ : base(b), offset(o), type(t), lhs(l) { }
+ bool operator<(const ArrayReference& other) const {
+ return
+ (base < other.base) ||
+ (base == other.base &&
+ (offset < other.offset || (offset == other.offset &&
+ (type < other.type ||
+ (type == other.type && lhs < other.lhs)))));
+ }
+ HInstruction* base; // base address
+ HInstruction* offset; // offset + i
+ Primitive::Type type; // component type
+ bool lhs; // def/use
+ };
+ // Loop setup and traversal.
+ void LocalRun();
void AddLoop(HLoopInformation* loop_info);
void RemoveLoop(LoopNode* node);
-
void TraverseLoopsInnerToOuter(LoopNode* node);
- // Simplification.
+ // Optimization.
void SimplifyInduction(LoopNode* node);
void SimplifyBlocks(LoopNode* node);
- bool SimplifyInnerLoop(LoopNode* node);
+ void OptimizeInnerLoop(LoopNode* node);
+
+ // Vectorization analysis and synthesis.
+ bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
+ void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
+ void GenerateNewLoop(LoopNode* node,
+ HBasicBlock* block,
+ HBasicBlock* new_preheader,
+ HInstruction* lo,
+ HInstruction* hi,
+ HInstruction* step);
+ bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
+ bool VectorizeUse(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions);
+ bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions);
+ bool TrySetVectorLength(uint32_t length);
+ void GenerateVecInv(HInstruction* org, Primitive::Type type);
+ void GenerateVecSub(HInstruction* org, HInstruction* off);
+ void GenerateVecMem(HInstruction* org,
+ HInstruction* opa,
+ HInstruction* opb,
+ Primitive::Type type);
+ void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
// Helpers.
- bool IsPhiInduction(HPhi* phi);
- bool IsEmptyHeader(HBasicBlock* block);
+ bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
+ bool TrySetSimpleLoopHeader(HBasicBlock* block);
bool IsEmptyBody(HBasicBlock* block);
bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
HInstruction* instruction,
bool collect_loop_uses,
/*out*/ int32_t* use_count);
+ bool IsUsedOutsideLoop(HLoopInformation* loop_info,
+ HInstruction* instruction);
bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block);
+ bool TryAssignLastValue(HLoopInformation* loop_info,
+ HInstruction* instruction,
+ HBasicBlock* block,
+ bool collect_loop_uses);
void RemoveDeadInstructions(const HInstructionList& list);
// Compiler driver (to query ISA features).
@@ -90,6 +162,9 @@ class HLoopOptimization : public HOptimization {
// through this allocator is immediately released when the loop optimizer is done.
ArenaAllocator* loop_allocator_;
+ // Global heap memory allocator. Used to build HIR.
+ ArenaAllocator* global_allocator_;
+
// Entries into the loop hierarchy representation. The hierarchy resides
// in phase-local heap memory.
LoopNode* top_loop_;
@@ -102,11 +177,33 @@ class HLoopOptimization : public HOptimization {
// Counter that tracks how many induction cycles have been simplified. Useful
// to trigger incremental updates of induction variable analysis of outer loops
// when the induction of inner loops has changed.
- int32_t induction_simplication_count_;
+ uint32_t induction_simplication_count_;
// Flag that tracks if any simplifications have occurred.
bool simplified_;
+ // Number of "lanes" for selected packed type.
+ uint32_t vector_length_;
+
+ // Set of array references in the vector loop.
+ // Contents reside in phase-local heap memory.
+ ArenaSet<ArrayReference>* vector_refs_;
+
+ // Mapping used during vectorization synthesis for both the scalar peeling/cleanup
+ // loop (simd_ is false) and the actual vector loop (simd_ is true). The data
+ // structure maps original instructions into the new instructions.
+ // Contents reside in phase-local heap memory.
+ ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
+
+ // Temporary vectorization bookkeeping.
+ HBasicBlock* vector_preheader_; // preheader of the new loop
+ HBasicBlock* vector_header_; // header of the new loop
+ HBasicBlock* vector_body_; // body of the new loop
+ HInstruction* vector_runtime_test_a_;
+ HInstruction* vector_runtime_test_b_; // defines a != b runtime test
+ HPhi* vector_phi_; // the Phi representing the normalized loop index
+ VectorMode vector_mode_; // selects synthesis mode
+
friend class LoopOptimizationTest;
DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index ec706e6694..5617e4bfcb 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1088,6 +1088,19 @@ void HInstruction::ReplaceWith(HInstruction* other) {
DCHECK(env_uses_.empty());
}
+void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) {
+ const HUseList<HInstruction*>& uses = GetUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+ HInstruction* user = it->GetUser();
+ size_t index = it->GetIndex();
+ // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+ ++it;
+ if (dominator->StrictlyDominates(user)) {
+ user->ReplaceInput(replacement, index);
+ }
+ }
+}
+
void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
HUserRecord<HInstruction*> input_use = InputRecordAt(index);
if (input_use.GetInstruction() == replacement) {
@@ -1323,6 +1336,18 @@ std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs) {
}
}
+std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) {
+ switch (rhs) {
+ case HDeoptimize::Kind::kBCE:
+ return os << "bce";
+ case HDeoptimize::Kind::kInline:
+ return os << "inline";
+ default:
+ LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs);
+ UNREACHABLE();
+ }
+}
+
bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
return this == instruction->GetPreviousDisregardingMoves();
}
@@ -2315,6 +2340,66 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) {
new_pre_header, old_pre_header, /* replace_if_back_edge */ false);
}
+HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header,
+ HBasicBlock* body,
+ HBasicBlock* exit) {
+ DCHECK(header->IsLoopHeader());
+ HLoopInformation* loop = header->GetLoopInformation();
+
+ // Add new loop blocks.
+ HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
+ HBasicBlock* new_header = new (arena_) HBasicBlock(this, header->GetDexPc());
+ HBasicBlock* new_body = new (arena_) HBasicBlock(this, header->GetDexPc());
+ AddBlock(new_pre_header);
+ AddBlock(new_header);
+ AddBlock(new_body);
+
+ // Set up control flow.
+ header->ReplaceSuccessor(exit, new_pre_header);
+ new_pre_header->AddSuccessor(new_header);
+ new_header->AddSuccessor(exit);
+ new_header->AddSuccessor(new_body);
+ new_body->AddSuccessor(new_header);
+
+ // Set up dominators.
+ header->ReplaceDominatedBlock(exit, new_pre_header);
+ new_pre_header->SetDominator(header);
+ new_pre_header->dominated_blocks_.push_back(new_header);
+ new_header->SetDominator(new_pre_header);
+ new_header->dominated_blocks_.push_back(new_body);
+ new_body->SetDominator(new_header);
+ new_header->dominated_blocks_.push_back(exit);
+ exit->SetDominator(new_header);
+
+ // Fix reverse post order.
+ size_t index_of_header = IndexOfElement(reverse_post_order_, header);
+ MakeRoomFor(&reverse_post_order_, 2, index_of_header);
+ reverse_post_order_[++index_of_header] = new_pre_header;
+ reverse_post_order_[++index_of_header] = new_header;
+ size_t index_of_body = IndexOfElement(reverse_post_order_, body);
+ MakeRoomFor(&reverse_post_order_, 1, index_of_body - 1);
+ reverse_post_order_[index_of_body] = new_body;
+
+ // Add gotos and suspend check (client must add conditional in header and copy environment).
+ new_pre_header->AddInstruction(new (arena_) HGoto());
+ HSuspendCheck* suspend_check = new (arena_) HSuspendCheck(header->GetDexPc());
+ new_header->AddInstruction(suspend_check);
+ new_body->AddInstruction(new (arena_) HGoto());
+
+ // Update loop information.
+ new_header->AddBackEdge(new_body);
+ new_header->GetLoopInformation()->SetSuspendCheck(suspend_check);
+ new_header->GetLoopInformation()->Populate();
+ new_pre_header->SetLoopInformation(loop->GetPreHeader()->GetLoopInformation()); // outward
+ HLoopInformationOutwardIterator it(*new_header);
+ for (it.Advance(); !it.Done(); it.Advance()) {
+ it.Current()->Add(new_pre_header);
+ it.Current()->Add(new_header);
+ it.Current()->Add(new_body);
+ }
+ return new_pre_header;
+}
+
static void CheckAgainstUpperBound(ReferenceTypeInfo rti, ReferenceTypeInfo upper_bound_rti)
REQUIRES_SHARED(Locks::mutator_lock_) {
if (rti.IsValid()) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index fb0c889792..52a02c2285 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -400,6 +400,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// put deoptimization instructions, etc.
void TransformLoopHeaderForBCE(HBasicBlock* header);
+ // Adds a new loop directly after the loop with the given header and exit.
+ // Returns the new preheader.
+ HBasicBlock* TransformLoopForVectorization(HBasicBlock* header,
+ HBasicBlock* body,
+ HBasicBlock* exit);
+
// Removes `block` from the graph. Assumes `block` has been disconnected from
// other blocks and has no instructions or phis.
void DeleteDeadEmptyBlock(HBasicBlock* block);
@@ -1363,6 +1369,25 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(TypeConversion, Instruction) \
M(UShr, BinaryOperation) \
M(Xor, BinaryOperation) \
+ M(VecReplicateScalar, VecUnaryOperation) \
+ M(VecSetScalars, VecUnaryOperation) \
+ M(VecSumReduce, VecUnaryOperation) \
+ M(VecCnv, VecUnaryOperation) \
+ M(VecNeg, VecUnaryOperation) \
+ M(VecNot, VecUnaryOperation) \
+ M(VecAdd, VecBinaryOperation) \
+ M(VecSub, VecBinaryOperation) \
+ M(VecMul, VecBinaryOperation) \
+ M(VecDiv, VecBinaryOperation) \
+ M(VecAnd, VecBinaryOperation) \
+ M(VecAndNot, VecBinaryOperation) \
+ M(VecOr, VecBinaryOperation) \
+ M(VecXor, VecBinaryOperation) \
+ M(VecShl, VecBinaryOperation) \
+ M(VecShr, VecBinaryOperation) \
+ M(VecUShr, VecBinaryOperation) \
+ M(VecLoad, VecMemoryOperation) \
+ M(VecStore, VecMemoryOperation) \
/*
* Instructions, shared across several (not all) architectures.
@@ -1424,7 +1449,11 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(Constant, Instruction) \
M(UnaryOperation, Instruction) \
M(BinaryOperation, Instruction) \
- M(Invoke, Instruction)
+ M(Invoke, Instruction) \
+ M(VecOperation, Instruction) \
+ M(VecUnaryOperation, VecOperation) \
+ M(VecBinaryOperation, VecOperation) \
+ M(VecMemoryOperation, VecOperation)
#define FOR_EACH_INSTRUCTION(M) \
FOR_EACH_CONCRETE_INSTRUCTION(M) \
@@ -2081,6 +2110,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
void SetLocations(LocationSummary* locations) { locations_ = locations; }
void ReplaceWith(HInstruction* instruction);
+ void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
void ReplaceInput(HInstruction* replacement, size_t index);
// This is almost the same as doing `ReplaceWith()`. But in this helper, the
@@ -2944,28 +2974,97 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> {
};
// Deoptimize to interpreter, upon checking a condition.
-class HDeoptimize FINAL : public HTemplateInstruction<1> {
+class HDeoptimize FINAL : public HVariableInputSizeInstruction {
public:
+ enum class Kind {
+ kBCE,
+ kInline,
+ kLast = kInline
+ };
+
+ // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move
+ // across.
+ HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc)
+ : HVariableInputSizeInstruction(
+ SideEffects::All(),
+ dex_pc,
+ arena,
+ /* number_of_inputs */ 1,
+ kArenaAllocMisc) {
+ SetPackedFlag<kFieldCanBeMoved>(false);
+ SetPackedField<DeoptimizeKindField>(kind);
+ SetRawInputAt(0, cond);
+ }
+
+ // Use this constructor when the `HDeoptimize` guards an instruction, and any user
+ // that relies on the deoptimization to pass should have its input be the `HDeoptimize`
+ // instead of `guard`.
// We set CanTriggerGC to prevent any intermediate address to be live
// at the point of the `HDeoptimize`.
- HDeoptimize(HInstruction* cond, uint32_t dex_pc)
- : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
+ HDeoptimize(ArenaAllocator* arena,
+ HInstruction* cond,
+ HInstruction* guard,
+ Kind kind,
+ uint32_t dex_pc)
+ : HVariableInputSizeInstruction(
+ SideEffects::CanTriggerGC(),
+ dex_pc,
+ arena,
+ /* number_of_inputs */ 2,
+ kArenaAllocMisc) {
+ SetPackedFlag<kFieldCanBeMoved>(true);
+ SetPackedField<DeoptimizeKindField>(kind);
SetRawInputAt(0, cond);
+ SetRawInputAt(1, guard);
}
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
- return true;
+ bool CanBeMoved() const OVERRIDE { return GetPackedFlag<kFieldCanBeMoved>(); }
+
+ bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind());
}
+
bool NeedsEnvironment() const OVERRIDE { return true; }
+
bool CanThrow() const OVERRIDE { return true; }
+ Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); }
+
+ Primitive::Type GetType() const OVERRIDE {
+ return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid;
+ }
+
+ bool GuardsAnInput() const {
+ return InputCount() == 2;
+ }
+
+ HInstruction* GuardedInput() const {
+ DCHECK(GuardsAnInput());
+ return InputAt(1);
+ }
+
+ void RemoveGuard() {
+ RemoveInputAt(1);
+ }
+
DECLARE_INSTRUCTION(Deoptimize);
private:
+ static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits;
+ static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1;
+ static constexpr size_t kFieldDeoptimizeKindSize =
+ MinimumBitsToStore(static_cast<size_t>(Kind::kLast));
+ static constexpr size_t kNumberOfDeoptimizePackedBits =
+ kFieldDeoptimizeKind + kFieldDeoptimizeKindSize;
+ static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits,
+ "Too many packed fields.");
+ using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
+
DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
};
+std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs);
+
// Represents a should_deoptimize flag. Currently used for CHA-based devirtualization.
// The compiled code checks this flag value in a guard before devirtualized call and
// if it's true, starts to do deoptimization.
@@ -6619,6 +6718,8 @@ class HParallelMove FINAL : public HTemplateInstruction<0> {
} // namespace art
+#include "nodes_vector.h"
+
#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
#include "nodes_shared.h"
#endif
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
new file mode 100644
index 0000000000..9f9b918f17
--- /dev/null
+++ b/compiler/optimizing/nodes_vector.h
@@ -0,0 +1,585 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_
+#define ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_
+
+// This #include should never be used by compilation, because this header file (nodes_vector.h)
+// is included in the header file nodes.h itself. However it gives editing tools better context.
+#include "nodes.h"
+
+namespace art {
+
+// Memory alignment, represented as an offset relative to a base, where 0 <= offset < base,
+// and base is a power of two. For example, the value Alignment(16, 0) means memory is
+// perfectly aligned at a 16-byte boundary, whereas the value Alignment(16, 4) means
+// memory is always exactly 4 bytes above such a boundary.
+class Alignment {
+ public:
+ Alignment(size_t base, size_t offset) : base_(base), offset_(offset) {
+ DCHECK_LT(offset, base);
+ DCHECK(IsPowerOfTwo(base));
+ }
+
+ // Returns true if memory is "at least" aligned at the given boundary.
+ // Assumes requested base is power of two.
+ bool IsAlignedAt(size_t base) const {
+ DCHECK_NE(0u, base);
+ DCHECK(IsPowerOfTwo(base));
+ return ((offset_ | base_) & (base - 1u)) == 0;
+ }
+
+ std::string ToString() const {
+ return "ALIGN(" + std::to_string(base_) + "," + std::to_string(offset_) + ")";
+ }
+
+ private:
+ size_t base_;
+ size_t offset_;
+};
+
+//
+// Definitions of abstract vector operations in HIR.
+//
+
+// Abstraction of a vector operation, i.e., an operation that performs
+// GetVectorLength() x GetPackedType() operations simultaneously.
+class HVecOperation : public HVariableInputSizeInstruction {
+ public:
+ HVecOperation(ArenaAllocator* arena,
+ Primitive::Type packed_type,
+ SideEffects side_effects,
+ size_t number_of_inputs,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVariableInputSizeInstruction(side_effects,
+ dex_pc,
+ arena,
+ number_of_inputs,
+ kArenaAllocVectorNode),
+ vector_length_(vector_length) {
+ SetPackedField<TypeField>(packed_type);
+ DCHECK_LT(1u, vector_length);
+ }
+
+ // Returns the number of elements packed in a vector.
+ size_t GetVectorLength() const {
+ return vector_length_;
+ }
+
+ // Returns the number of bytes in a full vector.
+ size_t GetVectorNumberOfBytes() const {
+ return vector_length_ * Primitive::ComponentSize(GetPackedType());
+ }
+
+ // Returns the type of the vector operation: a SIMD operation looks like a FPU location.
+ // TODO: we could introduce SIMD types in HIR.
+ Primitive::Type GetType() const OVERRIDE {
+ return Primitive::kPrimDouble;
+ }
+
+ // Returns the true component type packed in a vector.
+ Primitive::Type GetPackedType() const {
+ return GetPackedField<TypeField>();
+ }
+
+ DECLARE_ABSTRACT_INSTRUCTION(VecOperation);
+
+ private:
+ // Additional packed bits.
+ static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
+ static constexpr size_t kFieldTypeSize =
+ MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
+ static constexpr size_t kNumberOfVectorOpPackedBits = kFieldType + kFieldTypeSize;
+ static_assert(kNumberOfVectorOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+ using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
+
+ const size_t vector_length_;
+
+ DISALLOW_COPY_AND_ASSIGN(HVecOperation);
+};
+
+// Abstraction of a unary vector operation.
+class HVecUnaryOperation : public HVecOperation {
+ public:
+ HVecUnaryOperation(ArenaAllocator* arena,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecOperation(arena,
+ packed_type,
+ SideEffects::None(),
+ /*number_of_inputs*/ 1,
+ vector_length,
+ dex_pc) { }
+ DECLARE_ABSTRACT_INSTRUCTION(VecUnaryOperation);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecUnaryOperation);
+};
+
+// Abstraction of a binary vector operation.
+class HVecBinaryOperation : public HVecOperation {
+ public:
+ HVecBinaryOperation(ArenaAllocator* arena,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecOperation(arena,
+ packed_type,
+ SideEffects::None(),
+ /*number_of_inputs*/ 2,
+ vector_length,
+ dex_pc) { }
+ DECLARE_ABSTRACT_INSTRUCTION(VecBinaryOperation);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecBinaryOperation);
+};
+
+// Abstraction of a vector operation that references memory, with an alignment.
+// The Android runtime guarantees at least "component size" alignment for array
+// elements and, thus, vectors.
+class HVecMemoryOperation : public HVecOperation {
+ public:
+ HVecMemoryOperation(ArenaAllocator* arena,
+ Primitive::Type packed_type,
+ SideEffects side_effects,
+ size_t number_of_inputs,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc),
+ alignment_(Primitive::ComponentSize(packed_type), 0) { }
+
+ void SetAlignment(Alignment alignment) { alignment_ = alignment; }
+
+ Alignment GetAlignment() const { return alignment_; }
+
+ DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation);
+
+ private:
+ Alignment alignment_;
+
+ DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation);
+};
+
+//
+// Definitions of concrete vector operations in HIR.
+//
+
+// Replicates the given scalar into a vector,
+// viz. replicate(x) = [ x, .. , x ].
+class HVecReplicateScalar FINAL : public HVecUnaryOperation {
+ public:
+ HVecReplicateScalar(ArenaAllocator* arena,
+ HInstruction* scalar,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ SetRawInputAt(0, scalar);
+ }
+ DECLARE_INSTRUCTION(VecReplicateScalar);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecReplicateScalar);
+};
+
+// Assigns the given scalar elements to a vector,
+// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ].
+class HVecSetScalars FINAL : public HVecUnaryOperation {
+ HVecSetScalars(ArenaAllocator* arena,
+ HInstruction** scalars, // array
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ for (size_t i = 0; i < vector_length; i++) {
+ SetRawInputAt(0, scalars[i]);
+ }
+ }
+ DECLARE_INSTRUCTION(VecSetScalars);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecSetScalars);
+};
+
+// Sum-reduces the given vector into a shorter vector (m < n) or scalar (m = 1),
+// viz. sum-reduce[ x1, .. , xn ] = [ y1, .., ym ], where yi = sum_j x_j.
+class HVecSumReduce FINAL : public HVecUnaryOperation {
+ HVecSumReduce(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(input->IsVecOperation());
+ DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, input);
+ }
+
+ // TODO: probably integral promotion
+ Primitive::Type GetType() const OVERRIDE { return GetPackedType(); }
+
+ DECLARE_INSTRUCTION(VecSumReduce);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecSumReduce);
+};
+
+// Converts every component in the vector,
+// viz. cnv[ x1, .. , xn ] = [ cnv(x1), .. , cnv(xn) ].
+class HVecCnv FINAL : public HVecUnaryOperation {
+ public:
+ HVecCnv(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(input->IsVecOperation());
+ DCHECK_NE(input->AsVecOperation()->GetPackedType(), packed_type); // actual convert
+ SetRawInputAt(0, input);
+ }
+
+ Primitive::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); }
+ Primitive::Type GetResultType() const { return GetPackedType(); }
+
+ DECLARE_INSTRUCTION(VecCnv);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecCnv);
+};
+
+// Negates every component in the vector,
+// viz. neg[ x1, .. , xn ] = [ -x1, .. , -xn ].
+class HVecNeg FINAL : public HVecUnaryOperation {
+ public:
+ HVecNeg(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(input->IsVecOperation());
+ DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, input);
+ }
+ DECLARE_INSTRUCTION(VecNeg);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecNeg);
+};
+
+// Bitwise- or boolean-nots every component in the vector,
+// viz. not[ x1, .. , xn ] = [ ~x1, .. , ~xn ], or
+// not[ x1, .. , xn ] = [ !x1, .. , !xn ] for boolean.
+class HVecNot FINAL : public HVecUnaryOperation {
+ public:
+ HVecNot(ArenaAllocator* arena,
+ HInstruction* input,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(input->IsVecOperation());
+ SetRawInputAt(0, input);
+ }
+ DECLARE_INSTRUCTION(VecNot);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecNot);
+};
+
+// Adds every component in the two vectors,
+// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 + y1, .. , xn + yn ].
+class HVecAdd FINAL : public HVecBinaryOperation {
+ public:
+ HVecAdd(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecAdd);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecAdd);
+};
+
+// Subtracts every component in the two vectors,
+// viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ].
+class HVecSub FINAL : public HVecBinaryOperation {
+ public:
+ HVecSub(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecSub);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecSub);
+};
+
+// Multiplies every component in the two vectors,
+// viz. [ x1, .. , xn ] * [ y1, .. , yn ] = [ x1 * y1, .. , xn * yn ].
+class HVecMul FINAL : public HVecBinaryOperation {
+ public:
+ HVecMul(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecMul);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecMul);
+};
+
+// Divides every component in the two vectors,
+// viz. [ x1, .. , xn ] / [ y1, .. , yn ] = [ x1 / y1, .. , xn / yn ].
+class HVecDiv FINAL : public HVecBinaryOperation {
+ public:
+ HVecDiv(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecDiv);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecDiv);
+};
+
+// Bitwise-ands every component in the two vectors,
+// viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ].
+class HVecAnd FINAL : public HVecBinaryOperation {
+ public:
+ HVecAnd(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecAnd);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecAnd);
+};
+
+// Bitwise-and-nots every component in the two vectors,
+// viz. [ x1, .. , xn ] and-not [ y1, .. , yn ] = [ ~x1 & y1, .. , ~xn & yn ].
+class HVecAndNot FINAL : public HVecBinaryOperation {
+ public:
+ HVecAndNot(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecAndNot);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecAndNot);
+};
+
+// Bitwise-ors every component in the two vectors,
+// viz. [ x1, .. , xn ] | [ y1, .. , yn ] = [ x1 | y1, .. , xn | yn ].
+class HVecOr FINAL : public HVecBinaryOperation {
+ public:
+ HVecOr(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecOr);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecOr);
+};
+
+// Bitwise-xors every component in the two vectors,
+// viz. [ x1, .. , xn ] ^ [ y1, .. , yn ] = [ x1 ^ y1, .. , xn ^ yn ].
+class HVecXor FINAL : public HVecBinaryOperation {
+ public:
+ HVecXor(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation() && right->IsVecOperation());
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecXor);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecXor);
+};
+
+// Logically shifts every component in the vector left by the given distance,
+// viz. [ x1, .. , xn ] << d = [ x1 << d, .. , xn << d ].
+class HVecShl FINAL : public HVecBinaryOperation {
+ public:
+ HVecShl(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecShl);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecShl);
+};
+
+// Arithmetically shifts every component in the vector right by the given distance,
+// viz. [ x1, .. , xn ] >> d = [ x1 >> d, .. , xn >> d ].
+class HVecShr FINAL : public HVecBinaryOperation {
+ public:
+ HVecShr(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecShr);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecShr);
+};
+
+// Logically shifts every component in the vector right by the given distance,
+// viz. [ x1, .. , xn ] >>> d = [ x1 >>> d, .. , xn >>> d ].
+class HVecUShr FINAL : public HVecBinaryOperation {
+ public:
+ HVecUShr(ArenaAllocator* arena,
+ HInstruction* left,
+ HInstruction* right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+ DCHECK(left->IsVecOperation());
+ DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+ DECLARE_INSTRUCTION(VecUShr);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecUShr);
+};
+
+// Loads a vector from memory, viz. load(mem, 1)
+// yield the vector [ mem(1), .. , mem(n) ].
+class HVecLoad FINAL : public HVecMemoryOperation {
+ public:
+ HVecLoad(ArenaAllocator* arena,
+ HInstruction* base,
+ HInstruction* index,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecMemoryOperation(arena,
+ packed_type,
+ SideEffects::ArrayReadOfType(packed_type),
+ /*number_of_inputs*/ 2,
+ vector_length,
+ dex_pc) {
+ SetRawInputAt(0, base);
+ SetRawInputAt(1, index);
+ }
+ DECLARE_INSTRUCTION(VecLoad);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecLoad);
+};
+
+// Stores a vector to memory, viz. store(m, 1, [x1, .. , xn] )
+// sets mem(1) = x1, .. , mem(n) = xn.
+class HVecStore FINAL : public HVecMemoryOperation {
+ public:
+ HVecStore(ArenaAllocator* arena,
+ HInstruction* base,
+ HInstruction* index,
+ HInstruction* value,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecMemoryOperation(arena,
+ packed_type,
+ SideEffects::ArrayWriteOfType(packed_type),
+ /*number_of_inputs*/ 3,
+ vector_length,
+ dex_pc) {
+ DCHECK(value->IsVecOperation());
+ DCHECK_EQ(value->AsVecOperation()->GetPackedType(), packed_type);
+ SetRawInputAt(0, base);
+ SetRawInputAt(1, index);
+ SetRawInputAt(2, value);
+ }
+ DECLARE_INSTRUCTION(VecStore);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecStore);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 3c6d2d64a9..eb88fdee84 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -454,6 +454,8 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) {
static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
return instruction_set == kArm64
|| instruction_set == kThumb2
+ || instruction_set == kMips
+ || instruction_set == kMips64
|| instruction_set == kX86
|| instruction_set == kX86_64;
}
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index efbaf6c221..66bfea9860 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -40,6 +40,14 @@ void PrepareForRegisterAllocation::VisitDivZeroCheck(HDivZeroCheck* check) {
check->ReplaceWith(check->InputAt(0));
}
+void PrepareForRegisterAllocation::VisitDeoptimize(HDeoptimize* deoptimize) {
+ if (deoptimize->GuardsAnInput()) {
+ // Replace the uses with the actual guarded instruction.
+ deoptimize->ReplaceWith(deoptimize->GuardedInput());
+ deoptimize->RemoveGuard();
+ }
+}
+
void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
check->ReplaceWith(check->InputAt(0));
if (check->IsStringCharAt()) {
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index c128227654..7ffbe44ef6 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -44,6 +44,7 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
void VisitCondition(HCondition* condition) OVERRIDE;
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+ void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const;
bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 6e332ca59b..d5637b9b75 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -310,8 +310,8 @@ static void BoundTypeForClassCheck(HInstruction* check) {
BoundTypeIn(receiver, trueBlock, /* start_instruction */ nullptr, class_rti);
} else {
DCHECK(check->IsDeoptimize());
- if (compare->IsEqual()) {
- BoundTypeIn(receiver, check->GetBlock(), check, class_rti);
+ if (compare->IsEqual() && check->AsDeoptimize()->GuardsAnInput()) {
+ check->SetReferenceTypeInfo(class_rti);
}
}
}
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index 84a4bab1a9..0b49ce1a4c 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -29,7 +29,7 @@ namespace art {
*/
class ReferenceTypePropagationTest : public CommonCompilerTest {
public:
- ReferenceTypePropagationTest() : pool_(), allocator_(&pool_) {
+ ReferenceTypePropagationTest() : pool_(), allocator_(&pool_), propagation_(nullptr) {
graph_ = CreateGraph(&allocator_);
}
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index ab0dad4300..9236a0e4fa 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -315,7 +315,10 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor {
// This class and its sub-classes will never be used to drive a visit of an
// `HGraph` but only to visit `HInstructions` one at a time, so we do not need
// to pass a valid graph to `HGraphDelegateVisitor()`.
- SchedulingLatencyVisitor() : HGraphDelegateVisitor(nullptr) {}
+ SchedulingLatencyVisitor()
+ : HGraphDelegateVisitor(nullptr),
+ last_visited_latency_(0),
+ last_visited_internal_latency_(0) {}
void VisitInstruction(HInstruction* instruction) OVERRIDE {
LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". "
@@ -413,6 +416,7 @@ class HScheduler {
selector_(selector),
only_optimize_loop_blocks_(true),
scheduling_graph_(this, arena),
+ cursor_(nullptr),
candidates_(arena_->Adapter(kArenaAllocScheduler)) {}
virtual ~HScheduler() {}
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 36ee5a903a..b538a89a06 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -470,7 +470,12 @@ bool LiveInterval::SameRegisterKind(Location other) const {
}
size_t LiveInterval::NumberOfSpillSlotsNeeded() const {
- // TODO: detect vector operation.
+ // For a SIMD operation, compute the number of needed spill slots.
+ // TODO: do through vector type?
+ HInstruction* definition = GetParent()->GetDefinedBy();
+ if (definition != nullptr && definition->IsVecOperation()) {
+ return definition->AsVecOperation()->GetVectorNumberOfBytes() / kVRegSize;
+ }
// Return number of needed spill slots based on type.
return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1;
}
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index 1916c73ca4..a1016d1d47 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -189,13 +189,14 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
// Use HAboveOrEqual+HDeoptimize as the bounds check.
HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length);
block->AddInstruction(ae);
- HInstruction* deoptimize = new(&allocator_) HDeoptimize(ae, /* dex_pc */ 0u);
+ HInstruction* deoptimize =
+ new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u);
block->AddInstruction(deoptimize);
HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_,
- /* number_of_vregs */ 5,
- /* method */ nullptr,
- /* dex_pc */ 0u,
- deoptimize);
+ /* number_of_vregs */ 5,
+ /* method */ nullptr,
+ /* dex_pc */ 0u,
+ deoptimize);
deoptimize_env->CopyFrom(args);
deoptimize->SetRawEnvironment(deoptimize_env);
HInstruction* array_set =
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index d265a44092..f655994bd3 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -309,7 +309,7 @@ class AssemblerTest : public testing::Test {
template <typename RegType, typename ImmType>
std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType),
int imm_bits,
- const std::vector<Reg*> registers,
+ const std::vector<RegType*> registers,
std::string (AssemblerTest::*GetName)(const RegType&),
const std::string& fmt,
int bias) {
@@ -573,6 +573,19 @@ class AssemblerTest : public testing::Test {
}
template <typename ImmType>
+ std::string RepeatVIb(void (Ass::*f)(VecReg, ImmType),
+ int imm_bits,
+ std::string fmt,
+ int bias = 0) {
+ return RepeatTemplatedRegisterImmBits<VecReg, ImmType>(f,
+ imm_bits,
+ GetVectorRegisters(),
+ &AssemblerTest::GetVecRegName,
+ fmt,
+ bias);
+ }
+
+ template <typename ImmType>
std::string RepeatVRIb(void (Ass::*f)(VecReg, Reg, ImmType),
int imm_bits,
const std::string& fmt,
diff --git a/compiler/utils/atomic_method_ref_map-inl.h b/compiler/utils/atomic_method_ref_map-inl.h
index d71c2fe997..ad3a099eb6 100644
--- a/compiler/utils/atomic_method_ref_map-inl.h
+++ b/compiler/utils/atomic_method_ref_map-inl.h
@@ -42,7 +42,7 @@ template <typename T>
inline bool AtomicMethodRefMap<T>::Get(MethodReference ref, T* out) const {
const ElementArray* const array = GetArray(ref.dex_file);
if (array == nullptr) {
- return kInsertResultInvalidDexFile;
+ return false;
}
*out = (*array)[ref.dex_method_index].LoadRelaxed();
return true;
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 8a5ae754df..0cff44d830 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -252,6 +252,22 @@ void Mips64Assembler::EmitMsaMI10(int s10,
Emit(encoding);
}
+void Mips64Assembler::EmitMsaI10(int operation,
+ int df,
+ int i10,
+ VectorRegister wd,
+ int minor_opcode) {
+ CHECK_NE(wd, kNoVectorRegister);
+ CHECK(IsUint<10>(i10)) << i10;
+ uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+ operation << kMsaOperationShift |
+ df << kDfShift |
+ i10 << kI10Shift |
+ static_cast<uint32_t>(wd) << kWdShift |
+ minor_opcode;
+ Emit(encoding);
+}
+
void Mips64Assembler::EmitMsa2R(int operation,
int df,
VectorRegister ws,
@@ -1581,6 +1597,30 @@ void Mips64Assembler::FillD(VectorRegister wd, GpuRegister rs) {
EmitMsa2R(0xc0, 0x3, static_cast<VectorRegister>(rs), wd, 0x1e);
}
+void Mips64Assembler::LdiB(VectorRegister wd, int imm8) {
+ CHECK(HasMsa());
+ CHECK(IsInt<8>(imm8)) << imm8;
+ EmitMsaI10(0x6, 0x0, imm8 & kMsaS10Mask, wd, 0x7);
+}
+
+void Mips64Assembler::LdiH(VectorRegister wd, int imm10) {
+ CHECK(HasMsa());
+ CHECK(IsInt<10>(imm10)) << imm10;
+ EmitMsaI10(0x6, 0x1, imm10 & kMsaS10Mask, wd, 0x7);
+}
+
+void Mips64Assembler::LdiW(VectorRegister wd, int imm10) {
+ CHECK(HasMsa());
+ CHECK(IsInt<10>(imm10)) << imm10;
+ EmitMsaI10(0x6, 0x2, imm10 & kMsaS10Mask, wd, 0x7);
+}
+
+void Mips64Assembler::LdiD(VectorRegister wd, int imm10) {
+ CHECK(HasMsa());
+ CHECK(IsInt<10>(imm10)) << imm10;
+ EmitMsaI10(0x6, 0x3, imm10 & kMsaS10Mask, wd, 0x7);
+}
+
void Mips64Assembler::LdB(VectorRegister wd, GpuRegister rs, int offset) {
CHECK(HasMsa());
CHECK(IsInt<10>(offset)) << offset;
@@ -1661,6 +1701,7 @@ void Mips64Assembler::Addiu32(GpuRegister rt, GpuRegister rs, int32_t value) {
}
}
+// TODO: don't use rtmp, use daui, dahi, dati.
void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) {
if (IsInt<16>(value)) {
Daddiu(rt, rs, value);
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index a8035b6da4..666c6935a1 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -734,6 +734,10 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void FillW(VectorRegister wd, GpuRegister rs);
void FillD(VectorRegister wd, GpuRegister rs);
+ void LdiB(VectorRegister wd, int imm8);
+ void LdiH(VectorRegister wd, int imm10);
+ void LdiW(VectorRegister wd, int imm10);
+ void LdiD(VectorRegister wd, int imm10);
void LdB(VectorRegister wd, GpuRegister rs, int offset);
void LdH(VectorRegister wd, GpuRegister rs, int offset);
void LdW(VectorRegister wd, GpuRegister rs, int offset);
@@ -1457,6 +1461,7 @@ class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<Pointer
void EmitMsaBIT(int operation, int df_m, VectorRegister ws, VectorRegister wd, int minor_opcode);
void EmitMsaELM(int operation, int df_n, VectorRegister ws, VectorRegister wd, int minor_opcode);
void EmitMsaMI10(int s10, GpuRegister rs, VectorRegister wd, int minor_opcode, int df);
+ void EmitMsaI10(int operation, int df, int i10, VectorRegister wd, int minor_opcode);
void EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
void EmitMsa2RF(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index cadbe27819..f2e3b1610c 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -2836,6 +2836,22 @@ TEST_F(AssemblerMIPS64Test, FillD) {
DriverStr(RepeatVR(&mips64::Mips64Assembler::FillD, "fill.d ${reg1}, ${reg2}"), "fill.d");
}
+TEST_F(AssemblerMIPS64Test, LdiB) {
+ DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b");
+}
+
+TEST_F(AssemblerMIPS64Test, LdiH) {
+ DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiH, -10, "ldi.h ${reg}, {imm}"), "ldi.h");
+}
+
+TEST_F(AssemblerMIPS64Test, LdiW) {
+ DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiW, -10, "ldi.w ${reg}, {imm}"), "ldi.w");
+}
+
+TEST_F(AssemblerMIPS64Test, LdiD) {
+ DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiD, -10, "ldi.d ${reg}, {imm}"), "ldi.d");
+}
+
TEST_F(AssemblerMIPS64Test, LdB) {
DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdB, -10, "ld.b ${reg1}, {imm}(${reg2})"), "ld.b");
}
diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h
index 5ae9c73589..bc8e40b437 100644
--- a/compiler/utils/mips64/constants_mips64.h
+++ b/compiler/utils/mips64/constants_mips64.h
@@ -66,6 +66,7 @@ enum InstructionFields {
kWdShift = 6,
kWdBits = 5,
kS10Shift = 16,
+ kI10Shift = 11,
kS10MinorShift = 2,
kBranchOffsetMask = 0x0000ffff,
diff --git a/compiler/utils/mips64/managed_register_mips64.cc b/compiler/utils/mips64/managed_register_mips64.cc
index dea396e4a7..42d061ec15 100644
--- a/compiler/utils/mips64/managed_register_mips64.cc
+++ b/compiler/utils/mips64/managed_register_mips64.cc
@@ -26,6 +26,11 @@ bool Mips64ManagedRegister::Overlaps(const Mips64ManagedRegister& other) const {
CHECK(IsValidManagedRegister());
CHECK(other.IsValidManagedRegister());
if (Equals(other)) return true;
+ if (IsFpuRegister() && other.IsVectorRegister()) {
+ return (AsFpuRegister() == other.AsOverlappingFpuRegister());
+ } else if (IsVectorRegister() && other.IsFpuRegister()) {
+ return (AsVectorRegister() == other.AsOverlappingVectorRegister());
+ }
return false;
}
@@ -36,6 +41,8 @@ void Mips64ManagedRegister::Print(std::ostream& os) const {
os << "GPU: " << static_cast<int>(AsGpuRegister());
} else if (IsFpuRegister()) {
os << "FpuRegister: " << static_cast<int>(AsFpuRegister());
+ } else if (IsVectorRegister()) {
+ os << "VectorRegister: " << static_cast<int>(AsVectorRegister());
} else {
os << "??: " << RegId();
}
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
index c9f95569cf..3980199b1e 100644
--- a/compiler/utils/mips64/managed_register_mips64.h
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -30,11 +30,27 @@ const int kNumberOfGpuAllocIds = kNumberOfGpuRegisters;
const int kNumberOfFpuRegIds = kNumberOfFpuRegisters;
const int kNumberOfFpuAllocIds = kNumberOfFpuRegisters;
-const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds;
-const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds;
-
-// An instance of class 'ManagedRegister' represents a single GPU register (enum
-// Register) or a double precision FP register (enum FpuRegister)
+const int kNumberOfVecRegIds = kNumberOfVectorRegisters;
+const int kNumberOfVecAllocIds = kNumberOfVectorRegisters;
+
+const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds + kNumberOfVecRegIds;
+const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds + kNumberOfVecAllocIds;
+
+// Register ids map:
+// [0..R[ core registers (enum GpuRegister)
+// [R..F[ floating-point registers (enum FpuRegister)
+// [F..W[ MSA vector registers (enum VectorRegister)
+// where
+// R = kNumberOfGpuRegIds
+// F = R + kNumberOfFpuRegIds
+// W = F + kNumberOfVecRegIds
+
+// An instance of class 'ManagedRegister' represents a single Mips64 register.
+// A register can be one of the following:
+// * core register (enum GpuRegister)
+// * floating-point register (enum FpuRegister)
+// * MSA vector register (enum VectorRegister)
+//
// 'ManagedRegister::NoRegister()' provides an invalid register.
// There is a one-to-one mapping between ManagedRegister and register id.
class Mips64ManagedRegister : public ManagedRegister {
@@ -49,6 +65,21 @@ class Mips64ManagedRegister : public ManagedRegister {
return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds);
}
+ constexpr VectorRegister AsVectorRegister() const {
+ CHECK(IsVectorRegister());
+ return static_cast<VectorRegister>(id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegisters));
+ }
+
+ constexpr FpuRegister AsOverlappingFpuRegister() const {
+ CHECK(IsValidManagedRegister());
+ return static_cast<FpuRegister>(AsVectorRegister());
+ }
+
+ constexpr VectorRegister AsOverlappingVectorRegister() const {
+ CHECK(IsValidManagedRegister());
+ return static_cast<VectorRegister>(AsFpuRegister());
+ }
+
constexpr bool IsGpuRegister() const {
CHECK(IsValidManagedRegister());
return (0 <= id_) && (id_ < kNumberOfGpuRegIds);
@@ -60,6 +91,12 @@ class Mips64ManagedRegister : public ManagedRegister {
return (0 <= test) && (test < kNumberOfFpuRegIds);
}
+ constexpr bool IsVectorRegister() const {
+ CHECK(IsValidManagedRegister());
+ const int test = id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegIds);
+ return (0 <= test) && (test < kNumberOfVecRegIds);
+ }
+
void Print(std::ostream& os) const;
// Returns true if the two managed-registers ('this' and 'other') overlap.
@@ -77,6 +114,11 @@ class Mips64ManagedRegister : public ManagedRegister {
return FromRegId(r + kNumberOfGpuRegIds);
}
+ static constexpr Mips64ManagedRegister FromVectorRegister(VectorRegister r) {
+ CHECK_NE(r, kNoVectorRegister);
+ return FromRegId(r + kNumberOfGpuRegIds + kNumberOfFpuRegIds);
+ }
+
private:
constexpr bool IsValidManagedRegister() const {
return (0 <= id_) && (id_ < kNumberOfRegIds);
diff --git a/compiler/utils/mips64/managed_register_mips64_test.cc b/compiler/utils/mips64/managed_register_mips64_test.cc
new file mode 100644
index 0000000000..8b72d7e61d
--- /dev/null
+++ b/compiler/utils/mips64/managed_register_mips64_test.cc
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "managed_register_mips64.h"
+#include "globals.h"
+#include "gtest/gtest.h"
+
+namespace art {
+namespace mips64 {
+
+TEST(Mips64ManagedRegister, NoRegister) {
+ Mips64ManagedRegister reg = ManagedRegister::NoRegister().AsMips64();
+ EXPECT_TRUE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.Overlaps(reg));
+}
+
+TEST(Mips64ManagedRegister, GpuRegister) {
+ Mips64ManagedRegister reg = Mips64ManagedRegister::FromGpuRegister(ZERO);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(ZERO, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(AT);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(AT, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(V0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(V0, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(A0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(A0, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(A7);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(A7, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(T0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(T0, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(T3);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(T3, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(S0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(S0, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(GP);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(GP, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(SP);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(SP, reg.AsGpuRegister());
+
+ reg = Mips64ManagedRegister::FromGpuRegister(RA);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_TRUE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_EQ(RA, reg.AsGpuRegister());
+}
+
+TEST(Mips64ManagedRegister, FpuRegister) {
+ Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0);
+ Mips64ManagedRegister vreg = Mips64ManagedRegister::FromVectorRegister(W0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_TRUE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(vreg));
+ EXPECT_EQ(F0, reg.AsFpuRegister());
+ EXPECT_EQ(W0, reg.AsOverlappingVectorRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+
+ reg = Mips64ManagedRegister::FromFpuRegister(F1);
+ vreg = Mips64ManagedRegister::FromVectorRegister(W1);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_TRUE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(vreg));
+ EXPECT_EQ(F1, reg.AsFpuRegister());
+ EXPECT_EQ(W1, reg.AsOverlappingVectorRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+
+ reg = Mips64ManagedRegister::FromFpuRegister(F20);
+ vreg = Mips64ManagedRegister::FromVectorRegister(W20);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_TRUE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(vreg));
+ EXPECT_EQ(F20, reg.AsFpuRegister());
+ EXPECT_EQ(W20, reg.AsOverlappingVectorRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F20)));
+
+ reg = Mips64ManagedRegister::FromFpuRegister(F31);
+ vreg = Mips64ManagedRegister::FromVectorRegister(W31);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_TRUE(reg.IsFpuRegister());
+ EXPECT_FALSE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(vreg));
+ EXPECT_EQ(F31, reg.AsFpuRegister());
+ EXPECT_EQ(W31, reg.AsOverlappingVectorRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+}
+
+TEST(Mips64ManagedRegister, VectorRegister) {
+ Mips64ManagedRegister reg = Mips64ManagedRegister::FromVectorRegister(W0);
+ Mips64ManagedRegister freg = Mips64ManagedRegister::FromFpuRegister(F0);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_TRUE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(freg));
+ EXPECT_EQ(W0, reg.AsVectorRegister());
+ EXPECT_EQ(F0, reg.AsOverlappingFpuRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W2);
+ freg = Mips64ManagedRegister::FromFpuRegister(F2);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_TRUE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(freg));
+ EXPECT_EQ(W2, reg.AsVectorRegister());
+ EXPECT_EQ(F2, reg.AsOverlappingFpuRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W2)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W13);
+ freg = Mips64ManagedRegister::FromFpuRegister(F13);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_TRUE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(freg));
+ EXPECT_EQ(W13, reg.AsVectorRegister());
+ EXPECT_EQ(F13, reg.AsOverlappingFpuRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W13)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W29);
+ freg = Mips64ManagedRegister::FromFpuRegister(F29);
+ EXPECT_FALSE(reg.IsNoRegister());
+ EXPECT_FALSE(reg.IsGpuRegister());
+ EXPECT_FALSE(reg.IsFpuRegister());
+ EXPECT_TRUE(reg.IsVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(freg));
+ EXPECT_EQ(W29, reg.AsVectorRegister());
+ EXPECT_EQ(F29, reg.AsOverlappingFpuRegister());
+ EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W29)));
+}
+
+TEST(Mips64ManagedRegister, Equals) {
+ ManagedRegister no_reg = ManagedRegister::NoRegister();
+ EXPECT_TRUE(no_reg.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+ EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ Mips64ManagedRegister reg_ZERO = Mips64ManagedRegister::FromGpuRegister(ZERO);
+ EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_TRUE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+ EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ Mips64ManagedRegister reg_A1 = Mips64ManagedRegister::FromGpuRegister(A1);
+ EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_TRUE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+ EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ Mips64ManagedRegister reg_S2 = Mips64ManagedRegister::FromGpuRegister(S2);
+ EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+ EXPECT_TRUE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+ EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ Mips64ManagedRegister reg_F0 = Mips64ManagedRegister::FromFpuRegister(F0);
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+ EXPECT_TRUE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ Mips64ManagedRegister reg_F31 = Mips64ManagedRegister::FromFpuRegister(F31);
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+ EXPECT_TRUE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+ Mips64ManagedRegister reg_W0 = Mips64ManagedRegister::FromVectorRegister(W0);
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_TRUE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W1)));
+ EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ Mips64ManagedRegister reg_W31 = Mips64ManagedRegister::FromVectorRegister(W31);
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::NoRegister()));
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W1)));
+ EXPECT_TRUE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W31)));
+}
+
+TEST(Mips64ManagedRegister, Overlaps) {
+ Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0);
+ Mips64ManagedRegister reg_o = Mips64ManagedRegister::FromVectorRegister(W0);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(F0, reg_o.AsOverlappingFpuRegister());
+ EXPECT_EQ(W0, reg.AsOverlappingVectorRegister());
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromFpuRegister(F4);
+ reg_o = Mips64ManagedRegister::FromVectorRegister(W4);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(F4, reg_o.AsOverlappingFpuRegister());
+ EXPECT_EQ(W4, reg.AsOverlappingVectorRegister());
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromFpuRegister(F16);
+ reg_o = Mips64ManagedRegister::FromVectorRegister(W16);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(F16, reg_o.AsOverlappingFpuRegister());
+ EXPECT_EQ(W16, reg.AsOverlappingVectorRegister());
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromFpuRegister(F31);
+ reg_o = Mips64ManagedRegister::FromVectorRegister(W31);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(F31, reg_o.AsOverlappingFpuRegister());
+ EXPECT_EQ(W31, reg.AsOverlappingVectorRegister());
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W0);
+ reg_o = Mips64ManagedRegister::FromFpuRegister(F0);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(W0, reg_o.AsOverlappingVectorRegister());
+ EXPECT_EQ(F0, reg.AsOverlappingFpuRegister());
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W4);
+ reg_o = Mips64ManagedRegister::FromFpuRegister(F4);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(W4, reg_o.AsOverlappingVectorRegister());
+ EXPECT_EQ(F4, reg.AsOverlappingFpuRegister());
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W16);
+ reg_o = Mips64ManagedRegister::FromFpuRegister(F16);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(W16, reg_o.AsOverlappingVectorRegister());
+ EXPECT_EQ(F16, reg.AsOverlappingFpuRegister());
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromVectorRegister(W31);
+ reg_o = Mips64ManagedRegister::FromFpuRegister(F31);
+ EXPECT_TRUE(reg.Overlaps(reg_o));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_EQ(W31, reg_o.AsOverlappingVectorRegister());
+ EXPECT_EQ(F31, reg.AsOverlappingFpuRegister());
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromGpuRegister(ZERO);
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromGpuRegister(A0);
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromGpuRegister(S0);
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+ reg = Mips64ManagedRegister::FromGpuRegister(RA);
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+ EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+ EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+}
+
+} // namespace mips64
+} // namespace art
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 5307dc09d9..9c934b7f39 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -1221,6 +1221,24 @@ void X86Assembler::por(XmmRegister dst, XmmRegister src) {
}
+void X86Assembler::pavgb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xE0);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pavgw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitUint8(0x0F);
+ EmitUint8(0xE3);
+ EmitXmmRegisterOperand(dst, src);
+}
+
+
void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index f52cf16c8b..b87522a017 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -495,6 +495,9 @@ class X86Assembler FINAL : public Assembler {
void orps(XmmRegister dst, XmmRegister src);
void por(XmmRegister dst, XmmRegister src);
+ void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void pavgw(XmmRegister dst, XmmRegister src);
+
void pcmpeqb(XmmRegister dst, XmmRegister src);
void pcmpeqw(XmmRegister dst, XmmRegister src);
void pcmpeqd(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index 23049079e0..a01eb6dc23 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -605,6 +605,14 @@ TEST_F(AssemblerX86Test, POr) {
DriverStr(RepeatFF(&x86::X86Assembler::por, "por %{reg2}, %{reg1}"), "por");
}
+TEST_F(AssemblerX86Test, PAvgB) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb");
+}
+
+TEST_F(AssemblerX86Test, PAvgW) {
+ DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
+}
+
TEST_F(AssemblerX86Test, PCmpeqB) {
DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb");
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index d20a6965c3..488c75de41 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1427,6 +1427,24 @@ void X86_64Assembler::por(XmmRegister dst, XmmRegister src) {
EmitXmmRegisterOperand(dst.LowBits(), src);
}
+void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xE0);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x66);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xE3);
+ EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 08e17e81e5..fc2b117f71 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -523,6 +523,9 @@ class X86_64Assembler FINAL : public Assembler {
void orps(XmmRegister dst, XmmRegister src);
void por(XmmRegister dst, XmmRegister src);
+ void pavgb(XmmRegister dst, XmmRegister src); // no addr variant (for now)
+ void pavgw(XmmRegister dst, XmmRegister src);
+
void pcmpeqb(XmmRegister dst, XmmRegister src);
void pcmpeqw(XmmRegister dst, XmmRegister src);
void pcmpeqd(XmmRegister dst, XmmRegister src);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 20062fdb07..4adf210e47 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1293,6 +1293,14 @@ TEST_F(AssemblerX86_64Test, Por) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::por, "por %{reg2}, %{reg1}"), "por");
}
+TEST_F(AssemblerX86_64Test, Pavgb) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb");
+}
+
+TEST_F(AssemblerX86_64Test, Pavgw) {
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
+}
+
TEST_F(AssemblerX86_64Test, PCmpeqb) {
DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb");
}
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index 4bfc84990d..fa7e98586c 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -18,21 +18,21 @@
#include "verifier/verifier_deps.h"
#include "class_linker.h"
-#include "compiler/common_compiler_test.h"
-#include "compiler/dex/verification_results.h"
-#include "compiler/dex/verified_method.h"
-#include "compiler/driver/compiler_options.h"
-#include "compiler/driver/compiler_driver.h"
-#include "compiler/utils/atomic_method_ref_map-inl.h"
+#include "common_compiler_test.h"
#include "compiler_callbacks.h"
+#include "dex/verification_results.h"
+#include "dex/verified_method.h"
#include "dex_file.h"
#include "dex_file_types.h"
+#include "driver/compiler_options.h"
+#include "driver/compiler_driver.h"
#include "handle_scope-inl.h"
#include "verifier/method_verifier-inl.h"
#include "mirror/class_loader.h"
#include "runtime.h"
#include "thread.h"
#include "scoped_thread_state_change-inl.h"
+#include "utils/atomic_method_ref_map-inl.h"
namespace art {
namespace verifier {