Add AVX support for packed add/sub instructions on x86
Test: ./test.py --host, test-art-host-gtest
Change-Id: I48d05e6f6befd54657d962119a543b27a8a51d71
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 0ee0035..c8964dd 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -473,6 +473,70 @@
}
}
+static void CreateVecTerOpLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
+ LocationSummary* locations = new (allocator) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecAvxAdd(HVecAvxAdd* instruction) {
+ CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecAvxAdd(HVecAvxAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src1 = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister src2 = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ vpaddb(dst, src1, src2);
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ vpaddw(dst, src1, src2);
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ vpaddd(dst, src1, src2);
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ vpaddq(dst, src1, src2);
+ break;
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ vaddps(dst, src1, src2);
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ vaddpd(dst, src1, src2);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
@@ -574,6 +638,48 @@
}
}
+void LocationsBuilderX86::VisitVecAvxSub(HVecAvxSub* instruction) {
+ CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecAvxSub(HVecAvxSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ XmmRegister src1 = locations->InAt(0).AsFpuRegister<XmmRegister>();
+ XmmRegister src2 = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ vpsubb(dst, src1, src2);
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ vpsubw(dst, src1, src2);
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ vpsubd(dst, src1, src2);
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ vpsubq(dst, src1, src2);
+ break;
+ case DataType::Type::kFloat32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ vsubps(dst, src1, src2);
+ break;
+ case DataType::Type::kFloat64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ vsubpd(dst, src1, src2);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}