Add AVX support for packed mul/div instructions.
This is a follow up for the below patch:
https://android-review.googlesource.com/c/platform/build/+/830841
Test: ./test.py --host --64, test-art-host-gtest
Change-Id: Id2aa473035556ee230e66addeb69707df8530e75
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index c8964dd..29a1354 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -431,48 +431,6 @@
}
}
-void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
- CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
-}
-
-void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
- switch (instruction->GetPackedType()) {
- case DataType::Type::kUint8:
- case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
- __ paddb(dst, src);
- break;
- case DataType::Type::kUint16:
- case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ paddw(dst, src);
- break;
- case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ paddd(dst, src);
- break;
- case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ paddq(dst, src);
- break;
- case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ addps(dst, src);
- break;
- case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ addpd(dst, src);
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
- UNREACHABLE();
- }
-}
-
static void CreateVecTerOpLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
LocationSummary* locations = new (allocator) LocationSummary(instruction);
switch (instruction->GetPackedType()) {
@@ -495,44 +453,50 @@
}
}
-void LocationsBuilderX86::VisitVecAvxAdd(HVecAvxAdd* instruction) {
+void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
+ if (CpuHasAvxFeatureFlag()) {
CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
+ } else {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+ }
}
-void InstructionCodeGeneratorX86::VisitVecAvxAdd(HVecAvxAdd* instruction) {
+void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
+ bool cpu_has_avx = CpuHasAvxFeatureFlag();
LocationSummary* locations = instruction->GetLocations();
- XmmRegister src1 = locations->InAt(0).AsFpuRegister<XmmRegister>();
- XmmRegister src2 = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ DCHECK(cpu_has_avx || other_src == dst);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ vpaddb(dst, src1, src2);
+ cpu_has_avx ? __ vpaddb(dst, other_src, src) : __ paddb(dst, src);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
DCHECK_EQ(8u, instruction->GetVectorLength());
- __ vpaddw(dst, src1, src2);
+ cpu_has_avx ? __ vpaddw(dst, other_src, src) : __ paddw(dst, src);
break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ vpaddd(dst, src1, src2);
+ cpu_has_avx ? __ vpaddd(dst, other_src, src) : __ paddd(dst, src);
break;
case DataType::Type::kInt64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ vpaddq(dst, src1, src2);
+ cpu_has_avx ? __ vpaddq(dst, other_src, src) : __ paddq(dst, src);
break;
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ vaddps(dst, src1, src2);
+ cpu_has_avx ? __ vaddps(dst, other_src, src) : __ addps(dst, src);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ vaddpd(dst, src1, src2);
+ cpu_has_avx ? __ vaddpd(dst, other_src, src) : __ addpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -597,85 +561,49 @@
}
void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
- CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
-}
-
-void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
- switch (instruction->GetPackedType()) {
- case DataType::Type::kUint8:
- case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
- __ psubb(dst, src);
- break;
- case DataType::Type::kUint16:
- case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ psubw(dst, src);
- break;
- case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ psubd(dst, src);
- break;
- case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ psubq(dst, src);
- break;
- case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ subps(dst, src);
- break;
- case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ subpd(dst, src);
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
- UNREACHABLE();
+ if (CpuHasAvxFeatureFlag()) {
+ CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
+ } else {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
}
-void LocationsBuilderX86::VisitVecAvxSub(HVecAvxSub* instruction) {
- CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
-}
-
-void InstructionCodeGeneratorX86::VisitVecAvxSub(HVecAvxSub* instruction) {
+void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
+ bool cpu_has_avx = CpuHasAvxFeatureFlag();
LocationSummary* locations = instruction->GetLocations();
- XmmRegister src1 = locations->InAt(0).AsFpuRegister<XmmRegister>();
- XmmRegister src2 = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ DCHECK(cpu_has_avx || other_src == dst);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ vpsubb(dst, src1, src2);
+ cpu_has_avx ? __ vpsubb(dst, other_src, src) : __ psubb(dst, src);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
DCHECK_EQ(8u, instruction->GetVectorLength());
- __ vpsubw(dst, src1, src2);
+ cpu_has_avx ? __ vpsubw(dst, other_src, src) : __ psubw(dst, src);
break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ vpsubd(dst, src1, src2);
+ cpu_has_avx ? __ vpsubd(dst, other_src, src) : __ psubd(dst, src);
break;
case DataType::Type::kInt64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ vpsubq(dst, src1, src2);
+ cpu_has_avx ? __ vpsubq(dst, other_src, src) : __ psubq(dst, src);
break;
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ vsubps(dst, src1, src2);
+ cpu_has_avx ? __ vsubps(dst, other_src, src) : __ subps(dst, src);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ vsubpd(dst, src1, src2);
+ cpu_has_avx ? __ vsubpd(dst, other_src, src) : __ subpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -713,31 +641,37 @@
}
void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
- CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+ if (CpuHasAvxFeatureFlag()) {
+ CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
+ } else {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+ }
}
void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
+ bool cpu_has_avx = CpuHasAvxFeatureFlag();
LocationSummary* locations = instruction->GetLocations();
- DCHECK(locations->InAt(0).Equals(locations->Out()));
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ DCHECK(cpu_has_avx || other_src == dst);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint16:
case DataType::Type::kInt16:
DCHECK_EQ(8u, instruction->GetVectorLength());
- __ pmullw(dst, src);
+ cpu_has_avx ? __ vpmullw(dst, other_src, src) : __ pmullw(dst, src);
break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ pmulld(dst, src);
+ cpu_has_avx ? __ vpmulld(dst, other_src, src) : __ pmulld(dst, src);
break;
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ mulps(dst, src);
+ cpu_has_avx ? __ vmulps(dst, other_src, src) : __ mulps(dst, src);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ mulpd(dst, src);
+ cpu_has_avx ? __ vmulpd(dst, other_src, src) : __ mulpd(dst, src);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -746,22 +680,28 @@
}
void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
- CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+ if (CpuHasAvxFeatureFlag()) {
+ CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
+ } else {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+ }
}
void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
+ bool cpu_has_avx = CpuHasAvxFeatureFlag();
LocationSummary* locations = instruction->GetLocations();
- DCHECK(locations->InAt(0).Equals(locations->Out()));
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ DCHECK(cpu_has_avx || other_src == dst);
switch (instruction->GetPackedType()) {
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ divps(dst, src);
+ cpu_has_avx ? __ vdivps(dst, other_src, src) : __ divps(dst, src);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ divpd(dst, src);
+ cpu_has_avx ? __ vdivpd(dst, other_src, src) : __ divpd(dst, src);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index c147659..f28268b 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -437,85 +437,49 @@
}
void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
- CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
- switch (instruction->GetPackedType()) {
- case DataType::Type::kUint8:
- case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
- __ paddb(dst, src);
- break;
- case DataType::Type::kUint16:
- case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ paddw(dst, src);
- break;
- case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ paddd(dst, src);
- break;
- case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ paddq(dst, src);
- break;
- case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ addps(dst, src);
- break;
- case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ addpd(dst, src);
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
- UNREACHABLE();
+ if (CpuHasAvxFeatureFlag()) {
+ CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
+ } else {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
}
-void LocationsBuilderX86_64::VisitVecAvxAdd(HVecAvxAdd* instruction) {
- CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecAvxAdd(HVecAvxAdd* instruction) {
+void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
+ bool cpu_has_avx = CpuHasAvxFeatureFlag();
LocationSummary* locations = instruction->GetLocations();
- XmmRegister src1 = locations->InAt(0).AsFpuRegister<XmmRegister>();
- XmmRegister src2 = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ DCHECK(cpu_has_avx || other_src == dst);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ vpaddb(dst, src1, src2);
+ cpu_has_avx ? __ vpaddb(dst, other_src, src) : __ paddb(dst, src);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
DCHECK_EQ(8u, instruction->GetVectorLength());
- __ vpaddw(dst, src1, src2);
+ cpu_has_avx ? __ vpaddw(dst, other_src, src) : __ paddw(dst, src);
break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ vpaddd(dst, src1, src2);
+ cpu_has_avx ? __ vpaddd(dst, other_src, src) : __ paddd(dst, src);
break;
case DataType::Type::kInt64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ vpaddq(dst, src1, src2);
+ cpu_has_avx ? __ vpaddq(dst, other_src, src) : __ paddq(dst, src);
break;
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ vaddps(dst, src1, src2);
+ cpu_has_avx ? __ vaddps(dst, other_src, src) : __ addps(dst, src);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ vaddpd(dst, src1, src2);
+ cpu_has_avx ? __ vaddpd(dst, other_src, src) : __ addpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -580,85 +544,49 @@
}
void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
- CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- DCHECK(locations->InAt(0).Equals(locations->Out()));
- XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
- XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
- switch (instruction->GetPackedType()) {
- case DataType::Type::kUint8:
- case DataType::Type::kInt8:
- DCHECK_EQ(16u, instruction->GetVectorLength());
- __ psubb(dst, src);
- break;
- case DataType::Type::kUint16:
- case DataType::Type::kInt16:
- DCHECK_EQ(8u, instruction->GetVectorLength());
- __ psubw(dst, src);
- break;
- case DataType::Type::kInt32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ psubd(dst, src);
- break;
- case DataType::Type::kInt64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ psubq(dst, src);
- break;
- case DataType::Type::kFloat32:
- DCHECK_EQ(4u, instruction->GetVectorLength());
- __ subps(dst, src);
- break;
- case DataType::Type::kFloat64:
- DCHECK_EQ(2u, instruction->GetVectorLength());
- __ subpd(dst, src);
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
- UNREACHABLE();
+ if (CpuHasAvxFeatureFlag()) {
+ CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
+ } else {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
}
-void LocationsBuilderX86_64::VisitVecAvxSub(HVecAvxSub* instruction) {
- CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
-}
-
-void InstructionCodeGeneratorX86_64::VisitVecAvxSub(HVecAvxSub* instruction) {
+void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
+ bool cpu_has_avx = CpuHasAvxFeatureFlag();
LocationSummary* locations = instruction->GetLocations();
- XmmRegister src1 = locations->InAt(0).AsFpuRegister<XmmRegister>();
- XmmRegister src2 = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ DCHECK(cpu_has_avx || other_src == dst);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ vpsubb(dst, src1, src2);
+ cpu_has_avx ? __ vpsubb(dst, other_src, src) : __ psubb(dst, src);
break;
case DataType::Type::kUint16:
case DataType::Type::kInt16:
DCHECK_EQ(8u, instruction->GetVectorLength());
- __ vpsubw(dst, src1, src2);
+ cpu_has_avx ? __ vpsubw(dst, other_src, src) : __ psubw(dst, src);
break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ vpsubd(dst, src1, src2);
+ cpu_has_avx ? __ vpsubd(dst, other_src, src) : __ psubd(dst, src);
break;
case DataType::Type::kInt64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ vpsubq(dst, src1, src2);
+ cpu_has_avx ? __ vpsubq(dst, other_src, src) : __ psubq(dst, src);
break;
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ vsubps(dst, src1, src2);
+ cpu_has_avx ? __ vsubps(dst, other_src, src) : __ subps(dst, src);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ vsubpd(dst, src1, src2);
+ cpu_has_avx ? __ vsubpd(dst, other_src, src) : __ subpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -696,31 +624,37 @@
}
void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
- CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+ if (CpuHasAvxFeatureFlag()) {
+ CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
+ } else {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+ }
}
void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
+ bool cpu_has_avx = CpuHasAvxFeatureFlag();
LocationSummary* locations = instruction->GetLocations();
- DCHECK(locations->InAt(0).Equals(locations->Out()));
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ DCHECK(cpu_has_avx || other_src == dst);
switch (instruction->GetPackedType()) {
case DataType::Type::kUint16:
case DataType::Type::kInt16:
DCHECK_EQ(8u, instruction->GetVectorLength());
- __ pmullw(dst, src);
+ cpu_has_avx ? __ vpmullw(dst, other_src, src) : __ pmullw(dst, src);
break;
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ pmulld(dst, src);
+ cpu_has_avx ? __ vpmulld(dst, other_src, src): __ pmulld(dst, src);
break;
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ mulps(dst, src);
+ cpu_has_avx ? __ vmulps(dst, other_src, src) : __ mulps(dst, src);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ mulpd(dst, src);
+ cpu_has_avx ? __ vmulpd(dst, other_src, src) : __ mulpd(dst, src);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
@@ -729,22 +663,28 @@
}
void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
- CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+ if (CpuHasAvxFeatureFlag()) {
+ CreateVecTerOpLocations(GetGraph()->GetAllocator(), instruction);
+ } else {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+ }
}
void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
+ bool cpu_has_avx = CpuHasAvxFeatureFlag();
LocationSummary* locations = instruction->GetLocations();
- DCHECK(locations->InAt(0).Equals(locations->Out()));
XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister other_src = locations->InAt(0).AsFpuRegister<XmmRegister>();
XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ DCHECK(cpu_has_avx || other_src == dst);
switch (instruction->GetPackedType()) {
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- __ divps(dst, src);
+ cpu_has_avx ? __ vdivps(dst, other_src, src) : __ divps(dst, src);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- __ divpd(dst, src);
+ cpu_has_avx ? __ vdivpd(dst, other_src, src) : __ divpd(dst, src);
break;
default:
LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index d8a54e5..7f7e3a5 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -8425,6 +8425,19 @@
LOG(FATAL) << "Unreachable";
}
+bool LocationsBuilderX86::CpuHasAvxFeatureFlag() {
+ return codegen_->GetInstructionSetFeatures().HasAVX();
+}
+bool LocationsBuilderX86::CpuHasAvx2FeatureFlag() {
+ return codegen_->GetInstructionSetFeatures().HasAVX2();
+}
+bool InstructionCodeGeneratorX86::CpuHasAvxFeatureFlag() {
+ return codegen_->GetInstructionSetFeatures().HasAVX();
+}
+bool InstructionCodeGeneratorX86::CpuHasAvx2FeatureFlag() {
+ return codegen_->GetInstructionSetFeatures().HasAVX2();
+}
+
#undef __
} // namespace x86
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index bbca764..368c584 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -175,6 +175,8 @@
void HandleShift(HBinaryOperation* instruction);
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ bool CpuHasAvxFeatureFlag();
+ bool CpuHasAvx2FeatureFlag();
CodeGeneratorX86* const codegen_;
InvokeDexCallingConventionVisitorX86 parameter_visitor_;
@@ -307,6 +309,8 @@
HBasicBlock* default_block);
void GenerateFPCompare(Location lhs, Location rhs, HInstruction* insn, bool is_double);
+ bool CpuHasAvxFeatureFlag();
+ bool CpuHasAvx2FeatureFlag();
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index b3d76a3..8067b9c 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -7667,6 +7667,22 @@
}
}
+bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
+ return codegen_->GetInstructionSetFeatures().HasAVX();
+}
+
+bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
+ return codegen_->GetInstructionSetFeatures().HasAVX2();
+}
+
+bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
+ return codegen_->GetInstructionSetFeatures().HasAVX();
+}
+
+bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
+ return codegen_->GetInstructionSetFeatures().HasAVX2();
+}
+
#undef __
} // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index a25c29f..d3b49ea 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -177,6 +177,8 @@
void HandleShift(HBinaryOperation* operation);
void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGet(HInstruction* instruction);
+ bool CpuHasAvxFeatureFlag();
+ bool CpuHasAvx2FeatureFlag();
CodeGeneratorX86_64* const codegen_;
InvokeDexCallingConventionVisitorX86_64 parameter_visitor_;
@@ -287,6 +289,9 @@
void HandleGoto(HInstruction* got, HBasicBlock* successor);
+ bool CpuHasAvxFeatureFlag();
+ bool CpuHasAvx2FeatureFlag();
+
X86_64Assembler* const assembler_;
CodeGeneratorX86_64* const codegen_;
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index c6e7560..9914127 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -353,9 +353,6 @@
static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) {
if (reduction->IsVecAdd() ||
reduction->IsVecSub() ||
- #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
- reduction->IsVecAvxSub() || reduction->IsVecAvxAdd() ||
- #endif
reduction->IsVecSADAccumulate() ||
reduction->IsVecDotProd()) {
return HVecReduce::kSum;
@@ -1943,34 +1940,10 @@
new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_, dex_pc),
new (global_allocator_) HTypeConversion(org_type, opa, dex_pc));
case HInstruction::kAdd:
- #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
- if ((compiler_options_->GetInstructionSet() == InstructionSet::kX86 ||
- compiler_options_->GetInstructionSet() == InstructionSet::kX86_64) &&
- compiler_options_->GetInstructionSetFeatures()->AsX86InstructionSetFeatures()
- ->HasAVX2()) {
- GENERATE_VEC(
- new (global_allocator_) HVecAvxAdd(
- global_allocator_, opa, opb, type, vector_length_, dex_pc),
- new (global_allocator_) HAdd(org_type, opa, opb, dex_pc));
- UNREACHABLE(); // GENERATE_VEC ends with a "break".
- }
- #endif
GENERATE_VEC(
new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_, dex_pc),
new (global_allocator_) HAdd(org_type, opa, opb, dex_pc));
case HInstruction::kSub:
- #if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
- if ((compiler_options_->GetInstructionSet() == InstructionSet::kX86 ||
- compiler_options_->GetInstructionSet() == InstructionSet::kX86_64) &&
- compiler_options_->GetInstructionSetFeatures()->AsX86InstructionSetFeatures()
- ->HasAVX2()) {
- GENERATE_VEC(
- new (global_allocator_) HVecAvxSub(
- global_allocator_, opa, opb, type, vector_length_, dex_pc),
- new (global_allocator_) HSub(org_type, opa, opb, dex_pc));
- UNREACHABLE(); // GENERATE_VEC ends with a "break".
- }
- #endif
GENERATE_VEC(
new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_, dex_pc),
new (global_allocator_) HSub(org_type, opa, opb, dex_pc));
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 3e6e211..25f9e3c 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1540,9 +1540,7 @@
#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
#define FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M) \
M(X86AndNot, Instruction) \
- M(X86MaskOrResetLeastSetBit, Instruction) \
- M(VecAvxSub, VecOperation) \
- M(VecAvxAdd, VecOperation)
+ M(X86MaskOrResetLeastSetBit, Instruction)
#else
#define FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M)
#endif
@@ -7874,7 +7872,6 @@
#endif
#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
#include "nodes_x86.h"
-#include "nodes_vector_x86.h"
#endif
namespace art {
diff --git a/compiler/optimizing/nodes_vector_x86.h b/compiler/optimizing/nodes_vector_x86.h
deleted file mode 100644
index a8f576f..0000000
--- a/compiler/optimizing/nodes_vector_x86.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_NODES_VECTOR_X86_H_
-#define ART_COMPILER_OPTIMIZING_NODES_VECTOR_X86_H_
-
-#include "nodes_vector.h"
-
-namespace art {
-
-class HVecAvxAdd final : public HVecOperation {
- public:
- HVecAvxAdd(ArenaAllocator* allocator,
- HInstruction* src1,
- HInstruction* src2,
- DataType::Type packed_type,
- size_t vector_length,
- uint32_t dex_pc)
- : HVecOperation(kVecAvxAdd,
- allocator,
- packed_type,
- SideEffects::None(),
- /* number_of_inputs */ 2,
- vector_length,
- dex_pc) {
- DCHECK(HasConsistentPackedTypes(src1, packed_type));
- DCHECK(HasConsistentPackedTypes(src2, packed_type));
- SetRawInputAt(0, src1);
- SetRawInputAt(1, src2);
- }
-
- bool CanBeMoved() const override { return true; }
-
- DECLARE_INSTRUCTION(VecAvxAdd);
-
- protected:
- DEFAULT_COPY_CONSTRUCTOR(VecAvxAdd);
-};
-
-class HVecAvxSub final : public HVecOperation {
- public:
- HVecAvxSub(ArenaAllocator* allocator,
- HInstruction* src1,
- HInstruction* src2,
- DataType::Type packed_type,
- size_t vector_length,
- uint32_t dex_pc)
- : HVecOperation(kVecAvxSub,
- allocator,
- packed_type,
- SideEffects::None(),
- /* number_of_inputs */ 2,
- vector_length,
- dex_pc) {
- DCHECK(HasConsistentPackedTypes(src1, packed_type));
- DCHECK(HasConsistentPackedTypes(src2, packed_type));
- SetRawInputAt(0, src1);
- SetRawInputAt(1, src2);
- }
-
- bool CanBeMoved() const override { return true; }
-
- DECLARE_INSTRUCTION(VecAvxSub);
-
- protected:
- DEFAULT_COPY_CONSTRUCTOR(VecAvxSub);
-};
-
-} // namespace art
-
-#endif // ART_COMPILER_OPTIMIZING_NODES_VECTOR_X86_H_