Implement Sum-of-Abs-Differences idiom recognition.
Rationale:
Currently just on ARM64 (x86 lacks proper support),
using the SAD idiom yields great speedup on loops
that compute the sum-of-abs-difference operation.
Also includes some refinements around type conversions.
Speedup ExoPlayerAudio (golem run):
1.3x on ARM64
1.1x on x86
Test: test-art-host test-art-target
Bug: 64091002
Change-Id: Ia2b711d2bc23609a2ed50493dfe6719eedfe0130
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index c2fbf7f..0bedafc 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -826,21 +826,18 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
- switch (instr->GetPackedType()) {
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
- DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -849,18 +846,19 @@
}
}
-void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = instr->GetLocations();
- VectorRegister acc =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
- VectorRegister left =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
- VectorRegister right =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
- switch (instr->GetPackedType()) {
+void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister acc = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister left = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister right = VectorRegisterFrom(locations->InAt(2));
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(16u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvB(acc, left, right);
} else {
__ MsubvB(acc, left, right);
@@ -868,24 +866,24 @@
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(8u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvH(acc, left, right);
} else {
__ MsubvH(acc, left, right);
}
break;
case Primitive::kPrimInt:
- DCHECK_EQ(4u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvW(acc, left, right);
} else {
__ MsubvW(acc, left, right);
}
break;
case Primitive::kPrimLong:
- DCHECK_EQ(2u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvD(acc, left, right);
} else {
__ MsubvD(acc, left, right);
@@ -897,6 +895,15 @@
}
}
+void LocationsBuilderMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ // TODO: implement this, location helper already filled out (shared with MulAcc).
+}
+
// Helper to set up locations for vector memory operations.
static void CreateVecMemLocations(ArenaAllocator* arena,
HVecMemoryOperation* instruction,