Implement Sum-of-Abs-Differences idiom recognition.

Rationale:
Currently just on ARM64 (x86 lacks proper support),
using the SAD idiom yields great speedup on loops
that compute the sum-of-abs-difference operation.
Also includes some refinements around type conversions.

Speedup ExoPlayerAudio (golem run):
1.3x on ARM64
1.1x on x86

Test: test-art-host test-art-target

Bug: 64091002

Change-Id: Ia2b711d2bc23609a2ed50493dfe6719eedfe0130
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index c2fbf7f..0bedafc 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -826,21 +826,18 @@
   LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
-void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
-  switch (instr->GetPackedType()) {
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
     case Primitive::kPrimByte:
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
-      locations->SetInAt(
-          HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
-      locations->SetInAt(
-          HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
-      locations->SetInAt(
-          HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
-      DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetInAt(2, Location::RequiresFpuRegister());
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
@@ -849,18 +846,19 @@
   }
 }
 
-void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
-  LocationSummary* locations = instr->GetLocations();
-  VectorRegister acc =
-      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
-  VectorRegister left =
-      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
-  VectorRegister right =
-      VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
-  switch (instr->GetPackedType()) {
+void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+  CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister acc = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister left = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister right = VectorRegisterFrom(locations->InAt(2));
+  switch (instruction->GetPackedType()) {
     case Primitive::kPrimByte:
-      DCHECK_EQ(16u, instr->GetVectorLength());
-      if (instr->GetOpKind() == HInstruction::kAdd) {
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->GetOpKind() == HInstruction::kAdd) {
         __ MaddvB(acc, left, right);
       } else {
         __ MsubvB(acc, left, right);
@@ -868,24 +866,24 @@
       break;
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
-      DCHECK_EQ(8u, instr->GetVectorLength());
-      if (instr->GetOpKind() == HInstruction::kAdd) {
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->GetOpKind() == HInstruction::kAdd) {
         __ MaddvH(acc, left, right);
       } else {
         __ MsubvH(acc, left, right);
       }
       break;
     case Primitive::kPrimInt:
-      DCHECK_EQ(4u, instr->GetVectorLength());
-      if (instr->GetOpKind() == HInstruction::kAdd) {
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->GetOpKind() == HInstruction::kAdd) {
         __ MaddvW(acc, left, right);
       } else {
         __ MsubvW(acc, left, right);
       }
       break;
     case Primitive::kPrimLong:
-      DCHECK_EQ(2u, instr->GetVectorLength());
-      if (instr->GetOpKind() == HInstruction::kAdd) {
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      if (instruction->GetOpKind() == HInstruction::kAdd) {
         __ MaddvD(acc, left, right);
       } else {
         __ MsubvD(acc, left, right);
@@ -897,6 +895,15 @@
   }
 }
 
+void LocationsBuilderMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+  CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  // TODO: implement this, location helper already filled out (shared with MulAcc).
+}
+
 // Helper to set up locations for vector memory operations.
 static void CreateVecMemLocations(ArenaAllocator* arena,
                                   HVecMemoryOperation* instruction,