Implemented ABS vectorization.

Rationale:
This CL adds the concept of vectorizing intrinsics
to the ART vectorizer. More can follow (MIN, MAX, etc).

Test: test-art-host, test-art-target (angler)
Change-Id: Ieed8aa83ec64c1250ac0578570249cce338b5d36
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 4f3988e..8dabb4d 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -199,6 +199,46 @@
   }
 }
 
+void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+  if (instruction->GetPackedType() == Primitive::kPrimInt) {
+    instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimInt: {
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      __ movaps(dst, src);
+      __ pxor(tmp, tmp);
+      __ pcmpgtd(tmp, dst);
+      __ pxor(dst, tmp);
+      __ psubd(dst, tmp);
+      break;
+    }
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ pcmpeqb(dst, dst);  // all ones
+      __ psrld(dst, Immediate(1));
+      __ andps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ pcmpeqb(dst, dst);  // all ones
+      __ psrlq(dst, Immediate(1));
+      __ andpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
   // Boolean-not requires a temporary to construct the 16 x one.