Basic SIMD reduction support.

Rationale:
Enables vectorization of x += .... for very basic (simple, same-type)
constructs. Paves the way for more complex (narrower and/or mixed-type)
constructs, which will be handled by the next CL.

This is  a revert   of Icb5d6c805516db0a1d911c3ede9a246ccef89a22
and thus a revert^2 of I2454778dd0ef1da915c178c7274e1cf33e271d0f
and thus a revert^3 of I1c1c87b6323e01442e8fbd94869ddc9e760ea1fc
and thus a revert^4 of I7880c135aee3ed0a39da9ae5b468cbf80e613766

PS1-2 shows what needed to change

Test: test-art-host test-art-target

Bug: 64091002
Change-Id: I647889e0da0959ca405b70081b79c7d3c9bcb2e9
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 9095ecd..18a55c8 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -27,12 +27,13 @@
 using helpers::ARM64EncodableConstantOrRegister;
 using helpers::Arm64CanEncodeConstantAsImmediate;
 using helpers::DRegisterFrom;
-using helpers::VRegisterFrom;
 using helpers::HeapOperand;
 using helpers::InputRegisterAt;
 using helpers::Int64ConstantFrom;
-using helpers::XRegisterFrom;
+using helpers::OutputRegister;
+using helpers::VRegisterFrom;
 using helpers::WRegisterFrom;
+using helpers::XRegisterFrom;
 
 #define __ GetVIXLAssembler()->
 
@@ -127,20 +128,51 @@
   }
 }
 
-void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
-void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
-}
-
-void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  VRegister src = VRegisterFrom(locations->InAt(0));
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Umov(OutputRegister(instruction), src.V4S(), 0);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Umov(OutputRegister(instruction), src.V2D(), 0);
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 4u);
+      DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector unary operations.
@@ -169,6 +201,46 @@
   }
 }
 
+void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  VRegister src = VRegisterFrom(locations->InAt(0));
+  VRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      switch (instruction->GetKind()) {
+        case HVecReduce::kSum:
+          __ Addv(dst.S(), src.V4S());
+          break;
+        case HVecReduce::kMin:
+          __ Sminv(dst.S(), src.V4S());
+          break;
+        case HVecReduce::kMax:
+          __ Smaxv(dst.S(), src.V4S());
+          break;
+      }
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      switch (instruction->GetKind()) {
+        case HVecReduce::kSum:
+          __ Addp(dst.D(), src.V2D());
+          break;
+        default:
+          LOG(FATAL) << "Unsupported SIMD min/max";
+          UNREACHABLE();
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -263,6 +335,7 @@
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
   }
 }
 
@@ -805,6 +878,77 @@
   }
 }
 
+void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+
+  DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
+
+  HInstruction* input = instruction->InputAt(0);
+  bool is_zero = IsZeroBitPattern(input);
+
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+                                    : Location::RequiresRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+                                    : Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  VRegister dst = VRegisterFrom(locations->Out());
+
+  DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
+
+  // Zero out all other elements first.
+  __ Movi(dst.V16B(), 0);
+
+  // Shorthand for any type of zero.
+  if (IsZeroBitPattern(instruction->InputAt(0))) {
+    return;
+  }
+
+  // Set required elements.
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
   switch (instr->GetPackedType()) {