Revert "Basic SIMD reduction support."

This reverts commit 9879d0eac8fe2aae19ca6a4a2a83222d6383afc2.

Getting these type check failures in some builds. Need time to look at this better, so reverting for now :-(


dex2oatd F 08-30 21:14:29 210122 226218 
code_generator.cc:115] Check failed: CheckType(instruction->GetType(), locations->InAt(0)) PrimDouble C

Change-Id: I1c1c87b6323e01442e8fbd94869ddc9e760ea1fc
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 57e7dc6..e7aec76 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -27,28 +27,22 @@
 
 void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  HInstruction* input = instruction->InputAt(0);
-  bool is_zero = IsZeroBitPattern(input);
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimLong:
-      // Long needs extra temporary to load from the register pair.
-      if (!is_zero) {
-        locations->AddTemp(Location::RequiresFpuRegister());
-      }
+      // Long needs extra temporary to load the register pair.
+      locations->AddTemp(Location::RequiresFpuRegister());
       FALLTHROUGH_INTENDED;
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
-      locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
-                                    : Location::RequiresRegister());
+      locations->SetInAt(0, Location::RequiresRegister());
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
-                                    : Location::RequiresFpuRegister());
+      locations->SetInAt(0, Location::RequiresFpuRegister());
       locations->SetOut(Location::SameAsFirstInput());
       break;
     default:
@@ -59,53 +53,46 @@
 
 void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
-
-  // Shorthand for any type of zero.
-  if (IsZeroBitPattern(instruction->InputAt(0))) {
-    __ xorps(dst, dst);
-    return;
-  }
-
+  XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      __ movd(dst, locations->InAt(0).AsRegister<Register>());
-      __ punpcklbw(dst, dst);
-      __ punpcklwd(dst, dst);
-      __ pshufd(dst, dst, Immediate(0));
+      __ movd(reg, locations->InAt(0).AsRegister<Register>());
+      __ punpcklbw(reg, reg);
+      __ punpcklwd(reg, reg);
+      __ pshufd(reg, reg, Immediate(0));
       break;
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      __ movd(dst, locations->InAt(0).AsRegister<Register>());
-      __ punpcklwd(dst, dst);
-      __ pshufd(dst, dst, Immediate(0));
+      __ movd(reg, locations->InAt(0).AsRegister<Register>());
+      __ punpcklwd(reg, reg);
+      __ pshufd(reg, reg, Immediate(0));
       break;
     case Primitive::kPrimInt:
       DCHECK_EQ(4u, instruction->GetVectorLength());
-      __ movd(dst, locations->InAt(0).AsRegister<Register>());
-      __ pshufd(dst, dst, Immediate(0));
+      __ movd(reg, locations->InAt(0).AsRegister<Register>());
+      __ pshufd(reg, reg, Immediate(0));
       break;
     case Primitive::kPrimLong: {
       XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
       DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
+      __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>());
       __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
-      __ punpckldq(dst, tmp);
-      __ punpcklqdq(dst, dst);
+      __ punpckldq(reg, tmp);
+      __ punpcklqdq(reg, reg);
       break;
     }
     case Primitive::kPrimFloat:
       DCHECK(locations->InAt(0).Equals(locations->Out()));
       DCHECK_EQ(4u, instruction->GetVectorLength());
-      __ shufps(dst, dst, Immediate(0));
+      __ shufps(reg, reg, Immediate(0));
       break;
     case Primitive::kPrimDouble:
       DCHECK(locations->InAt(0).Equals(locations->Out()));
       DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ shufpd(dst, dst, Immediate(0));
+      __ shufpd(reg, reg, Immediate(0));
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -113,65 +100,20 @@
   }
 }
 
-void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-  switch (instruction->GetPackedType()) {
-    case Primitive::kPrimLong:
-      // Long needs extra temporary to store into the register pair.
-      locations->AddTemp(Location::RequiresFpuRegister());
-      FALLTHROUGH_INTENDED;
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetOut(Location::RequiresRegister());
-      break;
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetOut(Location::SameAsFirstInput());
-      break;
-    default:
-      LOG(FATAL) << "Unsupported SIMD type";
-      UNREACHABLE();
-  }
+void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
-void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
-  switch (instruction->GetPackedType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:  // TODO: up to here, and?
-      LOG(FATAL) << "Unsupported SIMD type";
-      UNREACHABLE();
-    case Primitive::kPrimInt:
-      DCHECK_LE(4u, instruction->GetVectorLength());
-      DCHECK_LE(instruction->GetVectorLength(), 16u);
-      __ movd(locations->Out().AsRegister<Register>(), src);
-      break;
-    case Primitive::kPrimLong: {
-      XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-      DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ movd(locations->Out().AsRegisterPairLow<Register>(), src);
-      __ pshufd(tmp, src, Immediate(1));
-      __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp);
-      break;
-    }
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      DCHECK_LE(2u, instruction->GetVectorLength());
-      DCHECK_LE(instruction->GetVectorLength(), 4u);
-      DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
-      break;
-    default:
-      LOG(FATAL) << "Unsupported SIMD type";
-      UNREACHABLE();
-  }
+void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
 }
 
 // Helper to set up locations for vector unary operations.
@@ -195,73 +137,6 @@
   }
 }
 
-void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) {
-  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
-  // Long reduction or min/max require a temporary.
-  if (instruction->GetPackedType() == Primitive::kPrimLong ||
-      instruction->GetKind() == HVecReduce::kMin ||
-      instruction->GetKind() == HVecReduce::kMax) {
-    instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
-  }
-}
-
-void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
-  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
-  switch (instruction->GetPackedType()) {
-    case Primitive::kPrimInt:
-      DCHECK_EQ(4u, instruction->GetVectorLength());
-      switch (instruction->GetKind()) {
-        case HVecReduce::kSum:
-          __ movaps(dst, src);
-          __ phaddd(dst, dst);
-          __ phaddd(dst, dst);
-          break;
-        case HVecReduce::kMin: {
-          XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-          __ movaps(tmp, src);
-          __ movaps(dst, src);
-          __ psrldq(tmp, Immediate(8));
-          __ pminsd(dst, tmp);
-          __ psrldq(tmp, Immediate(4));
-          __ pminsd(dst, tmp);
-          break;
-        }
-        case HVecReduce::kMax: {
-          XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-          __ movaps(tmp, src);
-          __ movaps(dst, src);
-          __ psrldq(tmp, Immediate(8));
-          __ pmaxsd(dst, tmp);
-          __ psrldq(tmp, Immediate(4));
-          __ pmaxsd(dst, tmp);
-          break;
-        }
-      }
-      break;
-    case Primitive::kPrimLong: {
-      DCHECK_EQ(2u, instruction->GetVectorLength());
-      XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-      switch (instruction->GetKind()) {
-        case HVecReduce::kSum:
-          __ movaps(tmp, src);
-          __ movaps(dst, src);
-          __ punpckhqdq(tmp, tmp);
-          __ paddq(dst, tmp);
-          break;
-        case HVecReduce::kMin:
-        case HVecReduce::kMax:
-          LOG(FATAL) << "Unsupported SIMD type";
-      }
-      break;
-    }
-    default:
-      LOG(FATAL) << "Unsupported SIMD type";
-      UNREACHABLE();
-  }
-}
-
 void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
   CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
 }
@@ -946,91 +821,6 @@
   }
 }
 
-void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
-
-  DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
-
-  HInstruction* input = instruction->InputAt(0);
-  bool is_zero = IsZeroBitPattern(input);
-
-  switch (instruction->GetPackedType()) {
-    case Primitive::kPrimLong:
-      // Long needs extra temporary to load from register pairs.
-      if (!is_zero) {
-        locations->AddTemp(Location::RequiresFpuRegister());
-      }
-      FALLTHROUGH_INTENDED;
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:
-    case Primitive::kPrimInt:
-      locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
-                                    : Location::RequiresRegister());
-      locations->SetOut(Location::RequiresFpuRegister());
-      break;
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
-                                    : Location::RequiresFpuRegister());
-      locations->SetOut(Location::RequiresFpuRegister());
-      break;
-    default:
-      LOG(FATAL) << "Unsupported SIMD type";
-      UNREACHABLE();
-  }
-}
-
-void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
-  LocationSummary* locations = instruction->GetLocations();
-  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
-
-  DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
-
-  // Zero out all other elements first.
-  __ xorps(dst, dst);
-
-  // Shorthand for any type of zero.
-  if (IsZeroBitPattern(instruction->InputAt(0))) {
-    return;
-  }
-
-  // Set required elements.
-  switch (instruction->GetPackedType()) {
-    case Primitive::kPrimBoolean:
-    case Primitive::kPrimByte:
-    case Primitive::kPrimChar:
-    case Primitive::kPrimShort:  // TODO: up to here, and?
-      LOG(FATAL) << "Unsupported SIMD type";
-      UNREACHABLE();
-    case Primitive::kPrimInt:
-      DCHECK_EQ(4u, instruction->GetVectorLength());
-      __ movd(dst, locations->InAt(0).AsRegister<Register>());
-      break;
-    case Primitive::kPrimLong: {
-      XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-      DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ xorps(tmp, tmp);
-      __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>());
-      __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
-      __ punpckldq(dst, tmp);
-      break;
-    }
-    case Primitive::kPrimFloat:
-      DCHECK_EQ(4u, instruction->GetVectorLength());
-      __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
-      break;
-    case Primitive::kPrimDouble:
-      DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
-      break;
-    default:
-      LOG(FATAL) << "Unsupported SIMD type";
-      UNREACHABLE();
-  }
-}
-
 void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
   LOG(FATAL) << "No SIMD for " << instr->GetId();
 }
@@ -1078,7 +868,6 @@
     case 8: scale = TIMES_8; break;
     default: break;
   }
-  // Incorporate the string or array offset in the address computation.
   uint32_t offset = is_string_char_at
       ? mirror::String::ValueOffset().Uint32Value()
       : mirror::Array::DataOffset(size).Uint32Value();
@@ -1113,7 +902,7 @@
         __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1));
         __ j(kNotZero, &not_compressed);
         // Zero extend 8 compressed bytes into 8 chars.
-        __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt()));
+        __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true));
         __ pxor(tmp, tmp);
         __ punpcklbw(reg, tmp);
         __ jmp(&done);