6 files changed, 51 insertions, 132 deletions
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 2f495fc15f..2452139d42 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -3078,6 +3078,18 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
   const Location first = locations->InAt(0);
   const Location out = locations->Out();
   const Location second = locations->InAt(1);
+
+  // In the unlucky case the output of this instruction overlaps
+  // with an input of an "emitted-at-use-site" condition, and
+  // the output of this instruction is not one of its inputs, we'll
+  // need to fallback to branches instead of conditional ARM instructions.
+  bool output_overlaps_with_condition_inputs =
+      !IsBooleanValueOrMaterializedCondition(condition) &&
+      !out.Equals(first) &&
+      !out.Equals(second) &&
+      (condition->GetLocations()->InAt(0).Equals(out) ||
+       condition->GetLocations()->InAt(1).Equals(out));
+  DCHECK(!output_overlaps_with_condition_inputs || condition->IsCondition());
   Location src;
 
   if (condition->IsIntConstant()) {
@@ -3091,7 +3103,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
     return;
   }
 
-  if (!DataType::IsFloatingPointType(type)) {
+  if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) {
     bool invert = false;
 
     if (out.Equals(second)) {
@@ -3163,6 +3175,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
   vixl32::Label* false_target = nullptr;
   vixl32::Label* true_target = nullptr;
   vixl32::Label select_end;
+  vixl32::Label other_case;
   vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
 
   if (out.Equals(second)) {
@@ -3173,12 +3186,21 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
     src = second;
 
     if (!out.Equals(first)) {
-      codegen_->MoveLocation(out, first, type);
+      if (output_overlaps_with_condition_inputs) {
+        false_target = &other_case;
+      } else {
+        codegen_->MoveLocation(out, first, type);
+      }
     }
   }
 
   GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target */ false);
   codegen_->MoveLocation(out, src, type);
+  if (output_overlaps_with_condition_inputs) {
+    __ B(target);
+    __ Bind(&other_case);
+    codegen_->MoveLocation(out, first, type);
+  }
 
   if (select_end.IsReferenced()) {
     __ Bind(&select_end);
@@ -3277,31 +3299,16 @@ void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
-  // Handle the long/FP comparisons made in instruction simplification.
-  switch (cond->InputAt(0)->GetType()) {
-    case DataType::Type::kInt64:
-      locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
-      if (!cond->IsEmittedAtUseSite()) {
-        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-      }
-      break;
-
-    case DataType::Type::kFloat32:
-    case DataType::Type::kFloat64:
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
-      if (!cond->IsEmittedAtUseSite()) {
-        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-      }
-      break;
-
-    default:
-      locations->SetInAt(0, Location::RequiresRegister());
-      locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
-      if (!cond->IsEmittedAtUseSite()) {
-        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-      }
+  const DataType::Type type = cond->InputAt(0)->GetType();
+  if (DataType::IsFloatingPointType(type)) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1)));
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
+  }
+  if (!cond->IsEmittedAtUseSite()) {
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 }
 
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 174efdf115..1cfdf54816 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -1290,6 +1290,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
   Register scratch;
 
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
     case DataType::Type::kUint16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       // Special handling of compressed/uncompressed string load.
@@ -1321,7 +1322,6 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
     case DataType::Type::kBool:
     case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
     case DataType::Type::kFloat32:
     case DataType::Type::kInt64:
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index f2ffccc887..4945328e2b 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -1141,6 +1141,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
     case DataType::Type::kUint16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       // Special handling of compressed/uncompressed string load.
@@ -1168,7 +1169,6 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
     case DataType::Type::kBool:
     case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
     case DataType::Type::kInt64:
       DCHECK_LE(2u, instruction->GetVectorLength());
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index e2b0485f89..a77c7d6838 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -1114,6 +1114,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
   XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
   bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
   switch (instruction->GetPackedType()) {
+    case DataType::Type::kInt16:  // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
     case DataType::Type::kUint16:
       DCHECK_EQ(8u, instruction->GetVectorLength());
       // Special handling of compressed/uncompressed string load.
@@ -1141,7 +1142,6 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
     case DataType::Type::kBool:
     case DataType::Type::kUint8:
     case DataType::Type::kInt8:
-    case DataType::Type::kInt16:
     case DataType::Type::kInt32:
     case DataType::Type::kInt64:
       DCHECK_LE(2u, instruction->GetVectorLength());
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 899496328e..9f278a9f4e 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -334,29 +334,12 @@ static bool IsAddConst(HInstruction* instruction,
 // Detect reductions of the following forms,
 //   x = x_phi + ..
 //   x = x_phi - ..
-//   x = max(x_phi, ..)
-//   x = min(x_phi, ..)
 static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
   if (reduction->IsAdd()) {
     return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
            (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
   } else if (reduction->IsSub()) {
     return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi);
-  } else if (reduction->IsInvokeStaticOrDirect()) {
-    switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) {
-      case Intrinsics::kMathMinIntInt:
-      case Intrinsics::kMathMinLongLong:
-      case Intrinsics::kMathMinFloatFloat:
-      case Intrinsics::kMathMinDoubleDouble:
-      case Intrinsics::kMathMaxIntInt:
-      case Intrinsics::kMathMaxLongLong:
-      case Intrinsics::kMathMaxFloatFloat:
-      case Intrinsics::kMathMaxDoubleDouble:
-        return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
-               (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
-      default:
-        return false;
-    }
   }
   return false;
 }
@@ -365,10 +348,6 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
 static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) {
   if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) {
     return HVecReduce::kSum;
-  } else if (reduction->IsVecMin()) {
-    return HVecReduce::kMin;
-  } else if (reduction->IsVecMax()) {
-    return HVecReduce::kMax;
   }
   LOG(FATAL) << "Unsupported SIMD reduction " << reduction->GetId();
   UNREACHABLE();
@@ -1124,7 +1103,6 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
   return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
 }
 
-// TODO: saturation arithmetic.
 bool HLoopOptimization::VectorizeUse(LoopNode* node,
                                      HInstruction* instruction,
                                      bool generate_code,
@@ -1331,43 +1309,6 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
         }
         return false;
       }
-      case Intrinsics::kMathMinIntInt:
-      case Intrinsics::kMathMinLongLong:
-      case Intrinsics::kMathMinFloatFloat:
-      case Intrinsics::kMathMinDoubleDouble:
-      case Intrinsics::kMathMaxIntInt:
-      case Intrinsics::kMathMaxLongLong:
-      case Intrinsics::kMathMaxFloatFloat:
-      case Intrinsics::kMathMaxDoubleDouble: {
-        // Deal with vector restrictions.
-        HInstruction* opa = instruction->InputAt(0);
-        HInstruction* opb = instruction->InputAt(1);
-        HInstruction* r = opa;
-        HInstruction* s = opb;
-        bool is_unsigned = false;
-        if (HasVectorRestrictions(restrictions, kNoMinMax)) {
-          return false;
-        } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
-                   !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
-          return false;  // reject, unless all operands are same-extension narrower
-        }
-        // Accept MIN/MAX(x, y) for vectorizable operands.
-        DCHECK(r != nullptr);
-        DCHECK(s != nullptr);
-        if (generate_code && vector_mode_ != kVector) {  // de-idiom
-          r = opa;
-          s = opb;
-        }
-        if (VectorizeUse(node, r, generate_code, type, restrictions) &&
-            VectorizeUse(node, s, generate_code, type, restrictions)) {
-          if (generate_code) {
-            GenerateVecOp(
-                instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
-          }
-          return true;
-        }
-        return false;
-      }
       default:
         return false;
     }  // switch
@@ -1426,7 +1367,7 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
           *restrictions |= kNoDiv;
           return TrySetVectorLength(4);
         case DataType::Type::kInt64:
-          *restrictions |= kNoDiv | kNoMul | kNoMinMax;
+          *restrictions |= kNoDiv | kNoMul;
           return TrySetVectorLength(2);
         case DataType::Type::kFloat32:
           *restrictions |= kNoReduction;
@@ -1456,13 +1397,13 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
             *restrictions |= kNoDiv | kNoSAD;
             return TrySetVectorLength(4);
           case DataType::Type::kInt64:
-            *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax | kNoSAD;
+            *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoSAD;
             return TrySetVectorLength(2);
           case DataType::Type::kFloat32:
-            *restrictions |= kNoMinMax | kNoReduction;  // minmax: -0.0 vs +0.0
+            *restrictions |= kNoReduction;
             return TrySetVectorLength(4);
           case DataType::Type::kFloat64:
-            *restrictions |= kNoMinMax | kNoReduction;  // minmax: -0.0 vs +0.0
+            *restrictions |= kNoReduction;
             return TrySetVectorLength(2);
           default:
             break;
@@ -1488,10 +1429,10 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
             *restrictions |= kNoDiv;
             return TrySetVectorLength(2);
           case DataType::Type::kFloat32:
-            *restrictions |= kNoMinMax | kNoReduction;  // min/max(x, NaN)
+            *restrictions |= kNoReduction;
             return TrySetVectorLength(4);
           case DataType::Type::kFloat64:
-            *restrictions |= kNoMinMax | kNoReduction;  // min/max(x, NaN)
+            *restrictions |= kNoReduction;
             return TrySetVectorLength(2);
           default:
             break;
@@ -1517,10 +1458,10 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
             *restrictions |= kNoDiv;
             return TrySetVectorLength(2);
           case DataType::Type::kFloat32:
-            *restrictions |= kNoMinMax | kNoReduction;  // min/max(x, NaN)
+            *restrictions |= kNoReduction;
             return TrySetVectorLength(4);
           case DataType::Type::kFloat64:
-            *restrictions |= kNoMinMax | kNoReduction;  // min/max(x, NaN)
+            *restrictions |= kNoReduction;
             return TrySetVectorLength(2);
           default:
             break;
@@ -1745,8 +1686,7 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct
 void HLoopOptimization::GenerateVecOp(HInstruction* org,
                                       HInstruction* opa,
                                       HInstruction* opb,
-                                      DataType::Type type,
-                                      bool is_unsigned) {
+                                      DataType::Type type) {
   uint32_t dex_pc = org->GetDexPc();
   HInstruction* vector = nullptr;
   DataType::Type org_type = org->GetType();
@@ -1823,32 +1763,6 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
             vector = new (global_allocator_)
                 HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc);
             break;
-          case Intrinsics::kMathMinIntInt:
-          case Intrinsics::kMathMinLongLong:
-          case Intrinsics::kMathMinFloatFloat:
-          case Intrinsics::kMathMinDoubleDouble: {
-            vector = new (global_allocator_)
-                HVecMin(global_allocator_,
-                        opa,
-                        opb,
-                        HVecOperation::ToProperType(type, is_unsigned),
-                        vector_length_,
-                        dex_pc);
-            break;
-          }
-          case Intrinsics::kMathMaxIntInt:
-          case Intrinsics::kMathMaxLongLong:
-          case Intrinsics::kMathMaxFloatFloat:
-          case Intrinsics::kMathMaxDoubleDouble: {
-            vector = new (global_allocator_)
-                HVecMax(global_allocator_,
-                        opa,
-                        opb,
-                        HVecOperation::ToProperType(type, is_unsigned),
-                        vector_length_,
-                        dex_pc);
-            break;
-          }
           default:
             LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId();
             UNREACHABLE();
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index a707ad1358..d70751037b 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -75,11 +75,10 @@ class HLoopOptimization : public HOptimization {
     kNoSignedHAdd    = 1 << 5,   // no signed halving add
     kNoUnroundedHAdd = 1 << 6,   // no unrounded halving add
     kNoAbs           = 1 << 7,   // no absolute value
-    kNoMinMax        = 1 << 8,   // no min/max
-    kNoStringCharAt  = 1 << 9,   // no StringCharAt
-    kNoReduction     = 1 << 10,  // no reduction
-    kNoSAD           = 1 << 11,  // no sum of absolute differences (SAD)
-    kNoWideSAD       = 1 << 12,  // no sum of absolute differences (SAD) with operand widening
+    kNoStringCharAt  = 1 << 8,   // no StringCharAt
+    kNoReduction     = 1 << 9,   // no reduction
+    kNoSAD           = 1 << 10,  // no sum of absolute differences (SAD)
+    kNoWideSAD       = 1 << 11,  // no sum of absolute differences (SAD) with operand widening
   };
 
   /*
@@ -173,8 +172,7 @@ class HLoopOptimization : public HOptimization {
   void GenerateVecOp(HInstruction* org,
                      HInstruction* opa,
                      HInstruction* opb,
-                     DataType::Type type,
-                     bool is_unsigned = false);
+                     DataType::Type type);
 
   // Vectorization idioms.
   bool VectorizeHalvingAddIdiom(LoopNode* node,