summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/optimizing/code_generator_vector_mips.cc174
-rw-r--r--compiler/optimizing/code_generator_vector_mips64.cc168
-rw-r--r--compiler/optimizing/loop_optimization.cc8
-rw-r--r--test/661-checker-simd-reduc/src/Main.java109
-rw-r--r--test/665-checker-simd-zero/src/Main.java48
5 files changed, 489 insertions, 18 deletions
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index c5a39ff882..7a8c0ad025 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -91,17 +91,61 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar*
}
void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Copy_sW(locations->Out().AsRegister<Register>(), src, 0);
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Mfc1(locations->Out().AsRegisterPairLow<Register>(),
+ locations->InAt(0).AsFpuRegister<FRegister>());
+ __ MoveFromFpuHigh(locations->Out().AsRegisterPairHigh<Register>(),
+ locations->InAt(0).AsFpuRegister<FRegister>());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 4u);
+ DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector unary operations.
static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
LocationSummary* locations = new (allocator) LocationSummary(instruction);
- switch (instruction->GetPackedType()) {
+ DataType::Type type = instruction->GetPackedType();
+ switch (type) {
case DataType::Type::kBool:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(),
@@ -118,7 +162,8 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
case DataType::Type::kFloat64:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(),
- (instruction->IsVecNeg() || instruction->IsVecAbs())
+ (instruction->IsVecNeg() || instruction->IsVecAbs() ||
+ (instruction->IsVecReduce() && type == DataType::Type::kInt64))
? Location::kOutputOverlap
: Location::kNoOutputOverlap);
break;
@@ -133,7 +178,54 @@ void LocationsBuilderMIPS::VisitVecReduce(HVecReduce* instruction) {
}
void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ VectorRegister tmp = static_cast<VectorRegister>(FTMP);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ Hadd_sD(tmp, src, src);
+ __ IlvlD(dst, tmp, tmp);
+ __ AddvW(dst, dst, tmp);
+ break;
+ case HVecReduce::kMin:
+ __ IlvodW(tmp, src, src);
+ __ Min_sW(tmp, src, tmp);
+ __ IlvlW(dst, tmp, tmp);
+ __ Min_sW(dst, dst, tmp);
+ break;
+ case HVecReduce::kMax:
+ __ IlvodW(tmp, src, src);
+ __ Max_sW(tmp, src, tmp);
+ __ IlvlW(dst, tmp, tmp);
+ __ Max_sW(dst, dst, tmp);
+ break;
+ }
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ IlvlD(dst, src, src);
+ __ AddvD(dst, dst, src);
+ break;
+ case HVecReduce::kMin:
+ __ IlvlD(dst, src, src);
+ __ Min_sD(dst, dst, src);
+ break;
+ case HVecReduce::kMax:
+ __ IlvlD(dst, src, src);
+ __ Max_sD(dst, dst, src);
+ break;
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) {
@@ -831,11 +923,79 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) {
}
void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
+
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ // Zero out all other elements first.
+ __ FillW(dst, ZERO);
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ return;
+ }
+
+ // Set required elements.
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ InsertB(dst, locations->InAt(0).AsRegister<Register>(), 0);
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ InsertH(dst, locations->InAt(0).AsRegister<Register>(), 0);
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ InsertW(dst, locations->InAt(0).AsRegister<Register>(), 0);
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Mtc1(locations->InAt(0).AsRegisterPairLow<Register>(),
+ locations->Out().AsFpuRegister<FRegister>());
+ __ MoveToFpuHigh(locations->InAt(0).AsRegisterPairHigh<Register>(),
+ locations->Out().AsFpuRegister<FRegister>());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector accumulations.
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index e606df2158..0c59b7344a 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -94,17 +94,58 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar
}
void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Copy_sW(locations->Out().AsRegister<GpuRegister>(), src, 0);
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Copy_sD(locations->Out().AsRegister<GpuRegister>(), src, 0);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 4u);
+ DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector unary operations.
static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
LocationSummary* locations = new (allocator) LocationSummary(instruction);
- switch (instruction->GetPackedType()) {
+ DataType::Type type = instruction->GetPackedType();
+ switch (type) {
case DataType::Type::kBool:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(),
@@ -121,7 +162,8 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
case DataType::Type::kFloat64:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(),
- (instruction->IsVecNeg() || instruction->IsVecAbs())
+ (instruction->IsVecNeg() || instruction->IsVecAbs() ||
+ (instruction->IsVecReduce() && type == DataType::Type::kInt64))
? Location::kOutputOverlap
: Location::kNoOutputOverlap);
break;
@@ -136,7 +178,54 @@ void LocationsBuilderMIPS64::VisitVecReduce(HVecReduce* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ VectorRegister tmp = static_cast<VectorRegister>(FTMP);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ Hadd_sD(tmp, src, src);
+ __ IlvlD(dst, tmp, tmp);
+ __ AddvW(dst, dst, tmp);
+ break;
+ case HVecReduce::kMin:
+ __ IlvodW(tmp, src, src);
+ __ Min_sW(tmp, src, tmp);
+ __ IlvlW(dst, tmp, tmp);
+ __ Min_sW(dst, dst, tmp);
+ break;
+ case HVecReduce::kMax:
+ __ IlvodW(tmp, src, src);
+ __ Max_sW(tmp, src, tmp);
+ __ IlvlW(dst, tmp, tmp);
+ __ Max_sW(dst, dst, tmp);
+ break;
+ }
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ IlvlD(dst, src, src);
+ __ AddvD(dst, dst, src);
+ break;
+ case HVecReduce::kMin:
+ __ IlvlD(dst, src, src);
+ __ Min_sD(dst, dst, src);
+ break;
+ case HVecReduce::kMax:
+ __ IlvlD(dst, src, src);
+ __ Max_sD(dst, dst, src);
+ break;
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) {
@@ -835,11 +924,76 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) {
}
void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
+
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ // Zero out all other elements first.
+ __ FillW(dst, ZERO);
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ return;
+ }
+
+ // Set required elements.
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ InsertB(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0);
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ InsertH(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0);
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ InsertW(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0);
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ InsertD(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector accumulations.
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 6a4faaf438..8f84796ff4 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -1422,10 +1422,10 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
*restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD;
return TrySetVectorLength(8);
case DataType::Type::kInt32:
- *restrictions |= kNoDiv | kNoReduction | kNoSAD;
+ *restrictions |= kNoDiv | kNoSAD;
return TrySetVectorLength(4);
case DataType::Type::kInt64:
- *restrictions |= kNoDiv | kNoReduction | kNoSAD;
+ *restrictions |= kNoDiv | kNoSAD;
return TrySetVectorLength(2);
case DataType::Type::kFloat32:
*restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
@@ -1451,10 +1451,10 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
*restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD;
return TrySetVectorLength(8);
case DataType::Type::kInt32:
- *restrictions |= kNoDiv | kNoReduction | kNoSAD;
+ *restrictions |= kNoDiv | kNoSAD;
return TrySetVectorLength(4);
case DataType::Type::kInt64:
- *restrictions |= kNoDiv | kNoReduction | kNoSAD;
+ *restrictions |= kNoDiv | kNoSAD;
return TrySetVectorLength(2);
case DataType::Type::kFloat32:
*restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
diff --git a/test/661-checker-simd-reduc/src/Main.java b/test/661-checker-simd-reduc/src/Main.java
index 57abf5d29e..1add0f1026 100644
--- a/test/661-checker-simd-reduc/src/Main.java
+++ b/test/661-checker-simd-reduc/src/Main.java
@@ -81,6 +81,16 @@ public class Main {
/// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
/// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.reductionInt(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionInt(int[] x) {
int sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -144,6 +154,25 @@ public class Main {
//
/// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
//
+ /// CHECK-START-MIPS64: int Main.reductionIntChain() loop_optimization (after)
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Phi1:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<I1:i\d+>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi1>>,<<Load1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<I1>>,<<Cons4>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Red1:d\d+>> VecReduce [<<Phi1>>] loop:none
+ /// CHECK-DAG: <<Extr1:i\d+>> VecExtractScalar [<<Red1>>] loop:none
+ /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop2:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<I2:i\d+>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load2>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: Add [<<I2>>,<<Cons4>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: <<Red2:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr2:i\d+>> VecExtractScalar [<<Red2>>] loop:none
+ //
+ /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+ //
// NOTE: pattern is robust with respect to vector loop unrolling and peeling.
private static int reductionIntChain() {
int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
@@ -189,6 +218,16 @@ public class Main {
/// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop1>> outer_loop:none
/// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
/// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.reductionIntToLoop(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi>>,<<Load>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionIntToLoop(int[] x) {
int r = 0;
for (int i = 0; i < 4; i++) {
@@ -220,6 +259,16 @@ public class Main {
/// CHECK-DAG: Add [<<I>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
/// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.reductionLong(long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{j\d+}}] loop:none
+ /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<I>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
+ /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none
private static long reductionLong(long[] x) {
long sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -282,6 +331,16 @@ public class Main {
/// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
/// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.reductionIntM1(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionIntM1(int[] x) {
int sum = -1;
for (int i = 0; i < x.length; i++) {
@@ -310,6 +369,16 @@ public class Main {
/// CHECK-DAG: Add [<<I>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
/// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.reductionLongM1(long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{j\d+}}] loop:none
+ /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<I>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
+ /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none
private static long reductionLongM1(long[] x) {
long sum = -1L;
for (int i = 0; i < x.length; i++) {
@@ -371,6 +440,16 @@ public class Main {
/// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
/// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.reductionMinusInt(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecSub [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionMinusInt(int[] x) {
int sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -399,6 +478,16 @@ public class Main {
/// CHECK-DAG: Add [<<I>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
/// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none
+ //
+ /// CHECK-START-MIPS64: long Main.reductionMinusLong(long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{j\d+}}] loop:none
+ /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecSub [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<I>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
+ /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none
private static long reductionMinusLong(long[] x) {
long sum = 0;
for (int i = 0; i < x.length; i++) {
@@ -461,6 +550,16 @@ public class Main {
/// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
/// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.reductionMinInt(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecReplicateScalar [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecMin [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionMinInt(int[] x) {
int min = Integer.MAX_VALUE;
for (int i = 0; i < x.length; i++) {
@@ -531,6 +630,16 @@ public class Main {
/// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
/// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
/// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
+ //
+ /// CHECK-START-MIPS64: int Main.reductionMaxInt(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecReplicateScalar [{{i\d+}}] loop:none
+ /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecMax [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
private static int reductionMaxInt(int[] x) {
int max = Integer.MIN_VALUE;
for (int i = 0; i < x.length; i++) {
diff --git a/test/665-checker-simd-zero/src/Main.java b/test/665-checker-simd-zero/src/Main.java
index 66eea642a4..6cd6d6465a 100644
--- a/test/665-checker-simd-zero/src/Main.java
+++ b/test/665-checker-simd-zero/src/Main.java
@@ -29,6 +29,12 @@ public class Main {
/// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
/// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.zeroz(boolean[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
private static void zeroz(boolean[] x) {
for (int i = 0; i < x.length; i++) {
x[i] = false;
@@ -45,6 +51,12 @@ public class Main {
/// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
/// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.zerob(byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
private static void zerob(byte[] x) {
for (int i = 0; i < x.length; i++) {
x[i] = 0;
@@ -61,6 +73,12 @@ public class Main {
/// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
/// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.zeroc(char[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
private static void zeroc(char[] x) {
for (int i = 0; i < x.length; i++) {
x[i] = 0;
@@ -77,6 +95,12 @@ public class Main {
/// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
/// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.zeros(short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
private static void zeros(short[] x) {
for (int i = 0; i < x.length; i++) {
x[i] = 0;
@@ -93,6 +117,12 @@ public class Main {
/// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
/// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.zeroi(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
private static void zeroi(int[] x) {
for (int i = 0; i < x.length; i++) {
x[i] = 0;
@@ -109,6 +139,12 @@ public class Main {
/// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
/// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.zerol(long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
private static void zerol(long[] x) {
for (int i = 0; i < x.length; i++) {
x[i] = 0;
@@ -125,6 +161,12 @@ public class Main {
/// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
/// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.zerof(float[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:f\d+>> FloatConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
private static void zerof(float[] x) {
for (int i = 0; i < x.length; i++) {
x[i] = 0;
@@ -141,6 +183,12 @@ public class Main {
/// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
/// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-MIPS64: void Main.zerod(double[]) loop_optimization (after)
+ /// CHECK-DAG: <<Zero:d\d+>> DoubleConstant 0 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none
private static void zerod(double[] x) {
for (int i = 0; i < x.length; i++) {
x[i] = 0;