diff options
| -rw-r--r-- | compiler/optimizing/code_generator_vector_mips.cc | 174 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_vector_mips64.cc | 168 | ||||
| -rw-r--r-- | compiler/optimizing/loop_optimization.cc | 8 | ||||
| -rw-r--r-- | test/661-checker-simd-reduc/src/Main.java | 109 | ||||
| -rw-r--r-- | test/665-checker-simd-zero/src/Main.java | 48 |
5 files changed, 489 insertions, 18 deletions
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index c5a39ff882..7a8c0ad025 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -91,17 +91,61 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* } void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Copy_sW(locations->Out().AsRegister<Register>(), src, 0); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Mfc1(locations->Out().AsRegisterPairLow<Register>(), + locations->InAt(0).AsFpuRegister<FRegister>()); + __ MoveFromFpuHigh(locations->Out().AsRegisterPairHigh<Register>(), + locations->InAt(0).AsFpuRegister<FRegister>()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 4u); + DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector unary operations. static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { LocationSummary* locations = new (allocator) LocationSummary(instruction); - switch (instruction->GetPackedType()) { + DataType::Type type = instruction->GetPackedType(); + switch (type) { case DataType::Type::kBool: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), @@ -118,7 +162,8 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), - (instruction->IsVecNeg() || instruction->IsVecAbs()) + (instruction->IsVecNeg() || instruction->IsVecAbs() || + (instruction->IsVecReduce() && type == DataType::Type::kInt64)) ? Location::kOutputOverlap : Location::kNoOutputOverlap); break; @@ -133,7 +178,54 @@ void LocationsBuilderMIPS::VisitVecReduce(HVecReduce* instruction) { } void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + VectorRegister tmp = static_cast<VectorRegister>(FTMP); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ Hadd_sD(tmp, src, src); + __ IlvlD(dst, tmp, tmp); + __ AddvW(dst, dst, tmp); + break; + case HVecReduce::kMin: + __ IlvodW(tmp, src, src); + __ Min_sW(tmp, src, tmp); + __ IlvlW(dst, tmp, tmp); + __ Min_sW(dst, dst, tmp); + break; + case HVecReduce::kMax: + __ IlvodW(tmp, src, src); + __ Max_sW(tmp, src, tmp); + __ IlvlW(dst, tmp, tmp); + __ Max_sW(dst, dst, tmp); + break; + } + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ IlvlD(dst, src, src); + __ AddvD(dst, dst, src); + break; + case HVecReduce::kMin: + __ IlvlD(dst, src, src); + __ Min_sD(dst, dst, src); + break; + case HVecReduce::kMax: + __ IlvlD(dst, src, src); + __ Max_sD(dst, dst, src); + break; + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) { @@ -831,11 +923,79 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { } void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); + + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + // Zero out all other elements first. + __ FillW(dst, ZERO); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + return; + } + + // Set required elements. + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ InsertB(dst, locations->InAt(0).AsRegister<Register>(), 0); + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ InsertH(dst, locations->InAt(0).AsRegister<Register>(), 0); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ InsertW(dst, locations->InAt(0).AsRegister<Register>(), 0); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Mtc1(locations->InAt(0).AsRegisterPairLow<Register>(), + locations->Out().AsFpuRegister<FRegister>()); + __ MoveToFpuHigh(locations->InAt(0).AsRegisterPairHigh<Register>(), + locations->Out().AsFpuRegister<FRegister>()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector accumulations. diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index e606df2158..0c59b7344a 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -94,17 +94,58 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar } void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Copy_sW(locations->Out().AsRegister<GpuRegister>(), src, 0); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Copy_sD(locations->Out().AsRegister<GpuRegister>(), src, 0); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 4u); + DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector unary operations. static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { LocationSummary* locations = new (allocator) LocationSummary(instruction); - switch (instruction->GetPackedType()) { + DataType::Type type = instruction->GetPackedType(); + switch (type) { case DataType::Type::kBool: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), @@ -121,7 +162,8 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), - (instruction->IsVecNeg() || instruction->IsVecAbs()) + (instruction->IsVecNeg() || instruction->IsVecAbs() || + (instruction->IsVecReduce() && type == DataType::Type::kInt64)) ? Location::kOutputOverlap : Location::kNoOutputOverlap); break; @@ -136,7 +178,54 @@ void LocationsBuilderMIPS64::VisitVecReduce(HVecReduce* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + VectorRegister tmp = static_cast<VectorRegister>(FTMP); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ Hadd_sD(tmp, src, src); + __ IlvlD(dst, tmp, tmp); + __ AddvW(dst, dst, tmp); + break; + case HVecReduce::kMin: + __ IlvodW(tmp, src, src); + __ Min_sW(tmp, src, tmp); + __ IlvlW(dst, tmp, tmp); + __ Min_sW(dst, dst, tmp); + break; + case HVecReduce::kMax: + __ IlvodW(tmp, src, src); + __ Max_sW(tmp, src, tmp); + __ IlvlW(dst, tmp, tmp); + __ Max_sW(dst, dst, tmp); + break; + } + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ IlvlD(dst, src, src); + __ AddvD(dst, dst, src); + break; + case HVecReduce::kMin: + __ IlvlD(dst, src, src); + __ Min_sD(dst, dst, src); + break; + case HVecReduce::kMax: + __ IlvlD(dst, src, src); + __ Max_sD(dst, dst, src); + break; + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) { @@ -835,11 +924,76 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { } void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); + + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + // Zero out all other elements first. + __ FillW(dst, ZERO); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + return; + } + + // Set required elements. + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ InsertB(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ InsertH(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ InsertW(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ InsertD(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector accumulations. diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 6a4faaf438..8f84796ff4 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -1422,10 +1422,10 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD; return TrySetVectorLength(8); case DataType::Type::kInt32: - *restrictions |= kNoDiv | kNoReduction | kNoSAD; + *restrictions |= kNoDiv | kNoSAD; return TrySetVectorLength(4); case DataType::Type::kInt64: - *restrictions |= kNoDiv | kNoReduction | kNoSAD; + *restrictions |= kNoDiv | kNoSAD; return TrySetVectorLength(2); case DataType::Type::kFloat32: *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) @@ -1451,10 +1451,10 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD; return TrySetVectorLength(8); case DataType::Type::kInt32: - *restrictions |= kNoDiv | kNoReduction | kNoSAD; + *restrictions |= kNoDiv | kNoSAD; return TrySetVectorLength(4); case DataType::Type::kInt64: - *restrictions |= kNoDiv | kNoReduction | kNoSAD; + *restrictions |= kNoDiv | kNoSAD; return TrySetVectorLength(2); case DataType::Type::kFloat32: *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) diff --git a/test/661-checker-simd-reduc/src/Main.java b/test/661-checker-simd-reduc/src/Main.java index 57abf5d29e..1add0f1026 100644 --- a/test/661-checker-simd-reduc/src/Main.java +++ b/test/661-checker-simd-reduc/src/Main.java @@ -81,6 +81,16 @@ public class Main { /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none + // + /// CHECK-START-MIPS64: int Main.reductionInt(int[]) loop_optimization (after) + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{i\d+}}] loop:none + /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none + /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none private static int reductionInt(int[] x) { int sum = 0; for (int i = 0; i < x.length; i++) { @@ -144,6 +154,25 @@ public class Main { // /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>" // + /// CHECK-START-MIPS64: int Main.reductionIntChain() loop_optimization (after) + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [{{i\d+}}] loop:none + /// CHECK-DAG: <<Phi1:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<I1:i\d+>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi1>>,<<Load1>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: Add [<<I1>>,<<Cons4>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Red1:d\d+>> VecReduce [<<Phi1>>] loop:none + /// CHECK-DAG: <<Extr1:i\d+>> VecExtractScalar [<<Red1>>] loop:none + /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [{{i\d+}}] loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop2:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<I2:i\d+>>] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load2>>] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: Add [<<I2>>,<<Cons4>>] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: <<Red2:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: <<Extr2:i\d+>> VecExtractScalar [<<Red2>>] loop:none + // + /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>" + // // NOTE: pattern is robust with respect to vector loop unrolling and peeling. private static int reductionIntChain() { int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; @@ -189,6 +218,16 @@ public class Main { /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop1>> outer_loop:none /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none + // + /// CHECK-START-MIPS64: int Main.reductionIntToLoop(int[]) loop_optimization (after) + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{i\d+}}] loop:none + /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi>>,<<Load>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none + /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none private static int reductionIntToLoop(int[] x) { int r = 0; for (int i = 0; i < 4; i++) { @@ -220,6 +259,16 @@ public class Main { /// CHECK-DAG: Add [<<I>>,<<Cons2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none + // + /// CHECK-START-MIPS64: long Main.reductionLong(long[]) loop_optimization (after) + /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{j\d+}}] loop:none + /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<I>>,<<Cons2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none + /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none private static long reductionLong(long[] x) { long sum = 0; for (int i = 0; i < x.length; i++) { @@ -282,6 +331,16 @@ public class Main { /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none + // + /// CHECK-START-MIPS64: int Main.reductionIntM1(int[]) loop_optimization (after) + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{i\d+}}] loop:none + /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none + /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none private static int reductionIntM1(int[] x) { int sum = -1; for (int i = 0; i < x.length; i++) { @@ -310,6 +369,16 @@ public class Main { /// CHECK-DAG: Add [<<I>>,<<Cons2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none + // + /// CHECK-START-MIPS64: long Main.reductionLongM1(long[]) loop_optimization (after) + /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{j\d+}}] loop:none + /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<I>>,<<Cons2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none + /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none private static long reductionLongM1(long[] x) { long sum = -1L; for (int i = 0; i < x.length; i++) { @@ -371,6 +440,16 @@ public class Main { /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none + // + /// CHECK-START-MIPS64: int Main.reductionMinusInt(int[]) loop_optimization (after) + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{i\d+}}] loop:none + /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none + /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none private static int reductionMinusInt(int[] x) { int sum = 0; for (int i = 0; i < x.length; i++) { @@ -399,6 +478,16 @@ public class Main { /// CHECK-DAG: Add [<<I>>,<<Cons2>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none + // + /// CHECK-START-MIPS64: long Main.reductionMinusLong(long[]) loop_optimization (after) + /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [{{j\d+}}] loop:none + /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<I>>,<<Cons2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none + /// CHECK-DAG: <<Extr:j\d+>> VecExtractScalar [<<Red>>] loop:none private static long reductionMinusLong(long[] x) { long sum = 0; for (int i = 0; i < x.length; i++) { @@ -461,6 +550,16 @@ public class Main { /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none + // + /// CHECK-START-MIPS64: int Main.reductionMinInt(int[]) loop_optimization (after) + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecReplicateScalar [{{i\d+}}] loop:none + /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecMin [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none + /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none private static int reductionMinInt(int[] x) { int min = Integer.MAX_VALUE; for (int i = 0; i < x.length; i++) { @@ -531,6 +630,16 @@ public class Main { /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none + // + /// CHECK-START-MIPS64: int Main.reductionMaxInt(int[]) loop_optimization (after) + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecReplicateScalar [{{i\d+}}] loop:none + /// CHECK-DAG: <<Phi:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<I:i\d+>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecMax [<<Phi>>,<<Load>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<I>>,<<Cons4>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi>>] loop:none + /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none private static int reductionMaxInt(int[] x) { int max = Integer.MIN_VALUE; for (int i = 0; i < x.length; i++) { diff --git a/test/665-checker-simd-zero/src/Main.java b/test/665-checker-simd-zero/src/Main.java index 66eea642a4..6cd6d6465a 100644 --- a/test/665-checker-simd-zero/src/Main.java +++ b/test/665-checker-simd-zero/src/Main.java @@ -29,6 +29,12 @@ public class Main { /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-MIPS64: void Main.zeroz(boolean[]) loop_optimization (after) + /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none private static void zeroz(boolean[] x) { for (int i = 0; i < x.length; i++) { x[i] = false; @@ -45,6 +51,12 @@ public class Main { /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-MIPS64: void Main.zerob(byte[]) loop_optimization (after) + /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none private static void zerob(byte[] x) { for (int i = 0; i < x.length; i++) { x[i] = 0; @@ -61,6 +73,12 @@ public class Main { /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-MIPS64: void Main.zeroc(char[]) loop_optimization (after) + /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none private static void zeroc(char[] x) { for (int i = 0; i < x.length; i++) { x[i] = 0; @@ -77,6 +95,12 @@ public class Main { /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-MIPS64: void Main.zeros(short[]) loop_optimization (after) + /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none private static void zeros(short[] x) { for (int i = 0; i < x.length; i++) { x[i] = 0; @@ -93,6 +117,12 @@ public class Main { /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-MIPS64: void Main.zeroi(int[]) loop_optimization (after) + /// CHECK-DAG: <<Zero:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none private static void zeroi(int[] x) { for (int i = 0; i < x.length; i++) { x[i] = 0; @@ -109,6 +139,12 @@ public class Main { /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-MIPS64: void Main.zerol(long[]) loop_optimization (after) + /// CHECK-DAG: <<Zero:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none private static void zerol(long[] x) { for (int i = 0; i < x.length; i++) { x[i] = 0; @@ -125,6 +161,12 @@ public class Main { /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-MIPS64: void Main.zerof(float[]) loop_optimization (after) + /// CHECK-DAG: <<Zero:f\d+>> FloatConstant 0 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none private static void zerof(float[] x) { for (int i = 0; i < x.length; i++) { x[i] = 0; @@ -141,6 +183,12 @@ public class Main { /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-MIPS64: void Main.zerod(double[]) loop_optimization (after) + /// CHECK-DAG: <<Zero:d\d+>> DoubleConstant 0 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Zero>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>> outer_loop:none private static void zerod(double[] x) { for (int i = 0; i < x.length; i++) { x[i] = 0; |