diff options
author | 2017-07-20 16:07:36 +0200 | |
---|---|---|
committer | 2017-08-14 10:16:34 +0200 | |
commit | bc5460b850a0fa2d8dcf6c8d36b0eb86f8fe46a8 (patch) | |
tree | 0db1314987cd0f24c7294c4ad540c7f28e2739d9 | |
parent | c1bb1cd339b2ebea9c4770fb4d61bacd7d77746f (diff) |
MIPS: Support MultiplyAccumulate for SIMD.
Moved support for multiply accumulate from arm64-specific to
general instruction simplification.
Also extended 550-checker-multiply-accumulate test.
Test: test-art-host, test-art-target
Change-Id: If113f0f0d5cb48e8a76273c919cfa2f49fce667d
-rw-r--r-- | compiler/optimizing/code_generator_vector_mips.cc | 67 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_mips64.cc | 67 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier.cc | 86 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_arm64.cc | 6 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_arm64.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_shared.cc | 67 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_shared.h | 1 | ||||
-rw-r--r-- | test/550-checker-multiply-accumulate/src/Main.java | 127 |
8 files changed, 335 insertions, 87 deletions
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index ea36e90112..6bf28ab1a3 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -819,11 +819,74 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { } void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt( + HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); + DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); + LocationSummary* locations = instr->GetLocations(); + VectorRegister acc = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex)); + VectorRegister left = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex)); + VectorRegister right = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex)); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvB(acc, left, right); + } else { + __ MsubvB(acc, left, right); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvH(acc, left, right); + } else { + __ MsubvH(acc, left, right); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvW(acc, left, right); + } else { + __ MsubvW(acc, left, right); + } + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvD(acc, left, right); + } else { + __ MsubvD(acc, left, right); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 0395db1df9..75bf7a7cbb 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -823,11 +823,74 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { } void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt( + HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); + DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); + LocationSummary* locations = instr->GetLocations(); + VectorRegister acc = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex)); + VectorRegister left = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex)); + VectorRegister right = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex)); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvB(acc, left, right); + } else { + __ MsubvB(acc, left, right); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvH(acc, left, right); + } else { + __ MsubvH(acc, left, right); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvW(acc, left, right); + } else { + __ MsubvW(acc, left, right); + } + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvD(acc, left, right); + } else { + __ MsubvD(acc, left, right); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 5c79511bab..f2a829fa56 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -59,6 +59,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { bool TryDeMorganNegationFactoring(HBinaryOperation* op); bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction); bool TrySubtractionChainSimplification(HBinaryOperation* instruction); + bool TryCombineVecMultiplyAccumulate(HVecMul* mul); void VisitShift(HBinaryOperation* shift); @@ -98,6 +99,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; void VisitInvoke(HInvoke* invoke) OVERRIDE; void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; + void VisitVecMul(HVecMul* instruction) OVERRIDE; bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; @@ -243,6 +245,84 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation return false; } +bool InstructionSimplifierVisitor::TryCombineVecMultiplyAccumulate(HVecMul* mul) { + Primitive::Type type = mul->GetPackedType(); + InstructionSet isa = codegen_->GetInstructionSet(); + switch (isa) { + case kArm64: + if (!(type == Primitive::kPrimByte || + type == Primitive::kPrimChar || + type == Primitive::kPrimShort || + type == Primitive::kPrimInt)) { + return false; + } + break; + case kMips: + case kMips64: + if (!(type == Primitive::kPrimByte || + type == Primitive::kPrimChar || + type == Primitive::kPrimShort || + type == Primitive::kPrimInt || + type == Primitive::kPrimLong)) { + return false; + } + break; + default: + return false; + } + + ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); + + if (mul->HasOnlyOneNonEnvironmentUse()) { + HInstruction* use = mul->GetUses().front().GetUser(); + if (use->IsVecAdd() || use->IsVecSub()) { + // Replace code looking like + // VECMUL tmp, x, y + // VECADD/SUB dst, acc, tmp + // with + // VECMULACC dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HVecBinaryOperation* binop = use->AsVecBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // This is always true since the `HVecMul` has only one use (which is checked above). + DCHECK_NE(binop_left, binop_right); + if (binop_right == mul) { + accumulator = binop_left; + } else if (use->IsVecAdd()) { + DCHECK_EQ(binop_left, mul); + accumulator = binop_right; + } + + HInstruction::InstructionKind kind = + use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; + if (accumulator != nullptr) { + HVecMultiplyAccumulate* mulacc = + new (arena) HVecMultiplyAccumulate(arena, + kind, + accumulator, + mul->GetLeft(), + mul->GetRight(), + binop->GetPackedType(), + binop->GetVectorLength()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!mul->HasUses()); + mul->GetBlock()->RemoveInstruction(mul); + return true; + } + } + } + + return false; +} + void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); HInstruction* shift_amount = instruction->GetRight(); @@ -2301,4 +2381,10 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( return true; } +void InstructionSimplifierVisitor::VisitVecMul(HVecMul* instruction) { + if (TryCombineVecMultiplyAccumulate(instruction)) { + RecordSimplification(); + } +} + } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 311be1fb49..7c9bfb11b2 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -210,12 +210,6 @@ void InstructionSimplifierArm64Visitor::VisitXor(HXor* instruction) { } } -void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) { - if (TryCombineVecMultiplyAccumulate(instruction, kArm64)) { - RecordSimplification(); - } -} - void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) { if (!instruction->IsStringCharAt() && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) { diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 8596f6ad40..4f16fc383d 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -74,7 +74,6 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; void VisitUShr(HUShr* instruction) OVERRIDE; void VisitXor(HXor* instruction) OVERRIDE; - void VisitVecMul(HVecMul* instruction) OVERRIDE; void VisitVecLoad(HVecLoad* instruction) OVERRIDE; void VisitVecStore(HVecStore* instruction) OVERRIDE; diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index d1bc4dadeb..7a759b9118 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -281,73 +281,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access, return true; } -bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) { - Primitive::Type type = mul->GetPackedType(); - switch (isa) { - case kArm64: - if (!(type == Primitive::kPrimByte || - type == Primitive::kPrimChar || - type == Primitive::kPrimShort || - type == Primitive::kPrimInt)) { - return false; - } - break; - default: - return false; - } - - ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); - - if (mul->HasOnlyOneNonEnvironmentUse()) { - HInstruction* use = mul->GetUses().front().GetUser(); - if (use->IsVecAdd() || use->IsVecSub()) { - // Replace code looking like - // VECMUL tmp, x, y - // VECADD/SUB dst, acc, tmp - // with - // VECMULACC dst, acc, x, y - // Note that we do not want to (unconditionally) perform the merge when the - // multiplication has multiple uses and it can be merged in all of them. - // Multiple uses could happen on the same control-flow path, and we would - // then increase the amount of work. In the future we could try to evaluate - // whether all uses are on different control-flow paths (using dominance and - // reverse-dominance information) and only perform the merge when they are. - HInstruction* accumulator = nullptr; - HVecBinaryOperation* binop = use->AsVecBinaryOperation(); - HInstruction* binop_left = binop->GetLeft(); - HInstruction* binop_right = binop->GetRight(); - // This is always true since the `HVecMul` has only one use (which is checked above). - DCHECK_NE(binop_left, binop_right); - if (binop_right == mul) { - accumulator = binop_left; - } else if (use->IsVecAdd()) { - DCHECK_EQ(binop_left, mul); - accumulator = binop_right; - } - - HInstruction::InstructionKind kind = - use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; - if (accumulator != nullptr) { - HVecMultiplyAccumulate* mulacc = - new (arena) HVecMultiplyAccumulate(arena, - kind, - accumulator, - mul->GetLeft(), - mul->GetRight(), - binop->GetPackedType(), - binop->GetVectorLength()); - - binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); - DCHECK(!mul->HasUses()); - mul->GetBlock()->RemoveInstruction(mul); - return true; - } - } - } - - return false; -} - bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) { if (index->IsConstant()) { // If index is constant the whole address calculation often can be done by LDR/STR themselves. diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index 371619fa2e..31e23833b1 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -58,7 +58,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access, HInstruction* index, size_t data_offset); -bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa); bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index); } // namespace art diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java index 6fd9cdd010..9e6fd3d96a 100644 --- a/test/550-checker-multiply-accumulate/src/Main.java +++ b/test/550-checker-multiply-accumulate/src/Main.java @@ -424,16 +424,29 @@ public class Main { return - (left * right); } - /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (before) + /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (before) /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none /// CHECK-DAG: VecAdd loop:<<Loop>> outer_loop:none - /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (after) + /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after) /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecMultiplyAccumulate kind:Add loop:<<Loop>> outer_loop:none - /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (after) + /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after) + /// CHECK-NOT: VecMul + /// CHECK-NOT: VecAdd + + /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd loop:<<Loop>> outer_loop:none + + /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMultiplyAccumulate kind:Add loop:<<Loop>> outer_loop:none + + /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after) /// CHECK-NOT: VecMul /// CHECK-NOT: VecAdd public static void SimdMulAdd(int[] array1, int[] array2) { @@ -442,16 +455,47 @@ public class Main { } } - /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (before) + /// CHECK-START-MIPS64: void Main.SimdMulAddLong(long[], long[]) instruction_simplifier$after_bce (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd loop:<<Loop>> outer_loop:none + + /// CHECK-START-MIPS64: void Main.SimdMulAddLong(long[], long[]) instruction_simplifier$after_bce (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMultiplyAccumulate kind:Add loop:<<Loop>> outer_loop:none + + /// CHECK-START-MIPS64: void Main.SimdMulAddLong(long[], long[]) instruction_simplifier$after_bce (after) + /// CHECK-NOT: VecMul + /// CHECK-NOT: VecAdd + public static void SimdMulAddLong(long[] array1, long[] array2) { + for (int j = 0; j < 100; j++) { + array2[j] += 12345 * array1[j]; + } + } + + /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (before) /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none - /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (after) + /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after) /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecMultiplyAccumulate kind:Sub loop:<<Loop>> outer_loop:none - /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (after) + /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after) + /// CHECK-NOT: VecMul + /// CHECK-NOT: VecSub + + /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none + + /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMultiplyAccumulate kind:Sub loop:<<Loop>> outer_loop:none + + /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after) /// CHECK-NOT: VecMul /// CHECK-NOT: VecSub public static void SimdMulSub(int[] array1, int[] array2) { @@ -460,12 +504,38 @@ public class Main { } } - /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier_arm64 (before) + /// CHECK-START-MIPS64: void Main.SimdMulSubLong(long[], long[]) instruction_simplifier$after_bce (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none + + /// CHECK-START-MIPS64: void Main.SimdMulSubLong(long[], long[]) instruction_simplifier$after_bce (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMultiplyAccumulate kind:Sub loop:<<Loop>> outer_loop:none + + /// CHECK-START-MIPS64: void Main.SimdMulSubLong(long[], long[]) instruction_simplifier$after_bce (after) + /// CHECK-NOT: VecMul + /// CHECK-NOT: VecSub + public static void SimdMulSubLong(long[] array1, long[] array2) { + for (int j = 0; j < 100; j++) { + array2[j] -= 12345 * array1[j]; + } + } + + /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none + + /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (after) + /// CHECK-NOT: VecMultiplyAccumulate + + /// CHECK-START-MIPS64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (before) /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none - /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier_arm64 (after) + /// CHECK-START-MIPS64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (after) /// CHECK-NOT: VecMultiplyAccumulate public static void SimdMulMultipleUses(int[] array1, int[] array2) { for (int j = 0; j < 100; j++) { @@ -475,6 +545,21 @@ public class Main { } } + /// CHECK-START-MIPS64: void Main.SimdMulMultipleUsesLong(long[], long[]) instruction_simplifier$after_bce (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none + + /// CHECK-START-MIPS64: void Main.SimdMulMultipleUsesLong(long[], long[]) instruction_simplifier$after_bce (after) + /// CHECK-NOT: VecMultiplyAccumulate + public static void SimdMulMultipleUsesLong(long[] array1, long[] array2) { + for (int j = 0; j < 100; j++) { + long temp = 12345 * array1[j]; + array2[j] -= temp; + array1[j] = temp; + } + } + public static final int ARRAY_SIZE = 1000; public static void initArray(int[] array) { @@ -483,6 +568,12 @@ public class Main { } } + public static void initArrayLong(long[] array) { + for (int i = 0; i < ARRAY_SIZE; i++) { + array[i] = i; + } + } + public static int calcArraySum(int[] array) { int sum = 0; for (int i = 0; i < ARRAY_SIZE; i++) { @@ -491,19 +582,39 @@ public class Main { return sum; } + public static long calcArraySumLong(long[] array) { + long sum = 0; + for (int i = 0; i < ARRAY_SIZE; i++) { + sum += array[i]; + } + return sum; + } + public static void testSimdMultiplyAccumulate() { int[] array1 = new int[ARRAY_SIZE]; int[] array2 = new int[ARRAY_SIZE]; + long[] array3 = new long[ARRAY_SIZE]; + long[] array4 = new long[ARRAY_SIZE]; initArray(array1); initArray(array2); SimdMulSub(array1, array2); assertIntEquals(-60608250, calcArraySum(array2)); + initArrayLong(array3); + initArrayLong(array4); + SimdMulSubLong(array3, array4); + assertLongEquals(-60608250, calcArraySumLong(array4)); + initArray(array1); initArray(array2); SimdMulAdd(array1, array2); assertIntEquals(61607250, calcArraySum(array2)); + + initArrayLong(array3); + initArrayLong(array4); + SimdMulAddLong(array3, array4); + assertLongEquals(61607250, calcArraySumLong(array4)); } public static void main(String[] args) { |