MIPS: Support MultiplyAccumulate for SIMD.
Moved support for multiply accumulate from arm64-specific to
general instruction simplification.
Also extended 550-checker-multiply-accumulate test.
Test: test-art-host, test-art-target
Change-Id: If113f0f0d5cb48e8a76273c919cfa2f49fce667d
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index ea36e90..6bf28ab 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -819,11 +819,74 @@
}
void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+ switch (instr->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
+ DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+ LocationSummary* locations = instr->GetLocations();
+ VectorRegister acc =
+ VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
+ VectorRegister left =
+ VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
+ VectorRegister right =
+ VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
+ switch (instr->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvB(acc, left, right);
+ } else {
+ __ MsubvB(acc, left, right);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvH(acc, left, right);
+ } else {
+ __ MsubvH(acc, left, right);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvW(acc, left, right);
+ } else {
+ __ MsubvW(acc, left, right);
+ }
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvD(acc, left, right);
+ } else {
+ __ MsubvD(acc, left, right);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 0395db1..75bf7a7 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -823,11 +823,74 @@
}
void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
+ switch (instr->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
+ locations->SetInAt(
+ HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
+ DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+ LocationSummary* locations = instr->GetLocations();
+ VectorRegister acc =
+ VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
+ VectorRegister left =
+ VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
+ VectorRegister right =
+ VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
+ switch (instr->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvB(acc, left, right);
+ } else {
+ __ MsubvB(acc, left, right);
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvH(acc, left, right);
+ } else {
+ __ MsubvH(acc, left, right);
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvW(acc, left, right);
+ } else {
+ __ MsubvW(acc, left, right);
+ }
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instr->GetVectorLength());
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ MaddvD(acc, left, right);
+ } else {
+ __ MsubvD(acc, left, right);
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 5c79511..f2a829f 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -59,6 +59,7 @@
bool TryDeMorganNegationFactoring(HBinaryOperation* op);
bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction);
bool TrySubtractionChainSimplification(HBinaryOperation* instruction);
+ bool TryCombineVecMultiplyAccumulate(HVecMul* mul);
void VisitShift(HBinaryOperation* shift);
@@ -98,6 +99,7 @@
void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
void VisitInvoke(HInvoke* invoke) OVERRIDE;
void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
+ void VisitVecMul(HVecMul* instruction) OVERRIDE;
bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
@@ -243,6 +245,84 @@
return false;
}
+bool InstructionSimplifierVisitor::TryCombineVecMultiplyAccumulate(HVecMul* mul) {
+ Primitive::Type type = mul->GetPackedType();
+ InstructionSet isa = codegen_->GetInstructionSet();
+ switch (isa) {
+ case kArm64:
+ if (!(type == Primitive::kPrimByte ||
+ type == Primitive::kPrimChar ||
+ type == Primitive::kPrimShort ||
+ type == Primitive::kPrimInt)) {
+ return false;
+ }
+ break;
+ case kMips:
+ case kMips64:
+ if (!(type == Primitive::kPrimByte ||
+ type == Primitive::kPrimChar ||
+ type == Primitive::kPrimShort ||
+ type == Primitive::kPrimInt ||
+ type == Primitive::kPrimLong)) {
+ return false;
+ }
+ break;
+ default:
+ return false;
+ }
+
+ ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
+
+ if (mul->HasOnlyOneNonEnvironmentUse()) {
+ HInstruction* use = mul->GetUses().front().GetUser();
+ if (use->IsVecAdd() || use->IsVecSub()) {
+ // Replace code looking like
+ // VECMUL tmp, x, y
+ // VECADD/SUB dst, acc, tmp
+ // with
+ // VECMULACC dst, acc, x, y
+ // Note that we do not want to (unconditionally) perform the merge when the
+ // multiplication has multiple uses and it can be merged in all of them.
+ // Multiple uses could happen on the same control-flow path, and we would
+ // then increase the amount of work. In the future we could try to evaluate
+ // whether all uses are on different control-flow paths (using dominance and
+ // reverse-dominance information) and only perform the merge when they are.
+ HInstruction* accumulator = nullptr;
+ HVecBinaryOperation* binop = use->AsVecBinaryOperation();
+ HInstruction* binop_left = binop->GetLeft();
+ HInstruction* binop_right = binop->GetRight();
+ // This is always true since the `HVecMul` has only one use (which is checked above).
+ DCHECK_NE(binop_left, binop_right);
+ if (binop_right == mul) {
+ accumulator = binop_left;
+ } else if (use->IsVecAdd()) {
+ DCHECK_EQ(binop_left, mul);
+ accumulator = binop_right;
+ }
+
+ HInstruction::InstructionKind kind =
+ use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
+ if (accumulator != nullptr) {
+ HVecMultiplyAccumulate* mulacc =
+ new (arena) HVecMultiplyAccumulate(arena,
+ kind,
+ accumulator,
+ mul->GetLeft(),
+ mul->GetRight(),
+ binop->GetPackedType(),
+ binop->GetVectorLength());
+
+ binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+ DCHECK(!mul->HasUses());
+ mul->GetBlock()->RemoveInstruction(mul);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr());
HInstruction* shift_amount = instruction->GetRight();
@@ -2301,4 +2381,10 @@
return true;
}
+void InstructionSimplifierVisitor::VisitVecMul(HVecMul* instruction) {
+ if (TryCombineVecMultiplyAccumulate(instruction)) {
+ RecordSimplification();
+ }
+}
+
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 311be1f..7c9bfb1 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -210,12 +210,6 @@
}
}
-void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) {
- if (TryCombineVecMultiplyAccumulate(instruction, kArm64)) {
- RecordSimplification();
- }
-}
-
void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) {
if (!instruction->IsStringCharAt()
&& TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) {
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 8596f6a..4f16fc3 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -74,7 +74,6 @@
void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
void VisitUShr(HUShr* instruction) OVERRIDE;
void VisitXor(HXor* instruction) OVERRIDE;
- void VisitVecMul(HVecMul* instruction) OVERRIDE;
void VisitVecLoad(HVecLoad* instruction) OVERRIDE;
void VisitVecStore(HVecStore* instruction) OVERRIDE;
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index d1bc4da..7a759b9 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -281,73 +281,6 @@
return true;
}
-bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) {
- Primitive::Type type = mul->GetPackedType();
- switch (isa) {
- case kArm64:
- if (!(type == Primitive::kPrimByte ||
- type == Primitive::kPrimChar ||
- type == Primitive::kPrimShort ||
- type == Primitive::kPrimInt)) {
- return false;
- }
- break;
- default:
- return false;
- }
-
- ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena();
-
- if (mul->HasOnlyOneNonEnvironmentUse()) {
- HInstruction* use = mul->GetUses().front().GetUser();
- if (use->IsVecAdd() || use->IsVecSub()) {
- // Replace code looking like
- // VECMUL tmp, x, y
- // VECADD/SUB dst, acc, tmp
- // with
- // VECMULACC dst, acc, x, y
- // Note that we do not want to (unconditionally) perform the merge when the
- // multiplication has multiple uses and it can be merged in all of them.
- // Multiple uses could happen on the same control-flow path, and we would
- // then increase the amount of work. In the future we could try to evaluate
- // whether all uses are on different control-flow paths (using dominance and
- // reverse-dominance information) and only perform the merge when they are.
- HInstruction* accumulator = nullptr;
- HVecBinaryOperation* binop = use->AsVecBinaryOperation();
- HInstruction* binop_left = binop->GetLeft();
- HInstruction* binop_right = binop->GetRight();
- // This is always true since the `HVecMul` has only one use (which is checked above).
- DCHECK_NE(binop_left, binop_right);
- if (binop_right == mul) {
- accumulator = binop_left;
- } else if (use->IsVecAdd()) {
- DCHECK_EQ(binop_left, mul);
- accumulator = binop_right;
- }
-
- HInstruction::InstructionKind kind =
- use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub;
- if (accumulator != nullptr) {
- HVecMultiplyAccumulate* mulacc =
- new (arena) HVecMultiplyAccumulate(arena,
- kind,
- accumulator,
- mul->GetLeft(),
- mul->GetRight(),
- binop->GetPackedType(),
- binop->GetVectorLength());
-
- binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
- DCHECK(!mul->HasUses());
- mul->GetBlock()->RemoveInstruction(mul);
- return true;
- }
- }
- }
-
- return false;
-}
-
bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) {
if (index->IsConstant()) {
// If index is constant the whole address calculation often can be done by LDR/STR themselves.
diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h
index 371619f..31e2383 100644
--- a/compiler/optimizing/instruction_simplifier_shared.h
+++ b/compiler/optimizing/instruction_simplifier_shared.h
@@ -58,7 +58,6 @@
HInstruction* index,
size_t data_offset);
-bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa);
bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index);
} // namespace art
diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java
index 6fd9cdd..9e6fd3d 100644
--- a/test/550-checker-multiply-accumulate/src/Main.java
+++ b/test/550-checker-multiply-accumulate/src/Main.java
@@ -424,16 +424,29 @@
return - (left * right);
}
- /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (before)
+ /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (before)
/// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none
/// CHECK-DAG: VecAdd loop:<<Loop>> outer_loop:none
- /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (after)
+ /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
/// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecMultiplyAccumulate kind:Add loop:<<Loop>> outer_loop:none
- /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (after)
+ /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
+ /// CHECK-NOT: VecMul
+ /// CHECK-NOT: VecAdd
+
+ /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (before)
+ /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd loop:<<Loop>> outer_loop:none
+
+ /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
+ /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecMultiplyAccumulate kind:Add loop:<<Loop>> outer_loop:none
+
+ /// CHECK-START-MIPS64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier$after_bce (after)
/// CHECK-NOT: VecMul
/// CHECK-NOT: VecAdd
public static void SimdMulAdd(int[] array1, int[] array2) {
@@ -442,16 +455,47 @@
}
}
- /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (before)
+ /// CHECK-START-MIPS64: void Main.SimdMulAddLong(long[], long[]) instruction_simplifier$after_bce (before)
+ /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd loop:<<Loop>> outer_loop:none
+
+ /// CHECK-START-MIPS64: void Main.SimdMulAddLong(long[], long[]) instruction_simplifier$after_bce (after)
+ /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecMultiplyAccumulate kind:Add loop:<<Loop>> outer_loop:none
+
+ /// CHECK-START-MIPS64: void Main.SimdMulAddLong(long[], long[]) instruction_simplifier$after_bce (after)
+ /// CHECK-NOT: VecMul
+ /// CHECK-NOT: VecAdd
+ public static void SimdMulAddLong(long[] array1, long[] array2) {
+ for (int j = 0; j < 100; j++) {
+ array2[j] += 12345 * array1[j];
+ }
+ }
+
+ /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (before)
/// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none
/// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none
- /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (after)
+ /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
/// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecMultiplyAccumulate kind:Sub loop:<<Loop>> outer_loop:none
- /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (after)
+ /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
+ /// CHECK-NOT: VecMul
+ /// CHECK-NOT: VecSub
+
+ /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (before)
+ /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none
+
+ /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
+ /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecMultiplyAccumulate kind:Sub loop:<<Loop>> outer_loop:none
+
+ /// CHECK-START-MIPS64: void Main.SimdMulSub(int[], int[]) instruction_simplifier$after_bce (after)
/// CHECK-NOT: VecMul
/// CHECK-NOT: VecSub
public static void SimdMulSub(int[] array1, int[] array2) {
@@ -460,12 +504,38 @@
}
}
- /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier_arm64 (before)
+ /// CHECK-START-MIPS64: void Main.SimdMulSubLong(long[], long[]) instruction_simplifier$after_bce (before)
/// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
/// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none
/// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none
- /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier_arm64 (after)
+ /// CHECK-START-MIPS64: void Main.SimdMulSubLong(long[], long[]) instruction_simplifier$after_bce (after)
+ /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecMultiplyAccumulate kind:Sub loop:<<Loop>> outer_loop:none
+
+ /// CHECK-START-MIPS64: void Main.SimdMulSubLong(long[], long[]) instruction_simplifier$after_bce (after)
+ /// CHECK-NOT: VecMul
+ /// CHECK-NOT: VecSub
+ public static void SimdMulSubLong(long[] array1, long[] array2) {
+ for (int j = 0; j < 100; j++) {
+ array2[j] -= 12345 * array1[j];
+ }
+ }
+
+ /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (before)
+ /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none
+
+ /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (after)
+ /// CHECK-NOT: VecMultiplyAccumulate
+
+ /// CHECK-START-MIPS64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (before)
+ /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none
+
+ /// CHECK-START-MIPS64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier$after_bce (after)
/// CHECK-NOT: VecMultiplyAccumulate
public static void SimdMulMultipleUses(int[] array1, int[] array2) {
for (int j = 0; j < 100; j++) {
@@ -475,6 +545,21 @@
}
}
+ /// CHECK-START-MIPS64: void Main.SimdMulMultipleUsesLong(long[], long[]) instruction_simplifier$after_bce (before)
+ /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none
+
+ /// CHECK-START-MIPS64: void Main.SimdMulMultipleUsesLong(long[], long[]) instruction_simplifier$after_bce (after)
+ /// CHECK-NOT: VecMultiplyAccumulate
+ public static void SimdMulMultipleUsesLong(long[] array1, long[] array2) {
+ for (int j = 0; j < 100; j++) {
+ long temp = 12345 * array1[j];
+ array2[j] -= temp;
+ array1[j] = temp;
+ }
+ }
+
public static final int ARRAY_SIZE = 1000;
public static void initArray(int[] array) {
@@ -483,6 +568,12 @@
}
}
+ public static void initArrayLong(long[] array) {
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ array[i] = i;
+ }
+ }
+
public static int calcArraySum(int[] array) {
int sum = 0;
for (int i = 0; i < ARRAY_SIZE; i++) {
@@ -491,19 +582,39 @@
return sum;
}
+ public static long calcArraySumLong(long[] array) {
+ long sum = 0;
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ sum += array[i];
+ }
+ return sum;
+ }
+
public static void testSimdMultiplyAccumulate() {
int[] array1 = new int[ARRAY_SIZE];
int[] array2 = new int[ARRAY_SIZE];
+ long[] array3 = new long[ARRAY_SIZE];
+ long[] array4 = new long[ARRAY_SIZE];
initArray(array1);
initArray(array2);
SimdMulSub(array1, array2);
assertIntEquals(-60608250, calcArraySum(array2));
+ initArrayLong(array3);
+ initArrayLong(array4);
+ SimdMulSubLong(array3, array4);
+ assertLongEquals(-60608250, calcArraySumLong(array4));
+
initArray(array1);
initArray(array2);
SimdMulAdd(array1, array2);
assertIntEquals(61607250, calcArraySum(array2));
+
+ initArrayLong(array3);
+ initArrayLong(array4);
+ SimdMulAddLong(array3, array4);
+ assertLongEquals(61607250, calcArraySumLong(array4));
}
public static void main(String[] args) {