diff options
author | 2017-04-20 17:28:00 +0000 | |
---|---|---|
committer | 2017-04-20 17:28:01 +0000 | |
commit | f99f62f8e04aecbbe1615e242a19ac475f66e565 (patch) | |
tree | 43f4758d8462343395028fd634430da770a61b83 | |
parent | 4c408ca7262122729fc9b1e53ad439507bd2ec19 (diff) | |
parent | f34dd206d0073fb3949be872224420a8488f551f (diff) |
Merge "ARM64: Support MultiplyAccumulate for SIMD."
-rw-r--r-- | compiler/optimizing/code_generator_vector_arm.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_arm64.cc | 61 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_arm_vixl.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_mips.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_mips64.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_x86.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_vector_x86_64.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/graph_visualizer.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_arm64.cc | 6 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_arm64.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_shared.cc | 66 | ||||
-rw-r--r-- | compiler/optimizing/instruction_simplifier_shared.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/nodes_vector.h | 57 | ||||
-rw-r--r-- | test/550-checker-multiply-accumulate/src/Main.java | 84 |
15 files changed, 330 insertions, 0 deletions
diff --git a/compiler/optimizing/code_generator_vector_arm.cc b/compiler/optimizing/code_generator_vector_arm.cc index 6e82123e56..f8552dcfc9 100644 --- a/compiler/optimizing/code_generator_vector_arm.cc +++ b/compiler/optimizing/code_generator_vector_arm.cc @@ -245,6 +245,14 @@ void InstructionCodeGeneratorARM::VisitVecUShr(HVecUShr* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderARM::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LOG(FATAL) << "No SIMD for " << instr->GetId(); +} + +void InstructionCodeGeneratorARM::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LOG(FATAL) << "No SIMD for " << instr->GetId(); +} + void LocationsBuilderARM::VisitVecLoad(HVecLoad* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 2dfccfff85..b3eb639142 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -681,6 +681,67 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { } } +void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + locations->SetInAt( + HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); + DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a +// 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result. +// However vector MultiplyAccumulate instruction is not affected. +void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LocationSummary* locations = instr->GetLocations(); + VRegister acc = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex)); + VRegister left = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex)); + VRegister right = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex)); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ Mla(acc.V16B(), left.V16B(), right.V16B()); + } else { + __ Mls(acc.V16B(), left.V16B(), right.V16B()); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ Mla(acc.V8H(), left.V8H(), right.V8H()); + } else { + __ Mls(acc.V8H(), left.V8H(), right.V8H()); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ Mla(acc.V4S(), left.V4S(), right.V4S()); + } else { + __ Mls(acc.V4S(), left.V4S(), right.V4S()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + } +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* arena, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index 990178b31b..53f314ec40 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -245,6 +245,14 @@ void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LOG(FATAL) << "No SIMD for " << instr->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LOG(FATAL) << "No SIMD for " << instr->GetId(); +} + void LocationsBuilderARMVIXL::VisitVecLoad(HVecLoad* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index 8ea1ca7d90..c4a32252d9 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -245,6 +245,14 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LOG(FATAL) << "No SIMD for " << instr->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LOG(FATAL) << "No SIMD for " << instr->GetId(); +} + void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index a484bb4774..50b95c17cb 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -245,6 +245,14 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LOG(FATAL) << "No SIMD for " << instr->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LOG(FATAL) << "No SIMD for " << instr->GetId(); +} + void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index a86d060821..013b092b5a 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -730,6 +730,14 @@ void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) { } } +void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LOG(FATAL) << "No SIMD for " << instr->GetId(); +} + +void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LOG(FATAL) << "No SIMD for " << instr->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* arena, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 696735367e..66f19a4376 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -719,6 +719,14 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { } } +void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LOG(FATAL) << "No SIMD for " << instr->GetId(); +} + +void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { + LOG(FATAL) << "No SIMD for " << instr->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* arena, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 1b2b9f80ac..e5d94c3504 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -514,6 +514,10 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha; } + void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE { + StartAttributeStream("kind") << instruction->GetOpKind(); + } + #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE { StartAttributeStream("kind") << instruction->GetOpKind(); diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 73b7b2bd95..f16e3727c8 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -210,5 +210,11 @@ void InstructionSimplifierArm64Visitor::VisitXor(HXor* instruction) { } } +void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) { + if (TryCombineVecMultiplyAccumulate(instruction, kArm64)) { + RecordSimplification(); + } +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 65654f50f4..eec4e49792 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -74,6 +74,7 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; void VisitUShr(HUShr* instruction) OVERRIDE; void VisitXor(HXor* instruction) OVERRIDE; + void VisitVecMul(HVecMul* instruction) OVERRIDE; OptimizingCompilerStats* stats_; }; diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index c2b1374f62..7d1f146587 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -278,5 +278,71 @@ bool TryExtractArrayAccessAddress(HInstruction* access, return true; } +bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) { + Primitive::Type type = mul->GetPackedType(); + switch (isa) { + case kArm64: + if (!(type == Primitive::kPrimByte || + type == Primitive::kPrimChar || + type == Primitive::kPrimShort || + type == Primitive::kPrimInt)) { + return false; + } + break; + default: + return false; + } + + ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); + + if (mul->HasOnlyOneNonEnvironmentUse()) { + HInstruction* use = mul->GetUses().front().GetUser(); + if (use->IsVecAdd() || use->IsVecSub()) { + // Replace code looking like + // VECMUL tmp, x, y + // VECADD/SUB dst, acc, tmp + // with + // VECMULACC dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HVecBinaryOperation* binop = use->AsVecBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // This is always true since the `HVecMul` has only one use (which is checked above). + DCHECK_NE(binop_left, binop_right); + if (binop_right == mul) { + accumulator = binop_left; + } else if (use->IsVecAdd()) { + DCHECK_EQ(binop_left, mul); + accumulator = binop_right; + } + + HInstruction::InstructionKind kind = + use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; + if (accumulator != nullptr) { + HVecMultiplyAccumulate* mulacc = + new (arena) HVecMultiplyAccumulate(arena, + kind, + accumulator, + mul->GetLeft(), + mul->GetRight(), + binop->GetPackedType(), + binop->GetVectorLength()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!mul->HasUses()); + mul->GetBlock()->RemoveInstruction(mul); + return true; + } + } + } + + return false; +} } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index 83e3ffca57..2ea103a518 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -58,6 +58,8 @@ bool TryExtractArrayAccessAddress(HInstruction* access, HInstruction* index, size_t data_offset); +bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa); + } // namespace art #endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_SHARED_H_ diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 6be237e612..af953c8f99 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1382,6 +1382,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(VecShl, VecBinaryOperation) \ M(VecShr, VecBinaryOperation) \ M(VecUShr, VecBinaryOperation) \ + M(VecMultiplyAccumulate, VecOperation) \ M(VecLoad, VecMemoryOperation) \ M(VecStore, VecMemoryOperation) \ diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index bff58d0910..450691c1ea 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -143,6 +143,10 @@ class HVecBinaryOperation : public HVecOperation { /*number_of_inputs*/ 2, vector_length, dex_pc) { } + + HInstruction* GetLeft() const { return InputAt(0); } + HInstruction* GetRight() const { return InputAt(1); } + DECLARE_ABSTRACT_INSTRUCTION(VecBinaryOperation); private: DISALLOW_COPY_AND_ASSIGN(HVecBinaryOperation); @@ -627,6 +631,59 @@ class HVecUShr FINAL : public HVecBinaryOperation { DISALLOW_COPY_AND_ASSIGN(HVecUShr); }; +// Multiplies every component in the two vectors, adds the result vector to the accumulator vector. +// viz. [ acc1, .., accn ] + [ x1, .. , xn ] * [ y1, .. , yn ] = +// [ acc1 + x1 * y1, .. , accn + xn * yn ]. +class HVecMultiplyAccumulate FINAL : public HVecOperation { + public: + HVecMultiplyAccumulate(ArenaAllocator* arena, + InstructionKind op, + HInstruction* accumulator, + HInstruction* mul_left, + HInstruction* mul_right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecOperation(arena, + packed_type, + SideEffects::None(), + /*number_of_inputs*/ 3, + vector_length, + dex_pc), + op_kind_(op) { + DCHECK(op == InstructionKind::kAdd || op == InstructionKind::kSub); + DCHECK(accumulator->IsVecOperation()); + DCHECK(mul_left->IsVecOperation() && mul_right->IsVecOperation()); + DCHECK_EQ(accumulator->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(mul_left->AsVecOperation()->GetPackedType(), packed_type); + DCHECK_EQ(mul_right->AsVecOperation()->GetPackedType(), packed_type); + + SetRawInputAt(kInputAccumulatorIndex, accumulator); + SetRawInputAt(kInputMulLeftIndex, mul_left); + SetRawInputAt(kInputMulRightIndex, mul_right); + } + + static constexpr int kInputAccumulatorIndex = 0; + static constexpr int kInputMulLeftIndex = 1; + static constexpr int kInputMulRightIndex = 2; + + bool CanBeMoved() const OVERRIDE { return true; } + + bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + return op_kind_ == other->AsVecMultiplyAccumulate()->op_kind_; + } + + InstructionKind GetOpKind() const { return op_kind_; } + + DECLARE_INSTRUCTION(VecMultiplyAccumulate); + + private: + // Indicates if this is a MADD or MSUB. + const InstructionKind op_kind_; + + DISALLOW_COPY_AND_ASSIGN(HVecMultiplyAccumulate); +}; + // Loads a vector from memory, viz. load(mem, 1) // yield the vector [ mem(1), .. , mem(n) ]. class HVecLoad FINAL : public HVecMemoryOperation { diff --git a/test/550-checker-multiply-accumulate/src/Main.java b/test/550-checker-multiply-accumulate/src/Main.java index 09376a2054..810f0faaa6 100644 --- a/test/550-checker-multiply-accumulate/src/Main.java +++ b/test/550-checker-multiply-accumulate/src/Main.java @@ -424,6 +424,88 @@ public class Main { return - (left * right); } + /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd loop:<<Loop>> outer_loop:none + + /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMultiplyAccumulate kind:Add loop:<<Loop>> outer_loop:none + + /// CHECK-START-ARM64: void Main.SimdMulAdd(int[], int[]) instruction_simplifier_arm64 (after) + /// CHECK-NOT: VecMull + /// CHECK-NOT: VecAdd + public static void SimdMulAdd(int[] array1, int[] array2) { + for (int j = 0; j < 100; j++) { + array2[j] += 12345 * array1[j]; + } + } + + /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none + + /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (after) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMultiplyAccumulate kind:Sub loop:<<Loop>> outer_loop:none + + /// CHECK-START-ARM64: void Main.SimdMulSub(int[], int[]) instruction_simplifier_arm64 (after) + /// CHECK-NOT: VecMull + /// CHECK-NOT: VecSub + public static void SimdMulSub(int[] array1, int[] array2) { + for (int j = 0; j < 100; j++) { + array2[j] -= 12345 * array1[j]; + } + } + + /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier_arm64 (before) + /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecMul loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecSub loop:<<Loop>> outer_loop:none + + /// CHECK-START-ARM64: void Main.SimdMulMultipleUses(int[], int[]) instruction_simplifier_arm64 (after) + /// CHECK-NOT: VecMultiplyAccumulate + public static void SimdMulMultipleUses(int[] array1, int[] array2) { + for (int j = 0; j < 100; j++) { + int temp = 12345 * array1[j]; + array2[j] -= temp; + array1[j] = temp; + } + } + + public static final int ARRAY_SIZE = 1000; + + public static void initArray(int[] array) { + for (int i = 0; i < ARRAY_SIZE; i++) { + array[i] = i; + } + } + + public static int calcArraySum(int[] array) { + int sum = 0; + for (int i = 0; i < ARRAY_SIZE; i++) { + sum += array[i]; + } + return sum; + } + + public static void testSimdMultiplyAccumulate() { + int[] array1 = new int[ARRAY_SIZE]; + int[] array2 = new int[ARRAY_SIZE]; + + initArray(array1); + initArray(array2); + SimdMulSub(array1, array2); + assertIntEquals(-60608250, calcArraySum(array2)); + + initArray(array1); + initArray(array2); + SimdMulAdd(array1, array2); + assertIntEquals(61607250, calcArraySum(array2)); + } + public static void main(String[] args) { assertIntEquals(7, $opt$noinline$mulAdd(1, 2, 3)); assertLongEquals(-26, $opt$noinline$mulSub(4, 5, 6)); @@ -433,5 +515,7 @@ public class Main { assertLongEquals(-225, $opt$noinline$mulMinusOne(15, 16)); assertIntEquals(-306, $opt$noinline$mulNeg(17, 18)); assertLongEquals(-380, $opt$noinline$mulNeg(19, 20)); + + testSimdMultiplyAccumulate(); } } |