diff options
18 files changed, 1801 insertions, 22 deletions
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 43169ba7eb..e79a96bc2a 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -1277,6 +1277,74 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins } } +void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DCHECK(instruction->GetPackedType() == DataType::Type::kInt32); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + + // For Int8 and Uint8 we need a temp register. + if (DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) { + locations->AddTemp(Location::RequiresFpuRegister()); + } +} + +void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) { + LocationSummary* locations = instruction->GetLocations(); + DCHECK(locations->InAt(0).Equals(locations->Out())); + VRegister acc = VRegisterFrom(locations->InAt(0)); + VRegister left = VRegisterFrom(locations->InAt(1)); + VRegister right = VRegisterFrom(locations->InAt(2)); + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), + HVecOperation::ToSignedType(b->GetPackedType())); + DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32); + DCHECK_EQ(4u, instruction->GetVectorLength()); + + size_t inputs_data_size = DataType::Size(a->GetPackedType()); + switch (inputs_data_size) { + case 1u: { + DCHECK_EQ(16u, a->GetVectorLength()); + VRegister tmp = VRegisterFrom(locations->GetTemp(0)); + if (instruction->IsZeroExtending()) { + // TODO: Use Armv8.4-A UDOT instruction when it is available. + __ Umull(tmp.V8H(), left.V8B(), right.V8B()); + __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + + __ Umull2(tmp.V8H(), left.V16B(), right.V16B()); + __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + } else { + // TODO: Use Armv8.4-A SDOT instruction when it is available. + __ Smull(tmp.V8H(), left.V8B(), right.V8B()); + __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + + __ Smull2(tmp.V8H(), left.V16B(), right.V16B()); + __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H()); + __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H()); + } + break; + } + case 2u: + DCHECK_EQ(8u, a->GetVectorLength()); + if (instruction->IsZeroExtending()) { + __ Umlal(acc.V4S(), left.V4H(), right.V4H()); + __ Umlal2(acc.V4S(), left.V8H(), right.V8H()); + } else { + __ Smlal(acc.V4S(), left.V4H(), right.V4H()); + __ Smlal2(acc.V4S(), left.V8H(), right.V8H()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size; + } +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index 7b66b17983..62b6c4ea01 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -854,6 +854,14 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* i } } +void LocationsBuilderARMVIXL::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word // size equals to 4). static bool IsWordAligned(HVecMemoryOperation* instruction) { diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index df0e1485d6..24f4fb2d7b 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -1274,6 +1274,14 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst } } +void LocationsBuilderMIPS::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index de354b63a1..972c49ebb1 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -1272,6 +1272,14 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in } } +void LocationsBuilderMIPS64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorMIPS64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 2502275b3a..c52ecc77c5 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -1143,6 +1143,14 @@ void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instr LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 4a67dafd8a..87d0106c3e 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -1116,6 +1116,14 @@ void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* in LOG(FATAL) << "No SIMD for " << instruction->GetId(); } +void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h index 5ac6e46003..3cbcc9e0c3 100644 --- a/compiler/optimizing/data_type.h +++ b/compiler/optimizing/data_type.h @@ -231,6 +231,21 @@ class DataType { } } + static Type ToUnsigned(Type type) { + switch (type) { + case Type::kInt8: + return Type::kUint8; + case Type::kInt16: + return Type::kUint16; + case Type::kInt32: + return Type::kUint32; + case Type::kInt64: + return Type::kUint64; + default: + return type; + } + } + static const char* PrettyDescriptor(Type type); private: diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 31db8c205f..21f22af3c3 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -564,6 +564,14 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << instruction->GetOpKind(); } + void VisitVecDotProd(HVecDotProd* instruction) override { + VisitVecOperation(instruction); + DataType::Type arg_type = instruction->InputAt(1)->AsVecOperation()->GetPackedType(); + StartAttributeStream("type") << (instruction->IsZeroExtending() ? + DataType::ToUnsigned(arg_type) : + DataType::ToSigned(arg_type)); + } + #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) override { StartAttributeStream("kind") << instruction->GetOpKind(); diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 7d66155b39..12b180d5ff 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -351,7 +351,10 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { // Translates vector operation to reduction kind. static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) { - if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) { + if (reduction->IsVecAdd() || + reduction->IsVecSub() || + reduction->IsVecSADAccumulate() || + reduction->IsVecDotProd()) { return HVecReduce::kSum; } LOG(FATAL) << "Unsupported SIMD reduction " << reduction->GetId(); @@ -431,6 +434,23 @@ static void PeelByCount(HLoopInformation* loop_info, int count) { } } +// Returns the narrower type out of instructions a and b types. +static DataType::Type GetNarrowerType(HInstruction* a, HInstruction* b) { + DataType::Type type = a->GetType(); + if (DataType::Size(b->GetType()) < DataType::Size(type)) { + type = b->GetType(); + } + if (a->IsTypeConversion() && + DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(type)) { + type = a->InputAt(0)->GetType(); + } + if (b->IsTypeConversion() && + DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(type)) { + type = b->InputAt(0)->GetType(); + } + return type; +} + // // Public methods. // @@ -1289,6 +1309,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, DataType::Type type = instruction->GetType(); // Recognize SAD idiom or direct reduction. if (VectorizeSADIdiom(node, instruction, generate_code, type, restrictions) || + VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) || (TrySetVectorType(type, &restrictions) && VectorizeUse(node, instruction, generate_code, type, restrictions))) { if (generate_code) { @@ -1531,11 +1552,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv | kNoReduction | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; + *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoDotProd; return TrySetVectorLength(4); case DataType::Type::kInt32: *restrictions |= kNoDiv | kNoWideSAD; @@ -1580,12 +1601,23 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= - kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; + *restrictions |= kNoMul | + kNoDiv | + kNoShift | + kNoAbs | + kNoSignedHAdd | + kNoUnroundedHAdd | + kNoSAD | + kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; + *restrictions |= kNoDiv | + kNoAbs | + kNoSignedHAdd | + kNoUnroundedHAdd | + kNoSAD| + kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv | kNoSAD; @@ -1610,11 +1642,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -1639,11 +1671,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoDotProd; return TrySetVectorLength(16); case DataType::Type::kUint16: case DataType::Type::kInt16: - *restrictions |= kNoDiv | kNoStringCharAt; + *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd; return TrySetVectorLength(8); case DataType::Type::kInt32: *restrictions |= kNoDiv; @@ -2071,18 +2103,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, HInstruction* r = a; HInstruction* s = b; bool is_unsigned = false; - DataType::Type sub_type = a->GetType(); - if (DataType::Size(b->GetType()) < DataType::Size(sub_type)) { - sub_type = b->GetType(); - } - if (a->IsTypeConversion() && - DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(sub_type)) { - sub_type = a->InputAt(0)->GetType(); - } - if (b->IsTypeConversion() && - DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(sub_type)) { - sub_type = b->InputAt(0)->GetType(); - } + DataType::Type sub_type = GetNarrowerType(a, b); if (reduction_type != sub_type && (!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) { return false; @@ -2123,6 +2144,75 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, return false; } +// Method recognises the following dot product idiom: +// q += a * b for operands a, b whose type is narrower than the reduction one. +// Provided that the operands have the same type or are promoted to a wider form. +// Since this may involve a vector length change, the idiom is handled by going directly +// to a dot product node (rather than relying combining finer grained nodes later). +bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type reduction_type, + uint64_t restrictions) { + if (!instruction->IsAdd() || (reduction_type != DataType::Type::kInt32)) { + return false; + } + + HInstruction* q = instruction->InputAt(0); + HInstruction* v = instruction->InputAt(1); + if (!v->IsMul() || v->GetType() != reduction_type) { + return false; + } + + HInstruction* a = v->InputAt(0); + HInstruction* b = v->InputAt(1); + HInstruction* r = a; + HInstruction* s = b; + DataType::Type op_type = GetNarrowerType(a, b); + bool is_unsigned = false; + + if (!IsNarrowerOperands(a, b, op_type, &r, &s, &is_unsigned)) { + return false; + } + op_type = HVecOperation::ToProperType(op_type, is_unsigned); + + if (!TrySetVectorType(op_type, &restrictions) || + HasVectorRestrictions(restrictions, kNoDotProd)) { + return false; + } + + DCHECK(r != nullptr && s != nullptr); + // Accept dot product idiom for vectorizable operands. Vectorized code uses the shorthand + // idiomatic operation. Sequential code uses the original scalar expressions. + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = a; + s = b; + } + if (VectorizeUse(node, q, generate_code, op_type, restrictions) && + VectorizeUse(node, r, generate_code, op_type, restrictions) && + VectorizeUse(node, s, generate_code, op_type, restrictions)) { + if (generate_code) { + if (vector_mode_ == kVector) { + vector_map_->Put(instruction, new (global_allocator_) HVecDotProd( + global_allocator_, + vector_map_->Get(q), + vector_map_->Get(r), + vector_map_->Get(s), + reduction_type, + is_unsigned, + GetOtherVL(reduction_type, op_type, vector_length_), + kNoDexPc)); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); + } else { + GenerateVecOp(v, vector_map_->Get(r), vector_map_->Get(s), reduction_type); + GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type); + } + } + return true; + } + return false; +} + // // Vectorization heuristics. // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 2b202fda75..1a842c4bf3 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -82,6 +82,7 @@ class HLoopOptimization : public HOptimization { kNoReduction = 1 << 9, // no reduction kNoSAD = 1 << 10, // no sum of absolute differences (SAD) kNoWideSAD = 1 << 11, // no sum of absolute differences (SAD) with operand widening + kNoDotProd = 1 << 12, // no dot product }; /* @@ -217,6 +218,11 @@ class HLoopOptimization : public HOptimization { bool generate_code, DataType::Type type, uint64_t restrictions); + bool VectorizeDotProdIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type type, + uint64_t restrictions); // Vectorization heuristics. Alignment ComputeAlignment(HInstruction* offset, diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 68f1a2406a..76887f9a5b 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1453,6 +1453,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(VecSetScalars, VecOperation) \ M(VecMultiplyAccumulate, VecOperation) \ M(VecSADAccumulate, VecOperation) \ + M(VecDotProd, VecOperation) \ M(VecLoad, VecMemoryOperation) \ M(VecStore, VecMemoryOperation) \ diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index c7539f2846..597e399dd1 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -1021,6 +1021,66 @@ class HVecSADAccumulate final : public HVecOperation { DEFAULT_COPY_CONSTRUCTOR(VecSADAccumulate); }; +// Performs dot product of two vectors and adds the result to wider precision components in +// the accumulator. +// +// viz. DOT_PRODUCT([ a1, .. , am], [ x1, .. , xn ], [ y1, .. , yn ]) = +// [ a1 + sum(xi * yi), .. , am + sum(xj * yj) ], +// for m <= n, non-overlapping sums, +// for either both signed or both unsigned operands x, y. +// +// Notes: +// - packed type reflects the type of sum reduction, not the type of the operands. +// - IsZeroExtending() is used to determine the kind of signed/zero extension to be +// performed for the operands. +// +// TODO: Support types other than kInt32 for packed type. +class HVecDotProd final : public HVecOperation { + public: + HVecDotProd(ArenaAllocator* allocator, + HInstruction* accumulator, + HInstruction* left, + HInstruction* right, + DataType::Type packed_type, + bool is_zero_extending, + size_t vector_length, + uint32_t dex_pc) + : HVecOperation(kVecDotProd, + allocator, + packed_type, + SideEffects::None(), + /* number_of_inputs */ 3, + vector_length, + dex_pc) { + DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); + DCHECK(DataType::IsIntegralType(packed_type)); + DCHECK(left->IsVecOperation()); + DCHECK(right->IsVecOperation()); + DCHECK_EQ(ToSignedType(left->AsVecOperation()->GetPackedType()), + ToSignedType(right->AsVecOperation()->GetPackedType())); + SetRawInputAt(0, accumulator); + SetRawInputAt(1, left); + SetRawInputAt(2, right); + SetPackedFlag<kFieldHDotProdIsZeroExtending>(is_zero_extending); + } + + bool IsZeroExtending() const { return GetPackedFlag<kFieldHDotProdIsZeroExtending>(); } + + bool CanBeMoved() const override { return true; } + + DECLARE_INSTRUCTION(VecDotProd); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecDotProd); + + private: + // Additional packed bits. + static constexpr size_t kFieldHDotProdIsZeroExtending = + HVecOperation::kNumberOfVectorOpPackedBits; + static constexpr size_t kNumberOfHDotProdPackedBits = kFieldHDotProdIsZeroExtending + 1; + static_assert(kNumberOfHDotProdPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); +}; + // Loads a vector from memory, viz. load(mem, 1) // yield the vector [ mem(1), .. , mem(n) ]. class HVecLoad final : public HVecMemoryOperation { diff --git a/test/684-checker-simd-dotprod/expected.txt b/test/684-checker-simd-dotprod/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/684-checker-simd-dotprod/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/684-checker-simd-dotprod/info.txt b/test/684-checker-simd-dotprod/info.txt new file mode 100644 index 0000000000..6c1efb6296 --- /dev/null +++ b/test/684-checker-simd-dotprod/info.txt @@ -0,0 +1 @@ +Functional tests on dot product idiom SIMD vectorization. diff --git a/test/684-checker-simd-dotprod/src/Main.java b/test/684-checker-simd-dotprod/src/Main.java new file mode 100644 index 0000000000..e0c87161dd --- /dev/null +++ b/test/684-checker-simd-dotprod/src/Main.java @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import other.TestByte; +import other.TestCharShort; +import other.TestVarious; + +/** + * Tests for dot product idiom vectorization. + */ +public class Main { + public static void main(String[] args) { + TestByte.run(); + TestCharShort.run(); + TestVarious.run(); + System.out.println("passed"); + } +} diff --git a/test/684-checker-simd-dotprod/src/other/TestByte.java b/test/684-checker-simd-dotprod/src/other/TestByte.java new file mode 100644 index 0000000000..9acfc59cc7 --- /dev/null +++ b/test/684-checker-simd-dotprod/src/other/TestByte.java @@ -0,0 +1,484 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package other; + +/** + * Tests for dot product idiom vectorization: byte case. + */ +public class TestByte { + + public static final int ARRAY_SIZE = 1024; + + /// CHECK-START: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSimple(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdSimple(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC1:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:b\d+>> TypeConversion [<<AddC1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC2:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:b\d+>> TypeConversion [<<AddC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplex(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplex(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((byte)(a[i] + 1)) * ((byte)(b[i] + 1)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSimpleUnsigned(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdSimpleUnsigned(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (a[i] & 0xff) * (b[i] & 0xff); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:a\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:a\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexUnsigned(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplexUnsigned(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (((a[i] & 0xff) + 1) & 0xff) * (((b[i] & 0xff) + 1) & 0xff); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:b\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:b\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexUnsignedCastedToSigned(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplexUnsignedCastedToSigned(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((byte)((a[i] & 0xff) + 1)) * ((byte)((b[i] & 0xff) + 1)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:a\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:a\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdComplexSignedCastedToUnsigned(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplexSignedCastedToUnsigned(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((a[i] + 1) & 0xff) * ((b[i] + 1) & 0xff); + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdSignedWidening(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Int8 + public static final int testDotProdSignedWidening(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((short)(a[i])) * ((short)(b[i])); + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdParamSigned(int, byte[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Int8 + public static final int testDotProdParamSigned(int x, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (byte)(x) * b[i]; + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestByte.testDotProdParamUnsigned(int, byte[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Uint8 + public static final int testDotProdParamUnsigned(int x, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (x & 0xff) * (b[i] & 0xff); + s += temp; + } + return s - 1; + } + + // No DOTPROD cases. + + /// CHECK-START: int other.TestByte.testDotProdIntParam(int, byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdIntParam(int x, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = b[i] * (x); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSignedToChar(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSignedToChar(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((char)(a[i])) * ((char)(b[i])); + s += temp; + } + return s - 1; + } + + // Cases when result of Mul is type-converted are not supported. + + /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToSignedByte(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleCastedToSignedByte(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + byte temp = (byte)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToUnsignedByte(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleCastedToUnsignedByte(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + s += (a[i] * b[i]) & 0xff; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToSignedByte(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToSignedByte(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + byte temp = (byte)((a[i] & 0xff) * (b[i] & 0xff)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToUnsignedByte(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToUnsignedByte(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + s += ((a[i] & 0xff) * (b[i] & 0xff)) & 0xff; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToShort(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleCastedToShort(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + short temp = (short)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleCastedToChar(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleCastedToChar(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + char temp = (char)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToShort(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToShort(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + short temp = (short)((a[i] & 0xff) * (b[i] & 0xff)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToChar(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToChar(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + char temp = (char)((a[i] & 0xff) * (b[i] & 0xff)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdSimpleUnsignedCastedToLong(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToLong(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + long temp = (long)((a[i] & 0xff) * (b[i] & 0xff)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestByte.testDotProdUnsignedSigned(byte[], byte[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdUnsignedSigned(byte[] a, byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (a[i] & 0xff) * b[i]; + s += temp; + } + return s - 1; + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + private static void testDotProd(byte[] b1, byte[] b2, int[] results) { + expectEquals(results[0], testDotProdSimple(b1, b2)); + expectEquals(results[1], testDotProdComplex(b1, b2)); + expectEquals(results[2], testDotProdSimpleUnsigned(b1, b2)); + expectEquals(results[3], testDotProdComplexUnsigned(b1, b2)); + expectEquals(results[4], testDotProdComplexUnsignedCastedToSigned(b1, b2)); + expectEquals(results[5], testDotProdComplexSignedCastedToUnsigned(b1, b2)); + expectEquals(results[6], testDotProdSignedWidening(b1, b2)); + expectEquals(results[7], testDotProdParamSigned(-128, b2)); + expectEquals(results[8], testDotProdParamUnsigned(-128, b2)); + expectEquals(results[9], testDotProdIntParam(-128, b2)); + expectEquals(results[10], testDotProdSignedToChar(b1, b2)); + expectEquals(results[11], testDotProdSimpleCastedToSignedByte(b1, b2)); + expectEquals(results[12], testDotProdSimpleCastedToUnsignedByte(b1, b2)); + expectEquals(results[13], testDotProdSimpleUnsignedCastedToSignedByte(b1, b2)); + expectEquals(results[14], testDotProdSimpleUnsignedCastedToUnsignedByte(b1, b2)); + expectEquals(results[15], testDotProdSimpleCastedToShort(b1, b2)); + expectEquals(results[16], testDotProdSimpleCastedToChar(b1, b2)); + expectEquals(results[17], testDotProdSimpleUnsignedCastedToShort(b1, b2)); + expectEquals(results[18], testDotProdSimpleUnsignedCastedToChar(b1, b2)); + expectEquals(results[19], testDotProdSimpleUnsignedCastedToLong(b1, b2)); + expectEquals(results[20], testDotProdUnsignedSigned(b1, b2)); + } + + public static void run() { + byte[] b1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + byte[] b2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + int[] results_1 = { 64516, 65548, 64516, 65548, 65548, 65548, 64516, -65024, 65024, -65024, + 64516, 4, 4, 4, 4, 64516, 64516, 64516, 64516, 64516, 64516 }; + testDotProd(b1_1, b2_1, results_1); + + byte[] b1_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + byte[] b2_2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + int[] results_2 = { 80645, 81931, 80645, 81931, 81931, 81931, 80645, -81280, 81280, -81280, + 80645, 5, 5, 5, 5, 80645, 80645, 80645, 80645, 80645, 80645 }; + testDotProd(b1_2, b2_2, results_2); + + byte[] b1_3 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; + byte[] b2_3 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + int[] results_3 = { -81280, 81291, 81280, 82571, 81291, 82571, -81280, -81280, 81280, -81280, + 41534080, -640, 640, -640, 640, -81280, 246400, 81280, 81280, 81280, 81280 }; + testDotProd(b1_3, b2_3, results_3); + + byte[] b1_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; + byte[] b2_4 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; + int[] results_4 = { 81920, 80656, 81920, 83216, 80656, 83216, 81920, 81920, 81920, 81920, + -83804160, 0, 0, 0, 0, 81920, 81920, 81920, 81920, 81920, -81920 }; + testDotProd(b1_4, b2_4, results_4); + } + + public static void main(String[] args) { + run(); + } +} diff --git a/test/684-checker-simd-dotprod/src/other/TestCharShort.java b/test/684-checker-simd-dotprod/src/other/TestCharShort.java new file mode 100644 index 0000000000..9cb9db59b3 --- /dev/null +++ b/test/684-checker-simd-dotprod/src/other/TestCharShort.java @@ -0,0 +1,552 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package other; + +/** + * Tests for dot product idiom vectorization: char and short case. + */ +public class TestCharShort { + + public static final int ARRAY_SIZE = 1024; + + /// CHECK-START: int other.TestCharShort.testDotProdSimple(short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSimple(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdSimple(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdComplex(short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC1:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:s\d+>> TypeConversion [<<AddC1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC2:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:s\d+>> TypeConversion [<<AddC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplex(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplex(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((short)(a[i] + 1)) * ((short)(b[i] + 1)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsigned(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSimpleUnsigned(char[], char[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdSimpleUnsigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdComplexUnsigned(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:c\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:c\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplexUnsigned(char[], char[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplexUnsigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((char)(a[i] + 1)) * ((char)(b[i] + 1)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdComplexUnsignedCastedToSigned(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:s\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:s\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplexUnsignedCastedToSigned(char[], char[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplexUnsignedCastedToSigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((short)(a[i] + 1)) * ((short)(b[i] + 1)); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdComplexSignedCastedToUnsigned(short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddC:i\d+>> Add [<<Get1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:c\d+>> TypeConversion [<<AddC>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddGets:i\d+>> Add [<<Get2>>,<<Const1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:c\d+>> TypeConversion [<<AddGets>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdComplexSignedCastedToUnsigned(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const1>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd1:d\d+>> VecAdd [<<Load1>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<VAdd2:d\d+>> VecAdd [<<Load2>>,<<Repl>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<VAdd1>>,<<VAdd2>>] type:Uint16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdComplexSignedCastedToUnsigned(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((char)(a[i] + 1)) * ((char)(b[i] + 1)); + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSignedToInt(short[], short[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Int16 + public static final int testDotProdSignedToInt(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((int)(a[i])) * ((int)(b[i])); + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdParamSigned(int, short[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Int16 + public static final int testDotProdParamSigned(int x, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (short)(x) * b[i]; + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdParamUnsigned(int, char[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Uint16 + public static final int testDotProdParamUnsigned(int x, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (char)(x) * b[i]; + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdIntParam(int, short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdIntParam(int x, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = b[i] * (x); + s += temp; + } + return s - 1; + } + + /// CHECK-START-{ARM64}: int other.TestCharShort.testDotProdSignedToChar(short[], short[]) loop_optimization (after) + /// CHECK-DAG: VecDotProd type:Uint16 + public static final int testDotProdSignedToChar(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((char)(a[i])) * ((char)(b[i])); + s += temp; + } + return s - 1; + } + + // Cases when result of Mul is type-converted are not supported. + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleMulCastedToSigned(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd type:Uint16 + public static final int testDotProdSimpleMulCastedToSigned(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + short temp = (short)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleMulCastedToUnsigned(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleMulCastedToUnsigned(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + char temp = (char)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedMulCastedToSigned(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedMulCastedToSigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + short temp = (short)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedMulCastedToUnsigned(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedMulCastedToUnsigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + char temp = (char)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleCastedToShort(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleCastedToShort(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + short temp = (short)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleCastedToChar(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleCastedToChar(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + char temp = (char)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedCastedToShort(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToShort(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + short temp = (short)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedCastedToChar(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToChar(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + char temp = (char)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSimpleUnsignedCastedToLong(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSimpleUnsignedCastedToLong(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + long temp = (long)(a[i] * b[i]); + s += temp; + } + return s - 1; + } + + // Narrowing conversions. + + /// CHECK-START: int other.TestCharShort.testDotProdSignedNarrowerSigned(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSignedNarrowerSigned(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((byte)(a[i])) * ((byte)(b[i])); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdSignedNarrowerUnsigned(short[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdSignedNarrowerUnsigned(short[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (a[i] & 0xff) * (b[i] & 0xff); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdUnsignedNarrowerSigned(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdUnsignedNarrowerSigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((byte)(a[i])) * ((byte)(b[i])); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdUnsignedNarrowerUnsigned(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdUnsignedNarrowerUnsigned(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = (a[i] & 0xff) * (b[i] & 0xff); + s += temp; + } + return s - 1; + } + + /// CHECK-START: int other.TestCharShort.testDotProdUnsignedSigned(char[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdUnsignedSigned(char[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s - 1; + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + private static void testDotProd(short[] s1, short[] s2, char[] c1, char[] c2, int[] results) { + expectEquals(results[0], testDotProdSimple(s1, s2)); + expectEquals(results[1], testDotProdComplex(s1, s2)); + expectEquals(results[2], testDotProdSimpleUnsigned(c1, c2)); + expectEquals(results[3], testDotProdComplexUnsigned(c1, c2)); + expectEquals(results[4], testDotProdComplexUnsignedCastedToSigned(c1, c2)); + expectEquals(results[5], testDotProdComplexSignedCastedToUnsigned(s1, s2)); + expectEquals(results[6], testDotProdSignedToInt(s1, s2)); + expectEquals(results[7], testDotProdParamSigned(-32768, s2)); + expectEquals(results[8], testDotProdParamUnsigned(-32768, c2)); + expectEquals(results[9], testDotProdIntParam(-32768, s2)); + expectEquals(results[10], testDotProdSignedToChar(s1, s2)); + expectEquals(results[11], testDotProdSimpleMulCastedToSigned(s1, s2)); + expectEquals(results[12], testDotProdSimpleMulCastedToUnsigned(s1, s2)); + expectEquals(results[13], testDotProdSimpleUnsignedMulCastedToSigned(c1, c2)); + expectEquals(results[14], testDotProdSimpleUnsignedMulCastedToUnsigned(c1, c2)); + expectEquals(results[15], testDotProdSimpleCastedToShort(s1, s2)); + expectEquals(results[16], testDotProdSimpleCastedToChar(s1, s2)); + expectEquals(results[17], testDotProdSimpleUnsignedCastedToShort(c1, c2)); + expectEquals(results[18], testDotProdSimpleUnsignedCastedToChar(c1, c2)); + expectEquals(results[19], testDotProdSimpleUnsignedCastedToLong(c1, c2)); + expectEquals(results[20], testDotProdSignedNarrowerSigned(s1, s2)); + expectEquals(results[21], testDotProdSignedNarrowerUnsigned(s1, s2)); + expectEquals(results[22], testDotProdUnsignedNarrowerSigned(c1, c2)); + expectEquals(results[23], testDotProdUnsignedNarrowerUnsigned(c1, c2)); + expectEquals(results[24], testDotProdUnsignedSigned(c1, s2)); + } + + public static void run() { + final short MAX_S = Short.MAX_VALUE; + final short MIN_S = Short.MAX_VALUE; + + short[] s1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; + short[] s2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; + char[] c1_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; + char[] c2_1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; + int[] results_1 = { 2147352578, -2147483634, 2147352578, -2147483634, -2147483634, -2147483634, + 2147352578, -2147418112, 2147418112, -2147418112, 2147352578, + 2, 2, 2, 2, 2, 2, 2, 2, 2147352578, 2, 130050, 2, 130050, 2147352578 }; + testDotProd(s1_1, s2_1, c1_1, c2_1, results_1); + + short[] s1_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; + short[] s2_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; + char[] c1_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; + char[] c2_2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S, MAX_S, MAX_S }; + int[] results_2 = { -262140, 12, -262140, 12, 12, 12, -262140, 131072, -131072, 131072, + -262140, 4, 4, 4, 4, 4, 4, 4, 4, -262140, 4, 260100, 4, 260100, -262140 }; + testDotProd(s1_2, s2_2, c1_2, c2_2, results_2); + + short[] s1_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + short[] s2_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; + char[] c1_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + char[] c2_3 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MAX_S, MAX_S }; + int[] results_3 = { 2147352578, -2147483634, 2147352578, -2147483634, -2147483634, + -2147483634, 2147352578, -2147418112, 2147418112, -2147418112, + 2147352578, 2, 2, 2, 2, 2, 2, 2, 2, 2147352578, 2, 130050, 2, + 130050, 2147352578}; + testDotProd(s1_3, s2_3, c1_3, c2_3, results_3); + + + short[] s1_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + short[] s2_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + char[] c1_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + char[] c2_4 = { MIN_S, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + int[] results_4 = { -1073938429, -1073741811, -1073938429, -1073741811, -1073741811, + -1073741811, -1073938429, 1073840128, -1073840128, 1073840128, + -1073938429, 3, 3, 3, 3, 3, 3, 3, 3, -1073938429, 3, 195075, 3, + 195075, -1073938429 }; + testDotProd(s1_4, s2_4, c1_4, c2_4, results_4); + } + + public static void main(String[] args) { + run(); + } +} diff --git a/test/684-checker-simd-dotprod/src/other/TestVarious.java b/test/684-checker-simd-dotprod/src/other/TestVarious.java new file mode 100644 index 0000000000..3f460982f2 --- /dev/null +++ b/test/684-checker-simd-dotprod/src/other/TestVarious.java @@ -0,0 +1,422 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package other; + +/** + * Tests for dot product idiom vectorization. + */ +public class TestVarious { + + /// CHECK-START: int other.TestVarious.testDotProdConstRight(byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const89:i\d+>> IntConstant 89 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Const89>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdConstRight(byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Const89:i\d+>> IntConstant 89 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const89>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Repl>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdConstRight(byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = b[i] * 89; + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdConstLeft(byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const89:i\d+>> IntConstant 89 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Const89>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdConstLeft(byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Const89:i\d+>> IntConstant 89 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const89>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Repl>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdConstLeft(byte[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = 89 * (b[i] & 0xff); + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdLoopInvariantConvRight(byte[], int) loop_optimization (before) + /// CHECK-DAG: <<Param:i\d+>> ParameterValue loop:none + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConstL:i\d+>> IntConstant 129 loop:none + /// CHECK-DAG: <<AddP:i\d+>> Add [<<Param>>,<<ConstL>>] loop:none + /// CHECK-DAG: <<TypeCnv:b\d+>> TypeConversion [<<AddP>>] loop:none + // + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<TypeCnv>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdLoopInvariantConvRight(byte[], int) loop_optimization (after) + /// CHECK-DAG: <<Param:i\d+>> ParameterValue loop:none + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<ConstL:i\d+>> IntConstant 129 loop:none + /// CHECK-DAG: <<AddP:i\d+>> Add [<<Param>>,<<ConstL>>] loop:none + /// CHECK-DAG: <<TypeCnv:b\d+>> TypeConversion [<<AddP>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<TypeCnv>>] loop:none + // + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Repl>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdLoopInvariantConvRight(byte[] b, int param) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = b[i] * ((byte)(param + 129)); + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdByteToChar(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdByteToChar(char[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = ((char)((byte)(a[i] + 129))) * b[i]; + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdMixedSize(byte[], short[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdMixedSize(byte[] a, short[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdMixedSizeAndSign(byte[], char[]) loop_optimization (after) + /// CHECK-NOT: VecDotProd + public static final int testDotProdMixedSizeAndSign(byte[] a, char[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdInt32(int[], int[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdInt32(int[], int[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul:d\d+>> VecMul [<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi2>>,<<Mul>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-DAG: <<Reduce:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: VecExtractScalar [<<Reduce>>] loop:none + public static final int testDotProdInt32(int[] a, int[] b) { + int s = 1; + for (int i = 0; i < b.length; i++) { + int temp = a[i] * b[i]; + s += temp; + } + return s; + } + + /// CHECK-START: int other.TestVarious.testDotProdBothSignedUnsigned1(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Const2>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul1:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:a\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC2:a\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul2:i\d+>> Mul [<<TypeC1>>,<<TypeC2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi3>>,<<Mul2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdBothSignedUnsigned1(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [<<Const2>>] loop:none + // + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi3>>,<<Load1>>,<<Load2>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + public static final int testDotProdBothSignedUnsigned1(byte[] a, byte[] b) { + int s1 = 1; + int s2 = 2; + for (int i = 0; i < b.length; i++) { + byte a_val = a[i]; + byte b_val = b[i]; + s1 += a_val * b_val; + s2 += (a_val & 0xff) * (b_val & 0xff); + } + return s1 + s2; + } + + /// CHECK-START: int other.TestVarious.testDotProdBothSignedUnsigned2(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Const2>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeC1:a\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul1:i\d+>> Mul [<<Get2>>,<<TypeC1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi3>>,<<Mul1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul2:i\d+>> Mul [<<Get1>>,<<Const42>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdBothSignedUnsigned2(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Const42:i\d+>> IntConstant 42 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<Const42>>] loop:none + /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [<<Const2>>] loop:none + // + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi3>>,<<Load2>>,<<Load1>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Repl>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + public static final int testDotProdBothSignedUnsigned2(byte[] a, byte[] b) { + int s1 = 1; + int s2 = 2; + for (int i = 0; i < b.length; i++) { + byte a_val = a[i]; + byte b_val = b[i]; + s2 += (a_val & 0xff) * (b_val & 0xff); + s1 += a_val * 42; + } + return s1 + s2; + } + + /// CHECK-START: int other.TestVarious.testDotProdBothSignedUnsignedDoubleLoad(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Const2>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<GetB1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<GetB2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul1:i\d+>> Mul [<<GetB1>>,<<GetB2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<GetA1:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<GetA2:a\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul2:i\d+>> Mul [<<GetA1>>,<<GetA2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi3>>,<<Mul2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdBothSignedUnsignedDoubleLoad(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Const16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [<<Const2>>] loop:none + // + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Int8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load3:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load4:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi3>>,<<Load3>>,<<Load4>>] type:Uint8 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const16>>] loop:<<Loop>> outer_loop:none + public static final int testDotProdBothSignedUnsignedDoubleLoad(byte[] a, byte[] b) { + int s1 = 1; + int s2 = 2; + for (int i = 0; i < b.length; i++) { + s1 += a[i] * b[i]; + s2 += (a[i] & 0xff) * (b[i] & 0xff); + } + return s1 + s2; + } + + /// CHECK-START: int other.TestVarious.testDotProdBothSignedUnsignedChar(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Const1>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Const2>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeS1:s\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<TypeS2:s\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul1:i\d+>> Mul [<<TypeS1>>,<<TypeS2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi3>>,<<Mul1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Mul2:i\d+>> Mul [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Mul2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const1>>] loop:<<Loop>> outer_loop:none + + /// CHECK-START-{ARM64}: int other.TestVarious.testDotProdBothSignedUnsignedChar(char[], char[]) loop_optimization (after) + /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Const1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Const2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<Const8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [<<Const1>>] loop:none + /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [<<Const2>>] loop:none + // + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Const0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Phi3:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi3>>,<<Load1>>,<<Load2>>] type:Int16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecDotProd [<<Phi2>>,<<Load1>>,<<Load2>>] type:Uint16 loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Const8>>] loop:<<Loop>> outer_loop:none + public static final int testDotProdBothSignedUnsignedChar(char[] a, char[] b) { + int s1 = 1; + int s2 = 2; + for (int i = 0; i < b.length; i++) { + char a_val = a[i]; + char b_val = b[i]; + s2 += ((short)a_val) * ((short)b_val); + s1 += a_val * b_val; + } + return s1 + s2; + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + public static void run() { + final short MAX_S = Short.MAX_VALUE; + final short MIN_S = Short.MAX_VALUE; + + byte[] b1 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; + byte[] b2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + + char[] c1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + char[] c2 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + + int[] i1 = { -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128, -128, -128, -128 }; + int[] i2 = { 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127, 127, 127, 127 }; + + short[] s1 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, MIN_S, MIN_S }; + + expectEquals(56516, testDotProdConstRight(b2)); + expectEquals(56516, testDotProdConstLeft(b2)); + expectEquals(1271, testDotProdLoopInvariantConvRight(b2, 129)); + expectEquals(-8519423, testDotProdByteToChar(c1, c2)); + expectEquals(-8388351, testDotProdMixedSize(b1, s1)); + expectEquals(-8388351, testDotProdMixedSizeAndSign(b1, c2)); + expectEquals(-81279, testDotProdInt32(i1, i2)); + expectEquals(3, testDotProdBothSignedUnsigned1(b1, b2)); + expectEquals(54403, testDotProdBothSignedUnsigned2(b1, b2)); + expectEquals(3, testDotProdBothSignedUnsignedDoubleLoad(b1, b2)); + expectEquals(-262137, testDotProdBothSignedUnsignedChar(c1, c2)); + } + + public static void main(String[] args) { + run(); + } +} |