diff options
30 files changed, 2419 insertions, 237 deletions
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index 18a55c8b09..3f576c82b3 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -949,20 +949,18 @@ void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instructi } } -void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); - switch (instr->GetPackedType()) { +// Helper to set up locations for vector accumulations. +static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: - locations->SetInAt( - HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); - DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); break; default: @@ -971,18 +969,25 @@ void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* i } } +void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetArena(), instruction); +} + // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result. // However vector MultiplyAccumulate instruction is not affected. -void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = instr->GetLocations(); - VRegister acc = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex)); - VRegister left = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex)); - VRegister right = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex)); - switch (instr->GetPackedType()) { +void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister acc = VRegisterFrom(locations->InAt(0)); + VRegister left = VRegisterFrom(locations->InAt(1)); + VRegister right = VRegisterFrom(locations->InAt(2)); + + DCHECK(locations->InAt(0).Equals(locations->Out())); + + switch (instruction->GetPackedType()) { case Primitive::kPrimByte: - DCHECK_EQ(16u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ Mla(acc.V16B(), left.V16B(), right.V16B()); } else { __ Mls(acc.V16B(), left.V16B(), right.V16B()); @@ -990,16 +995,16 @@ void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccum break; case Primitive::kPrimChar: case Primitive::kPrimShort: - DCHECK_EQ(8u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ Mla(acc.V8H(), left.V8H(), right.V8H()); } else { __ Mls(acc.V8H(), left.V8H(), right.V8H()); } break; case Primitive::kPrimInt: - DCHECK_EQ(4u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ Mla(acc.V4S(), left.V4S(), right.V4S()); } else { __ Mls(acc.V4S(), left.V4S(), right.V4S()); @@ -1007,6 +1012,186 @@ void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccum break; default: LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetArena(), instruction); + // Some conversions require temporary registers. + LocationSummary* locations = instruction->GetLocations(); + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(a->GetPackedType(), b->GetPackedType()); + switch (a->GetPackedType()) { + case Primitive::kPrimByte: + switch (instruction->GetPackedType()) { + case Primitive::kPrimLong: + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + FALLTHROUGH_INTENDED; + case Primitive::kPrimInt: + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + break; + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + if (instruction->GetPackedType() == Primitive::kPrimLong) { + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + } + break; + case Primitive::kPrimInt: + case Primitive::kPrimLong: + if (instruction->GetPackedType() == a->GetPackedType()) { + locations->AddTemp(Location::RequiresFpuRegister()); + } + break; + default: + break; + } +} + +void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister acc = VRegisterFrom(locations->InAt(0)); + VRegister left = VRegisterFrom(locations->InAt(1)); + VRegister right = VRegisterFrom(locations->InAt(2)); + + DCHECK(locations->InAt(0).Equals(locations->Out())); + + // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S). + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(a->GetPackedType(), b->GetPackedType()); + switch (a->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Sabal(acc.V8H(), left.V8B(), right.V8B()); + __ Sabal2(acc.V8H(), left.V16B(), right.V16B()); + break; + case Primitive::kPrimInt: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + VRegister tmp1 = VRegisterFrom(locations->GetTemp(0)); + VRegister tmp2 = VRegisterFrom(locations->GetTemp(1)); + __ Sxtl(tmp1.V8H(), left.V8B()); + __ Sxtl(tmp2.V8H(), right.V8B()); + __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H()); + __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H()); + __ Sxtl2(tmp1.V8H(), left.V16B()); + __ Sxtl2(tmp2.V8H(), right.V16B()); + __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H()); + __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H()); + break; + } + case Primitive::kPrimLong: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + VRegister tmp1 = VRegisterFrom(locations->GetTemp(0)); + VRegister tmp2 = VRegisterFrom(locations->GetTemp(1)); + VRegister tmp3 = VRegisterFrom(locations->GetTemp(2)); + VRegister tmp4 = VRegisterFrom(locations->GetTemp(3)); + __ Sxtl(tmp1.V8H(), left.V8B()); + __ Sxtl(tmp2.V8H(), right.V8B()); + __ Sxtl(tmp3.V4S(), tmp1.V4H()); + __ Sxtl(tmp4.V4S(), tmp2.V4H()); + __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); + __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); + __ Sxtl2(tmp3.V4S(), tmp1.V8H()); + __ Sxtl2(tmp4.V4S(), tmp2.V8H()); + __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); + __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); + __ Sxtl2(tmp1.V8H(), left.V16B()); + __ Sxtl2(tmp2.V8H(), right.V16B()); + __ Sxtl(tmp3.V4S(), tmp1.V4H()); + __ Sxtl(tmp4.V4S(), tmp2.V4H()); + __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); + __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); + __ Sxtl2(tmp3.V4S(), tmp1.V8H()); + __ Sxtl2(tmp4.V4S(), tmp2.V8H()); + __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); + __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Sabal(acc.V4S(), left.V4H(), right.V4H()); + __ Sabal2(acc.V4S(), left.V8H(), right.V8H()); + break; + case Primitive::kPrimLong: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + VRegister tmp1 = VRegisterFrom(locations->GetTemp(0)); + VRegister tmp2 = VRegisterFrom(locations->GetTemp(1)); + __ Sxtl(tmp1.V4S(), left.V4H()); + __ Sxtl(tmp2.V4S(), right.V4H()); + __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S()); + __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S()); + __ Sxtl2(tmp1.V4S(), left.V8H()); + __ Sxtl2(tmp2.V4S(), right.V8H()); + __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S()); + __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S()); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimInt: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + VRegister tmp = VRegisterFrom(locations->GetTemp(0)); + __ Sub(tmp.V4S(), left.V4S(), right.V4S()); + __ Abs(tmp.V4S(), tmp.V4S()); + __ Add(acc.V4S(), acc.V4S(), tmp.V4S()); + break; + } + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Sabal(acc.V2D(), left.V2S(), right.V2S()); + __ Sabal2(acc.V2D(), left.V4S(), right.V4S()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimLong: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + VRegister tmp = VRegisterFrom(locations->GetTemp(0)); + __ Sub(tmp.V2D(), left.V2D(), right.V2D()); + __ Abs(tmp.V2D(), tmp.V2D()); + __ Add(acc.V2D(), acc.V2D(), tmp.V2D()); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; } } diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index 7a11dff41e..069054c2f5 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -629,12 +629,40 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruc LOG(FATAL) << "No SIMD for " << instruction->GetId(); } -void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +// Helper to set up locations for vector accumulations. +static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); } -void InstructionCodeGeneratorARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void LocationsBuilderARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); } // Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index c2fbf7f04b..0bedafcc81 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -826,21 +826,18 @@ void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instructio LOG(FATAL) << "No SIMD for " << instruction->GetId(); } -void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); - switch (instr->GetPackedType()) { +// Helper to set up locations for vector accumulations. +static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: - locations->SetInAt( - HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); - DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); break; default: @@ -849,18 +846,19 @@ void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* in } } -void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = instr->GetLocations(); - VectorRegister acc = - VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex)); - VectorRegister left = - VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex)); - VectorRegister right = - VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex)); - switch (instr->GetPackedType()) { +void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VectorRegister acc = VectorRegisterFrom(locations->InAt(0)); + VectorRegister left = VectorRegisterFrom(locations->InAt(1)); + VectorRegister right = VectorRegisterFrom(locations->InAt(2)); + switch (instruction->GetPackedType()) { case Primitive::kPrimByte: - DCHECK_EQ(16u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ MaddvB(acc, left, right); } else { __ MsubvB(acc, left, right); @@ -868,24 +866,24 @@ void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumu break; case Primitive::kPrimChar: case Primitive::kPrimShort: - DCHECK_EQ(8u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ MaddvH(acc, left, right); } else { __ MsubvH(acc, left, right); } break; case Primitive::kPrimInt: - DCHECK_EQ(4u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ MaddvW(acc, left, right); } else { __ MsubvW(acc, left, right); } break; case Primitive::kPrimLong: - DCHECK_EQ(2u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + DCHECK_EQ(2u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ MaddvD(acc, left, right); } else { __ MsubvD(acc, left, right); @@ -897,6 +895,15 @@ void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumu } } +void LocationsBuilderMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + // TODO: implement this, location helper already filled out (shared with MulAcc). +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* arena, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 9d3a777c13..db31bdcc92 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -830,21 +830,18 @@ void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruct LOG(FATAL) << "No SIMD for " << instruction->GetId(); } -void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); - switch (instr->GetPackedType()) { +// Helper to set up locations for vector accumulations. +static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: - locations->SetInAt( - HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); - DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); break; default: @@ -853,18 +850,19 @@ void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* } } -void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = instr->GetLocations(); - VectorRegister acc = - VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex)); - VectorRegister left = - VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex)); - VectorRegister right = - VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex)); - switch (instr->GetPackedType()) { +void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VectorRegister acc = VectorRegisterFrom(locations->InAt(0)); + VectorRegister left = VectorRegisterFrom(locations->InAt(1)); + VectorRegister right = VectorRegisterFrom(locations->InAt(2)); + switch (instruction->GetPackedType()) { case Primitive::kPrimByte: - DCHECK_EQ(16u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ MaddvB(acc, left, right); } else { __ MsubvB(acc, left, right); @@ -872,24 +870,24 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu break; case Primitive::kPrimChar: case Primitive::kPrimShort: - DCHECK_EQ(8u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ MaddvH(acc, left, right); } else { __ MsubvH(acc, left, right); } break; case Primitive::kPrimInt: - DCHECK_EQ(4u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ MaddvW(acc, left, right); } else { __ MsubvW(acc, left, right); } break; case Primitive::kPrimLong: - DCHECK_EQ(2u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + DCHECK_EQ(2u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ MaddvD(acc, left, right); } else { __ MsubvD(acc, left, right); @@ -901,6 +899,15 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu } } +void LocationsBuilderMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); + // TODO: implement this, location helper already filled out (shared with MulAcc). +} + // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* arena, HVecMemoryOperation* instruction, diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 37190f8363..5a012e7298 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -51,7 +51,6 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi : Location::RequiresFpuRegister()); locations->SetOut(is_zero ? Location::RequiresFpuRegister() : Location::SameAsFirstInput()); - break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -1033,12 +1032,42 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction } } -void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +// Helper to set up locations for vector accumulations. +static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + // TODO: pmaddwd? + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetArena(), instruction); } -void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + // TODO: psadbw for unsigned? + LOG(FATAL) << "No SIMD for " << instruction->GetId(); } // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index edd0209f10..3698b7fb85 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -1005,11 +1005,41 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct } } +// Helper to set up locations for vector accumulations. +static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + CreateVecAccumLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + // TODO: pmaddwd? + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetArena(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + // TODO: psadbw for unsigned? LOG(FATAL) << "No SIMD for " << instruction->GetId(); } diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index baa045390b..6f8743bd53 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -71,10 +71,13 @@ static bool IsEarlyExit(HLoopInformation* loop_info) { return false; } -// Detect a sign extension from the given type. Returns the promoted operand on success. +// Detect a sign extension in instruction from the given type. The to64 parameter +// denotes if result is long, and thus sign extension from int can be included. +// Returns the promoted operand on success. static bool IsSignExtensionAndGet(HInstruction* instruction, Primitive::Type type, - /*out*/ HInstruction** operand) { + /*out*/ HInstruction** operand, + bool to64 = false) { // Accept any already wider constant that would be handled properly by sign // extension when represented in the *width* of the given narrower data type // (the fact that char normally zero extends does not matter here). @@ -82,20 +85,24 @@ static bool IsSignExtensionAndGet(HInstruction* instruction, if (IsInt64AndGet(instruction, /*out*/ &value)) { switch (type) { case Primitive::kPrimByte: - if (std::numeric_limits<int8_t>::min() <= value && - std::numeric_limits<int8_t>::max() >= value) { + if (IsInt<8>(value)) { *operand = instruction; return true; } return false; case Primitive::kPrimChar: case Primitive::kPrimShort: - if (std::numeric_limits<int16_t>::min() <= value && - std::numeric_limits<int16_t>::max() <= value) { + if (IsInt<16>(value)) { *operand = instruction; return true; } return false; + case Primitive::kPrimInt: + if (IsInt<32>(value)) { + *operand = instruction; + return to64; + } + return false; default: return false; } @@ -110,40 +117,52 @@ static bool IsSignExtensionAndGet(HInstruction* instruction, case Primitive::kPrimShort: *operand = instruction; return true; + case Primitive::kPrimInt: + *operand = instruction; + return to64; default: return false; } } - // TODO: perhaps explicit conversions later too? - // (this may return something different from instruction) + // Explicit type conversion to long. + if (instruction->IsTypeConversion() && instruction->GetType() == Primitive::kPrimLong) { + return IsSignExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, /*to64*/ true); + } return false; } -// Detect a zero extension from the given type. Returns the promoted operand on success. +// Detect a zero extension in instruction from the given type. The to64 parameter +// denotes if result is long, and thus zero extension from int can be included. +// Returns the promoted operand on success. static bool IsZeroExtensionAndGet(HInstruction* instruction, Primitive::Type type, - /*out*/ HInstruction** operand) { + /*out*/ HInstruction** operand, + bool to64 = false) { // Accept any already wider constant that would be handled properly by zero // extension when represented in the *width* of the given narrower data type - // (the fact that byte/short normally sign extend does not matter here). + // (the fact that byte/short/int normally sign extend does not matter here). int64_t value = 0; if (IsInt64AndGet(instruction, /*out*/ &value)) { switch (type) { case Primitive::kPrimByte: - if (std::numeric_limits<uint8_t>::min() <= value && - std::numeric_limits<uint8_t>::max() >= value) { + if (IsUint<8>(value)) { *operand = instruction; return true; } return false; case Primitive::kPrimChar: case Primitive::kPrimShort: - if (std::numeric_limits<uint16_t>::min() <= value && - std::numeric_limits<uint16_t>::max() <= value) { + if (IsUint<16>(value)) { *operand = instruction; return true; } return false; + case Primitive::kPrimInt: + if (IsUint<32>(value)) { + *operand = instruction; + return to64; + } + return false; default: return false; } @@ -170,14 +189,21 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction, (IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) || IsZeroExtensionAndGet(a, type, /*out*/ operand)))) { switch ((*operand)->GetType()) { - case Primitive::kPrimByte: return mask == std::numeric_limits<uint8_t>::max(); + case Primitive::kPrimByte: + return mask == std::numeric_limits<uint8_t>::max(); case Primitive::kPrimChar: - case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max(); + case Primitive::kPrimShort: + return mask == std::numeric_limits<uint16_t>::max(); + case Primitive::kPrimInt: + return mask == std::numeric_limits<uint32_t>::max() && to64; default: return false; } } } - // TODO: perhaps explicit conversions later too? + // Explicit type conversion to long. + if (instruction->IsTypeConversion() && instruction->GetType() == Primitive::kPrimLong) { + return IsZeroExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, /*to64*/ true); + } return false; } @@ -214,6 +240,55 @@ static bool IsNarrowerOperand(HInstruction* a, return false; } +// Compute relative vector length based on type difference. +static size_t GetOtherVL(Primitive::Type other_type, Primitive::Type vector_type, size_t vl) { + switch (other_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + switch (vector_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: return vl; + default: break; + } + return vl; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + switch (vector_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: return vl >> 1; + case Primitive::kPrimChar: + case Primitive::kPrimShort: return vl; + default: break; + } + break; + case Primitive::kPrimInt: + switch (vector_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: return vl >> 2; + case Primitive::kPrimChar: + case Primitive::kPrimShort: return vl >> 1; + case Primitive::kPrimInt: return vl; + default: break; + } + break; + case Primitive::kPrimLong: + switch (vector_type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: return vl >> 3; + case Primitive::kPrimChar: + case Primitive::kPrimShort: return vl >> 2; + case Primitive::kPrimInt: return vl >> 1; + case Primitive::kPrimLong: return vl; + default: break; + } + break; + default: + break; + } + LOG(FATAL) << "Unsupported idiom conversion"; + UNREACHABLE(); +} + // Detect up to two instructions a and b, and an acccumulated constant c. static bool IsAddConstHelper(HInstruction* instruction, /*out*/ HInstruction** a, @@ -260,16 +335,16 @@ static bool IsAddConst(HInstruction* instruction, } // Detect reductions of the following forms, -// under assumption phi has only *one* use: // x = x_phi + .. // x = x_phi - .. // x = max(x_phi, ..) // x = min(x_phi, ..) static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { if (reduction->IsAdd()) { - return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi; + return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || + (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); } else if (reduction->IsSub()) { - return reduction->InputAt(0) == phi; + return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi); } else if (reduction->IsInvokeStaticOrDirect()) { switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) { case Intrinsics::kMathMinIntInt: @@ -280,7 +355,8 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { case Intrinsics::kMathMaxLongLong: case Intrinsics::kMathMaxFloatFloat: case Intrinsics::kMathMaxDoubleDouble: - return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi; + return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || + (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); default: return false; } @@ -288,9 +364,9 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { return false; } -// Translates operation to reduction kind. -static HVecReduce::ReductionKind GetReductionKind(HInstruction* reduction) { - if (reduction->IsVecAdd() || reduction->IsVecSub()) { +// Translates vector operation to reduction kind. +static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) { + if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) { return HVecReduce::kSum; } else if (reduction->IsVecMin()) { return HVecReduce::kMin; @@ -720,7 +796,6 @@ void HLoopOptimization::Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count) { - Primitive::Type induc_type = Primitive::kPrimInt; HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -739,6 +814,10 @@ void HLoopOptimization::Vectorize(LoopNode* node, vector_header_ = header; vector_body_ = block; + // Loop induction type. + Primitive::Type induc_type = main_phi->GetType(); + DCHECK(induc_type == Primitive::kPrimInt || induc_type == Primitive::kPrimLong) << induc_type; + // Generate dynamic loop peeling trip count, if needed, under the assumption // that the Android runtime guarantees at least "component size" alignment: // ptc = (ALIGN - (&a[initial] % ALIGN)) / type-size @@ -767,10 +846,10 @@ void HLoopOptimization::Vectorize(LoopNode* node, HInstruction* rem = Insert( preheader, new (global_allocator_) HAnd(induc_type, diff, - graph_->GetIntConstant(chunk - 1))); + graph_->GetConstant(induc_type, chunk - 1))); vtc = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, rem)); } - vector_index_ = graph_->GetIntConstant(0); + vector_index_ = graph_->GetConstant(induc_type, 0); // Generate runtime disambiguation test: // vtc = a != b ? vtc : 0; @@ -779,7 +858,8 @@ void HLoopOptimization::Vectorize(LoopNode* node, preheader, new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_)); vtc = Insert(preheader, - new (global_allocator_) HSelect(rt, vtc, graph_->GetIntConstant(0), kNoDexPc)); + new (global_allocator_) + HSelect(rt, vtc, graph_->GetConstant(induc_type, 0), kNoDexPc)); needs_cleanup = true; } @@ -793,7 +873,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), vector_index_, ptc, - graph_->GetIntConstant(1), + graph_->GetConstant(induc_type, 1), kNoUnrollingFactor); } @@ -806,7 +886,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), vector_index_, vtc, - graph_->GetIntConstant(vector_length_), // increment per unroll + graph_->GetConstant(induc_type, vector_length_), // increment per unroll unroll); HLoopInformation* vloop = vector_header_->GetLoopInformation(); @@ -820,14 +900,20 @@ void HLoopOptimization::Vectorize(LoopNode* node, graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), vector_index_, stc, - graph_->GetIntConstant(1), + graph_->GetConstant(induc_type, 1), kNoUnrollingFactor); } // Link reductions to their final uses. for (auto i = reductions_->begin(); i != reductions_->end(); ++i) { if (i->first->IsPhi()) { - i->first->ReplaceWith(ReduceAndExtractIfNeeded(i->second)); + HInstruction* phi = i->first; + HInstruction* repl = ReduceAndExtractIfNeeded(i->second); + // Deal with regular uses. + for (const HUseListNode<HInstruction*>& use : phi->GetUses()) { + induction_range_.Replace(use.GetUser(), phi, repl); // update induction use + } + phi->ReplaceWith(repl); } } @@ -853,7 +939,7 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node, HInstruction* step, uint32_t unroll) { DCHECK(unroll == 1 || vector_mode_ == kVector); - Primitive::Type induc_type = Primitive::kPrimInt; + Primitive::Type induc_type = lo->GetType(); // Prepare new loop. vector_preheader_ = new_preheader, vector_header_ = vector_preheader_->GetSingleSuccessor(); @@ -942,8 +1028,10 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, auto redit = reductions_->find(instruction); if (redit != reductions_->end()) { Primitive::Type type = instruction->GetType(); - if (TrySetVectorType(type, &restrictions) && - VectorizeUse(node, instruction, generate_code, type, restrictions)) { + // Recognize SAD idiom or direct reduction. + if (VectorizeSADIdiom(node, instruction, generate_code, type, restrictions) || + (TrySetVectorType(type, &restrictions) && + VectorizeUse(node, instruction, generate_code, type, restrictions))) { if (generate_code) { HInstruction* new_red = vector_map_->Get(instruction); vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second)); @@ -1029,14 +1117,20 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, HInstruction* opa = conversion->InputAt(0); Primitive::Type from = conversion->GetInputType(); Primitive::Type to = conversion->GetResultType(); - if ((to == Primitive::kPrimByte || - to == Primitive::kPrimChar || - to == Primitive::kPrimShort) && from == Primitive::kPrimInt) { - // Accept a "narrowing" type conversion from a "wider" computation for - // (1) conversion into final required type, - // (2) vectorizable operand, - // (3) "wider" operations cannot bring in higher order bits. - if (to == type && VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) { + if (Primitive::IsIntegralType(from) && Primitive::IsIntegralType(to)) { + size_t size_vec = Primitive::ComponentSize(type); + size_t size_from = Primitive::ComponentSize(from); + size_t size_to = Primitive::ComponentSize(to); + // Accept an integral conversion + // (1a) narrowing into vector type, "wider" operations cannot bring in higher order bits, or + // (1b) widening from at least vector type, and + // (2) vectorizable operand. + if ((size_to < size_from && + size_to == size_vec && + VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) || + (size_to >= size_from && + size_from >= size_vec && + VectorizeUse(node, opa, generate_code, type, restrictions))) { if (generate_code) { if (vector_mode_ == kVector) { vector_map_->Put(instruction, vector_map_->Get(opa)); // operand pass-through @@ -1088,7 +1182,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) { - // Recognize vectorization idioms. + // Recognize halving add idiom. if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) { return true; } @@ -1181,7 +1275,8 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return false; // reject, unless all operands are same-extension narrower } // Accept MIN/MAX(x, y) for vectorizable operands. - DCHECK(r != nullptr && s != nullptr); + DCHECK(r != nullptr); + DCHECK(s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = opa; s = opb; @@ -1232,11 +1327,11 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv; return TrySetVectorLength(8); case Primitive::kPrimInt: *restrictions |= kNoDiv; @@ -1261,17 +1356,17 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric case Primitive::kPrimBoolean: case Primitive::kPrimByte: *restrictions |= - kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction; + kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction; + *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; return TrySetVectorLength(8); case Primitive::kPrimInt: - *restrictions |= kNoDiv; + *restrictions |= kNoDiv | kNoSAD; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax; + *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax | kNoSAD; return TrySetVectorLength(2); case Primitive::kPrimFloat: *restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0 @@ -1289,17 +1384,17 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv | kNoReduction | kNoSAD; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; + *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD; return TrySetVectorLength(8); case Primitive::kPrimInt: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv | kNoReduction | kNoSAD; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv | kNoReduction | kNoSAD; return TrySetVectorLength(2); case Primitive::kPrimFloat: *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) @@ -1317,17 +1412,17 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric switch (type) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv | kNoReduction | kNoSAD; return TrySetVectorLength(16); case Primitive::kPrimChar: case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; + *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD; return TrySetVectorLength(8); case Primitive::kPrimInt: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv | kNoReduction | kNoSAD; return TrySetVectorLength(4); case Primitive::kPrimLong: - *restrictions |= kNoDiv | kNoReduction; + *restrictions |= kNoDiv | kNoReduction | kNoSAD; return TrySetVectorLength(2); case Primitive::kPrimFloat: *restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN) @@ -1371,8 +1466,16 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type) if (it != vector_permanent_map_->end()) { vector = it->second; // reuse during unrolling } else { - vector = new (global_allocator_) HVecReplicateScalar( - global_allocator_, org, type, vector_length_); + // Generates ReplicateScalar( (optional_type_conv) org ). + HInstruction* input = org; + Primitive::Type input_type = input->GetType(); + if (type != input_type && (type == Primitive::kPrimLong || + input_type == Primitive::kPrimLong)) { + input = Insert(vector_preheader_, + new (global_allocator_) HTypeConversion(type, input, kNoDexPc)); + } + vector = new (global_allocator_) + HVecReplicateScalar(global_allocator_, input, type, vector_length_); vector_permanent_map_->Put(org, Insert(vector_preheader_, vector)); } vector_map_->Put(org, vector); @@ -1465,10 +1568,15 @@ void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* r // Prepare the new initialization. if (vector_mode_ == kVector) { // Generate a [initial, 0, .., 0] vector. - new_init = Insert( - vector_preheader_, - new (global_allocator_) HVecSetScalars( - global_allocator_, &new_init, phi->GetType(), vector_length_, 1)); + HVecOperation* red_vector = new_red->AsVecOperation(); + size_t vector_length = red_vector->GetVectorLength(); + Primitive::Type type = red_vector->GetPackedType(); + new_init = Insert(vector_preheader_, + new (global_allocator_) HVecSetScalars(global_allocator_, + &new_init, + type, + vector_length, + 1)); } else { new_init = ReduceAndExtractIfNeeded(new_init); } @@ -1484,18 +1592,20 @@ HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruct if (instruction->IsPhi()) { HInstruction* input = instruction->InputAt(1); if (input->IsVecOperation()) { - Primitive::Type type = input->AsVecOperation()->GetPackedType(); + HVecOperation* input_vector = input->AsVecOperation(); + size_t vector_length = input_vector->GetVectorLength(); + Primitive::Type type = input_vector->GetPackedType(); + HVecReduce::ReductionKind kind = GetReductionKind(input_vector); HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0]; // Generate a vector reduction and scalar extract // x = REDUCE( [x_1, .., x_n] ) // y = x_1 // along the exit of the defining loop. - HVecReduce::ReductionKind kind = GetReductionKind(input); HInstruction* reduce = new (global_allocator_) HVecReduce( - global_allocator_, instruction, type, vector_length_, kind); + global_allocator_, instruction, type, vector_length, kind); exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction()); instruction = new (global_allocator_) HVecExtractScalar( - global_allocator_, reduce, type, vector_length_, 0); + global_allocator_, reduce, type, vector_length, 0); exit->InsertInstructionAfter(instruction, reduce); } } @@ -1516,27 +1626,19 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, HInstruction* opb, Primitive::Type type, bool is_unsigned) { - if (vector_mode_ == kSequential) { - // Non-converting scalar code follows implicit integral promotion. - if (!org->IsTypeConversion() && (type == Primitive::kPrimBoolean || - type == Primitive::kPrimByte || - type == Primitive::kPrimChar || - type == Primitive::kPrimShort)) { - type = Primitive::kPrimInt; - } - } HInstruction* vector = nullptr; + Primitive::Type org_type = org->GetType(); switch (org->GetKind()) { case HInstruction::kNeg: DCHECK(opb == nullptr); GENERATE_VEC( new (global_allocator_) HVecNeg(global_allocator_, opa, type, vector_length_), - new (global_allocator_) HNeg(type, opa)); + new (global_allocator_) HNeg(org_type, opa)); case HInstruction::kNot: DCHECK(opb == nullptr); GENERATE_VEC( new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_), - new (global_allocator_) HNot(type, opa)); + new (global_allocator_) HNot(org_type, opa)); case HInstruction::kBooleanNot: DCHECK(opb == nullptr); GENERATE_VEC( @@ -1546,47 +1648,47 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, DCHECK(opb == nullptr); GENERATE_VEC( new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_), - new (global_allocator_) HTypeConversion(type, opa, kNoDexPc)); + new (global_allocator_) HTypeConversion(org_type, opa, kNoDexPc)); case HInstruction::kAdd: GENERATE_VEC( new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HAdd(type, opa, opb)); + new (global_allocator_) HAdd(org_type, opa, opb)); case HInstruction::kSub: GENERATE_VEC( new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HSub(type, opa, opb)); + new (global_allocator_) HSub(org_type, opa, opb)); case HInstruction::kMul: GENERATE_VEC( new (global_allocator_) HVecMul(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HMul(type, opa, opb)); + new (global_allocator_) HMul(org_type, opa, opb)); case HInstruction::kDiv: GENERATE_VEC( new (global_allocator_) HVecDiv(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HDiv(type, opa, opb, kNoDexPc)); + new (global_allocator_) HDiv(org_type, opa, opb, kNoDexPc)); case HInstruction::kAnd: GENERATE_VEC( new (global_allocator_) HVecAnd(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HAnd(type, opa, opb)); + new (global_allocator_) HAnd(org_type, opa, opb)); case HInstruction::kOr: GENERATE_VEC( new (global_allocator_) HVecOr(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HOr(type, opa, opb)); + new (global_allocator_) HOr(org_type, opa, opb)); case HInstruction::kXor: GENERATE_VEC( new (global_allocator_) HVecXor(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HXor(type, opa, opb)); + new (global_allocator_) HXor(org_type, opa, opb)); case HInstruction::kShl: GENERATE_VEC( new (global_allocator_) HVecShl(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HShl(type, opa, opb)); + new (global_allocator_) HShl(org_type, opa, opb)); case HInstruction::kShr: GENERATE_VEC( new (global_allocator_) HVecShr(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HShr(type, opa, opb)); + new (global_allocator_) HShr(org_type, opa, opb)); case HInstruction::kUShr: GENERATE_VEC( new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HUShr(type, opa, opb)); + new (global_allocator_) HUShr(org_type, opa, opb)); case HInstruction::kInvokeStaticOrDirect: { HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect(); if (vector_mode_ == kVector) { @@ -1667,8 +1769,8 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, // // Method recognizes the following idioms: -// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b -// regular halving add (a + b) >> 1 for unsigned/signed operands a, b +// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b +// truncated halving add (a + b) >> 1 for unsigned/signed operands a, b // Provided that the operands are promoted to a wider form to do the arithmetic and // then cast back to narrower form, the idioms can be mapped into efficient SIMD // implementation that operates directly in narrower form (plus one extra bit). @@ -1712,7 +1814,8 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, } // Accept recognized halving add for vectorizable operands. Vectorized code uses the // shorthand idiomatic operation. Sequential code uses the original scalar expressions. - DCHECK(r != nullptr && s != nullptr); + DCHECK(r != nullptr); + DCHECK(s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = instruction->InputAt(0); s = instruction->InputAt(1); @@ -1741,6 +1844,88 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, return false; } +// Method recognizes the following idiom: +// q += ABS(a - b) for signed operands a, b +// Provided that the operands have the same type or are promoted to a wider form. +// Since this may involve a vector length change, the idiom is handled by going directly +// to a sad-accumulate node (rather than relying combining finer grained nodes later). +// TODO: unsigned SAD too? +bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + Primitive::Type reduction_type, + uint64_t restrictions) { + // Filter integral "q += ABS(a - b);" reduction, where ABS and SUB + // are done in the same precision (either int or long). + if (!instruction->IsAdd() || + (reduction_type != Primitive::kPrimInt && reduction_type != Primitive::kPrimLong)) { + return false; + } + HInstruction* q = instruction->InputAt(0); + HInstruction* v = instruction->InputAt(1); + HInstruction* a = nullptr; + HInstruction* b = nullptr; + if (v->IsInvokeStaticOrDirect() && + (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt || + v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) { + HInstruction* x = v->InputAt(0); + if (x->IsSub() && x->GetType() == reduction_type) { + a = x->InputAt(0); + b = x->InputAt(1); + } + } + if (a == nullptr || b == nullptr) { + return false; + } + // Accept same-type or consistent sign extension for narrower-type on operands a and b. + // The same-type or narrower operands are called r (a or lower) and s (b or lower). + HInstruction* r = a; + HInstruction* s = b; + bool is_unsigned = false; + Primitive::Type sub_type = a->GetType(); + if (a->IsTypeConversion()) { + sub_type = a->InputAt(0)->GetType(); + } else if (b->IsTypeConversion()) { + sub_type = b->InputAt(0)->GetType(); + } + if (reduction_type != sub_type && + (!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) { + return false; + } + // Try same/narrower type and deal with vector restrictions. + if (!TrySetVectorType(sub_type, &restrictions) || HasVectorRestrictions(restrictions, kNoSAD)) { + return false; + } + // Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand + // idiomatic operation. Sequential code uses the original scalar expressions. + DCHECK(r != nullptr); + DCHECK(s != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = s = v->InputAt(0); + } + if (VectorizeUse(node, q, generate_code, sub_type, restrictions) && + VectorizeUse(node, r, generate_code, sub_type, restrictions) && + VectorizeUse(node, s, generate_code, sub_type, restrictions)) { + if (generate_code) { + if (vector_mode_ == kVector) { + vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate( + global_allocator_, + vector_map_->Get(q), + vector_map_->Get(r), + vector_map_->Get(s), + reduction_type, + GetOtherVL(reduction_type, sub_type, vector_length_))); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); + } else { + GenerateVecOp(v, vector_map_->Get(r), nullptr, reduction_type); + GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type); + } + } + return true; + } + return false; +} + // // Vectorization heuristics. // diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index f34751815b..ae2ea76f47 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -75,6 +75,7 @@ class HLoopOptimization : public HOptimization { kNoMinMax = 1 << 8, // no min/max kNoStringCharAt = 1 << 9, // no StringCharAt kNoReduction = 1 << 10, // no reduction + kNoSAD = 1 << 11, // no sum of absolute differences (SAD) }; /* @@ -172,6 +173,11 @@ class HLoopOptimization : public HOptimization { bool generate_code, Primitive::Type type, uint64_t restrictions); + bool VectorizeSADIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + Primitive::Type type, + uint64_t restrictions); // Vectorization heuristics. bool IsVectorizationProfitable(int64_t trip_count); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index a6d0da1c96..6bc5111de2 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1396,6 +1396,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(VecUShr, VecBinaryOperation) \ M(VecSetScalars, VecOperation) \ M(VecMultiplyAccumulate, VecOperation) \ + M(VecSADAccumulate, VecOperation) \ M(VecLoad, VecMemoryOperation) \ M(VecStore, VecMemoryOperation) \ diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index c5e75a7ca4..1488b7086a 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -461,8 +461,8 @@ class HVecAdd FINAL : public HVecBinaryOperation { }; // Performs halving add on every component in the two vectors, viz. -// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ] -// or [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ] +// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ] +// truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ] // for signed operands x, y (sign extension) or unsigned operands x, y (zero extension). class HVecHalvingAdd FINAL : public HVecBinaryOperation { public: @@ -810,8 +810,8 @@ class HVecUShr FINAL : public HVecBinaryOperation { // // Assigns the given scalar elements to a vector, -// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ] if n == m, -// set( array(x1, .., xm) ) = [ x1, .. , xm, 0, .., 0 ] if m < n. +// viz. set( array(x1, .. , xn) ) = [ x1, .. , xn ] if n == m, +// set( array(x1, .. , xm) ) = [ x1, .. , xm, 0, .. , 0 ] if m < n. class HVecSetScalars FINAL : public HVecOperation { public: HVecSetScalars(ArenaAllocator* arena, @@ -842,9 +842,8 @@ class HVecSetScalars FINAL : public HVecOperation { DISALLOW_COPY_AND_ASSIGN(HVecSetScalars); }; -// Multiplies every component in the two vectors, adds the result vector to the accumulator vector. -// viz. [ acc1, .., accn ] + [ x1, .. , xn ] * [ y1, .. , yn ] = -// [ acc1 + x1 * y1, .. , accn + xn * yn ]. +// Multiplies every component in the two vectors, adds the result vector to the accumulator vector, +// viz. [ a1, .. , an ] + [ x1, .. , xn ] * [ y1, .. , yn ] = [ a1 + x1 * y1, .. , an + xn * yn ]. class HVecMultiplyAccumulate FINAL : public HVecOperation { public: HVecMultiplyAccumulate(ArenaAllocator* arena, @@ -866,15 +865,11 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); DCHECK(HasConsistentPackedTypes(mul_left, packed_type)); DCHECK(HasConsistentPackedTypes(mul_right, packed_type)); - SetRawInputAt(kInputAccumulatorIndex, accumulator); - SetRawInputAt(kInputMulLeftIndex, mul_left); - SetRawInputAt(kInputMulRightIndex, mul_right); + SetRawInputAt(0, accumulator); + SetRawInputAt(1, mul_left); + SetRawInputAt(2, mul_right); } - static constexpr int kInputAccumulatorIndex = 0; - static constexpr int kInputMulLeftIndex = 1; - static constexpr int kInputMulRightIndex = 2; - bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { @@ -894,6 +889,42 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { DISALLOW_COPY_AND_ASSIGN(HVecMultiplyAccumulate); }; +// Takes the absolute difference of two vectors, and adds the results to +// same-precision or wider-precision components in the accumulator, +// viz. SAD([ a1, .. , am ], [ x1, .. , xn ], [ y1, .. , yn ] = +// [ a1 + sum abs(xi-yi), .. , am + sum abs(xj-yj) ], +// for m <= n and non-overlapping sums. +class HVecSADAccumulate FINAL : public HVecOperation { + public: + HVecSADAccumulate(ArenaAllocator* arena, + HInstruction* accumulator, + HInstruction* sad_left, + HInstruction* sad_right, + Primitive::Type packed_type, + size_t vector_length, + uint32_t dex_pc = kNoDexPc) + : HVecOperation(arena, + packed_type, + SideEffects::None(), + /* number_of_inputs */ 3, + vector_length, + dex_pc) { + DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); + DCHECK(sad_left->IsVecOperation()); + DCHECK(sad_right->IsVecOperation()); + DCHECK_EQ(sad_left->AsVecOperation()->GetPackedType(), + sad_right->AsVecOperation()->GetPackedType()); + SetRawInputAt(0, accumulator); + SetRawInputAt(1, sad_left); + SetRawInputAt(2, sad_right); + } + + DECLARE_INSTRUCTION(VecSADAccumulate); + + private: + DISALLOW_COPY_AND_ASSIGN(HVecSADAccumulate); +}; + // Loads a vector from memory, viz. load(mem, 1) // yield the vector [ mem(1), .. , mem(n) ]. class HVecLoad FINAL : public HVecMemoryOperation { diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java index e018b56dd5..9643b90d15 100644 --- a/test/651-checker-byte-simd-minmax/src/Main.java +++ b/test/651-checker-byte-simd-minmax/src/Main.java @@ -165,6 +165,28 @@ public class Main { } } + /// CHECK-START: void Main.doitMin100(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.doitMin100(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] unsigned:false loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none + private static void doitMin100(byte[] x, byte[] y) { + int min = Math.min(x.length, y.length); + for (int i = 0; i < min; i++) { + x[i] = (byte) Math.min(y[i], 100); + } + } + public static void main(String[] args) { // Initialize cross-values for all possible values. int total = 256 * 256; @@ -202,6 +224,11 @@ public class Main { byte expected = (byte) Math.max(y[i] & 0xff, z[i] & 0xff); expectEquals(expected, x[i]); } + doitMin100(x, y); + for (int i = 0; i < total; i++) { + byte expected = (byte) Math.min(y[i], 100); + expectEquals(expected, x[i]); + } System.out.println("passed"); } diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java index 57cad9b34a..8a0262cfcd 100644 --- a/test/651-checker-char-simd-minmax/src/Main.java +++ b/test/651-checker-char-simd-minmax/src/Main.java @@ -89,6 +89,28 @@ public class Main { } } + /// CHECK-START: void Main.doitMin100(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:c\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.doitMin100(char[], char[]) loop_optimization (after) + /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] unsigned:true loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none + private static void doitMin100(char[] x, char[] y) { + int min = Math.min(x.length, y.length); + for (int i = 0; i < min; i++) { + x[i] = (char) Math.min(y[i], 100); + } + } + public static void main(String[] args) { char[] interesting = { 0x0000, 0x0001, 0x007f, 0x0080, 0x0081, 0x00ff, @@ -124,6 +146,11 @@ public class Main { char expected = (char) Math.max(y[i], z[i]); expectEquals(expected, x[i]); } + doitMin100(x, y); + for (int i = 0; i < total; i++) { + char expected = (char) Math.min(y[i], 100); + expectEquals(expected, x[i]); + } System.out.println("passed"); } diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java index 4f2a7a4440..ffbf73bd62 100644 --- a/test/651-checker-short-simd-minmax/src/Main.java +++ b/test/651-checker-short-simd-minmax/src/Main.java @@ -165,6 +165,28 @@ public class Main { } } + /// CHECK-START: void Main.doitMin100(short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.doitMin100(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none + /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] unsigned:false loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none + private static void doitMin100(short[] x, short[] y) { + int min = Math.min(x.length, y.length); + for (int i = 0; i < min; i++) { + x[i] = (short) Math.min(y[i], 100); + } + } + public static void main(String[] args) { short[] interesting = { (short) 0x0000, (short) 0x0001, (short) 0x007f, @@ -216,6 +238,11 @@ public class Main { short expected = (short) Math.max(y[i] & 0xffff, z[i] & 0xffff); expectEquals(expected, x[i]); } + doitMin100(x, y); + for (int i = 0; i < total; i++) { + short expected = (short) Math.min(y[i], 100); + expectEquals(expected, x[i]); + } System.out.println("passed"); } diff --git a/test/656-checker-simd-opt/src/Main.java b/test/656-checker-simd-opt/src/Main.java index 091633ff34..39a126f5d3 100644 --- a/test/656-checker-simd-opt/src/Main.java +++ b/test/656-checker-simd-opt/src/Main.java @@ -92,7 +92,91 @@ public class Main { } } - public static void main(String[] args) { + /// CHECK-START: long Main.longInductionReduction(long[]) loop_optimization (before) + /// CHECK-DAG: <<L0:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none + /// CHECK-DAG: <<Phi1:j\d+>> Phi [<<L0>>,<<Add1:j\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<L1>>,<<Add2:j\d+>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add2>> Add [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add1>> Add [<<Phi1>>,<<L1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: long Main.longInductionReduction(long[]) loop_optimization (after) + /// CHECK-DAG: <<L0:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<L2:j\d+>> LongConstant 2 loop:none + /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none + /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Get>>] loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<L1>>] loop:none + /// CHECK-DAG: <<Phi1:j\d+>> Phi [<<L0>>,{{j\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi2>>,<<Rep>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<L2>>] loop:<<Loop>> outer_loop:none + static long longInductionReduction(long[] y) { + long x = 1; + for (long i = 0; i < 10; i++) { + x += y[0]; + } + return x; + } + + /// CHECK-START: void Main.intVectorLongInvariant(int[], long[]) loop_optimization (before) + /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,<<Add:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Cnv:i\d+>> TypeConversion [<<Get>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add>> Add [<<Phi>>,<<I1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.intVectorLongInvariant(int[], long[]) loop_optimization (after) + /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<I4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none + /// CHECK-DAG: <<Cnv:i\d+>> TypeConversion [<<Get>>] loop:none + /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Cnv>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Rep>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi>>,<<I4>>] loop:<<Loop>> outer_loop:none + static void intVectorLongInvariant(int[] x, long[] y) { + for (int i = 0; i < 100; i++) { + x[i] = (int) y[0]; + } + } + + /// CHECK-START: void Main.longCanBeDoneWithInt(int[], int[]) loop_optimization (before) + /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,<<Add:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<AddL:j\d+>> Add [<<Cnv1>>,<<L1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv2:i\d+>> TypeConversion [<<AddL>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add>> Add [<<Phi>>,<<I1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: void Main.longCanBeDoneWithInt(int[], int[]) loop_optimization (after) + /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<I4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<Cnv:i\d+>> TypeConversion [<<L1>>] loop:none + /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Cnv>>] loop:none + /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi>>,<<I4>>] loop:<<Loop>> outer_loop:none + static void longCanBeDoneWithInt(int[] x, int[] y) { + for (int i = 0; i < 100; i++) { + x[i] = (int) (y[i] + 1L); + } + } + + static void testUnroll() { float[] x = new float[100]; float[] y = new float[100]; for (int i = 0; i < 100; i++) { @@ -104,51 +188,89 @@ public class Main { expectEquals(5.0f, x[i]); expectEquals(2.0f, y[i]); } - { - int[] a = new int[100]; - int[] b = new int[100]; - for (int i = 0; i < 100; i++) { - a[i] = 0; - b[i] = i; - } - stencil(a, b, 100); - for (int i = 1; i < 99; i++) { - int e = i + i + i; - expectEquals(e, a[i]); - expectEquals(i, b[i]); - } - } - { - int[] a = new int[100]; - int[] b = new int[100]; - for (int i = 0; i < 100; i++) { - a[i] = 0; - b[i] = i; - } - stencilSubInt(a, b, 100); - for (int i = 1; i < 99; i++) { - int e = i + i + i; - expectEquals(e, a[i]); - expectEquals(i, b[i]); - } - } - { - int[] a = new int[100]; - int[] b = new int[100]; - for (int i = 0; i < 100; i++) { - a[i] = 0; - b[i] = i; - } - stencilAddInt(a, b, 100); - for (int i = 1; i < 99; i++) { - int e = i + i + i; - expectEquals(e, a[i]); - expectEquals(i, b[i]); - } + } + + static void testStencil1() { + int[] a = new int[100]; + int[] b = new int[100]; + for (int i = 0; i < 100; i++) { + a[i] = 0; + b[i] = i; + } + stencil(a, b, 100); + for (int i = 1; i < 99; i++) { + int e = i + i + i; + expectEquals(e, a[i]); + expectEquals(i, b[i]); + } + } + + static void testStencil2() { + int[] a = new int[100]; + int[] b = new int[100]; + for (int i = 0; i < 100; i++) { + a[i] = 0; + b[i] = i; + } + stencilSubInt(a, b, 100); + for (int i = 1; i < 99; i++) { + int e = i + i + i; + expectEquals(e, a[i]); + expectEquals(i, b[i]); } + } + + static void testStencil3() { + int[] a = new int[100]; + int[] b = new int[100]; + for (int i = 0; i < 100; i++) { + a[i] = 0; + b[i] = i; + } + stencilAddInt(a, b, 100); + for (int i = 1; i < 99; i++) { + int e = i + i + i; + expectEquals(e, a[i]); + expectEquals(i, b[i]); + } + } + + static void testTypes() { + int[] a = new int[100]; + int[] b = new int[100]; + long[] l = { 3 }; + expectEquals(31, longInductionReduction(l)); + intVectorLongInvariant(a, l); + for (int i = 0; i < 100; i++) { + expectEquals(3, a[i]); + } + longCanBeDoneWithInt(b, a); + for (int i = 0; i < 100; i++) { + expectEquals(4, b[i]); + } + } + + public static void main(String[] args) { + testUnroll(); + testStencil1(); + testStencil2(); + testStencil3(); + testTypes(); System.out.println("passed"); } + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + private static void expectEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + private static void expectEquals(float expected, float result) { if (expected != result) { throw new Error("Expected: " + expected + ", found: " + result); diff --git a/test/660-checker-simd-sad-byte/expected.txt b/test/660-checker-simd-sad-byte/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/660-checker-simd-sad-byte/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/660-checker-simd-sad-byte/info.txt b/test/660-checker-simd-sad-byte/info.txt new file mode 100644 index 0000000000..b56c119129 --- /dev/null +++ b/test/660-checker-simd-sad-byte/info.txt @@ -0,0 +1 @@ +Functional tests on SAD vectorization. diff --git a/test/660-checker-simd-sad-byte/src/Main.java b/test/660-checker-simd-sad-byte/src/Main.java new file mode 100644 index 0000000000..72d1c24dbe --- /dev/null +++ b/test/660-checker-simd-sad-byte/src/Main.java @@ -0,0 +1,332 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for SAD (sum of absolute differences). + */ +public class Main { + + // TODO: lower precision still coming, b/64091002 + + private static byte sadByte2Byte(byte[] b1, byte[] b2) { + int min_length = Math.min(b1.length, b2.length); + byte sad = 0; + for (int i = 0; i < min_length; i++) { + sad += Math.abs(b1[i] - b2[i]); + } + return sad; + } + + private static byte sadByte2ByteAlt(byte[] b1, byte[] b2) { + int min_length = Math.min(b1.length, b2.length); + byte sad = 0; + for (int i = 0; i < min_length; i++) { + byte s = b1[i]; + byte p = b2[i]; + sad += s >= p ? s - p : p - s; + } + return sad; + } + + private static byte sadByte2ByteAlt2(byte[] b1, byte[] b2) { + int min_length = Math.min(b1.length, b2.length); + byte sad = 0; + for (int i = 0; i < min_length; i++) { + byte s = b1[i]; + byte p = b2[i]; + int x = s - p; + if (x < 0) x = -x; + sad += x; + } + return sad; + } + + private static short sadByte2Short(byte[] b1, byte[] b2) { + int min_length = Math.min(b1.length, b2.length); + short sad = 0; + for (int i = 0; i < min_length; i++) { + sad += Math.abs(b1[i] - b2[i]); + } + return sad; + } + + private static short sadByte2ShortAlt(byte[] b1, byte[] b2) { + int min_length = Math.min(b1.length, b2.length); + short sad = 0; + for (int i = 0; i < min_length; i++) { + byte s = b1[i]; + byte p = b2[i]; + sad += s >= p ? s - p : p - s; + } + return sad; + } + + private static short sadByte2ShortAlt2(byte[] b1, byte[] b2) { + int min_length = Math.min(b1.length, b2.length); + short sad = 0; + for (int i = 0; i < min_length; i++) { + byte s = b1[i]; + byte p = b2[i]; + int x = s - p; + if (x < 0) x = -x; + sad += x; + } + return sad; + } + + /// CHECK-START: int Main.sadByte2Int(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: int Main.sadByte2Int(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none + private static int sadByte2Int(byte[] b1, byte[] b2) { + int min_length = Math.min(b1.length, b2.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + sad += Math.abs(b1[i] - b2[i]); + } + return sad; + } + + /// CHECK-START: int Main.sadByte2IntAlt(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: int Main.sadByte2IntAlt(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none + private static int sadByte2IntAlt(byte[] b1, byte[] b2) { + int min_length = Math.min(b1.length, b2.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + byte s = b1[i]; + byte p = b2[i]; + sad += s >= p ? s - p : p - s; + } + return sad; + } + + /// CHECK-START: int Main.sadByte2IntAlt2(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: int Main.sadByte2IntAlt2(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none + private static int sadByte2IntAlt2(byte[] b1, byte[] b2) { + int min_length = Math.min(b1.length, b2.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + byte s = b1[i]; + byte p = b2[i]; + int x = s - p; + if (x < 0) x = -x; + sad += x; + } + return sad; + } + + /// CHECK-START: long Main.sadByte2Long(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: long Main.sadByte2Long(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none + private static long sadByte2Long(byte[] b1, byte[] b2) { + int min_length = Math.min(b1.length, b2.length); + long sad = 0; + for (int i = 0; i < min_length; i++) { + long x = b1[i]; + long y = b2[i]; + sad += Math.abs(x - y); + } + return sad; + } + + /// CHECK-START: long Main.sadByte2LongAt1(byte[], byte[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: long Main.sadByte2LongAt1(byte[], byte[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none + private static long sadByte2LongAt1(byte[] b1, byte[] b2) { + int min_length = Math.min(b1.length, b2.length); + long sad = 1; // starts at 1 + for (int i = 0; i < min_length; i++) { + long x = b1[i]; + long y = b2[i]; + sad += Math.abs(x - y); + } + return sad; + } + + public static void main(String[] args) { + // Cross-test the two most extreme values individually. + byte[] b1 = { 0, -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + byte[] b2 = { 0, 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + expectEquals(-1, sadByte2Byte(b1, b2)); + expectEquals(-1, sadByte2Byte(b2, b1)); + expectEquals(-1, sadByte2ByteAlt(b1, b2)); + expectEquals(-1, sadByte2ByteAlt(b2, b1)); + expectEquals(-1, sadByte2ByteAlt2(b1, b2)); + expectEquals(-1, sadByte2ByteAlt2(b2, b1)); + expectEquals(255, sadByte2Short(b1, b2)); + expectEquals(255, sadByte2Short(b2, b1)); + expectEquals(255, sadByte2ShortAlt(b1, b2)); + expectEquals(255, sadByte2ShortAlt(b2, b1)); + expectEquals(255, sadByte2ShortAlt2(b1, b2)); + expectEquals(255, sadByte2ShortAlt2(b2, b1)); + expectEquals(255, sadByte2Int(b1, b2)); + expectEquals(255, sadByte2Int(b2, b1)); + expectEquals(255, sadByte2IntAlt(b1, b2)); + expectEquals(255, sadByte2IntAlt(b2, b1)); + expectEquals(255, sadByte2IntAlt2(b1, b2)); + expectEquals(255, sadByte2IntAlt2(b2, b1)); + expectEquals(255, sadByte2Long(b1, b2)); + expectEquals(255L, sadByte2Long(b2, b1)); + expectEquals(256L, sadByte2LongAt1(b1, b2)); + expectEquals(256L, sadByte2LongAt1(b2, b1)); + + // Use cross-values to test all cases. + // One for scalar cleanup. + int n = 256; + int m = n * n + 1; + int k = 0; + b1 = new byte[m]; + b2 = new byte[m]; + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + b1[k] = (byte) i; + b2[k] = (byte) j; + k++; + } + } + b1[k] = 10; + b2[k] = 2; + expectEquals(8, sadByte2Byte(b1, b2)); + expectEquals(8, sadByte2ByteAlt(b1, b2)); + expectEquals(8, sadByte2ByteAlt2(b1, b2)); + expectEquals(21768, sadByte2Short(b1, b2)); + expectEquals(21768, sadByte2ShortAlt(b1, b2)); + expectEquals(21768, sadByte2ShortAlt2(b1, b2)); + expectEquals(5592328, sadByte2Int(b1, b2)); + expectEquals(5592328, sadByte2IntAlt(b1, b2)); + expectEquals(5592328, sadByte2IntAlt2(b1, b2)); + expectEquals(5592328L, sadByte2Long(b1, b2)); + expectEquals(5592329L, sadByte2LongAt1(b1, b2)); + + System.out.println("passed"); + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + private static void expectEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } +} diff --git a/test/660-checker-simd-sad-char/expected.txt b/test/660-checker-simd-sad-char/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/660-checker-simd-sad-char/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/660-checker-simd-sad-char/info.txt b/test/660-checker-simd-sad-char/info.txt new file mode 100644 index 0000000000..b56c119129 --- /dev/null +++ b/test/660-checker-simd-sad-char/info.txt @@ -0,0 +1 @@ +Functional tests on SAD vectorization. diff --git a/test/660-checker-simd-sad-char/src/Main.java b/test/660-checker-simd-sad-char/src/Main.java new file mode 100644 index 0000000000..bb0c58f2e0 --- /dev/null +++ b/test/660-checker-simd-sad-char/src/Main.java @@ -0,0 +1,259 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for SAD (sum of absolute differences). + */ +public class Main { + + // TODO: lower precision still coming, b/64091002 + + // TODO: consider unsigned SAD too, b/64091002 + + private static char sadShort2Short(char[] s1, char[] s2) { + int min_length = Math.min(s1.length, s2.length); + char sad = 0; + for (int i = 0; i < min_length; i++) { + sad += Math.abs(s1[i] - s2[i]); + } + return sad; + } + + private static char sadShort2ShortAlt(char[] s1, char[] s2) { + int min_length = Math.min(s1.length, s2.length); + char sad = 0; + for (int i = 0; i < min_length; i++) { + char s = s1[i]; + char p = s2[i]; + sad += s >= p ? s - p : p - s; + } + return sad; + } + + private static char sadShort2ShortAlt2(char[] s1, char[] s2) { + int min_length = Math.min(s1.length, s2.length); + char sad = 0; + for (int i = 0; i < min_length; i++) { + char s = s1[i]; + char p = s2[i]; + int x = s - p; + if (x < 0) x = -x; + sad += x; + } + return sad; + } + + /// CHECK-START: int Main.sadShort2Int(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: int Main.sadShort2Int(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecSADAccumulate + private static int sadShort2Int(char[] s1, char[] s2) { + int min_length = Math.min(s1.length, s2.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + sad += Math.abs(s1[i] - s2[i]); + } + return sad; + } + + /// CHECK-START: int Main.sadShort2IntAlt(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: int Main.sadShort2IntAlt(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecSADAccumulate + private static int sadShort2IntAlt(char[] s1, char[] s2) { + int min_length = Math.min(s1.length, s2.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + char s = s1[i]; + char p = s2[i]; + sad += s >= p ? s - p : p - s; + } + return sad; + } + + /// CHECK-START: int Main.sadShort2IntAlt2(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: int Main.sadShort2IntAlt2(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecSADAccumulate + private static int sadShort2IntAlt2(char[] s1, char[] s2) { + int min_length = Math.min(s1.length, s2.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + char s = s1[i]; + char p = s2[i]; + int x = s - p; + if (x < 0) x = -x; + sad += x; + } + return sad; + } + + /// CHECK-START: long Main.sadShort2Long(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: long Main.sadShort2Long(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecSADAccumulate + private static long sadShort2Long(char[] s1, char[] s2) { + int min_length = Math.min(s1.length, s2.length); + long sad = 0; + for (int i = 0; i < min_length; i++) { + long x = s1[i]; + long y = s2[i]; + sad += Math.abs(x - y); + } + return sad; + } + + /// CHECK-START: long Main.sadShort2LongAt1(char[], char[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: long Main.sadShort2LongAt1(char[], char[]) loop_optimization (after) + /// CHECK-NOT: VecSADAccumulate + private static long sadShort2LongAt1(char[] s1, char[] s2) { + int min_length = Math.min(s1.length, s2.length); + long sad = 1; // starts at 1 + for (int i = 0; i < min_length; i++) { + long x = s1[i]; + long y = s2[i]; + sad += Math.abs(x - y); + } + return sad; + } + + public static void main(String[] args) { + // Cross-test the two most extreme values individually. + char[] s1 = { 0, 0x8000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + char[] s2 = { 0, 0x7fff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + expectEquals(1, sadShort2Short(s1, s2)); + expectEquals(1, sadShort2Short(s2, s1)); + expectEquals(1, sadShort2ShortAlt(s1, s2)); + expectEquals(1, sadShort2ShortAlt(s2, s1)); + expectEquals(1, sadShort2ShortAlt2(s1, s2)); + expectEquals(1, sadShort2ShortAlt2(s2, s1)); + expectEquals(1, sadShort2Int(s1, s2)); + expectEquals(1, sadShort2Int(s2, s1)); + expectEquals(1, sadShort2IntAlt(s1, s2)); + expectEquals(1, sadShort2IntAlt(s2, s1)); + expectEquals(1, sadShort2IntAlt2(s1, s2)); + expectEquals(1, sadShort2IntAlt2(s2, s1)); + expectEquals(1L, sadShort2Long(s1, s2)); + expectEquals(1L, sadShort2Long(s2, s1)); + expectEquals(2L, sadShort2LongAt1(s1, s2)); + expectEquals(2L, sadShort2LongAt1(s2, s1)); + + // Use cross-values to test all cases. + char[] interesting = { + (char) 0x0000, + (char) 0x0001, + (char) 0x0002, + (char) 0x1234, + (char) 0x8000, + (char) 0x8001, + (char) 0x7fff, + (char) 0xffff + }; + int n = interesting.length; + int m = n * n + 1; + s1 = new char[m]; + s2 = new char[m]; + int k = 0; + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + s1[k] = interesting[i]; + s2[k] = interesting[j]; + k++; + } + } + s1[k] = 10; + s2[k] = 2; + expectEquals(56196, sadShort2Short(s1, s2)); + expectEquals(56196, sadShort2ShortAlt(s1, s2)); + expectEquals(56196, sadShort2ShortAlt2(s1, s2)); + expectEquals(1497988, sadShort2Int(s1, s2)); + expectEquals(1497988, sadShort2IntAlt(s1, s2)); + expectEquals(1497988, sadShort2IntAlt2(s1, s2)); + expectEquals(1497988L, sadShort2Long(s1, s2)); + expectEquals(1497989L, sadShort2LongAt1(s1, s2)); + + System.out.println("passed"); + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + private static void expectEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } +} diff --git a/test/660-checker-simd-sad-int/expected.txt b/test/660-checker-simd-sad-int/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/660-checker-simd-sad-int/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/660-checker-simd-sad-int/info.txt b/test/660-checker-simd-sad-int/info.txt new file mode 100644 index 0000000000..b56c119129 --- /dev/null +++ b/test/660-checker-simd-sad-int/info.txt @@ -0,0 +1 @@ +Functional tests on SAD vectorization. diff --git a/test/660-checker-simd-sad-int/src/Main.java b/test/660-checker-simd-sad-int/src/Main.java new file mode 100644 index 0000000000..0daeeddc69 --- /dev/null +++ b/test/660-checker-simd-sad-int/src/Main.java @@ -0,0 +1,248 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for SAD (sum of absolute differences). + */ +public class Main { + + /// CHECK-START: int Main.sadInt2Int(int[], int[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: int Main.sadInt2Int(int[], int[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none + private static int sadInt2Int(int[] x, int[] y) { + int min_length = Math.min(x.length, y.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + sad += Math.abs(x[i] - y[i]); + } + return sad; + } + + /// CHECK-START: int Main.sadInt2IntAlt(int[], int[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub1:i\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub2:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Select:i\d+>> Select [<<Sub2>>,<<Sub1>>,{{z\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Select>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + // No ABS? No SAD! + // + /// CHECK-START-ARM64: int Main.sadInt2IntAlt(int[], int[]) loop_optimization (after) + /// CHECK-NOT: VecSADAccumulate + private static int sadInt2IntAlt(int[] x, int[] y) { + int min_length = Math.min(x.length, y.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + int s = x[i]; + int p = y[i]; + sad += s >= p ? s - p : p - s; + } + return sad; + } + + /// CHECK-START: int Main.sadInt2IntAlt2(int[], int[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: int Main.sadInt2IntAlt2(int[], int[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none + private static int sadInt2IntAlt2(int[] x, int[] y) { + int min_length = Math.min(x.length, y.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + int s = x[i]; + int p = y[i]; + int m = s - p; + if (m < 0) m = -m; + sad += m; + } + return sad; + } + + /// CHECK-START: long Main.sadInt2Long(int[], int[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: long Main.sadInt2Long(int[], int[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none + private static long sadInt2Long(int[] x, int[] y) { + int min_length = Math.min(x.length, y.length); + long sad = 0; + for (int i = 0; i < min_length; i++) { + long s = x[i]; + long p = y[i]; + sad += Math.abs(s - p); + } + return sad; + } + + /// CHECK-START: long Main.sadInt2LongAt1(int[], int[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: long Main.sadInt2LongAt1(int[], int[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none + private static long sadInt2LongAt1(int[] x, int[] y) { + int min_length = Math.min(x.length, y.length); + long sad = 1; // starts at 1 + for (int i = 0; i < min_length; i++) { + long s = x[i]; + long p = y[i]; + sad += Math.abs(s - p); + } + return sad; + } + + public static void main(String[] args) { + // Cross-test the two most extreme values individually. + int[] x = { 0, Integer.MAX_VALUE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int[] y = { 0, Integer.MIN_VALUE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + expectEquals(1, sadInt2Int(x, y)); + expectEquals(1, sadInt2Int(y, x)); + expectEquals(-1, sadInt2IntAlt(x, y)); + expectEquals(-1, sadInt2IntAlt(y, x)); + expectEquals(1, sadInt2IntAlt2(x, y)); + expectEquals(1, sadInt2IntAlt2(y, x)); + expectEquals(4294967295L, sadInt2Long(x, y)); + expectEquals(4294967295L, sadInt2Long(y, x)); + expectEquals(4294967296L, sadInt2LongAt1(x, y)); + expectEquals(4294967296L, sadInt2LongAt1(y, x)); + + // Use cross-values for the interesting values. + int[] interesting = { + 0x00000000, 0x00000001, 0x00007fff, 0x00008000, 0x00008001, 0x0000ffff, + 0x00010000, 0x00010001, 0x00017fff, 0x00018000, 0x00018001, 0x0001ffff, + 0x7fff0000, 0x7fff0001, 0x7fff7fff, 0x7fff8000, 0x7fff8001, 0x7fffffff, + 0x80000000, 0x80000001, 0x80007fff, 0x80008000, 0x80008001, 0x8000ffff, + 0x80010000, 0x80010001, 0x80017fff, 0x80018000, 0x80018001, 0x8001ffff, + 0xffff0000, 0xffff0001, 0xffff7fff, 0xffff8000, 0xffff8001, 0xffffffff + }; + int n = interesting.length; + int m = n * n + 1; + x = new int[m]; + y = new int[m]; + int k = 0; + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + x[k] = interesting[i]; + y[k] = interesting[j]; + k++; + } + } + x[k] = 10; + y[k] = 2; + expectEquals(8, sadInt2Int(x, y)); + expectEquals(-13762600, sadInt2IntAlt(x, y)); + expectEquals(8, sadInt2IntAlt2(x, y)); + expectEquals(2010030931928L, sadInt2Long(x, y)); + expectEquals(2010030931929L, sadInt2LongAt1(x, y)); + + System.out.println("passed"); + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + private static void expectEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } +} diff --git a/test/660-checker-simd-sad-long/expected.txt b/test/660-checker-simd-sad-long/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/660-checker-simd-sad-long/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/660-checker-simd-sad-long/info.txt b/test/660-checker-simd-sad-long/info.txt new file mode 100644 index 0000000000..b56c119129 --- /dev/null +++ b/test/660-checker-simd-sad-long/info.txt @@ -0,0 +1 @@ +Functional tests on SAD vectorization. diff --git a/test/660-checker-simd-sad-long/src/Main.java b/test/660-checker-simd-sad-long/src/Main.java new file mode 100644 index 0000000000..06f62bd031 --- /dev/null +++ b/test/660-checker-simd-sad-long/src/Main.java @@ -0,0 +1,209 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for SAD (sum of absolute differences). + */ +public class Main { + + /// CHECK-START: long Main.sadLong2Long(long[], long[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: long Main.sadLong2Long(long[], long[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none + private static long sadLong2Long(long[] x, long[] y) { + int min_length = Math.min(x.length, y.length); + long sad = 0; + for (int i = 0; i < min_length; i++) { + sad += Math.abs(x[i] - y[i]); + } + return sad; + } + + /// CHECK-START: long Main.sadLong2LongAlt(long[], long[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub1:j\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub2:j\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Select:j\d+>> Select [<<Sub2>>,<<Sub1>>,{{z\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Select>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + // No ABS? No SAD! + // + /// CHECK-START: long Main.sadLong2LongAlt(long[], long[]) loop_optimization (after) + /// CHECK-NOT: VecSADAccumulate + private static long sadLong2LongAlt(long[] x, long[] y) { + int min_length = Math.min(x.length, y.length); + long sad = 0; + for (int i = 0; i < min_length; i++) { + long s = x[i]; + long p = y[i]; + sad += s >= p ? s - p : p - s; + } + return sad; + } + + /// CHECK-START: long Main.sadLong2LongAlt2(long[], long[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: long Main.sadLong2LongAlt2(long[], long[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none + private static long sadLong2LongAlt2(long[] x, long[] y) { + int min_length = Math.min(x.length, y.length); + long sad = 0; + for (int i = 0; i < min_length; i++) { + long s = x[i]; + long p = y[i]; + long m = s - p; + if (m < 0) m = -m; + sad += m; + } + return sad; + } + + /// CHECK-START: long Main.sadLong2LongAt1(long[], long[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: long Main.sadLong2LongAt1(long[], long[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none + private static long sadLong2LongAt1(long[] x, long[] y) { + int min_length = Math.min(x.length, y.length); + long sad = 1; // starts at 1 + for (int i = 0; i < min_length; i++) { + sad += Math.abs(x[i] - y[i]); + } + return sad; + } + + public static void main(String[] args) { + // Cross-test the two most extreme values individually. + long[] x = { 0, Long.MIN_VALUE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + long[] y = { 0, Long.MAX_VALUE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + expectEquals(1L, sadLong2Long(x, y)); + expectEquals(1L, sadLong2Long(y, x)); + expectEquals(-1L, sadLong2LongAlt(x, y)); + expectEquals(-1L, sadLong2LongAlt(y, x)); + expectEquals(1L, sadLong2LongAlt2(x, y)); + expectEquals(1L, sadLong2LongAlt2(y, x)); + expectEquals(2L, sadLong2LongAt1(x, y)); + expectEquals(2L, sadLong2LongAt1(y, x)); + + // Use cross-values for the interesting values. + long[] interesting = { + 0x0000000000000000L, 0x0000000000000001L, 0x000000007fffffffL, + 0x0000000080000000L, 0x0000000080000001L, 0x00000000ffffffffL, + 0x0000000100000000L, 0x0000000100000001L, 0x000000017fffffffL, + 0x0000000180000000L, 0x0000000180000001L, 0x00000001ffffffffL, + 0x7fffffff00000000L, 0x7fffffff00000001L, 0x7fffffff7fffffffL, + 0x7fffffff80000000L, 0x7fffffff80000001L, 0x7fffffffffffffffL, + 0x8000000000000000L, 0x8000000000000001L, 0x800000007fffffffL, + 0x8000000080000000L, 0x8000000080000001L, 0x80000000ffffffffL, + 0x8000000100000000L, 0x8000000100000001L, 0x800000017fffffffL, + 0x8000000180000000L, 0x8000000180000001L, 0x80000001ffffffffL, + 0xffffffff00000000L, 0xffffffff00000001L, 0xffffffff7fffffffL, + 0xffffffff80000000L, 0xffffffff80000001L, 0xffffffffffffffffL + }; + int n = interesting.length; + int m = n * n + 1; + x = new long[m]; + y = new long[m]; + int k = 0; + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + x[k] = interesting[i]; + y[k] = interesting[j]; + k++; + } + } + x[k] = 10; + y[k] = 2; + expectEquals(8L, sadLong2Long(x, y)); + expectEquals(-901943132200L, sadLong2LongAlt(x, y)); + expectEquals(8L, sadLong2LongAlt2(x, y)); + expectEquals(9L, sadLong2LongAt1(x, y)); + + System.out.println("passed"); + } + + private static void expectEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } +} diff --git a/test/660-checker-simd-sad-short/expected.txt b/test/660-checker-simd-sad-short/expected.txt new file mode 100644 index 0000000000..b0aad4deb5 --- /dev/null +++ b/test/660-checker-simd-sad-short/expected.txt @@ -0,0 +1 @@ +passed diff --git a/test/660-checker-simd-sad-short/info.txt b/test/660-checker-simd-sad-short/info.txt new file mode 100644 index 0000000000..b56c119129 --- /dev/null +++ b/test/660-checker-simd-sad-short/info.txt @@ -0,0 +1 @@ +Functional tests on SAD vectorization. diff --git a/test/660-checker-simd-sad-short/src/Main.java b/test/660-checker-simd-sad-short/src/Main.java new file mode 100644 index 0000000000..d94308e24d --- /dev/null +++ b/test/660-checker-simd-sad-short/src/Main.java @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Tests for SAD (sum of absolute differences). + */ +public class Main { + + // TODO: lower precision still coming, b/64091002 + + private static short sadShort2Short(short[] s1, short[] s2) { + int min_length = Math.min(s1.length, s2.length); + short sad = 0; + for (int i = 0; i < min_length; i++) { + sad += Math.abs(s1[i] - s2[i]); + } + return sad; + } + + private static short sadShort2ShortAlt(short[] s1, short[] s2) { + int min_length = Math.min(s1.length, s2.length); + short sad = 0; + for (int i = 0; i < min_length; i++) { + short s = s1[i]; + short p = s2[i]; + sad += s >= p ? s - p : p - s; + } + return sad; + } + + private static short sadShort2ShortAlt2(short[] s1, short[] s2) { + int min_length = Math.min(s1.length, s2.length); + short sad = 0; + for (int i = 0; i < min_length; i++) { + short s = s1[i]; + short p = s2[i]; + int x = s - p; + if (x < 0) x = -x; + sad += x; + } + return sad; + } + + /// CHECK-START: int Main.sadShort2Int(short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: int Main.sadShort2Int(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none + private static int sadShort2Int(short[] s1, short[] s2) { + int min_length = Math.min(s1.length, s2.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + sad += Math.abs(s1[i] - s2[i]); + } + return sad; + } + + /// CHECK-START: int Main.sadShort2IntAlt(short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: int Main.sadShort2IntAlt(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none + private static int sadShort2IntAlt(short[] s1, short[] s2) { + int min_length = Math.min(s1.length, s2.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + short s = s1[i]; + short p = s2[i]; + sad += s >= p ? s - p : p - s; + } + return sad; + } + + /// CHECK-START: int Main.sadShort2IntAlt2(short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: int Main.sadShort2IntAlt2(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none + private static int sadShort2IntAlt2(short[] s1, short[] s2) { + int min_length = Math.min(s1.length, s2.length); + int sad = 0; + for (int i = 0; i < min_length; i++) { + short s = s1[i]; + short p = s2[i]; + int x = s - p; + if (x < 0) x = -x; + sad += x; + } + return sad; + } + + /// CHECK-START: long Main.sadShort2Long(short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: long Main.sadShort2Long(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none + private static long sadShort2Long(short[] s1, short[] s2) { + int min_length = Math.min(s1.length, s2.length); + long sad = 0; + for (int i = 0; i < min_length; i++) { + long x = s1[i]; + long y = s2[i]; + sad += Math.abs(x - y); + } + return sad; + } + + /// CHECK-START: long Main.sadShort2LongAt1(short[], short[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none + // + /// CHECK-START-ARM64: long Main.sadShort2LongAt1(short[], short[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none + /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none + private static long sadShort2LongAt1(short[] s1, short[] s2) { + int min_length = Math.min(s1.length, s2.length); + long sad = 1; // starts at 1 + for (int i = 0; i < min_length; i++) { + long x = s1[i]; + long y = s2[i]; + sad += Math.abs(x - y); + } + return sad; + } + + public static void main(String[] args) { + // Cross-test the two most extreme values individually. + short[] s1 = { 0, -32768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + short[] s2 = { 0, 32767, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + expectEquals(-1, sadShort2Short(s1, s2)); + expectEquals(-1, sadShort2Short(s2, s1)); + expectEquals(-1, sadShort2ShortAlt(s1, s2)); + expectEquals(-1, sadShort2ShortAlt(s2, s1)); + expectEquals(-1, sadShort2ShortAlt2(s1, s2)); + expectEquals(-1, sadShort2ShortAlt2(s2, s1)); + expectEquals(65535, sadShort2Int(s1, s2)); + expectEquals(65535, sadShort2Int(s2, s1)); + expectEquals(65535, sadShort2IntAlt(s1, s2)); + expectEquals(65535, sadShort2IntAlt(s2, s1)); + expectEquals(65535, sadShort2IntAlt2(s1, s2)); + expectEquals(65535, sadShort2IntAlt2(s2, s1)); + expectEquals(65535L, sadShort2Long(s1, s2)); + expectEquals(65535L, sadShort2Long(s2, s1)); + expectEquals(65536L, sadShort2LongAt1(s1, s2)); + expectEquals(65536L, sadShort2LongAt1(s2, s1)); + + // Use cross-values to test all cases. + short[] interesting = { + (short) 0x0000, + (short) 0x0001, + (short) 0x0002, + (short) 0x1234, + (short) 0x8000, + (short) 0x8001, + (short) 0x7fff, + (short) 0xffff + }; + int n = interesting.length; + int m = n * n + 1; + s1 = new short[m]; + s2 = new short[m]; + int k = 0; + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + s1[k] = interesting[i]; + s2[k] = interesting[j]; + k++; + } + } + s1[k] = 10; + s2[k] = 2; + expectEquals(-18932, sadShort2Short(s1, s2)); + expectEquals(-18932, sadShort2ShortAlt(s1, s2)); + expectEquals(-18932, sadShort2ShortAlt2(s1, s2)); + expectEquals(1291788, sadShort2Int(s1, s2)); + expectEquals(1291788, sadShort2IntAlt(s1, s2)); + expectEquals(1291788, sadShort2IntAlt2(s1, s2)); + expectEquals(1291788L, sadShort2Long(s1, s2)); + expectEquals(1291789L, sadShort2LongAt1(s1, s2)); + + System.out.println("passed"); + } + + private static void expectEquals(int expected, int result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } + + private static void expectEquals(long expected, long result) { + if (expected != result) { + throw new Error("Expected: " + expected + ", found: " + result); + } + } +} diff --git a/test/661-checker-simd-reduc/src/Main.java b/test/661-checker-simd-reduc/src/Main.java index 71eb3cde9c..bcfa968584 100644 --- a/test/661-checker-simd-reduc/src/Main.java +++ b/test/661-checker-simd-reduc/src/Main.java @@ -80,6 +80,101 @@ public class Main { return sum; } + /// CHECK-START: int Main.reductionIntChain() loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons1>>,{{i\d+}}] loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi2>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Get1>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Cons1>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop2:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi4:i\d+>> Phi [<<Phi1>>,{{i\d+}}] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi3>>] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: Add [<<Phi4>>,<<Get2>>] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: Add [<<Phi3>>,<<Cons1>>] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: Return [<<Phi4>>] loop:none + // + /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>" + // + /// CHECK-START-ARM64: int Main.reductionIntChain() loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [<<Cons1>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load1>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Red1:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: <<Extr1:i\d+>> VecExtractScalar [<<Red1>>] loop:none + /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [<<Extr1>>] loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop2:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi4:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi3>>] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi4>>,<<Load2>>] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: Add [<<Phi3>>,<<Cons4>>] loop:<<Loop2>> outer_loop:none + /// CHECK-DAG: <<Red2:d\d+>> VecReduce [<<Phi4>>] loop:none + /// CHECK-DAG: <<Extr2:i\d+>> VecExtractScalar [<<Red2>>] loop:none + /// CHECK-DAG: Return [<<Extr2>>] loop:none + // + /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>" + // + // NOTE: pattern is robust with respect to vector loop unrolling. + private static int reductionIntChain() { + int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 }; + int r = 1; + for (int i = 0; i < 16; i++) { + r += x[i]; + } + for (int i = 0; i < 16; i++) { + r += x[i]; + } + return r; + } + + /// CHECK-START: int Main.reductionIntToLoop(int[]) loop_optimization (before) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: Add [<<Phi2>>,<<Get>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Phi2>>,{{i\d+}}] loop:<<Loop2:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi4:i\d+>> Phi [<<Phi2>>,{{i\d+}}] loop:<<Loop2>> outer_loop:none + // + /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>" + // + /// CHECK-START-ARM64: int Main.reductionIntToLoop(int[]) loop_optimization (after) + /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none + /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none + /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none + /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none + /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load1>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop1>> outer_loop:none + /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none + /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none + /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Extr>>,{{i\d+}}] loop:<<Loop2:B\d+>> outer_loop:none + /// CHECK-DAG: <<Phi4:i\d+>> Phi [<<Extr>>,{{i\d+}}] loop:<<Loop2>> outer_loop:none + // + /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>" + // + private static int reductionIntToLoop(int[] x) { + int r = 0; + for (int i = 0; i < 4; i++) { + r += x[i]; + } + for (int i = r; i < 16; i++) { + r += i; + } + return r; + } + /// CHECK-START: long Main.reductionLong(long[]) loop_optimization (before) /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none /// CHECK-DAG: <<Long0:j\d+>> LongConstant 0 loop:none @@ -468,10 +563,28 @@ public class Main { } // Test various reductions in loops. + int[] x0 = { 0, 0, 0, 0 }; + int[] x1 = { 0, 0, 0, 1 }; + int[] x2 = { 1, 1, 1, 1 }; expectEquals(-74, reductionByte(xb)); expectEquals(-27466, reductionShort(xs)); expectEquals(38070, reductionChar(xc)); expectEquals(365750, reductionInt(xi)); + expectEquals(273, reductionIntChain()); + expectEquals(120, reductionIntToLoop(x0)); + expectEquals(121, reductionIntToLoop(x1)); + expectEquals(118, reductionIntToLoop(x2)); + expectEquals(-1205, reductionIntToLoop(xi)); + expectEquals(365750L, reductionLong(xl)); + expectEquals(-75, reductionByteM1(xb)); + expectEquals(-27467, reductionShortM1(xs)); + expectEquals(38069, reductionCharM1(xc)); + expectEquals(365749, reductionIntM1(xi)); + expectEquals(365749L, reductionLongM1(xl)); + expectEquals(74, reductionMinusByte(xb)); + expectEquals(27466, reductionMinusShort(xs)); + expectEquals(27466, reductionMinusChar(xc)); + expectEquals(-365750, reductionMinusInt(xi)); expectEquals(365750L, reductionLong(xl)); expectEquals(-75, reductionByteM1(xb)); expectEquals(-27467, reductionShortM1(xs)); |