Implement Sum-of-Abs-Differences idiom recognition.
Rationale:
Currently just on ARM64 (x86 lacks proper support),
using the SAD idiom yields great speedup on loops
that compute the sum-of-abs-difference operation.
Also includes some refinements around type conversions.
Speedup ExoPlayerAudio (golem run):
1.3x on ARM64
1.1x on x86
Test: test-art-host test-art-target
Bug: 64091002
Change-Id: Ia2b711d2bc23609a2ed50493dfe6719eedfe0130
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 18a55c8..3f576c8 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -949,20 +949,18 @@
}
}
-void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
- switch (instr->GetPackedType()) {
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
- DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -971,18 +969,25 @@
}
}
+void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
// Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
// 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
// However vector MultiplyAccumulate instruction is not affected.
-void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = instr->GetLocations();
- VRegister acc = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
- VRegister left = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
- VRegister right = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
- switch (instr->GetPackedType()) {
+void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister acc = VRegisterFrom(locations->InAt(0));
+ VRegister left = VRegisterFrom(locations->InAt(1));
+ VRegister right = VRegisterFrom(locations->InAt(2));
+
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(16u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ Mla(acc.V16B(), left.V16B(), right.V16B());
} else {
__ Mls(acc.V16B(), left.V16B(), right.V16B());
@@ -990,16 +995,16 @@
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(8u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ Mla(acc.V8H(), left.V8H(), right.V8H());
} else {
__ Mls(acc.V8H(), left.V8H(), right.V8H());
}
break;
case Primitive::kPrimInt:
- DCHECK_EQ(4u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ Mla(acc.V4S(), left.V4S(), right.V4S());
} else {
__ Mls(acc.V4S(), left.V4S(), right.V4S());
@@ -1007,6 +1012,186 @@
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+ // Some conversions require temporary registers.
+ LocationSummary* locations = instruction->GetLocations();
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(a->GetPackedType(), b->GetPackedType());
+ switch (a->GetPackedType()) {
+ case Primitive::kPrimByte:
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimLong:
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimInt:
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+ default:
+ break;
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ if (instruction->GetPackedType() == Primitive::kPrimLong) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ if (instruction->GetPackedType() == a->GetPackedType()) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister acc = VRegisterFrom(locations->InAt(0));
+ VRegister left = VRegisterFrom(locations->InAt(1));
+ VRegister right = VRegisterFrom(locations->InAt(2));
+
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+
+ // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(a->GetPackedType(), b->GetPackedType());
+ switch (a->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sabal(acc.V8H(), left.V8B(), right.V8B());
+ __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
+ break;
+ case Primitive::kPrimInt: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
+ VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
+ __ Sxtl(tmp1.V8H(), left.V8B());
+ __ Sxtl(tmp2.V8H(), right.V8B());
+ __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
+ __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
+ __ Sxtl2(tmp1.V8H(), left.V16B());
+ __ Sxtl2(tmp2.V8H(), right.V16B());
+ __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
+ __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
+ break;
+ }
+ case Primitive::kPrimLong: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
+ VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
+ VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
+ VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
+ __ Sxtl(tmp1.V8H(), left.V8B());
+ __ Sxtl(tmp2.V8H(), right.V8B());
+ __ Sxtl(tmp3.V4S(), tmp1.V4H());
+ __ Sxtl(tmp4.V4S(), tmp2.V4H());
+ __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
+ __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
+ __ Sxtl2(tmp3.V4S(), tmp1.V8H());
+ __ Sxtl2(tmp4.V4S(), tmp2.V8H());
+ __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
+ __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
+ __ Sxtl2(tmp1.V8H(), left.V16B());
+ __ Sxtl2(tmp2.V8H(), right.V16B());
+ __ Sxtl(tmp3.V4S(), tmp1.V4H());
+ __ Sxtl(tmp4.V4S(), tmp2.V4H());
+ __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
+ __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
+ __ Sxtl2(tmp3.V4S(), tmp1.V8H());
+ __ Sxtl2(tmp4.V4S(), tmp2.V8H());
+ __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
+ __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Sabal(acc.V4S(), left.V4H(), right.V4H());
+ __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
+ break;
+ case Primitive::kPrimLong: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
+ VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
+ __ Sxtl(tmp1.V4S(), left.V4H());
+ __ Sxtl(tmp2.V4S(), right.V4H());
+ __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
+ __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
+ __ Sxtl2(tmp1.V4S(), left.V8H());
+ __ Sxtl2(tmp2.V4S(), right.V8H());
+ __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
+ __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+ __ Sub(tmp.V4S(), left.V4S(), right.V4S());
+ __ Abs(tmp.V4S(), tmp.V4S());
+ __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
+ break;
+ }
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Sabal(acc.V2D(), left.V2S(), right.V2S());
+ __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimLong: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+ __ Sub(tmp.V2D(), left.V2D(), right.V2D());
+ __ Abs(tmp.V2D(), tmp.V2D());
+ __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
}
}
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 7a11dff..069054c 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -629,12 +629,40 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
-void InstructionCodeGeneratorARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
// Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index c2fbf7f..0bedafc 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -826,21 +826,18 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
- switch (instr->GetPackedType()) {
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
- DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -849,18 +846,19 @@
}
}
-void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = instr->GetLocations();
- VectorRegister acc =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
- VectorRegister left =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
- VectorRegister right =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
- switch (instr->GetPackedType()) {
+void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister acc = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister left = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister right = VectorRegisterFrom(locations->InAt(2));
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(16u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvB(acc, left, right);
} else {
__ MsubvB(acc, left, right);
@@ -868,24 +866,24 @@
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(8u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvH(acc, left, right);
} else {
__ MsubvH(acc, left, right);
}
break;
case Primitive::kPrimInt:
- DCHECK_EQ(4u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvW(acc, left, right);
} else {
__ MsubvW(acc, left, right);
}
break;
case Primitive::kPrimLong:
- DCHECK_EQ(2u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvD(acc, left, right);
} else {
__ MsubvD(acc, left, right);
@@ -897,6 +895,15 @@
}
}
+void LocationsBuilderMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ // TODO: implement this, location helper already filled out (shared with MulAcc).
+}
+
// Helper to set up locations for vector memory operations.
static void CreateVecMemLocations(ArenaAllocator* arena,
HVecMemoryOperation* instruction,
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 9d3a777..db31bdc 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -830,21 +830,18 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
- switch (instr->GetPackedType()) {
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
- DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -853,18 +850,19 @@
}
}
-void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = instr->GetLocations();
- VectorRegister acc =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
- VectorRegister left =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
- VectorRegister right =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
- switch (instr->GetPackedType()) {
+void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister acc = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister left = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister right = VectorRegisterFrom(locations->InAt(2));
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(16u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvB(acc, left, right);
} else {
__ MsubvB(acc, left, right);
@@ -872,24 +870,24 @@
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(8u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvH(acc, left, right);
} else {
__ MsubvH(acc, left, right);
}
break;
case Primitive::kPrimInt:
- DCHECK_EQ(4u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvW(acc, left, right);
} else {
__ MsubvW(acc, left, right);
}
break;
case Primitive::kPrimLong:
- DCHECK_EQ(2u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvD(acc, left, right);
} else {
__ MsubvD(acc, left, right);
@@ -901,6 +899,15 @@
}
}
+void LocationsBuilderMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ // TODO: implement this, location helper already filled out (shared with MulAcc).
+}
+
// Helper to set up locations for vector memory operations.
static void CreateVecMemLocations(ArenaAllocator* arena,
HVecMemoryOperation* instruction,
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 37190f8..5a012e7 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -51,7 +51,6 @@
: Location::RequiresFpuRegister());
locations->SetOut(is_zero ? Location::RequiresFpuRegister()
: Location::SameAsFirstInput());
-
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -1033,12 +1032,42 @@
}
}
-void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
-void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ // TODO: pmaddwd?
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ // TODO: psadbw for unsigned?
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
// Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index edd0209..3698b7f 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -1005,11 +1005,41 @@
}
}
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
}
void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ // TODO: pmaddwd?
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ // TODO: psadbw for unsigned?
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index baa0453..6f8743b 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -71,10 +71,13 @@
return false;
}
-// Detect a sign extension from the given type. Returns the promoted operand on success.
+// Detect a sign extension in instruction from the given type. The to64 parameter
+// denotes if result is long, and thus sign extension from int can be included.
+// Returns the promoted operand on success.
static bool IsSignExtensionAndGet(HInstruction* instruction,
Primitive::Type type,
- /*out*/ HInstruction** operand) {
+ /*out*/ HInstruction** operand,
+ bool to64 = false) {
// Accept any already wider constant that would be handled properly by sign
// extension when represented in the *width* of the given narrower data type
// (the fact that char normally zero extends does not matter here).
@@ -82,20 +85,24 @@
if (IsInt64AndGet(instruction, /*out*/ &value)) {
switch (type) {
case Primitive::kPrimByte:
- if (std::numeric_limits<int8_t>::min() <= value &&
- std::numeric_limits<int8_t>::max() >= value) {
+ if (IsInt<8>(value)) {
*operand = instruction;
return true;
}
return false;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- if (std::numeric_limits<int16_t>::min() <= value &&
- std::numeric_limits<int16_t>::max() <= value) {
+ if (IsInt<16>(value)) {
*operand = instruction;
return true;
}
return false;
+ case Primitive::kPrimInt:
+ if (IsInt<32>(value)) {
+ *operand = instruction;
+ return to64;
+ }
+ return false;
default:
return false;
}
@@ -110,40 +117,52 @@
case Primitive::kPrimShort:
*operand = instruction;
return true;
+ case Primitive::kPrimInt:
+ *operand = instruction;
+ return to64;
default:
return false;
}
}
- // TODO: perhaps explicit conversions later too?
- // (this may return something different from instruction)
+ // Explicit type conversion to long.
+ if (instruction->IsTypeConversion() && instruction->GetType() == Primitive::kPrimLong) {
+ return IsSignExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, /*to64*/ true);
+ }
return false;
}
-// Detect a zero extension from the given type. Returns the promoted operand on success.
+// Detect a zero extension in instruction from the given type. The to64 parameter
+// denotes if result is long, and thus zero extension from int can be included.
+// Returns the promoted operand on success.
static bool IsZeroExtensionAndGet(HInstruction* instruction,
Primitive::Type type,
- /*out*/ HInstruction** operand) {
+ /*out*/ HInstruction** operand,
+ bool to64 = false) {
// Accept any already wider constant that would be handled properly by zero
// extension when represented in the *width* of the given narrower data type
- // (the fact that byte/short normally sign extend does not matter here).
+ // (the fact that byte/short/int normally sign extend does not matter here).
int64_t value = 0;
if (IsInt64AndGet(instruction, /*out*/ &value)) {
switch (type) {
case Primitive::kPrimByte:
- if (std::numeric_limits<uint8_t>::min() <= value &&
- std::numeric_limits<uint8_t>::max() >= value) {
+ if (IsUint<8>(value)) {
*operand = instruction;
return true;
}
return false;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- if (std::numeric_limits<uint16_t>::min() <= value &&
- std::numeric_limits<uint16_t>::max() <= value) {
+ if (IsUint<16>(value)) {
*operand = instruction;
return true;
}
return false;
+ case Primitive::kPrimInt:
+ if (IsUint<32>(value)) {
+ *operand = instruction;
+ return to64;
+ }
+ return false;
default:
return false;
}
@@ -170,14 +189,21 @@
(IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) ||
IsZeroExtensionAndGet(a, type, /*out*/ operand)))) {
switch ((*operand)->GetType()) {
- case Primitive::kPrimByte: return mask == std::numeric_limits<uint8_t>::max();
+ case Primitive::kPrimByte:
+ return mask == std::numeric_limits<uint8_t>::max();
case Primitive::kPrimChar:
- case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max();
+ case Primitive::kPrimShort:
+ return mask == std::numeric_limits<uint16_t>::max();
+ case Primitive::kPrimInt:
+ return mask == std::numeric_limits<uint32_t>::max() && to64;
default: return false;
}
}
}
- // TODO: perhaps explicit conversions later too?
+ // Explicit type conversion to long.
+ if (instruction->IsTypeConversion() && instruction->GetType() == Primitive::kPrimLong) {
+ return IsZeroExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, /*to64*/ true);
+ }
return false;
}
@@ -214,6 +240,55 @@
return false;
}
+// Compute relative vector length based on type difference.
+static size_t GetOtherVL(Primitive::Type other_type, Primitive::Type vector_type, size_t vl) {
+ switch (other_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ switch (vector_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte: return vl;
+ default: break;
+ }
+ return vl;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ switch (vector_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte: return vl >> 1;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: return vl;
+ default: break;
+ }
+ break;
+ case Primitive::kPrimInt:
+ switch (vector_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte: return vl >> 2;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: return vl >> 1;
+ case Primitive::kPrimInt: return vl;
+ default: break;
+ }
+ break;
+ case Primitive::kPrimLong:
+ switch (vector_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte: return vl >> 3;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: return vl >> 2;
+ case Primitive::kPrimInt: return vl >> 1;
+ case Primitive::kPrimLong: return vl;
+ default: break;
+ }
+ break;
+ default:
+ break;
+ }
+ LOG(FATAL) << "Unsupported idiom conversion";
+ UNREACHABLE();
+}
+
// Detect up to two instructions a and b, and an acccumulated constant c.
static bool IsAddConstHelper(HInstruction* instruction,
/*out*/ HInstruction** a,
@@ -260,16 +335,16 @@
}
// Detect reductions of the following forms,
-// under assumption phi has only *one* use:
// x = x_phi + ..
// x = x_phi - ..
// x = max(x_phi, ..)
// x = min(x_phi, ..)
static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
if (reduction->IsAdd()) {
- return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi;
+ return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
+ (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
} else if (reduction->IsSub()) {
- return reduction->InputAt(0) == phi;
+ return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi);
} else if (reduction->IsInvokeStaticOrDirect()) {
switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) {
case Intrinsics::kMathMinIntInt:
@@ -280,7 +355,8 @@
case Intrinsics::kMathMaxLongLong:
case Intrinsics::kMathMaxFloatFloat:
case Intrinsics::kMathMaxDoubleDouble:
- return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi;
+ return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
+ (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
default:
return false;
}
@@ -288,9 +364,9 @@
return false;
}
-// Translates operation to reduction kind.
-static HVecReduce::ReductionKind GetReductionKind(HInstruction* reduction) {
- if (reduction->IsVecAdd() || reduction->IsVecSub()) {
+// Translates vector operation to reduction kind.
+static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) {
+ if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) {
return HVecReduce::kSum;
} else if (reduction->IsVecMin()) {
return HVecReduce::kMin;
@@ -720,7 +796,6 @@
HBasicBlock* block,
HBasicBlock* exit,
int64_t trip_count) {
- Primitive::Type induc_type = Primitive::kPrimInt;
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
@@ -739,6 +814,10 @@
vector_header_ = header;
vector_body_ = block;
+ // Loop induction type.
+ Primitive::Type induc_type = main_phi->GetType();
+ DCHECK(induc_type == Primitive::kPrimInt || induc_type == Primitive::kPrimLong) << induc_type;
+
// Generate dynamic loop peeling trip count, if needed, under the assumption
// that the Android runtime guarantees at least "component size" alignment:
// ptc = (ALIGN - (&a[initial] % ALIGN)) / type-size
@@ -767,10 +846,10 @@
HInstruction* rem = Insert(
preheader, new (global_allocator_) HAnd(induc_type,
diff,
- graph_->GetIntConstant(chunk - 1)));
+ graph_->GetConstant(induc_type, chunk - 1)));
vtc = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, rem));
}
- vector_index_ = graph_->GetIntConstant(0);
+ vector_index_ = graph_->GetConstant(induc_type, 0);
// Generate runtime disambiguation test:
// vtc = a != b ? vtc : 0;
@@ -779,7 +858,8 @@
preheader,
new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_));
vtc = Insert(preheader,
- new (global_allocator_) HSelect(rt, vtc, graph_->GetIntConstant(0), kNoDexPc));
+ new (global_allocator_)
+ HSelect(rt, vtc, graph_->GetConstant(induc_type, 0), kNoDexPc));
needs_cleanup = true;
}
@@ -793,7 +873,7 @@
graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
vector_index_,
ptc,
- graph_->GetIntConstant(1),
+ graph_->GetConstant(induc_type, 1),
kNoUnrollingFactor);
}
@@ -806,7 +886,7 @@
graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
vector_index_,
vtc,
- graph_->GetIntConstant(vector_length_), // increment per unroll
+ graph_->GetConstant(induc_type, vector_length_), // increment per unroll
unroll);
HLoopInformation* vloop = vector_header_->GetLoopInformation();
@@ -820,14 +900,20 @@
graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
vector_index_,
stc,
- graph_->GetIntConstant(1),
+ graph_->GetConstant(induc_type, 1),
kNoUnrollingFactor);
}
// Link reductions to their final uses.
for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
if (i->first->IsPhi()) {
- i->first->ReplaceWith(ReduceAndExtractIfNeeded(i->second));
+ HInstruction* phi = i->first;
+ HInstruction* repl = ReduceAndExtractIfNeeded(i->second);
+ // Deal with regular uses.
+ for (const HUseListNode<HInstruction*>& use : phi->GetUses()) {
+ induction_range_.Replace(use.GetUser(), phi, repl); // update induction use
+ }
+ phi->ReplaceWith(repl);
}
}
@@ -853,7 +939,7 @@
HInstruction* step,
uint32_t unroll) {
DCHECK(unroll == 1 || vector_mode_ == kVector);
- Primitive::Type induc_type = Primitive::kPrimInt;
+ Primitive::Type induc_type = lo->GetType();
// Prepare new loop.
vector_preheader_ = new_preheader,
vector_header_ = vector_preheader_->GetSingleSuccessor();
@@ -942,8 +1028,10 @@
auto redit = reductions_->find(instruction);
if (redit != reductions_->end()) {
Primitive::Type type = instruction->GetType();
- if (TrySetVectorType(type, &restrictions) &&
- VectorizeUse(node, instruction, generate_code, type, restrictions)) {
+ // Recognize SAD idiom or direct reduction.
+ if (VectorizeSADIdiom(node, instruction, generate_code, type, restrictions) ||
+ (TrySetVectorType(type, &restrictions) &&
+ VectorizeUse(node, instruction, generate_code, type, restrictions))) {
if (generate_code) {
HInstruction* new_red = vector_map_->Get(instruction);
vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
@@ -1029,14 +1117,20 @@
HInstruction* opa = conversion->InputAt(0);
Primitive::Type from = conversion->GetInputType();
Primitive::Type to = conversion->GetResultType();
- if ((to == Primitive::kPrimByte ||
- to == Primitive::kPrimChar ||
- to == Primitive::kPrimShort) && from == Primitive::kPrimInt) {
- // Accept a "narrowing" type conversion from a "wider" computation for
- // (1) conversion into final required type,
- // (2) vectorizable operand,
- // (3) "wider" operations cannot bring in higher order bits.
- if (to == type && VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) {
+ if (Primitive::IsIntegralType(from) && Primitive::IsIntegralType(to)) {
+ size_t size_vec = Primitive::ComponentSize(type);
+ size_t size_from = Primitive::ComponentSize(from);
+ size_t size_to = Primitive::ComponentSize(to);
+ // Accept an integral conversion
+ // (1a) narrowing into vector type, "wider" operations cannot bring in higher order bits, or
+ // (1b) widening from at least vector type, and
+ // (2) vectorizable operand.
+ if ((size_to < size_from &&
+ size_to == size_vec &&
+ VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) ||
+ (size_to >= size_from &&
+ size_from >= size_vec &&
+ VectorizeUse(node, opa, generate_code, type, restrictions))) {
if (generate_code) {
if (vector_mode_ == kVector) {
vector_map_->Put(instruction, vector_map_->Get(opa)); // operand pass-through
@@ -1088,7 +1182,7 @@
return true;
}
} else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) {
- // Recognize vectorization idioms.
+ // Recognize halving add idiom.
if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) {
return true;
}
@@ -1181,7 +1275,8 @@
return false; // reject, unless all operands are same-extension narrower
}
// Accept MIN/MAX(x, y) for vectorizable operands.
- DCHECK(r != nullptr && s != nullptr);
+ DCHECK(r != nullptr);
+ DCHECK(s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
r = opa;
s = opb;
@@ -1232,11 +1327,11 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1261,17 +1356,17 @@
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
*restrictions |=
- kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
+ kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
+ *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoSAD;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax;
+ *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax | kNoSAD;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
*restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
@@ -1289,17 +1384,17 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoSAD;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoSAD;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoSAD;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
*restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
@@ -1317,17 +1412,17 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoSAD;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoSAD;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoSAD;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
*restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
@@ -1371,8 +1466,16 @@
if (it != vector_permanent_map_->end()) {
vector = it->second; // reuse during unrolling
} else {
- vector = new (global_allocator_) HVecReplicateScalar(
- global_allocator_, org, type, vector_length_);
+ // Generates ReplicateScalar( (optional_type_conv) org ).
+ HInstruction* input = org;
+ Primitive::Type input_type = input->GetType();
+ if (type != input_type && (type == Primitive::kPrimLong ||
+ input_type == Primitive::kPrimLong)) {
+ input = Insert(vector_preheader_,
+ new (global_allocator_) HTypeConversion(type, input, kNoDexPc));
+ }
+ vector = new (global_allocator_)
+ HVecReplicateScalar(global_allocator_, input, type, vector_length_);
vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
}
vector_map_->Put(org, vector);
@@ -1465,10 +1568,15 @@
// Prepare the new initialization.
if (vector_mode_ == kVector) {
// Generate a [initial, 0, .., 0] vector.
- new_init = Insert(
- vector_preheader_,
- new (global_allocator_) HVecSetScalars(
- global_allocator_, &new_init, phi->GetType(), vector_length_, 1));
+ HVecOperation* red_vector = new_red->AsVecOperation();
+ size_t vector_length = red_vector->GetVectorLength();
+ Primitive::Type type = red_vector->GetPackedType();
+ new_init = Insert(vector_preheader_,
+ new (global_allocator_) HVecSetScalars(global_allocator_,
+ &new_init,
+ type,
+ vector_length,
+ 1));
} else {
new_init = ReduceAndExtractIfNeeded(new_init);
}
@@ -1484,18 +1592,20 @@
if (instruction->IsPhi()) {
HInstruction* input = instruction->InputAt(1);
if (input->IsVecOperation()) {
- Primitive::Type type = input->AsVecOperation()->GetPackedType();
+ HVecOperation* input_vector = input->AsVecOperation();
+ size_t vector_length = input_vector->GetVectorLength();
+ Primitive::Type type = input_vector->GetPackedType();
+ HVecReduce::ReductionKind kind = GetReductionKind(input_vector);
HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0];
// Generate a vector reduction and scalar extract
// x = REDUCE( [x_1, .., x_n] )
// y = x_1
// along the exit of the defining loop.
- HVecReduce::ReductionKind kind = GetReductionKind(input);
HInstruction* reduce = new (global_allocator_) HVecReduce(
- global_allocator_, instruction, type, vector_length_, kind);
+ global_allocator_, instruction, type, vector_length, kind);
exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
instruction = new (global_allocator_) HVecExtractScalar(
- global_allocator_, reduce, type, vector_length_, 0);
+ global_allocator_, reduce, type, vector_length, 0);
exit->InsertInstructionAfter(instruction, reduce);
}
}
@@ -1516,27 +1626,19 @@
HInstruction* opb,
Primitive::Type type,
bool is_unsigned) {
- if (vector_mode_ == kSequential) {
- // Non-converting scalar code follows implicit integral promotion.
- if (!org->IsTypeConversion() && (type == Primitive::kPrimBoolean ||
- type == Primitive::kPrimByte ||
- type == Primitive::kPrimChar ||
- type == Primitive::kPrimShort)) {
- type = Primitive::kPrimInt;
- }
- }
HInstruction* vector = nullptr;
+ Primitive::Type org_type = org->GetType();
switch (org->GetKind()) {
case HInstruction::kNeg:
DCHECK(opb == nullptr);
GENERATE_VEC(
new (global_allocator_) HVecNeg(global_allocator_, opa, type, vector_length_),
- new (global_allocator_) HNeg(type, opa));
+ new (global_allocator_) HNeg(org_type, opa));
case HInstruction::kNot:
DCHECK(opb == nullptr);
GENERATE_VEC(
new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_),
- new (global_allocator_) HNot(type, opa));
+ new (global_allocator_) HNot(org_type, opa));
case HInstruction::kBooleanNot:
DCHECK(opb == nullptr);
GENERATE_VEC(
@@ -1546,47 +1648,47 @@
DCHECK(opb == nullptr);
GENERATE_VEC(
new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_),
- new (global_allocator_) HTypeConversion(type, opa, kNoDexPc));
+ new (global_allocator_) HTypeConversion(org_type, opa, kNoDexPc));
case HInstruction::kAdd:
GENERATE_VEC(
new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HAdd(type, opa, opb));
+ new (global_allocator_) HAdd(org_type, opa, opb));
case HInstruction::kSub:
GENERATE_VEC(
new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HSub(type, opa, opb));
+ new (global_allocator_) HSub(org_type, opa, opb));
case HInstruction::kMul:
GENERATE_VEC(
new (global_allocator_) HVecMul(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HMul(type, opa, opb));
+ new (global_allocator_) HMul(org_type, opa, opb));
case HInstruction::kDiv:
GENERATE_VEC(
new (global_allocator_) HVecDiv(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HDiv(type, opa, opb, kNoDexPc));
+ new (global_allocator_) HDiv(org_type, opa, opb, kNoDexPc));
case HInstruction::kAnd:
GENERATE_VEC(
new (global_allocator_) HVecAnd(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HAnd(type, opa, opb));
+ new (global_allocator_) HAnd(org_type, opa, opb));
case HInstruction::kOr:
GENERATE_VEC(
new (global_allocator_) HVecOr(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HOr(type, opa, opb));
+ new (global_allocator_) HOr(org_type, opa, opb));
case HInstruction::kXor:
GENERATE_VEC(
new (global_allocator_) HVecXor(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HXor(type, opa, opb));
+ new (global_allocator_) HXor(org_type, opa, opb));
case HInstruction::kShl:
GENERATE_VEC(
new (global_allocator_) HVecShl(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HShl(type, opa, opb));
+ new (global_allocator_) HShl(org_type, opa, opb));
case HInstruction::kShr:
GENERATE_VEC(
new (global_allocator_) HVecShr(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HShr(type, opa, opb));
+ new (global_allocator_) HShr(org_type, opa, opb));
case HInstruction::kUShr:
GENERATE_VEC(
new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HUShr(type, opa, opb));
+ new (global_allocator_) HUShr(org_type, opa, opb));
case HInstruction::kInvokeStaticOrDirect: {
HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect();
if (vector_mode_ == kVector) {
@@ -1667,8 +1769,8 @@
//
// Method recognizes the following idioms:
-// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
-// regular halving add (a + b) >> 1 for unsigned/signed operands a, b
+// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
+// truncated halving add (a + b) >> 1 for unsigned/signed operands a, b
// Provided that the operands are promoted to a wider form to do the arithmetic and
// then cast back to narrower form, the idioms can be mapped into efficient SIMD
// implementation that operates directly in narrower form (plus one extra bit).
@@ -1712,7 +1814,8 @@
}
// Accept recognized halving add for vectorizable operands. Vectorized code uses the
// shorthand idiomatic operation. Sequential code uses the original scalar expressions.
- DCHECK(r != nullptr && s != nullptr);
+ DCHECK(r != nullptr);
+ DCHECK(s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
r = instruction->InputAt(0);
s = instruction->InputAt(1);
@@ -1741,6 +1844,88 @@
return false;
}
+// Method recognizes the following idiom:
+// q += ABS(a - b) for signed operands a, b
+// Provided that the operands have the same type or are promoted to a wider form.
+// Since this may involve a vector length change, the idiom is handled by going directly
+// to a sad-accumulate node (rather than relying combining finer grained nodes later).
+// TODO: unsigned SAD too?
+bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type reduction_type,
+ uint64_t restrictions) {
+ // Filter integral "q += ABS(a - b);" reduction, where ABS and SUB
+ // are done in the same precision (either int or long).
+ if (!instruction->IsAdd() ||
+ (reduction_type != Primitive::kPrimInt && reduction_type != Primitive::kPrimLong)) {
+ return false;
+ }
+ HInstruction* q = instruction->InputAt(0);
+ HInstruction* v = instruction->InputAt(1);
+ HInstruction* a = nullptr;
+ HInstruction* b = nullptr;
+ if (v->IsInvokeStaticOrDirect() &&
+ (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt ||
+ v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) {
+ HInstruction* x = v->InputAt(0);
+ if (x->IsSub() && x->GetType() == reduction_type) {
+ a = x->InputAt(0);
+ b = x->InputAt(1);
+ }
+ }
+ if (a == nullptr || b == nullptr) {
+ return false;
+ }
+ // Accept same-type or consistent sign extension for narrower-type on operands a and b.
+ // The same-type or narrower operands are called r (a or lower) and s (b or lower).
+ HInstruction* r = a;
+ HInstruction* s = b;
+ bool is_unsigned = false;
+ Primitive::Type sub_type = a->GetType();
+ if (a->IsTypeConversion()) {
+ sub_type = a->InputAt(0)->GetType();
+ } else if (b->IsTypeConversion()) {
+ sub_type = b->InputAt(0)->GetType();
+ }
+ if (reduction_type != sub_type &&
+ (!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) {
+ return false;
+ }
+ // Try same/narrower type and deal with vector restrictions.
+ if (!TrySetVectorType(sub_type, &restrictions) || HasVectorRestrictions(restrictions, kNoSAD)) {
+ return false;
+ }
+ // Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand
+ // idiomatic operation. Sequential code uses the original scalar expressions.
+ DCHECK(r != nullptr);
+ DCHECK(s != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = s = v->InputAt(0);
+ }
+ if (VectorizeUse(node, q, generate_code, sub_type, restrictions) &&
+ VectorizeUse(node, r, generate_code, sub_type, restrictions) &&
+ VectorizeUse(node, s, generate_code, sub_type, restrictions)) {
+ if (generate_code) {
+ if (vector_mode_ == kVector) {
+ vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate(
+ global_allocator_,
+ vector_map_->Get(q),
+ vector_map_->Get(r),
+ vector_map_->Get(s),
+ reduction_type,
+ GetOtherVL(reduction_type, sub_type, vector_length_)));
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
+ } else {
+ GenerateVecOp(v, vector_map_->Get(r), nullptr, reduction_type);
+ GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type);
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
//
// Vectorization heuristics.
//
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index f347518..ae2ea76 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -75,6 +75,7 @@
kNoMinMax = 1 << 8, // no min/max
kNoStringCharAt = 1 << 9, // no StringCharAt
kNoReduction = 1 << 10, // no reduction
+ kNoSAD = 1 << 11, // no sum of absolute differences (SAD)
};
/*
@@ -172,6 +173,11 @@
bool generate_code,
Primitive::Type type,
uint64_t restrictions);
+ bool VectorizeSADIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions);
// Vectorization heuristics.
bool IsVectorizationProfitable(int64_t trip_count);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index a6d0da1..6bc5111 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1396,6 +1396,7 @@
M(VecUShr, VecBinaryOperation) \
M(VecSetScalars, VecOperation) \
M(VecMultiplyAccumulate, VecOperation) \
+ M(VecSADAccumulate, VecOperation) \
M(VecLoad, VecMemoryOperation) \
M(VecStore, VecMemoryOperation) \
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index c5e75a7..1488b70 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -461,8 +461,8 @@
};
// Performs halving add on every component in the two vectors, viz.
-// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
-// or [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ]
+// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
+// truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ]
// for signed operands x, y (sign extension) or unsigned operands x, y (zero extension).
class HVecHalvingAdd FINAL : public HVecBinaryOperation {
public:
@@ -810,8 +810,8 @@
//
// Assigns the given scalar elements to a vector,
-// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ] if n == m,
-// set( array(x1, .., xm) ) = [ x1, .. , xm, 0, .., 0 ] if m < n.
+// viz. set( array(x1, .. , xn) ) = [ x1, .. , xn ] if n == m,
+// set( array(x1, .. , xm) ) = [ x1, .. , xm, 0, .. , 0 ] if m < n.
class HVecSetScalars FINAL : public HVecOperation {
public:
HVecSetScalars(ArenaAllocator* arena,
@@ -842,9 +842,8 @@
DISALLOW_COPY_AND_ASSIGN(HVecSetScalars);
};
-// Multiplies every component in the two vectors, adds the result vector to the accumulator vector.
-// viz. [ acc1, .., accn ] + [ x1, .. , xn ] * [ y1, .. , yn ] =
-// [ acc1 + x1 * y1, .. , accn + xn * yn ].
+// Multiplies every component in the two vectors, adds the result vector to the accumulator vector,
+// viz. [ a1, .. , an ] + [ x1, .. , xn ] * [ y1, .. , yn ] = [ a1 + x1 * y1, .. , an + xn * yn ].
class HVecMultiplyAccumulate FINAL : public HVecOperation {
public:
HVecMultiplyAccumulate(ArenaAllocator* arena,
@@ -866,15 +865,11 @@
DCHECK(HasConsistentPackedTypes(accumulator, packed_type));
DCHECK(HasConsistentPackedTypes(mul_left, packed_type));
DCHECK(HasConsistentPackedTypes(mul_right, packed_type));
- SetRawInputAt(kInputAccumulatorIndex, accumulator);
- SetRawInputAt(kInputMulLeftIndex, mul_left);
- SetRawInputAt(kInputMulRightIndex, mul_right);
+ SetRawInputAt(0, accumulator);
+ SetRawInputAt(1, mul_left);
+ SetRawInputAt(2, mul_right);
}
- static constexpr int kInputAccumulatorIndex = 0;
- static constexpr int kInputMulLeftIndex = 1;
- static constexpr int kInputMulRightIndex = 2;
-
bool CanBeMoved() const OVERRIDE { return true; }
bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
@@ -894,6 +889,42 @@
DISALLOW_COPY_AND_ASSIGN(HVecMultiplyAccumulate);
};
+// Takes the absolute difference of two vectors, and adds the results to
+// same-precision or wider-precision components in the accumulator,
+// viz. SAD([ a1, .. , am ], [ x1, .. , xn ], [ y1, .. , yn ] =
+// [ a1 + sum abs(xi-yi), .. , am + sum abs(xj-yj) ],
+// for m <= n and non-overlapping sums.
+class HVecSADAccumulate FINAL : public HVecOperation {
+ public:
+ HVecSADAccumulate(ArenaAllocator* arena,
+ HInstruction* accumulator,
+ HInstruction* sad_left,
+ HInstruction* sad_right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecOperation(arena,
+ packed_type,
+ SideEffects::None(),
+ /* number_of_inputs */ 3,
+ vector_length,
+ dex_pc) {
+ DCHECK(HasConsistentPackedTypes(accumulator, packed_type));
+ DCHECK(sad_left->IsVecOperation());
+ DCHECK(sad_right->IsVecOperation());
+ DCHECK_EQ(sad_left->AsVecOperation()->GetPackedType(),
+ sad_right->AsVecOperation()->GetPackedType());
+ SetRawInputAt(0, accumulator);
+ SetRawInputAt(1, sad_left);
+ SetRawInputAt(2, sad_right);
+ }
+
+ DECLARE_INSTRUCTION(VecSADAccumulate);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecSADAccumulate);
+};
+
// Loads a vector from memory, viz. load(mem, 1)
// yield the vector [ mem(1), .. , mem(n) ].
class HVecLoad FINAL : public HVecMemoryOperation {
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
index e018b56..9643b90 100644
--- a/test/651-checker-byte-simd-minmax/src/Main.java
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -165,6 +165,28 @@
}
}
+ /// CHECK-START: void Main.doitMin100(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.doitMin100(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] unsigned:false loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ private static void doitMin100(byte[] x, byte[] y) {
+ int min = Math.min(x.length, y.length);
+ for (int i = 0; i < min; i++) {
+ x[i] = (byte) Math.min(y[i], 100);
+ }
+ }
+
public static void main(String[] args) {
// Initialize cross-values for all possible values.
int total = 256 * 256;
@@ -202,6 +224,11 @@
byte expected = (byte) Math.max(y[i] & 0xff, z[i] & 0xff);
expectEquals(expected, x[i]);
}
+ doitMin100(x, y);
+ for (int i = 0; i < total; i++) {
+ byte expected = (byte) Math.min(y[i], 100);
+ expectEquals(expected, x[i]);
+ }
System.out.println("passed");
}
diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java
index 57cad9b..8a0262c 100644
--- a/test/651-checker-char-simd-minmax/src/Main.java
+++ b/test/651-checker-char-simd-minmax/src/Main.java
@@ -89,6 +89,28 @@
}
}
+ /// CHECK-START: void Main.doitMin100(char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:c\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.doitMin100(char[], char[]) loop_optimization (after)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] unsigned:true loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ private static void doitMin100(char[] x, char[] y) {
+ int min = Math.min(x.length, y.length);
+ for (int i = 0; i < min; i++) {
+ x[i] = (char) Math.min(y[i], 100);
+ }
+ }
+
public static void main(String[] args) {
char[] interesting = {
0x0000, 0x0001, 0x007f, 0x0080, 0x0081, 0x00ff,
@@ -124,6 +146,11 @@
char expected = (char) Math.max(y[i], z[i]);
expectEquals(expected, x[i]);
}
+ doitMin100(x, y);
+ for (int i = 0; i < total; i++) {
+ char expected = (char) Math.min(y[i], 100);
+ expectEquals(expected, x[i]);
+ }
System.out.println("passed");
}
diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java
index 4f2a7a4..ffbf73b 100644
--- a/test/651-checker-short-simd-minmax/src/Main.java
+++ b/test/651-checker-short-simd-minmax/src/Main.java
@@ -165,6 +165,28 @@
}
}
+ /// CHECK-START: void Main.doitMin100(short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.doitMin100(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] unsigned:false loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ private static void doitMin100(short[] x, short[] y) {
+ int min = Math.min(x.length, y.length);
+ for (int i = 0; i < min; i++) {
+ x[i] = (short) Math.min(y[i], 100);
+ }
+ }
+
public static void main(String[] args) {
short[] interesting = {
(short) 0x0000, (short) 0x0001, (short) 0x007f,
@@ -216,6 +238,11 @@
short expected = (short) Math.max(y[i] & 0xffff, z[i] & 0xffff);
expectEquals(expected, x[i]);
}
+ doitMin100(x, y);
+ for (int i = 0; i < total; i++) {
+ short expected = (short) Math.min(y[i], 100);
+ expectEquals(expected, x[i]);
+ }
System.out.println("passed");
}
diff --git a/test/656-checker-simd-opt/src/Main.java b/test/656-checker-simd-opt/src/Main.java
index 091633f..39a126f 100644
--- a/test/656-checker-simd-opt/src/Main.java
+++ b/test/656-checker-simd-opt/src/Main.java
@@ -92,7 +92,91 @@
}
}
- public static void main(String[] args) {
+ /// CHECK-START: long Main.longInductionReduction(long[]) loop_optimization (before)
+ /// CHECK-DAG: <<L0:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none
+ /// CHECK-DAG: <<Phi1:j\d+>> Phi [<<L0>>,<<Add1:j\d+>>] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<L1>>,<<Add2:j\d+>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add2>> Add [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add1>> Add [<<Phi1>>,<<L1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.longInductionReduction(long[]) loop_optimization (after)
+ /// CHECK-DAG: <<L0:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<L2:j\d+>> LongConstant 2 loop:none
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Get>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<L1>>] loop:none
+ /// CHECK-DAG: <<Phi1:j\d+>> Phi [<<L0>>,{{j\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi2>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<L2>>] loop:<<Loop>> outer_loop:none
+ static long longInductionReduction(long[] y) {
+ long x = 1;
+ for (long i = 0; i < 10; i++) {
+ x += y[0];
+ }
+ return x;
+ }
+
+ /// CHECK-START: void Main.intVectorLongInvariant(int[], long[]) loop_optimization (before)
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,<<Add:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:i\d+>> TypeConversion [<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add>> Add [<<Phi>>,<<I1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.intVectorLongInvariant(int[], long[]) loop_optimization (after)
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<I4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none
+ /// CHECK-DAG: <<Cnv:i\d+>> TypeConversion [<<Get>>] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Cnv>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi>>,<<I4>>] loop:<<Loop>> outer_loop:none
+ static void intVectorLongInvariant(int[] x, long[] y) {
+ for (int i = 0; i < 100; i++) {
+ x[i] = (int) y[0];
+ }
+ }
+
+ /// CHECK-START: void Main.longCanBeDoneWithInt(int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,<<Add:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<AddL:j\d+>> Add [<<Cnv1>>,<<L1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:i\d+>> TypeConversion [<<AddL>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add>> Add [<<Phi>>,<<I1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.longCanBeDoneWithInt(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<I4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Cnv:i\d+>> TypeConversion [<<L1>>] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Cnv>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi>>,<<I4>>] loop:<<Loop>> outer_loop:none
+ static void longCanBeDoneWithInt(int[] x, int[] y) {
+ for (int i = 0; i < 100; i++) {
+ x[i] = (int) (y[i] + 1L);
+ }
+ }
+
+ static void testUnroll() {
float[] x = new float[100];
float[] y = new float[100];
for (int i = 0; i < 100; i++) {
@@ -104,51 +188,89 @@
expectEquals(5.0f, x[i]);
expectEquals(2.0f, y[i]);
}
- {
- int[] a = new int[100];
- int[] b = new int[100];
- for (int i = 0; i < 100; i++) {
- a[i] = 0;
- b[i] = i;
- }
- stencil(a, b, 100);
- for (int i = 1; i < 99; i++) {
- int e = i + i + i;
- expectEquals(e, a[i]);
- expectEquals(i, b[i]);
- }
+ }
+
+ static void testStencil1() {
+ int[] a = new int[100];
+ int[] b = new int[100];
+ for (int i = 0; i < 100; i++) {
+ a[i] = 0;
+ b[i] = i;
}
- {
- int[] a = new int[100];
- int[] b = new int[100];
- for (int i = 0; i < 100; i++) {
- a[i] = 0;
- b[i] = i;
- }
- stencilSubInt(a, b, 100);
- for (int i = 1; i < 99; i++) {
- int e = i + i + i;
- expectEquals(e, a[i]);
- expectEquals(i, b[i]);
- }
+ stencil(a, b, 100);
+ for (int i = 1; i < 99; i++) {
+ int e = i + i + i;
+ expectEquals(e, a[i]);
+ expectEquals(i, b[i]);
}
- {
- int[] a = new int[100];
- int[] b = new int[100];
- for (int i = 0; i < 100; i++) {
- a[i] = 0;
- b[i] = i;
- }
- stencilAddInt(a, b, 100);
- for (int i = 1; i < 99; i++) {
- int e = i + i + i;
- expectEquals(e, a[i]);
- expectEquals(i, b[i]);
- }
+ }
+
+ static void testStencil2() {
+ int[] a = new int[100];
+ int[] b = new int[100];
+ for (int i = 0; i < 100; i++) {
+ a[i] = 0;
+ b[i] = i;
}
+ stencilSubInt(a, b, 100);
+ for (int i = 1; i < 99; i++) {
+ int e = i + i + i;
+ expectEquals(e, a[i]);
+ expectEquals(i, b[i]);
+ }
+ }
+
+ static void testStencil3() {
+ int[] a = new int[100];
+ int[] b = new int[100];
+ for (int i = 0; i < 100; i++) {
+ a[i] = 0;
+ b[i] = i;
+ }
+ stencilAddInt(a, b, 100);
+ for (int i = 1; i < 99; i++) {
+ int e = i + i + i;
+ expectEquals(e, a[i]);
+ expectEquals(i, b[i]);
+ }
+ }
+
+ static void testTypes() {
+ int[] a = new int[100];
+ int[] b = new int[100];
+ long[] l = { 3 };
+ expectEquals(31, longInductionReduction(l));
+ intVectorLongInvariant(a, l);
+ for (int i = 0; i < 100; i++) {
+ expectEquals(3, a[i]);
+ }
+ longCanBeDoneWithInt(b, a);
+ for (int i = 0; i < 100; i++) {
+ expectEquals(4, b[i]);
+ }
+ }
+
+ public static void main(String[] args) {
+ testUnroll();
+ testStencil1();
+ testStencil2();
+ testStencil3();
+ testTypes();
System.out.println("passed");
}
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
private static void expectEquals(float expected, float result) {
if (expected != result) {
throw new Error("Expected: " + expected + ", found: " + result);
diff --git a/test/660-checker-simd-sad-byte/expected.txt b/test/660-checker-simd-sad-byte/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/660-checker-simd-sad-byte/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/660-checker-simd-sad-byte/info.txt b/test/660-checker-simd-sad-byte/info.txt
new file mode 100644
index 0000000..b56c119
--- /dev/null
+++ b/test/660-checker-simd-sad-byte/info.txt
@@ -0,0 +1 @@
+Functional tests on SAD vectorization.
diff --git a/test/660-checker-simd-sad-byte/src/Main.java b/test/660-checker-simd-sad-byte/src/Main.java
new file mode 100644
index 0000000..72d1c24
--- /dev/null
+++ b/test/660-checker-simd-sad-byte/src/Main.java
@@ -0,0 +1,332 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for SAD (sum of absolute differences).
+ */
+public class Main {
+
+ // TODO: lower precision still coming, b/64091002
+
+ private static byte sadByte2Byte(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ byte sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(b1[i] - b2[i]);
+ }
+ return sad;
+ }
+
+ private static byte sadByte2ByteAlt(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ byte sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ byte s = b1[i];
+ byte p = b2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ private static byte sadByte2ByteAlt2(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ byte sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ byte s = b1[i];
+ byte p = b2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ private static short sadByte2Short(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ short sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(b1[i] - b2[i]);
+ }
+ return sad;
+ }
+
+ private static short sadByte2ShortAlt(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ short sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ byte s = b1[i];
+ byte p = b2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ private static short sadByte2ShortAlt2(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ short sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ byte s = b1[i];
+ byte p = b2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadByte2Int(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadByte2Int(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ private static int sadByte2Int(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(b1[i] - b2[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadByte2IntAlt(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadByte2IntAlt(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ private static int sadByte2IntAlt(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ byte s = b1[i];
+ byte p = b2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadByte2IntAlt2(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadByte2IntAlt2(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ private static int sadByte2IntAlt2(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ byte s = b1[i];
+ byte p = b2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadByte2Long(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadByte2Long(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ private static long sadByte2Long(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ long x = b1[i];
+ long y = b2[i];
+ sad += Math.abs(x - y);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadByte2LongAt1(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadByte2LongAt1(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ private static long sadByte2LongAt1(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ long sad = 1; // starts at 1
+ for (int i = 0; i < min_length; i++) {
+ long x = b1[i];
+ long y = b2[i];
+ sad += Math.abs(x - y);
+ }
+ return sad;
+ }
+
+ public static void main(String[] args) {
+ // Cross-test the two most extreme values individually.
+ byte[] b1 = { 0, -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ byte[] b2 = { 0, 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ expectEquals(-1, sadByte2Byte(b1, b2));
+ expectEquals(-1, sadByte2Byte(b2, b1));
+ expectEquals(-1, sadByte2ByteAlt(b1, b2));
+ expectEquals(-1, sadByte2ByteAlt(b2, b1));
+ expectEquals(-1, sadByte2ByteAlt2(b1, b2));
+ expectEquals(-1, sadByte2ByteAlt2(b2, b1));
+ expectEquals(255, sadByte2Short(b1, b2));
+ expectEquals(255, sadByte2Short(b2, b1));
+ expectEquals(255, sadByte2ShortAlt(b1, b2));
+ expectEquals(255, sadByte2ShortAlt(b2, b1));
+ expectEquals(255, sadByte2ShortAlt2(b1, b2));
+ expectEquals(255, sadByte2ShortAlt2(b2, b1));
+ expectEquals(255, sadByte2Int(b1, b2));
+ expectEquals(255, sadByte2Int(b2, b1));
+ expectEquals(255, sadByte2IntAlt(b1, b2));
+ expectEquals(255, sadByte2IntAlt(b2, b1));
+ expectEquals(255, sadByte2IntAlt2(b1, b2));
+ expectEquals(255, sadByte2IntAlt2(b2, b1));
+ expectEquals(255, sadByte2Long(b1, b2));
+ expectEquals(255L, sadByte2Long(b2, b1));
+ expectEquals(256L, sadByte2LongAt1(b1, b2));
+ expectEquals(256L, sadByte2LongAt1(b2, b1));
+
+ // Use cross-values to test all cases.
+ // One for scalar cleanup.
+ int n = 256;
+ int m = n * n + 1;
+ int k = 0;
+ b1 = new byte[m];
+ b2 = new byte[m];
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ b1[k] = (byte) i;
+ b2[k] = (byte) j;
+ k++;
+ }
+ }
+ b1[k] = 10;
+ b2[k] = 2;
+ expectEquals(8, sadByte2Byte(b1, b2));
+ expectEquals(8, sadByte2ByteAlt(b1, b2));
+ expectEquals(8, sadByte2ByteAlt2(b1, b2));
+ expectEquals(21768, sadByte2Short(b1, b2));
+ expectEquals(21768, sadByte2ShortAlt(b1, b2));
+ expectEquals(21768, sadByte2ShortAlt2(b1, b2));
+ expectEquals(5592328, sadByte2Int(b1, b2));
+ expectEquals(5592328, sadByte2IntAlt(b1, b2));
+ expectEquals(5592328, sadByte2IntAlt2(b1, b2));
+ expectEquals(5592328L, sadByte2Long(b1, b2));
+ expectEquals(5592329L, sadByte2LongAt1(b1, b2));
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/660-checker-simd-sad-char/expected.txt b/test/660-checker-simd-sad-char/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/660-checker-simd-sad-char/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/660-checker-simd-sad-char/info.txt b/test/660-checker-simd-sad-char/info.txt
new file mode 100644
index 0000000..b56c119
--- /dev/null
+++ b/test/660-checker-simd-sad-char/info.txt
@@ -0,0 +1 @@
+Functional tests on SAD vectorization.
diff --git a/test/660-checker-simd-sad-char/src/Main.java b/test/660-checker-simd-sad-char/src/Main.java
new file mode 100644
index 0000000..bb0c58f
--- /dev/null
+++ b/test/660-checker-simd-sad-char/src/Main.java
@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for SAD (sum of absolute differences).
+ */
+public class Main {
+
+ // TODO: lower precision still coming, b/64091002
+
+ // TODO: consider unsigned SAD too, b/64091002
+
+ private static char sadShort2Short(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ char sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(s1[i] - s2[i]);
+ }
+ return sad;
+ }
+
+ private static char sadShort2ShortAlt(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ char sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ char s = s1[i];
+ char p = s2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ private static char sadShort2ShortAlt2(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ char sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ char s = s1[i];
+ char p = s2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2Int(char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2Int(char[], char[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static int sadShort2Int(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(s1[i] - s2[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntAlt(char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntAlt(char[], char[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static int sadShort2IntAlt(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ char s = s1[i];
+ char p = s2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntAlt2(char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntAlt2(char[], char[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static int sadShort2IntAlt2(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ char s = s1[i];
+ char p = s2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadShort2Long(char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadShort2Long(char[], char[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static long sadShort2Long(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ long x = s1[i];
+ long y = s2[i];
+ sad += Math.abs(x - y);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadShort2LongAt1(char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadShort2LongAt1(char[], char[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static long sadShort2LongAt1(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ long sad = 1; // starts at 1
+ for (int i = 0; i < min_length; i++) {
+ long x = s1[i];
+ long y = s2[i];
+ sad += Math.abs(x - y);
+ }
+ return sad;
+ }
+
+ public static void main(String[] args) {
+ // Cross-test the two most extreme values individually.
+ char[] s1 = { 0, 0x8000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ char[] s2 = { 0, 0x7fff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ expectEquals(1, sadShort2Short(s1, s2));
+ expectEquals(1, sadShort2Short(s2, s1));
+ expectEquals(1, sadShort2ShortAlt(s1, s2));
+ expectEquals(1, sadShort2ShortAlt(s2, s1));
+ expectEquals(1, sadShort2ShortAlt2(s1, s2));
+ expectEquals(1, sadShort2ShortAlt2(s2, s1));
+ expectEquals(1, sadShort2Int(s1, s2));
+ expectEquals(1, sadShort2Int(s2, s1));
+ expectEquals(1, sadShort2IntAlt(s1, s2));
+ expectEquals(1, sadShort2IntAlt(s2, s1));
+ expectEquals(1, sadShort2IntAlt2(s1, s2));
+ expectEquals(1, sadShort2IntAlt2(s2, s1));
+ expectEquals(1L, sadShort2Long(s1, s2));
+ expectEquals(1L, sadShort2Long(s2, s1));
+ expectEquals(2L, sadShort2LongAt1(s1, s2));
+ expectEquals(2L, sadShort2LongAt1(s2, s1));
+
+ // Use cross-values to test all cases.
+ char[] interesting = {
+ (char) 0x0000,
+ (char) 0x0001,
+ (char) 0x0002,
+ (char) 0x1234,
+ (char) 0x8000,
+ (char) 0x8001,
+ (char) 0x7fff,
+ (char) 0xffff
+ };
+ int n = interesting.length;
+ int m = n * n + 1;
+ s1 = new char[m];
+ s2 = new char[m];
+ int k = 0;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ s1[k] = interesting[i];
+ s2[k] = interesting[j];
+ k++;
+ }
+ }
+ s1[k] = 10;
+ s2[k] = 2;
+ expectEquals(56196, sadShort2Short(s1, s2));
+ expectEquals(56196, sadShort2ShortAlt(s1, s2));
+ expectEquals(56196, sadShort2ShortAlt2(s1, s2));
+ expectEquals(1497988, sadShort2Int(s1, s2));
+ expectEquals(1497988, sadShort2IntAlt(s1, s2));
+ expectEquals(1497988, sadShort2IntAlt2(s1, s2));
+ expectEquals(1497988L, sadShort2Long(s1, s2));
+ expectEquals(1497989L, sadShort2LongAt1(s1, s2));
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/660-checker-simd-sad-int/expected.txt b/test/660-checker-simd-sad-int/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/660-checker-simd-sad-int/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/660-checker-simd-sad-int/info.txt b/test/660-checker-simd-sad-int/info.txt
new file mode 100644
index 0000000..b56c119
--- /dev/null
+++ b/test/660-checker-simd-sad-int/info.txt
@@ -0,0 +1 @@
+Functional tests on SAD vectorization.
diff --git a/test/660-checker-simd-sad-int/src/Main.java b/test/660-checker-simd-sad-int/src/Main.java
new file mode 100644
index 0000000..0daeedd
--- /dev/null
+++ b/test/660-checker-simd-sad-int/src/Main.java
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for SAD (sum of absolute differences).
+ */
+public class Main {
+
+ /// CHECK-START: int Main.sadInt2Int(int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadInt2Int(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ private static int sadInt2Int(int[] x, int[] y) {
+ int min_length = Math.min(x.length, y.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(x[i] - y[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadInt2IntAlt(int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub1:i\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub2:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Select:i\d+>> Select [<<Sub2>>,<<Sub1>>,{{z\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Select>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ // No ABS? No SAD!
+ //
+ /// CHECK-START-ARM64: int Main.sadInt2IntAlt(int[], int[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static int sadInt2IntAlt(int[] x, int[] y) {
+ int min_length = Math.min(x.length, y.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ int s = x[i];
+ int p = y[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadInt2IntAlt2(int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadInt2IntAlt2(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ private static int sadInt2IntAlt2(int[] x, int[] y) {
+ int min_length = Math.min(x.length, y.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ int s = x[i];
+ int p = y[i];
+ int m = s - p;
+ if (m < 0) m = -m;
+ sad += m;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadInt2Long(int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadInt2Long(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ private static long sadInt2Long(int[] x, int[] y) {
+ int min_length = Math.min(x.length, y.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ long s = x[i];
+ long p = y[i];
+ sad += Math.abs(s - p);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadInt2LongAt1(int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadInt2LongAt1(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ private static long sadInt2LongAt1(int[] x, int[] y) {
+ int min_length = Math.min(x.length, y.length);
+ long sad = 1; // starts at 1
+ for (int i = 0; i < min_length; i++) {
+ long s = x[i];
+ long p = y[i];
+ sad += Math.abs(s - p);
+ }
+ return sad;
+ }
+
+ public static void main(String[] args) {
+ // Cross-test the two most extreme values individually.
+ int[] x = { 0, Integer.MAX_VALUE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ int[] y = { 0, Integer.MIN_VALUE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ expectEquals(1, sadInt2Int(x, y));
+ expectEquals(1, sadInt2Int(y, x));
+ expectEquals(-1, sadInt2IntAlt(x, y));
+ expectEquals(-1, sadInt2IntAlt(y, x));
+ expectEquals(1, sadInt2IntAlt2(x, y));
+ expectEquals(1, sadInt2IntAlt2(y, x));
+ expectEquals(4294967295L, sadInt2Long(x, y));
+ expectEquals(4294967295L, sadInt2Long(y, x));
+ expectEquals(4294967296L, sadInt2LongAt1(x, y));
+ expectEquals(4294967296L, sadInt2LongAt1(y, x));
+
+ // Use cross-values for the interesting values.
+ int[] interesting = {
+ 0x00000000, 0x00000001, 0x00007fff, 0x00008000, 0x00008001, 0x0000ffff,
+ 0x00010000, 0x00010001, 0x00017fff, 0x00018000, 0x00018001, 0x0001ffff,
+ 0x7fff0000, 0x7fff0001, 0x7fff7fff, 0x7fff8000, 0x7fff8001, 0x7fffffff,
+ 0x80000000, 0x80000001, 0x80007fff, 0x80008000, 0x80008001, 0x8000ffff,
+ 0x80010000, 0x80010001, 0x80017fff, 0x80018000, 0x80018001, 0x8001ffff,
+ 0xffff0000, 0xffff0001, 0xffff7fff, 0xffff8000, 0xffff8001, 0xffffffff
+ };
+ int n = interesting.length;
+ int m = n * n + 1;
+ x = new int[m];
+ y = new int[m];
+ int k = 0;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ x[k] = interesting[i];
+ y[k] = interesting[j];
+ k++;
+ }
+ }
+ x[k] = 10;
+ y[k] = 2;
+ expectEquals(8, sadInt2Int(x, y));
+ expectEquals(-13762600, sadInt2IntAlt(x, y));
+ expectEquals(8, sadInt2IntAlt2(x, y));
+ expectEquals(2010030931928L, sadInt2Long(x, y));
+ expectEquals(2010030931929L, sadInt2LongAt1(x, y));
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/660-checker-simd-sad-long/expected.txt b/test/660-checker-simd-sad-long/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/660-checker-simd-sad-long/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/660-checker-simd-sad-long/info.txt b/test/660-checker-simd-sad-long/info.txt
new file mode 100644
index 0000000..b56c119
--- /dev/null
+++ b/test/660-checker-simd-sad-long/info.txt
@@ -0,0 +1 @@
+Functional tests on SAD vectorization.
diff --git a/test/660-checker-simd-sad-long/src/Main.java b/test/660-checker-simd-sad-long/src/Main.java
new file mode 100644
index 0000000..06f62bd
--- /dev/null
+++ b/test/660-checker-simd-sad-long/src/Main.java
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for SAD (sum of absolute differences).
+ */
+public class Main {
+
+ /// CHECK-START: long Main.sadLong2Long(long[], long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadLong2Long(long[], long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ private static long sadLong2Long(long[] x, long[] y) {
+ int min_length = Math.min(x.length, y.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(x[i] - y[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadLong2LongAlt(long[], long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub1:j\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub2:j\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Select:j\d+>> Select [<<Sub2>>,<<Sub1>>,{{z\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Select>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ // No ABS? No SAD!
+ //
+ /// CHECK-START: long Main.sadLong2LongAlt(long[], long[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static long sadLong2LongAlt(long[] x, long[] y) {
+ int min_length = Math.min(x.length, y.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ long s = x[i];
+ long p = y[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadLong2LongAlt2(long[], long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadLong2LongAlt2(long[], long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ private static long sadLong2LongAlt2(long[] x, long[] y) {
+ int min_length = Math.min(x.length, y.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ long s = x[i];
+ long p = y[i];
+ long m = s - p;
+ if (m < 0) m = -m;
+ sad += m;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadLong2LongAt1(long[], long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadLong2LongAt1(long[], long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ private static long sadLong2LongAt1(long[] x, long[] y) {
+ int min_length = Math.min(x.length, y.length);
+ long sad = 1; // starts at 1
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(x[i] - y[i]);
+ }
+ return sad;
+ }
+
+ public static void main(String[] args) {
+ // Cross-test the two most extreme values individually.
+ long[] x = { 0, Long.MIN_VALUE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ long[] y = { 0, Long.MAX_VALUE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ expectEquals(1L, sadLong2Long(x, y));
+ expectEquals(1L, sadLong2Long(y, x));
+ expectEquals(-1L, sadLong2LongAlt(x, y));
+ expectEquals(-1L, sadLong2LongAlt(y, x));
+ expectEquals(1L, sadLong2LongAlt2(x, y));
+ expectEquals(1L, sadLong2LongAlt2(y, x));
+ expectEquals(2L, sadLong2LongAt1(x, y));
+ expectEquals(2L, sadLong2LongAt1(y, x));
+
+ // Use cross-values for the interesting values.
+ long[] interesting = {
+ 0x0000000000000000L, 0x0000000000000001L, 0x000000007fffffffL,
+ 0x0000000080000000L, 0x0000000080000001L, 0x00000000ffffffffL,
+ 0x0000000100000000L, 0x0000000100000001L, 0x000000017fffffffL,
+ 0x0000000180000000L, 0x0000000180000001L, 0x00000001ffffffffL,
+ 0x7fffffff00000000L, 0x7fffffff00000001L, 0x7fffffff7fffffffL,
+ 0x7fffffff80000000L, 0x7fffffff80000001L, 0x7fffffffffffffffL,
+ 0x8000000000000000L, 0x8000000000000001L, 0x800000007fffffffL,
+ 0x8000000080000000L, 0x8000000080000001L, 0x80000000ffffffffL,
+ 0x8000000100000000L, 0x8000000100000001L, 0x800000017fffffffL,
+ 0x8000000180000000L, 0x8000000180000001L, 0x80000001ffffffffL,
+ 0xffffffff00000000L, 0xffffffff00000001L, 0xffffffff7fffffffL,
+ 0xffffffff80000000L, 0xffffffff80000001L, 0xffffffffffffffffL
+ };
+ int n = interesting.length;
+ int m = n * n + 1;
+ x = new long[m];
+ y = new long[m];
+ int k = 0;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ x[k] = interesting[i];
+ y[k] = interesting[j];
+ k++;
+ }
+ }
+ x[k] = 10;
+ y[k] = 2;
+ expectEquals(8L, sadLong2Long(x, y));
+ expectEquals(-901943132200L, sadLong2LongAlt(x, y));
+ expectEquals(8L, sadLong2LongAlt2(x, y));
+ expectEquals(9L, sadLong2LongAt1(x, y));
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/660-checker-simd-sad-short/expected.txt b/test/660-checker-simd-sad-short/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/660-checker-simd-sad-short/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/660-checker-simd-sad-short/info.txt b/test/660-checker-simd-sad-short/info.txt
new file mode 100644
index 0000000..b56c119
--- /dev/null
+++ b/test/660-checker-simd-sad-short/info.txt
@@ -0,0 +1 @@
+Functional tests on SAD vectorization.
diff --git a/test/660-checker-simd-sad-short/src/Main.java b/test/660-checker-simd-sad-short/src/Main.java
new file mode 100644
index 0000000..d94308e
--- /dev/null
+++ b/test/660-checker-simd-sad-short/src/Main.java
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for SAD (sum of absolute differences).
+ */
+public class Main {
+
+ // TODO: lower precision still coming, b/64091002
+
+ private static short sadShort2Short(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ short sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(s1[i] - s2[i]);
+ }
+ return sad;
+ }
+
+ private static short sadShort2ShortAlt(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ short sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ short s = s1[i];
+ short p = s2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ private static short sadShort2ShortAlt2(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ short sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ short s = s1[i];
+ short p = s2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2Int(short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2Int(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2Int(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(s1[i] - s2[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntAlt(short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntAlt(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntAlt(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ short s = s1[i];
+ short p = s2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntAlt2(short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntAlt2(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntAlt2(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ short s = s1[i];
+ short p = s2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadShort2Long(short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadShort2Long(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static long sadShort2Long(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ long x = s1[i];
+ long y = s2[i];
+ sad += Math.abs(x - y);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadShort2LongAt1(short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadShort2LongAt1(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static long sadShort2LongAt1(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ long sad = 1; // starts at 1
+ for (int i = 0; i < min_length; i++) {
+ long x = s1[i];
+ long y = s2[i];
+ sad += Math.abs(x - y);
+ }
+ return sad;
+ }
+
+ public static void main(String[] args) {
+ // Cross-test the two most extreme values individually.
+ short[] s1 = { 0, -32768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ short[] s2 = { 0, 32767, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ expectEquals(-1, sadShort2Short(s1, s2));
+ expectEquals(-1, sadShort2Short(s2, s1));
+ expectEquals(-1, sadShort2ShortAlt(s1, s2));
+ expectEquals(-1, sadShort2ShortAlt(s2, s1));
+ expectEquals(-1, sadShort2ShortAlt2(s1, s2));
+ expectEquals(-1, sadShort2ShortAlt2(s2, s1));
+ expectEquals(65535, sadShort2Int(s1, s2));
+ expectEquals(65535, sadShort2Int(s2, s1));
+ expectEquals(65535, sadShort2IntAlt(s1, s2));
+ expectEquals(65535, sadShort2IntAlt(s2, s1));
+ expectEquals(65535, sadShort2IntAlt2(s1, s2));
+ expectEquals(65535, sadShort2IntAlt2(s2, s1));
+ expectEquals(65535L, sadShort2Long(s1, s2));
+ expectEquals(65535L, sadShort2Long(s2, s1));
+ expectEquals(65536L, sadShort2LongAt1(s1, s2));
+ expectEquals(65536L, sadShort2LongAt1(s2, s1));
+
+ // Use cross-values to test all cases.
+ short[] interesting = {
+ (short) 0x0000,
+ (short) 0x0001,
+ (short) 0x0002,
+ (short) 0x1234,
+ (short) 0x8000,
+ (short) 0x8001,
+ (short) 0x7fff,
+ (short) 0xffff
+ };
+ int n = interesting.length;
+ int m = n * n + 1;
+ s1 = new short[m];
+ s2 = new short[m];
+ int k = 0;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ s1[k] = interesting[i];
+ s2[k] = interesting[j];
+ k++;
+ }
+ }
+ s1[k] = 10;
+ s2[k] = 2;
+ expectEquals(-18932, sadShort2Short(s1, s2));
+ expectEquals(-18932, sadShort2ShortAlt(s1, s2));
+ expectEquals(-18932, sadShort2ShortAlt2(s1, s2));
+ expectEquals(1291788, sadShort2Int(s1, s2));
+ expectEquals(1291788, sadShort2IntAlt(s1, s2));
+ expectEquals(1291788, sadShort2IntAlt2(s1, s2));
+ expectEquals(1291788L, sadShort2Long(s1, s2));
+ expectEquals(1291789L, sadShort2LongAt1(s1, s2));
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/661-checker-simd-reduc/src/Main.java b/test/661-checker-simd-reduc/src/Main.java
index 71eb3cd..bcfa968 100644
--- a/test/661-checker-simd-reduc/src/Main.java
+++ b/test/661-checker-simd-reduc/src/Main.java
@@ -80,6 +80,101 @@
return sum;
}
+ /// CHECK-START: int Main.reductionIntChain() loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons1>>,{{i\d+}}] loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi2>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Get1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Cons1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop2:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi4:i\d+>> Phi [<<Phi1>>,{{i\d+}}] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi3>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi4>>,<<Get2>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi3>>,<<Cons1>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: Return [<<Phi4>>] loop:none
+ //
+ /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+ //
+ /// CHECK-START-ARM64: int Main.reductionIntChain() loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [<<Cons1>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Red1:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr1:i\d+>> VecExtractScalar [<<Red1>>] loop:none
+ /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [<<Extr1>>] loop:none
+ /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop2:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi4:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi3>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi4>>,<<Load2>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi3>>,<<Cons4>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: <<Red2:d\d+>> VecReduce [<<Phi4>>] loop:none
+ /// CHECK-DAG: <<Extr2:i\d+>> VecExtractScalar [<<Red2>>] loop:none
+ /// CHECK-DAG: Return [<<Extr2>>] loop:none
+ //
+ /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+ //
+ // NOTE: pattern is robust with respect to vector loop unrolling.
+ private static int reductionIntChain() {
+ int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+ int r = 1;
+ for (int i = 0; i < 16; i++) {
+ r += x[i];
+ }
+ for (int i = 0; i < 16; i++) {
+ r += x[i];
+ }
+ return r;
+ }
+
+ /// CHECK-START: int Main.reductionIntToLoop(int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Get>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Phi2>>,{{i\d+}}] loop:<<Loop2:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi4:i\d+>> Phi [<<Phi2>>,{{i\d+}}] loop:<<Loop2>> outer_loop:none
+ //
+ /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+ //
+ /// CHECK-START-ARM64: int Main.reductionIntToLoop(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
+ /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Extr>>,{{i\d+}}] loop:<<Loop2:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi4:i\d+>> Phi [<<Extr>>,{{i\d+}}] loop:<<Loop2>> outer_loop:none
+ //
+ /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+ //
+ private static int reductionIntToLoop(int[] x) {
+ int r = 0;
+ for (int i = 0; i < 4; i++) {
+ r += x[i];
+ }
+ for (int i = r; i < 16; i++) {
+ r += i;
+ }
+ return r;
+ }
+
/// CHECK-START: long Main.reductionLong(long[]) loop_optimization (before)
/// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
/// CHECK-DAG: <<Long0:j\d+>> LongConstant 0 loop:none
@@ -468,10 +563,28 @@
}
// Test various reductions in loops.
+ int[] x0 = { 0, 0, 0, 0 };
+ int[] x1 = { 0, 0, 0, 1 };
+ int[] x2 = { 1, 1, 1, 1 };
expectEquals(-74, reductionByte(xb));
expectEquals(-27466, reductionShort(xs));
expectEquals(38070, reductionChar(xc));
expectEquals(365750, reductionInt(xi));
+ expectEquals(273, reductionIntChain());
+ expectEquals(120, reductionIntToLoop(x0));
+ expectEquals(121, reductionIntToLoop(x1));
+ expectEquals(118, reductionIntToLoop(x2));
+ expectEquals(-1205, reductionIntToLoop(xi));
+ expectEquals(365750L, reductionLong(xl));
+ expectEquals(-75, reductionByteM1(xb));
+ expectEquals(-27467, reductionShortM1(xs));
+ expectEquals(38069, reductionCharM1(xc));
+ expectEquals(365749, reductionIntM1(xi));
+ expectEquals(365749L, reductionLongM1(xl));
+ expectEquals(74, reductionMinusByte(xb));
+ expectEquals(27466, reductionMinusShort(xs));
+ expectEquals(27466, reductionMinusChar(xc));
+ expectEquals(-365750, reductionMinusInt(xi));
expectEquals(365750L, reductionLong(xl));
expectEquals(-75, reductionByteM1(xb));
expectEquals(-27467, reductionShortM1(xs));