MIPS: Implement Sum-of-Abs-Differences
Test: test-art-host test-art-target
Change-Id: I32a3e21f96cdcbab2e108d71746670408deb901a
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index 384b642..3cf150a 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -1071,11 +1071,195 @@
void LocationsBuilderMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
+ LocationSummary* locations = instruction->GetLocations();
+ // All conversions require at least one temporary register.
+ locations->AddTemp(Location::RequiresFpuRegister());
+ // Some conversions require a second temporary register.
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+ HVecOperation::ToSignedType(b->GetPackedType()));
+ switch (a->GetPackedType()) {
+ case DataType::Type::kInt32:
+ if (instruction->GetPackedType() == DataType::Type::kInt32) {
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+ default:
+ break;
+ }
}
void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
- // TODO: implement this, location helper already filled out (shared with MulAcc).
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister acc = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister left = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister right = VectorRegisterFrom(locations->InAt(2));
+ VectorRegister tmp = static_cast<VectorRegister>(FTMP);
+ VectorRegister tmp1 = VectorRegisterFrom(locations->GetTemp(0));
+
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+
+ // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+ HVecOperation::ToSignedType(b->GetPackedType()));
+ switch (a->GetPackedType()) {
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16: {
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillB(tmp, ZERO);
+ __ Hadd_sH(tmp1, left, tmp);
+ __ Hadd_sH(tmp2, right, tmp);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ AddvH(acc, acc, tmp1);
+ __ Hadd_sH(tmp1, tmp, left);
+ __ Hadd_sH(tmp2, tmp, right);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ AddvH(acc, acc, tmp1);
+ break;
+ }
+ case DataType::Type::kInt32: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillB(tmp, ZERO);
+ __ Hadd_sH(tmp1, left, tmp);
+ __ Hadd_sH(tmp2, right, tmp);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ Hadd_sW(tmp1, tmp1, tmp1);
+ __ AddvW(acc, acc, tmp1);
+ __ Hadd_sH(tmp1, tmp, left);
+ __ Hadd_sH(tmp2, tmp, right);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ Hadd_sW(tmp1, tmp1, tmp1);
+ __ AddvW(acc, acc, tmp1);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillB(tmp, ZERO);
+ __ Hadd_sH(tmp1, left, tmp);
+ __ Hadd_sH(tmp2, right, tmp);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ Hadd_sW(tmp1, tmp1, tmp1);
+ __ Hadd_sD(tmp1, tmp1, tmp1);
+ __ AddvD(acc, acc, tmp1);
+ __ Hadd_sH(tmp1, tmp, left);
+ __ Hadd_sH(tmp2, tmp, right);
+ __ Asub_sH(tmp1, tmp1, tmp2);
+ __ Hadd_sW(tmp1, tmp1, tmp1);
+ __ Hadd_sD(tmp1, tmp1, tmp1);
+ __ AddvD(acc, acc, tmp1);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillH(tmp, ZERO);
+ __ Hadd_sW(tmp1, left, tmp);
+ __ Hadd_sW(tmp2, right, tmp);
+ __ Asub_sW(tmp1, tmp1, tmp2);
+ __ AddvW(acc, acc, tmp1);
+ __ Hadd_sW(tmp1, tmp, left);
+ __ Hadd_sW(tmp2, tmp, right);
+ __ Asub_sW(tmp1, tmp1, tmp2);
+ __ AddvW(acc, acc, tmp1);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillH(tmp, ZERO);
+ __ Hadd_sW(tmp1, left, tmp);
+ __ Hadd_sW(tmp2, right, tmp);
+ __ Asub_sW(tmp1, tmp1, tmp2);
+ __ Hadd_sD(tmp1, tmp1, tmp1);
+ __ AddvD(acc, acc, tmp1);
+ __ Hadd_sW(tmp1, tmp, left);
+ __ Hadd_sW(tmp2, tmp, right);
+ __ Asub_sW(tmp1, tmp1, tmp2);
+ __ Hadd_sD(tmp1, tmp1, tmp1);
+ __ AddvD(acc, acc, tmp1);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ FillW(tmp, ZERO);
+ __ SubvW(tmp1, left, right);
+ __ Add_aW(tmp1, tmp1, tmp);
+ __ AddvW(acc, acc, tmp1);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1));
+ __ FillW(tmp, ZERO);
+ __ Hadd_sD(tmp1, left, tmp);
+ __ Hadd_sD(tmp2, right, tmp);
+ __ Asub_sD(tmp1, tmp1, tmp2);
+ __ AddvD(acc, acc, tmp1);
+ __ Hadd_sD(tmp1, tmp, left);
+ __ Hadd_sD(tmp2, tmp, right);
+ __ Asub_sD(tmp1, tmp1, tmp2);
+ __ AddvD(acc, acc, tmp1);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt64: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ FillW(tmp, ZERO);
+ __ SubvD(tmp1, left, right);
+ __ Add_aD(tmp1, tmp1, tmp);
+ __ AddvD(acc, acc, tmp1);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector memory operations.