MIPS: Basic SIMD reduction support.
Enables vectorization of x += .... for very basic (simple, same-type)
constructs for MIPS.
Note: Testing is done with checker parts of tests 661 and 665,
locally changed to cover MIPS32 cases. These changes can't
be included in this patch since MSA is not a default option.
Test: test-art-host test-art-target
Change-Id: Ia3b3646afecb76c2f00996a30923ca70302be57e
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index c5a39ff..7a8c0ad 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -91,17 +91,61 @@
}
void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Copy_sW(locations->Out().AsRegister<Register>(), src, 0);
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Mfc1(locations->Out().AsRegisterPairLow<Register>(),
+ locations->InAt(0).AsFpuRegister<FRegister>());
+ __ MoveFromFpuHigh(locations->Out().AsRegisterPairHigh<Register>(),
+ locations->InAt(0).AsFpuRegister<FRegister>());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ DCHECK_LE(2u, instruction->GetVectorLength());
+ DCHECK_LE(instruction->GetVectorLength(), 4u);
+ DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector unary operations.
static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
LocationSummary* locations = new (allocator) LocationSummary(instruction);
- switch (instruction->GetPackedType()) {
+ DataType::Type type = instruction->GetPackedType();
+ switch (type) {
case DataType::Type::kBool:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(),
@@ -118,7 +162,8 @@
case DataType::Type::kFloat64:
locations->SetInAt(0, Location::RequiresFpuRegister());
locations->SetOut(Location::RequiresFpuRegister(),
- (instruction->IsVecNeg() || instruction->IsVecAbs())
+ (instruction->IsVecNeg() || instruction->IsVecAbs() ||
+ (instruction->IsVecReduce() && type == DataType::Type::kInt64))
? Location::kOutputOverlap
: Location::kNoOutputOverlap);
break;
@@ -133,7 +178,54 @@
}
void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+ VectorRegister tmp = static_cast<VectorRegister>(FTMP);
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ Hadd_sD(tmp, src, src);
+ __ IlvlD(dst, tmp, tmp);
+ __ AddvW(dst, dst, tmp);
+ break;
+ case HVecReduce::kMin:
+ __ IlvodW(tmp, src, src);
+ __ Min_sW(tmp, src, tmp);
+ __ IlvlW(dst, tmp, tmp);
+ __ Min_sW(dst, dst, tmp);
+ break;
+ case HVecReduce::kMax:
+ __ IlvodW(tmp, src, src);
+ __ Max_sW(tmp, src, tmp);
+ __ IlvlW(dst, tmp, tmp);
+ __ Max_sW(dst, dst, tmp);
+ break;
+ }
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ switch (instruction->GetKind()) {
+ case HVecReduce::kSum:
+ __ IlvlD(dst, src, src);
+ __ AddvD(dst, dst, src);
+ break;
+ case HVecReduce::kMin:
+ __ IlvlD(dst, src, src);
+ __ Min_sD(dst, dst, src);
+ break;
+ case HVecReduce::kMax:
+ __ IlvlD(dst, src, src);
+ __ Max_sD(dst, dst, src);
+ break;
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) {
@@ -831,11 +923,79 @@
}
void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ HInstruction* input = instruction->InputAt(0);
+ bool is_zero = IsZeroBitPattern(input);
+
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
+ : Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister dst = VectorRegisterFrom(locations->Out());
+
+ DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
+
+ // Zero out all other elements first.
+ __ FillW(dst, ZERO);
+
+ // Shorthand for any type of zero.
+ if (IsZeroBitPattern(instruction->InputAt(0))) {
+ return;
+ }
+
+ // Set required elements.
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kBool:
+ case DataType::Type::kUint8:
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ InsertB(dst, locations->InAt(0).AsRegister<Register>(), 0);
+ break;
+ case DataType::Type::kUint16:
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ InsertH(dst, locations->InAt(0).AsRegister<Register>(), 0);
+ break;
+ case DataType::Type::kInt32:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ InsertW(dst, locations->InAt(0).AsRegister<Register>(), 0);
+ break;
+ case DataType::Type::kInt64:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Mtc1(locations->InAt(0).AsRegisterPairLow<Register>(),
+ locations->Out().AsFpuRegister<FRegister>());
+ __ MoveToFpuHigh(locations->InAt(0).AsRegisterPairHigh<Register>(),
+ locations->Out().AsFpuRegister<FRegister>());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
// Helper to set up locations for vector accumulations.