diff options
Diffstat (limited to 'compiler/optimizing')
7 files changed, 216 insertions, 79 deletions
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index ea36e90112..6bf28ab1a3 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -819,11 +819,74 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { } void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt( + HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); + DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); + LocationSummary* locations = instr->GetLocations(); + VectorRegister acc = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex)); + VectorRegister left = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex)); + VectorRegister right = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex)); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvB(acc, left, right); + } else { + __ MsubvB(acc, left, right); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvH(acc, left, right); + } else { + __ MsubvH(acc, left, right); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvW(acc, left, right); + } else { + __ MsubvW(acc, left, right); + } + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvD(acc, left, right); + } else { + __ MsubvD(acc, left, right); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 0395db1df9..75bf7a7cbb 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -823,11 +823,74 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { } void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt( + HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); + locations->SetInAt( + HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); + DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); + LocationSummary* locations = instr->GetLocations(); + VectorRegister acc = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex)); + VectorRegister left = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex)); + VectorRegister right = + VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex)); + switch (instr->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvB(acc, left, right); + } else { + __ MsubvB(acc, left, right); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvH(acc, left, right); + } else { + __ MsubvH(acc, left, right); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvW(acc, left, right); + } else { + __ MsubvW(acc, left, right); + } + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instr->GetVectorLength()); + if (instr->GetOpKind() == HInstruction::kAdd) { + __ MaddvD(acc, left, right); + } else { + __ MsubvD(acc, left, right); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector memory operations. diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index 5c79511bab..f2a829fa56 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -59,6 +59,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { bool TryDeMorganNegationFactoring(HBinaryOperation* op); bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction); bool TrySubtractionChainSimplification(HBinaryOperation* instruction); + bool TryCombineVecMultiplyAccumulate(HVecMul* mul); void VisitShift(HBinaryOperation* shift); @@ -98,6 +99,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; void VisitInvoke(HInvoke* invoke) OVERRIDE; void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; + void VisitVecMul(HVecMul* instruction) OVERRIDE; bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; @@ -243,6 +245,84 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation return false; } +bool InstructionSimplifierVisitor::TryCombineVecMultiplyAccumulate(HVecMul* mul) { + Primitive::Type type = mul->GetPackedType(); + InstructionSet isa = codegen_->GetInstructionSet(); + switch (isa) { + case kArm64: + if (!(type == Primitive::kPrimByte || + type == Primitive::kPrimChar || + type == Primitive::kPrimShort || + type == Primitive::kPrimInt)) { + return false; + } + break; + case kMips: + case kMips64: + if (!(type == Primitive::kPrimByte || + type == Primitive::kPrimChar || + type == Primitive::kPrimShort || + type == Primitive::kPrimInt || + type == Primitive::kPrimLong)) { + return false; + } + break; + default: + return false; + } + + ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); + + if (mul->HasOnlyOneNonEnvironmentUse()) { + HInstruction* use = mul->GetUses().front().GetUser(); + if (use->IsVecAdd() || use->IsVecSub()) { + // Replace code looking like + // VECMUL tmp, x, y + // VECADD/SUB dst, acc, tmp + // with + // VECMULACC dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HVecBinaryOperation* binop = use->AsVecBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // This is always true since the `HVecMul` has only one use (which is checked above). + DCHECK_NE(binop_left, binop_right); + if (binop_right == mul) { + accumulator = binop_left; + } else if (use->IsVecAdd()) { + DCHECK_EQ(binop_left, mul); + accumulator = binop_right; + } + + HInstruction::InstructionKind kind = + use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; + if (accumulator != nullptr) { + HVecMultiplyAccumulate* mulacc = + new (arena) HVecMultiplyAccumulate(arena, + kind, + accumulator, + mul->GetLeft(), + mul->GetRight(), + binop->GetPackedType(), + binop->GetVectorLength()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!mul->HasUses()); + mul->GetBlock()->RemoveInstruction(mul); + return true; + } + } + } + + return false; +} + void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); HInstruction* shift_amount = instruction->GetRight(); @@ -2301,4 +2381,10 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( return true; } +void InstructionSimplifierVisitor::VisitVecMul(HVecMul* instruction) { + if (TryCombineVecMultiplyAccumulate(instruction)) { + RecordSimplification(); + } +} + } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 311be1fb49..7c9bfb11b2 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -210,12 +210,6 @@ void InstructionSimplifierArm64Visitor::VisitXor(HXor* instruction) { } } -void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) { - if (TryCombineVecMultiplyAccumulate(instruction, kArm64)) { - RecordSimplification(); - } -} - void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) { if (!instruction->IsStringCharAt() && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) { diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 8596f6ad40..4f16fc383d 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -74,7 +74,6 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor { void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; void VisitUShr(HUShr* instruction) OVERRIDE; void VisitXor(HXor* instruction) OVERRIDE; - void VisitVecMul(HVecMul* instruction) OVERRIDE; void VisitVecLoad(HVecLoad* instruction) OVERRIDE; void VisitVecStore(HVecStore* instruction) OVERRIDE; diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index d1bc4dadeb..7a759b9118 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -281,73 +281,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access, return true; } -bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) { - Primitive::Type type = mul->GetPackedType(); - switch (isa) { - case kArm64: - if (!(type == Primitive::kPrimByte || - type == Primitive::kPrimChar || - type == Primitive::kPrimShort || - type == Primitive::kPrimInt)) { - return false; - } - break; - default: - return false; - } - - ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); - - if (mul->HasOnlyOneNonEnvironmentUse()) { - HInstruction* use = mul->GetUses().front().GetUser(); - if (use->IsVecAdd() || use->IsVecSub()) { - // Replace code looking like - // VECMUL tmp, x, y - // VECADD/SUB dst, acc, tmp - // with - // VECMULACC dst, acc, x, y - // Note that we do not want to (unconditionally) perform the merge when the - // multiplication has multiple uses and it can be merged in all of them. - // Multiple uses could happen on the same control-flow path, and we would - // then increase the amount of work. In the future we could try to evaluate - // whether all uses are on different control-flow paths (using dominance and - // reverse-dominance information) and only perform the merge when they are. - HInstruction* accumulator = nullptr; - HVecBinaryOperation* binop = use->AsVecBinaryOperation(); - HInstruction* binop_left = binop->GetLeft(); - HInstruction* binop_right = binop->GetRight(); - // This is always true since the `HVecMul` has only one use (which is checked above). - DCHECK_NE(binop_left, binop_right); - if (binop_right == mul) { - accumulator = binop_left; - } else if (use->IsVecAdd()) { - DCHECK_EQ(binop_left, mul); - accumulator = binop_right; - } - - HInstruction::InstructionKind kind = - use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; - if (accumulator != nullptr) { - HVecMultiplyAccumulate* mulacc = - new (arena) HVecMultiplyAccumulate(arena, - kind, - accumulator, - mul->GetLeft(), - mul->GetRight(), - binop->GetPackedType(), - binop->GetVectorLength()); - - binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); - DCHECK(!mul->HasUses()); - mul->GetBlock()->RemoveInstruction(mul); - return true; - } - } - } - - return false; -} - bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) { if (index->IsConstant()) { // If index is constant the whole address calculation often can be done by LDR/STR themselves. diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index 371619fa2e..31e23833b1 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -58,7 +58,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access, HInstruction* index, size_t data_offset); -bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa); bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index); } // namespace art |