diff options
Diffstat (limited to 'compiler/optimizing/instruction_simplifier_x86.cc')
-rw-r--r-- | compiler/optimizing/instruction_simplifier_x86.cc | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc new file mode 100644 index 0000000000..b3f67d6e84 --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_x86.cc @@ -0,0 +1,149 @@ +/* + * Copyright (C) 2018 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instruction_simplifier_x86.h" +#include "arch/x86/instruction_set_features_x86.h" +#include "mirror/array-inl.h" +#include "code_generator.h" + + +namespace art { + +namespace x86 { + +class InstructionSimplifierX86Visitor : public HGraphVisitor { + public: + InstructionSimplifierX86Visitor(HGraph* graph, + CodeGeneratorX86 *codegen, + OptimizingCompilerStats* stats) + : HGraphVisitor(graph), codegen_(codegen), stats_(stats) {} + + private: + void RecordSimplification() { + MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); + } + + bool HasCpuFeatureFlag() { + return (codegen_->GetInstructionSetFeatures().HasAVX2()); + } + + /** + * This simplifier uses a special-purpose BB visitor. + * (1) No need to visit Phi nodes. + * (2) Since statements can be removed in a "forward" fashion, + * the visitor should test if each statement is still there. + */ + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + // TODO: fragile iteration, provide more robust iterators? + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsInBlock()) { + instruction->Accept(this); + } + } + } + + bool TryGenerateVecMultiplyAccumulate(HVecMul* mul); + void VisitVecMul(HVecMul* instruction) OVERRIDE; + + CodeGeneratorX86* codegen_; + OptimizingCompilerStats* stats_; +}; + +/* generic expressions for FMA +a = (b * c) + a +a = (b * c) – a +*/ +bool InstructionSimplifierX86Visitor::TryGenerateVecMultiplyAccumulate(HVecMul* mul) { + if (!(mul->GetPackedType() == DataType::Type::kFloat32 || + mul->GetPackedType() == DataType::Type::kFloat64)) { + return false; + } + ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator(); + if (mul->HasOnlyOneNonEnvironmentUse()) { + HInstruction* use = mul->GetUses().front().GetUser(); + if (use->IsVecAdd() || use->IsVecSub()) { + // Replace code looking like + // VECMUL tmp, x, y + // VECADD dst, acc, tmp or VECADD dst, tmp, acc + // or + // VECSUB dst, tmp, acc + // with + // VECMULACC dst, acc, x, y + + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HVecBinaryOperation* binop = use->AsVecBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + DCHECK_NE(binop_left, binop_right); + if (use->IsVecSub()) { + if (binop_left == mul) { + accumulator = binop_right; + } + } else { + // VecAdd + if (binop_right == mul) { + accumulator = binop_left; + } else { + DCHECK_EQ(binop_left, mul); + accumulator = binop_right; + } + } + HInstruction::InstructionKind kind = + use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; + + if (accumulator != nullptr) { + HVecMultiplyAccumulate* mulacc = + new (allocator) HVecMultiplyAccumulate(allocator, + kind, + accumulator, + mul->GetLeft(), + mul->GetRight(), + binop->GetPackedType(), + binop->GetVectorLength(), + binop->GetDexPc()); + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!mul->HasUses()); + mul->GetBlock()->RemoveInstruction(mul); + return true; + } + } + } + return false; +} + +void InstructionSimplifierX86Visitor::VisitVecMul(HVecMul* instruction) { + if (HasCpuFeatureFlag()) { + if (TryGenerateVecMultiplyAccumulate(instruction)) { + RecordSimplification(); + } + } +} + +bool InstructionSimplifierX86::Run() { + InstructionSimplifierX86Visitor visitor(graph_, codegen_, stats_); + visitor.VisitReversePostOrder(); + return true; +} + +} // namespace x86 +} // namespace art |