| /* |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "instruction_simplifier_x86.h" |
| #include "arch/x86/instruction_set_features_x86.h" |
| #include "mirror/array-inl.h" |
| #include "code_generator.h" |
| |
| |
| namespace art { |
| |
| namespace x86 { |
| |
| class InstructionSimplifierX86Visitor : public HGraphVisitor { |
| public: |
| InstructionSimplifierX86Visitor(HGraph* graph, |
| CodeGeneratorX86 *codegen, |
| OptimizingCompilerStats* stats) |
| : HGraphVisitor(graph), codegen_(codegen), stats_(stats) {} |
| |
| private: |
| void RecordSimplification() { |
| MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); |
| } |
| |
| bool HasCpuFeatureFlag() { |
| return (codegen_->GetInstructionSetFeatures().HasAVX2()); |
| } |
| |
| /** |
| * This simplifier uses a special-purpose BB visitor. |
| * (1) No need to visit Phi nodes. |
| * (2) Since statements can be removed in a "forward" fashion, |
| * the visitor should test if each statement is still there. |
| */ |
| void VisitBasicBlock(HBasicBlock* block) OVERRIDE { |
| // TODO: fragile iteration, provide more robust iterators? |
| for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { |
| HInstruction* instruction = it.Current(); |
| if (instruction->IsInBlock()) { |
| instruction->Accept(this); |
| } |
| } |
| } |
| |
| bool TryGenerateVecMultiplyAccumulate(HVecMul* mul); |
| void VisitVecMul(HVecMul* instruction) OVERRIDE; |
| |
| CodeGeneratorX86* codegen_; |
| OptimizingCompilerStats* stats_; |
| }; |
| |
| /* generic expressions for FMA |
| a = (b * c) + a |
| a = (b * c) – a |
| */ |
| bool InstructionSimplifierX86Visitor::TryGenerateVecMultiplyAccumulate(HVecMul* mul) { |
| if (!(mul->GetPackedType() == DataType::Type::kFloat32 || |
| mul->GetPackedType() == DataType::Type::kFloat64)) { |
| return false; |
| } |
| ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator(); |
| if (mul->HasOnlyOneNonEnvironmentUse()) { |
| HInstruction* use = mul->GetUses().front().GetUser(); |
| if (use->IsVecAdd() || use->IsVecSub()) { |
| // Replace code looking like |
| // VECMUL tmp, x, y |
| // VECADD dst, acc, tmp or VECADD dst, tmp, acc |
| // or |
| // VECSUB dst, tmp, acc |
| // with |
| // VECMULACC dst, acc, x, y |
| |
| // Note that we do not want to (unconditionally) perform the merge when the |
| // multiplication has multiple uses and it can be merged in all of them. |
| // Multiple uses could happen on the same control-flow path, and we would |
| // then increase the amount of work. In the future we could try to evaluate |
| // whether all uses are on different control-flow paths (using dominance and |
| // reverse-dominance information) and only perform the merge when they are. |
| HInstruction* accumulator = nullptr; |
| HVecBinaryOperation* binop = use->AsVecBinaryOperation(); |
| HInstruction* binop_left = binop->GetLeft(); |
| HInstruction* binop_right = binop->GetRight(); |
| DCHECK_NE(binop_left, binop_right); |
| if (use->IsVecSub()) { |
| if (binop_left == mul) { |
| accumulator = binop_right; |
| } |
| } else { |
| // VecAdd |
| if (binop_right == mul) { |
| accumulator = binop_left; |
| } else { |
| DCHECK_EQ(binop_left, mul); |
| accumulator = binop_right; |
| } |
| } |
| HInstruction::InstructionKind kind = |
| use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; |
| |
| if (accumulator != nullptr) { |
| HVecMultiplyAccumulate* mulacc = |
| new (allocator) HVecMultiplyAccumulate(allocator, |
| kind, |
| accumulator, |
| mul->GetLeft(), |
| mul->GetRight(), |
| binop->GetPackedType(), |
| binop->GetVectorLength(), |
| binop->GetDexPc()); |
| binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); |
| DCHECK(!mul->HasUses()); |
| mul->GetBlock()->RemoveInstruction(mul); |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| void InstructionSimplifierX86Visitor::VisitVecMul(HVecMul* instruction) { |
| if (HasCpuFeatureFlag()) { |
| if (TryGenerateVecMultiplyAccumulate(instruction)) { |
| RecordSimplification(); |
| } |
| } |
| } |
| |
| bool InstructionSimplifierX86::Run() { |
| InstructionSimplifierX86Visitor visitor(graph_, codegen_, stats_); |
| visitor.VisitReversePostOrder(); |
| return true; |
| } |
| |
| } // namespace x86 |
| } // namespace art |