| /* |
| * Copyright (C) 2016 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ |
| #define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ |
| |
| #include "induction_var_range.h" |
| #include "nodes.h" |
| #include "optimization.h" |
| |
| namespace art { |
| |
| class CompilerDriver; |
| |
| /** |
| * Loop optimizations. Builds a loop hierarchy and applies optimizations to |
| * the detected nested loops, such as removal of dead induction and empty loops |
| * and inner loop vectorization. |
| */ |
| class HLoopOptimization : public HOptimization { |
| public: |
| HLoopOptimization(HGraph* graph, |
| CompilerDriver* compiler_driver, |
| HInductionVarAnalysis* induction_analysis); |
| |
| void Run() OVERRIDE; |
| |
| static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; |
| |
| private: |
| /** |
| * A single loop inside the loop hierarchy representation. |
| */ |
| struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> { |
| explicit LoopNode(HLoopInformation* lp_info) |
| : loop_info(lp_info), |
| outer(nullptr), |
| inner(nullptr), |
| previous(nullptr), |
| next(nullptr) {} |
| HLoopInformation* loop_info; |
| LoopNode* outer; |
| LoopNode* inner; |
| LoopNode* previous; |
| LoopNode* next; |
| }; |
| |
| /* |
| * Vectorization restrictions (bit mask). |
| */ |
| enum VectorRestrictions { |
| kNone = 0, // no restrictions |
| kNoMul = 1, // no multiplication |
| kNoDiv = 2, // no division |
| kNoShift = 4, // no shift |
| kNoShr = 8, // no arithmetic shift right |
| kNoHiBits = 16, // "wider" operations cannot bring in higher order bits |
| kNoAbs = 32, // no absolute value |
| }; |
| |
| /* |
| * Vectorization mode during synthesis |
| * (sequential peeling/cleanup loop or vector loop). |
| */ |
| enum VectorMode { |
| kSequential, |
| kVector |
| }; |
| |
| /* |
| * Representation of a unit-stride array reference. |
| */ |
| struct ArrayReference { |
| ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l) |
| : base(b), offset(o), type(t), lhs(l) { } |
| bool operator<(const ArrayReference& other) const { |
| return |
| (base < other.base) || |
| (base == other.base && |
| (offset < other.offset || (offset == other.offset && |
| (type < other.type || |
| (type == other.type && lhs < other.lhs))))); |
| } |
| HInstruction* base; // base address |
| HInstruction* offset; // offset + i |
| Primitive::Type type; // component type |
| bool lhs; // def/use |
| }; |
| |
| // Loop setup and traversal. |
| void LocalRun(); |
| void AddLoop(HLoopInformation* loop_info); |
| void RemoveLoop(LoopNode* node); |
| void TraverseLoopsInnerToOuter(LoopNode* node); |
| |
| // Optimization. |
| void SimplifyInduction(LoopNode* node); |
| void SimplifyBlocks(LoopNode* node); |
| void OptimizeInnerLoop(LoopNode* node); |
| |
| // Vectorization analysis and synthesis. |
| bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count); |
| void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count); |
| void GenerateNewLoop(LoopNode* node, |
| HBasicBlock* block, |
| HBasicBlock* new_preheader, |
| HInstruction* lo, |
| HInstruction* hi, |
| HInstruction* step); |
| bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code); |
| bool VectorizeUse(LoopNode* node, |
| HInstruction* instruction, |
| bool generate_code, |
| Primitive::Type type, |
| uint64_t restrictions); |
| bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions); |
| bool TrySetVectorLength(uint32_t length); |
| void GenerateVecInv(HInstruction* org, Primitive::Type type); |
| void GenerateVecSub(HInstruction* org, HInstruction* off); |
| void GenerateVecMem(HInstruction* org, |
| HInstruction* opa, |
| HInstruction* opb, |
| Primitive::Type type); |
| void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type); |
| |
| // Helpers. |
| bool TrySetPhiInduction(HPhi* phi, bool restrict_uses); |
| bool TrySetSimpleLoopHeader(HBasicBlock* block); |
| bool IsEmptyBody(HBasicBlock* block); |
| bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info, |
| HInstruction* instruction, |
| bool collect_loop_uses, |
| /*out*/ int32_t* use_count); |
| bool IsUsedOutsideLoop(HLoopInformation* loop_info, |
| HInstruction* instruction); |
| bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block); |
| bool TryAssignLastValue(HLoopInformation* loop_info, |
| HInstruction* instruction, |
| HBasicBlock* block, |
| bool collect_loop_uses); |
| void RemoveDeadInstructions(const HInstructionList& list); |
| |
| // Compiler driver (to query ISA features). |
| const CompilerDriver* compiler_driver_; |
| |
| // Range information based on prior induction variable analysis. |
| InductionVarRange induction_range_; |
| |
| // Phase-local heap memory allocator for the loop optimizer. Storage obtained |
| // through this allocator is immediately released when the loop optimizer is done. |
| ArenaAllocator* loop_allocator_; |
| |
| // Global heap memory allocator. Used to build HIR. |
| ArenaAllocator* global_allocator_; |
| |
| // Entries into the loop hierarchy representation. The hierarchy resides |
| // in phase-local heap memory. |
| LoopNode* top_loop_; |
| LoopNode* last_loop_; |
| |
| // Temporary bookkeeping of a set of instructions. |
| // Contents reside in phase-local heap memory. |
| ArenaSet<HInstruction*>* iset_; |
| |
| // Counter that tracks how many induction cycles have been simplified. Useful |
| // to trigger incremental updates of induction variable analysis of outer loops |
| // when the induction of inner loops has changed. |
| uint32_t induction_simplication_count_; |
| |
| // Flag that tracks if any simplifications have occurred. |
| bool simplified_; |
| |
| // Number of "lanes" for selected packed type. |
| uint32_t vector_length_; |
| |
| // Set of array references in the vector loop. |
| // Contents reside in phase-local heap memory. |
| ArenaSet<ArrayReference>* vector_refs_; |
| |
| // Mapping used during vectorization synthesis for both the scalar peeling/cleanup |
| // loop (simd_ is false) and the actual vector loop (simd_ is true). The data |
| // structure maps original instructions into the new instructions. |
| // Contents reside in phase-local heap memory. |
| ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_; |
| |
| // Temporary vectorization bookkeeping. |
| HBasicBlock* vector_preheader_; // preheader of the new loop |
| HBasicBlock* vector_header_; // header of the new loop |
| HBasicBlock* vector_body_; // body of the new loop |
| HInstruction* vector_runtime_test_a_; |
| HInstruction* vector_runtime_test_b_; // defines a != b runtime test |
| HPhi* vector_phi_; // the Phi representing the normalized loop index |
| VectorMode vector_mode_; // selects synthesis mode |
| |
| friend class LoopOptimizationTest; |
| |
| DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); |
| }; |
| |
| } // namespace art |
| |
| #endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ |