Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2016 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ |
| 18 | #define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ |
| 19 | |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 20 | #include "induction_var_range.h" |
| 21 | #include "nodes.h" |
| 22 | #include "optimization.h" |
| 23 | |
| 24 | namespace art { |
| 25 | |
Aart Bik | 92685a8 | 2017-03-06 11:13:43 -0800 | [diff] [blame] | 26 | class CompilerDriver; |
| 27 | |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 28 | /** |
| 29 | * Loop optimizations. Builds a loop hierarchy and applies optimizations to |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 30 | * the detected nested loops, such as removal of dead induction and empty loops |
| 31 | * and inner loop vectorization. |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 32 | */ |
| 33 | class HLoopOptimization : public HOptimization { |
| 34 | public: |
Aart Bik | 92685a8 | 2017-03-06 11:13:43 -0800 | [diff] [blame] | 35 | HLoopOptimization(HGraph* graph, |
| 36 | CompilerDriver* compiler_driver, |
| 37 | HInductionVarAnalysis* induction_analysis); |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 38 | |
| 39 | void Run() OVERRIDE; |
| 40 | |
| 41 | static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; |
| 42 | |
| 43 | private: |
| 44 | /** |
| 45 | * A single loop inside the loop hierarchy representation. |
| 46 | */ |
Aart Bik | 9620230 | 2016-10-04 17:33:56 -0700 | [diff] [blame] | 47 | struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> { |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 48 | explicit LoopNode(HLoopInformation* lp_info) |
| 49 | : loop_info(lp_info), |
| 50 | outer(nullptr), |
| 51 | inner(nullptr), |
| 52 | previous(nullptr), |
| 53 | next(nullptr) {} |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 54 | HLoopInformation* loop_info; |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 55 | LoopNode* outer; |
| 56 | LoopNode* inner; |
| 57 | LoopNode* previous; |
| 58 | LoopNode* next; |
| 59 | }; |
| 60 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 61 | /* |
| 62 | * Vectorization restrictions (bit mask). |
| 63 | */ |
| 64 | enum VectorRestrictions { |
Aart Bik | f3e61ee | 2017-04-12 17:09:20 -0700 | [diff] [blame] | 65 | kNone = 0, // no restrictions |
| 66 | kNoMul = 1, // no multiplication |
| 67 | kNoDiv = 2, // no division |
| 68 | kNoShift = 4, // no shift |
| 69 | kNoShr = 8, // no arithmetic shift right |
| 70 | kNoHiBits = 16, // "wider" operations cannot bring in higher order bits |
| 71 | kNoSignedHAdd = 32, // no signed halving add |
| 72 | kNoUnroundedHAdd = 64, // no unrounded halving add |
| 73 | kNoAbs = 128, // no absolute value |
Aart Bik | c8e93c7 | 2017-05-10 10:49:22 -0700 | [diff] [blame] | 74 | kNoMinMax = 256, // no min/max |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 75 | }; |
Aart Bik | 9620230 | 2016-10-04 17:33:56 -0700 | [diff] [blame] | 76 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 77 | /* |
| 78 | * Vectorization mode during synthesis |
| 79 | * (sequential peeling/cleanup loop or vector loop). |
| 80 | */ |
| 81 | enum VectorMode { |
| 82 | kSequential, |
| 83 | kVector |
| 84 | }; |
| 85 | |
| 86 | /* |
| 87 | * Representation of a unit-stride array reference. |
| 88 | */ |
| 89 | struct ArrayReference { |
| 90 | ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l) |
| 91 | : base(b), offset(o), type(t), lhs(l) { } |
| 92 | bool operator<(const ArrayReference& other) const { |
| 93 | return |
| 94 | (base < other.base) || |
| 95 | (base == other.base && |
| 96 | (offset < other.offset || (offset == other.offset && |
| 97 | (type < other.type || |
| 98 | (type == other.type && lhs < other.lhs))))); |
| 99 | } |
| 100 | HInstruction* base; // base address |
| 101 | HInstruction* offset; // offset + i |
| 102 | Primitive::Type type; // component type |
| 103 | bool lhs; // def/use |
| 104 | }; |
| 105 | |
| 106 | // Loop setup and traversal. |
| 107 | void LocalRun(); |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 108 | void AddLoop(HLoopInformation* loop_info); |
| 109 | void RemoveLoop(LoopNode* node); |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 110 | void TraverseLoopsInnerToOuter(LoopNode* node); |
| 111 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 112 | // Optimization. |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 113 | void SimplifyInduction(LoopNode* node); |
Aart Bik | 482095d | 2016-10-10 15:39:10 -0700 | [diff] [blame] | 114 | void SimplifyBlocks(LoopNode* node); |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 115 | void OptimizeInnerLoop(LoopNode* node); |
| 116 | |
| 117 | // Vectorization analysis and synthesis. |
| 118 | bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count); |
| 119 | void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count); |
| 120 | void GenerateNewLoop(LoopNode* node, |
| 121 | HBasicBlock* block, |
| 122 | HBasicBlock* new_preheader, |
| 123 | HInstruction* lo, |
| 124 | HInstruction* hi, |
| 125 | HInstruction* step); |
| 126 | bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code); |
| 127 | bool VectorizeUse(LoopNode* node, |
| 128 | HInstruction* instruction, |
| 129 | bool generate_code, |
| 130 | Primitive::Type type, |
| 131 | uint64_t restrictions); |
| 132 | bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions); |
| 133 | bool TrySetVectorLength(uint32_t length); |
| 134 | void GenerateVecInv(HInstruction* org, Primitive::Type type); |
| 135 | void GenerateVecSub(HInstruction* org, HInstruction* off); |
| 136 | void GenerateVecMem(HInstruction* org, |
| 137 | HInstruction* opa, |
| 138 | HInstruction* opb, |
| 139 | Primitive::Type type); |
| 140 | void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type); |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 141 | |
Aart Bik | f3e61ee | 2017-04-12 17:09:20 -0700 | [diff] [blame] | 142 | // Vectorization idioms. |
| 143 | bool VectorizeHalvingAddIdiom(LoopNode* node, |
| 144 | HInstruction* instruction, |
| 145 | bool generate_code, |
| 146 | Primitive::Type type, |
| 147 | uint64_t restrictions); |
| 148 | |
Aart Bik | 6b69e0a | 2017-01-11 10:20:43 -0800 | [diff] [blame] | 149 | // Helpers. |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 150 | bool TrySetPhiInduction(HPhi* phi, bool restrict_uses); |
| 151 | bool TrySetSimpleLoopHeader(HBasicBlock* block); |
Aart Bik | cc42be0 | 2016-10-20 16:14:16 -0700 | [diff] [blame] | 152 | bool IsEmptyBody(HBasicBlock* block); |
Aart Bik | 482095d | 2016-10-10 15:39:10 -0700 | [diff] [blame] | 153 | bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info, |
Aart Bik | 8c4a854 | 2016-10-06 11:36:57 -0700 | [diff] [blame] | 154 | HInstruction* instruction, |
Aart Bik | 6b69e0a | 2017-01-11 10:20:43 -0800 | [diff] [blame] | 155 | bool collect_loop_uses, |
Aart Bik | 8c4a854 | 2016-10-06 11:36:57 -0700 | [diff] [blame] | 156 | /*out*/ int32_t* use_count); |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 157 | bool IsUsedOutsideLoop(HLoopInformation* loop_info, |
| 158 | HInstruction* instruction); |
Aart Bik | 807868e | 2016-11-03 17:51:43 -0700 | [diff] [blame] | 159 | bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block); |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 160 | bool TryAssignLastValue(HLoopInformation* loop_info, |
| 161 | HInstruction* instruction, |
| 162 | HBasicBlock* block, |
| 163 | bool collect_loop_uses); |
Aart Bik | 6b69e0a | 2017-01-11 10:20:43 -0800 | [diff] [blame] | 164 | void RemoveDeadInstructions(const HInstructionList& list); |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 165 | |
Aart Bik | 92685a8 | 2017-03-06 11:13:43 -0800 | [diff] [blame] | 166 | // Compiler driver (to query ISA features). |
| 167 | const CompilerDriver* compiler_driver_; |
| 168 | |
Aart Bik | 9620230 | 2016-10-04 17:33:56 -0700 | [diff] [blame] | 169 | // Range information based on prior induction variable analysis. |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 170 | InductionVarRange induction_range_; |
| 171 | |
| 172 | // Phase-local heap memory allocator for the loop optimizer. Storage obtained |
Aart Bik | 9620230 | 2016-10-04 17:33:56 -0700 | [diff] [blame] | 173 | // through this allocator is immediately released when the loop optimizer is done. |
Nicolas Geoffray | ebe1674 | 2016-10-05 09:55:42 +0100 | [diff] [blame] | 174 | ArenaAllocator* loop_allocator_; |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 175 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 176 | // Global heap memory allocator. Used to build HIR. |
| 177 | ArenaAllocator* global_allocator_; |
| 178 | |
Aart Bik | 9620230 | 2016-10-04 17:33:56 -0700 | [diff] [blame] | 179 | // Entries into the loop hierarchy representation. The hierarchy resides |
| 180 | // in phase-local heap memory. |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 181 | LoopNode* top_loop_; |
| 182 | LoopNode* last_loop_; |
| 183 | |
Aart Bik | 8c4a854 | 2016-10-06 11:36:57 -0700 | [diff] [blame] | 184 | // Temporary bookkeeping of a set of instructions. |
| 185 | // Contents reside in phase-local heap memory. |
| 186 | ArenaSet<HInstruction*>* iset_; |
| 187 | |
Aart Bik | 482095d | 2016-10-10 15:39:10 -0700 | [diff] [blame] | 188 | // Counter that tracks how many induction cycles have been simplified. Useful |
| 189 | // to trigger incremental updates of induction variable analysis of outer loops |
| 190 | // when the induction of inner loops has changed. |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 191 | uint32_t induction_simplication_count_; |
Aart Bik | 482095d | 2016-10-10 15:39:10 -0700 | [diff] [blame] | 192 | |
Aart Bik | df7822e | 2016-12-06 10:05:30 -0800 | [diff] [blame] | 193 | // Flag that tracks if any simplifications have occurred. |
| 194 | bool simplified_; |
| 195 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 196 | // Number of "lanes" for selected packed type. |
| 197 | uint32_t vector_length_; |
| 198 | |
| 199 | // Set of array references in the vector loop. |
| 200 | // Contents reside in phase-local heap memory. |
| 201 | ArenaSet<ArrayReference>* vector_refs_; |
| 202 | |
| 203 | // Mapping used during vectorization synthesis for both the scalar peeling/cleanup |
| 204 | // loop (simd_ is false) and the actual vector loop (simd_ is true). The data |
| 205 | // structure maps original instructions into the new instructions. |
| 206 | // Contents reside in phase-local heap memory. |
| 207 | ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_; |
| 208 | |
| 209 | // Temporary vectorization bookkeeping. |
| 210 | HBasicBlock* vector_preheader_; // preheader of the new loop |
| 211 | HBasicBlock* vector_header_; // header of the new loop |
| 212 | HBasicBlock* vector_body_; // body of the new loop |
| 213 | HInstruction* vector_runtime_test_a_; |
| 214 | HInstruction* vector_runtime_test_b_; // defines a != b runtime test |
| 215 | HPhi* vector_phi_; // the Phi representing the normalized loop index |
| 216 | VectorMode vector_mode_; // selects synthesis mode |
| 217 | |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 218 | friend class LoopOptimizationTest; |
| 219 | |
| 220 | DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); |
| 221 | }; |
| 222 | |
| 223 | } // namespace art |
| 224 | |
| 225 | #endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ |