Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2016 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ |
| 18 | #define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ |
| 19 | |
Vladimír Marko | 434d968 | 2022-11-04 14:04:17 +0000 | [diff] [blame] | 20 | #include "base/macros.h" |
Vladimir Marko | ca6fff8 | 2017-10-03 14:49:14 +0100 | [diff] [blame] | 21 | #include "base/scoped_arena_allocator.h" |
| 22 | #include "base/scoped_arena_containers.h" |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 23 | #include "induction_var_range.h" |
Artem Serov | 121f203 | 2017-10-23 19:19:06 +0100 | [diff] [blame] | 24 | #include "loop_analysis.h" |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 25 | #include "nodes.h" |
| 26 | #include "optimization.h" |
Artem Serov | 121f203 | 2017-10-23 19:19:06 +0100 | [diff] [blame] | 27 | #include "superblock_cloner.h" |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 28 | |
Vladimír Marko | 434d968 | 2022-11-04 14:04:17 +0000 | [diff] [blame] | 29 | namespace art HIDDEN { |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 30 | |
Vladimir Marko | a043111 | 2018-06-25 09:32:54 +0100 | [diff] [blame] | 31 | class CompilerOptions; |
Artem Serov | cf43fb6 | 2018-02-15 14:43:48 +0000 | [diff] [blame] | 32 | class ArchNoOptsLoopHelper; |
Aart Bik | 92685a8 | 2017-03-06 11:13:43 -0800 | [diff] [blame] | 33 | |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 34 | /** |
| 35 | * Loop optimizations. Builds a loop hierarchy and applies optimizations to |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 36 | * the detected nested loops, such as removal of dead induction and empty loops |
| 37 | * and inner loop vectorization. |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 38 | */ |
| 39 | class HLoopOptimization : public HOptimization { |
| 40 | public: |
Aart Bik | 92685a8 | 2017-03-06 11:13:43 -0800 | [diff] [blame] | 41 | HLoopOptimization(HGraph* graph, |
Artem Serov | c8150b5 | 2019-07-31 18:28:00 +0100 | [diff] [blame] | 42 | const CodeGenerator& codegen, // Needs info about the target. |
Aart Bik | b92cc33 | 2017-09-06 15:53:17 -0700 | [diff] [blame] | 43 | HInductionVarAnalysis* induction_analysis, |
Aart Bik | 2ca10eb | 2017-11-15 15:17:53 -0800 | [diff] [blame] | 44 | OptimizingCompilerStats* stats, |
| 45 | const char* name = kLoopOptimizationPassName); |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 46 | |
Roland Levillain | bbc6e7e | 2018-08-24 16:58:47 +0100 | [diff] [blame] | 47 | bool Run() override; |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 48 | |
| 49 | static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; |
| 50 | |
Stelios Ioannou | c54cc7c | 2021-07-09 17:06:03 +0100 | [diff] [blame] | 51 | // The maximum number of total instructions (trip_count * instruction_count), |
| 52 | // where the optimization of removing SuspendChecks from the loop header could |
| 53 | // be performed. |
| 54 | static constexpr int64_t kMaxTotalInstRemoveSuspendCheck = 128; |
| 55 | |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 56 | private: |
| 57 | /** |
| 58 | * A single loop inside the loop hierarchy representation. |
| 59 | */ |
Aart Bik | 9620230 | 2016-10-04 17:33:56 -0700 | [diff] [blame] | 60 | struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> { |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 61 | explicit LoopNode(HLoopInformation* lp_info) |
| 62 | : loop_info(lp_info), |
| 63 | outer(nullptr), |
| 64 | inner(nullptr), |
| 65 | previous(nullptr), |
Santiago Aboy Solanes | 0eca098 | 2022-04-08 18:00:48 +0100 | [diff] [blame] | 66 | next(nullptr), |
| 67 | try_catch_kind(TryCatchKind::kUnknown) {} |
| 68 | |
| 69 | enum class TryCatchKind { |
| 70 | kUnknown, |
| 71 | // Either if we have a try catch in the loop, or if the loop is inside of an outer try catch, |
| 72 | // we set `kHasTryCatch`. |
| 73 | kHasTryCatch, |
| 74 | kNoTryCatch |
| 75 | }; |
| 76 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 77 | HLoopInformation* loop_info; |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 78 | LoopNode* outer; |
| 79 | LoopNode* inner; |
| 80 | LoopNode* previous; |
| 81 | LoopNode* next; |
Santiago Aboy Solanes | 0eca098 | 2022-04-08 18:00:48 +0100 | [diff] [blame] | 82 | TryCatchKind try_catch_kind; |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 83 | }; |
| 84 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 85 | /* |
| 86 | * Vectorization restrictions (bit mask). |
| 87 | */ |
| 88 | enum VectorRestrictions { |
Aart Bik | 0148de4 | 2017-09-05 09:25:01 -0700 | [diff] [blame] | 89 | kNone = 0, // no restrictions |
| 90 | kNoMul = 1 << 0, // no multiplication |
| 91 | kNoDiv = 1 << 1, // no division |
| 92 | kNoShift = 1 << 2, // no shift |
| 93 | kNoShr = 1 << 3, // no arithmetic shift right |
| 94 | kNoHiBits = 1 << 4, // "wider" operations cannot bring in higher order bits |
| 95 | kNoSignedHAdd = 1 << 5, // no signed halving add |
Artem Serov | 8ba4de1 | 2019-12-04 21:10:23 +0000 | [diff] [blame] | 96 | kNoUnsignedHAdd = 1 << 6, // no unsigned halving add |
| 97 | kNoUnroundedHAdd = 1 << 7, // no unrounded halving add |
| 98 | kNoAbs = 1 << 8, // no absolute value |
| 99 | kNoStringCharAt = 1 << 9, // no StringCharAt |
| 100 | kNoReduction = 1 << 10, // no reduction |
| 101 | kNoSAD = 1 << 11, // no sum of absolute differences (SAD) |
| 102 | kNoWideSAD = 1 << 12, // no sum of absolute differences (SAD) with operand widening |
| 103 | kNoDotProd = 1 << 13, // no dot product |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 104 | }; |
Aart Bik | 9620230 | 2016-10-04 17:33:56 -0700 | [diff] [blame] | 105 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 106 | /* |
| 107 | * Vectorization mode during synthesis |
| 108 | * (sequential peeling/cleanup loop or vector loop). |
| 109 | */ |
| 110 | enum VectorMode { |
| 111 | kSequential, |
| 112 | kVector |
| 113 | }; |
| 114 | |
| 115 | /* |
| 116 | * Representation of a unit-stride array reference. |
| 117 | */ |
| 118 | struct ArrayReference { |
Aart Bik | 38a3f21 | 2017-10-20 17:02:21 -0700 | [diff] [blame] | 119 | ArrayReference(HInstruction* b, HInstruction* o, DataType::Type t, bool l, bool c = false) |
| 120 | : base(b), offset(o), type(t), lhs(l), is_string_char_at(c) { } |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 121 | bool operator<(const ArrayReference& other) const { |
| 122 | return |
| 123 | (base < other.base) || |
| 124 | (base == other.base && |
| 125 | (offset < other.offset || (offset == other.offset && |
| 126 | (type < other.type || |
Aart Bik | 38a3f21 | 2017-10-20 17:02:21 -0700 | [diff] [blame] | 127 | (type == other.type && |
| 128 | (lhs < other.lhs || |
| 129 | (lhs == other.lhs && |
| 130 | is_string_char_at < other.is_string_char_at))))))); |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 131 | } |
Aart Bik | 38a3f21 | 2017-10-20 17:02:21 -0700 | [diff] [blame] | 132 | HInstruction* base; // base address |
| 133 | HInstruction* offset; // offset + i |
| 134 | DataType::Type type; // component type |
| 135 | bool lhs; // def/use |
| 136 | bool is_string_char_at; // compressed string read |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 137 | }; |
| 138 | |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 139 | // |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 140 | // Loop setup and traversal. |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 141 | // |
| 142 | |
Aart Bik | 2477320 | 2018-04-26 10:28:51 -0700 | [diff] [blame] | 143 | bool LocalRun(); |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 144 | void AddLoop(HLoopInformation* loop_info); |
| 145 | void RemoveLoop(LoopNode* node); |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 146 | |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 147 | // Traverses all loops inner to outer to perform simplifications and optimizations. |
| 148 | // Returns true if loops nested inside current loop (node) have changed. |
| 149 | bool TraverseLoopsInnerToOuter(LoopNode* node); |
| 150 | |
Santiago Aboy Solanes | 0eca098 | 2022-04-08 18:00:48 +0100 | [diff] [blame] | 151 | // Calculates `node`'s `try_catch_kind` and sets it to: |
| 152 | // 1) kHasTryCatch if it has try catches (or if it's inside of an outer try catch) |
| 153 | // 2) kNoTryCatch otherwise. |
| 154 | void CalculateAndSetTryCatchKind(LoopNode* node); |
| 155 | |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 156 | // |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 157 | // Optimization. |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 158 | // |
| 159 | |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 160 | void SimplifyInduction(LoopNode* node); |
Aart Bik | 482095d | 2016-10-10 15:39:10 -0700 | [diff] [blame] | 161 | void SimplifyBlocks(LoopNode* node); |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 162 | |
Artem Serov | 121f203 | 2017-10-23 19:19:06 +0100 | [diff] [blame] | 163 | // Performs optimizations specific to inner loop with finite header logic (empty loop removal, |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 164 | // unrolling, vectorization). Returns true if anything changed. |
Artem Serov | 121f203 | 2017-10-23 19:19:06 +0100 | [diff] [blame] | 165 | bool TryOptimizeInnerLoopFinite(LoopNode* node); |
| 166 | |
| 167 | // Performs optimizations specific to inner loop. Returns true if anything changed. |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 168 | bool OptimizeInnerLoop(LoopNode* node); |
| 169 | |
Artem Serov | 121f203 | 2017-10-23 19:19:06 +0100 | [diff] [blame] | 170 | // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling |
Artem Serov | 0e32908 | 2018-06-12 10:23:27 +0100 | [diff] [blame] | 171 | // opportunities. Returns whether transformation happened. 'generate_code' determines whether the |
| 172 | // optimization should be actually applied. |
| 173 | bool TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info, |
| 174 | bool generate_code = true); |
Artem Serov | 121f203 | 2017-10-23 19:19:06 +0100 | [diff] [blame] | 175 | |
Artem Serov | 72411e6 | 2017-10-19 16:18:07 +0100 | [diff] [blame] | 176 | // Tries to apply loop peeling for loop invariant exits elimination. Returns whether |
Artem Serov | 0e32908 | 2018-06-12 10:23:27 +0100 | [diff] [blame] | 177 | // transformation happened. 'generate_code' determines whether the optimization should be |
| 178 | // actually applied. |
| 179 | bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info, |
| 180 | bool generate_code = true); |
| 181 | |
Artem Serov | 18ba1da | 2018-05-16 19:06:32 +0100 | [diff] [blame] | 182 | // Tries to perform whole loop unrolling for a small loop with a small trip count to eliminate |
| 183 | // the loop check overhead and to have more opportunities for inter-iteration optimizations. |
| 184 | // Returns whether transformation happened. 'generate_code' determines whether the optimization |
| 185 | // should be actually applied. |
| 186 | bool TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code = true); |
| 187 | |
Stelios Ioannou | c54cc7c | 2021-07-09 17:06:03 +0100 | [diff] [blame] | 188 | // Tries to remove SuspendCheck for plain loops with a low trip count. The |
| 189 | // SuspendCheck in the codegen makes sure that the thread can be interrupted |
| 190 | // during execution for GC. Not being able to do so might decrease the |
| 191 | // responsiveness of GC when a very long loop or a long recursion is being |
| 192 | // executed. However, for plain loops with a small trip count, the removal of |
| 193 | // SuspendCheck should not affect the GC's responsiveness by a large margin. |
| 194 | // Consequently, since the thread won't be interrupted for plain loops, it is |
| 195 | // assumed that the performance might increase by removing SuspendCheck. |
| 196 | bool TryToRemoveSuspendCheckFromLoopHeader(LoopAnalysisInfo* analysis_info, |
| 197 | bool generate_code = true); |
| 198 | |
| 199 | // Tries to apply scalar loop optimizations. |
| 200 | bool TryLoopScalarOpts(LoopNode* node); |
Artem Serov | 72411e6 | 2017-10-19 16:18:07 +0100 | [diff] [blame] | 201 | |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 202 | // |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 203 | // Vectorization analysis and synthesis. |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 204 | // |
| 205 | |
Aart Bik | 14a68b4 | 2017-06-08 14:06:58 -0700 | [diff] [blame] | 206 | bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count); |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 207 | void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count); |
| 208 | void GenerateNewLoop(LoopNode* node, |
| 209 | HBasicBlock* block, |
| 210 | HBasicBlock* new_preheader, |
| 211 | HInstruction* lo, |
| 212 | HInstruction* hi, |
Aart Bik | 14a68b4 | 2017-06-08 14:06:58 -0700 | [diff] [blame] | 213 | HInstruction* step, |
| 214 | uint32_t unroll); |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 215 | bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code); |
| 216 | bool VectorizeUse(LoopNode* node, |
| 217 | HInstruction* instruction, |
| 218 | bool generate_code, |
Vladimir Marko | 0ebe0d8 | 2017-09-21 22:50:39 +0100 | [diff] [blame] | 219 | DataType::Type type, |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 220 | uint64_t restrictions); |
Aart Bik | 38a3f21 | 2017-10-20 17:02:21 -0700 | [diff] [blame] | 221 | uint32_t GetVectorSizeInBytes(); |
Vladimir Marko | 0ebe0d8 | 2017-09-21 22:50:39 +0100 | [diff] [blame] | 222 | bool TrySetVectorType(DataType::Type type, /*out*/ uint64_t* restrictions); |
Artem Serov | c8150b5 | 2019-07-31 18:28:00 +0100 | [diff] [blame] | 223 | bool TrySetVectorLengthImpl(uint32_t length); |
| 224 | |
| 225 | bool TrySetVectorLength(DataType::Type type, uint32_t length) { |
| 226 | bool res = TrySetVectorLengthImpl(length); |
| 227 | // Currently the vectorizer supports only the mode when full SIMD registers are used. |
Santiago Aboy Solanes | 872ec72 | 2022-02-18 14:10:25 +0000 | [diff] [blame] | 228 | DCHECK_IMPLIES(res, DataType::Size(type) * length == GetVectorSizeInBytes()); |
Artem Serov | c8150b5 | 2019-07-31 18:28:00 +0100 | [diff] [blame] | 229 | return res; |
| 230 | } |
| 231 | |
Vladimir Marko | 0ebe0d8 | 2017-09-21 22:50:39 +0100 | [diff] [blame] | 232 | void GenerateVecInv(HInstruction* org, DataType::Type type); |
Aart Bik | 14a68b4 | 2017-06-08 14:06:58 -0700 | [diff] [blame] | 233 | void GenerateVecSub(HInstruction* org, HInstruction* offset); |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 234 | void GenerateVecMem(HInstruction* org, |
| 235 | HInstruction* opa, |
| 236 | HInstruction* opb, |
Aart Bik | 14a68b4 | 2017-06-08 14:06:58 -0700 | [diff] [blame] | 237 | HInstruction* offset, |
Vladimir Marko | 0ebe0d8 | 2017-09-21 22:50:39 +0100 | [diff] [blame] | 238 | DataType::Type type); |
Aart Bik | 0148de4 | 2017-09-05 09:25:01 -0700 | [diff] [blame] | 239 | void GenerateVecReductionPhi(HPhi* phi); |
| 240 | void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction); |
| 241 | HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction); |
Aart Bik | 304c8a5 | 2017-05-23 11:01:13 -0700 | [diff] [blame] | 242 | void GenerateVecOp(HInstruction* org, |
| 243 | HInstruction* opa, |
| 244 | HInstruction* opb, |
Aart Bik | 3f08e9b | 2018-05-01 13:42:03 -0700 | [diff] [blame] | 245 | DataType::Type type); |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 246 | |
Aart Bik | f3e61ee | 2017-04-12 17:09:20 -0700 | [diff] [blame] | 247 | // Vectorization idioms. |
Aart Bik | 29aa082 | 2018-03-08 11:28:00 -0800 | [diff] [blame] | 248 | bool VectorizeSaturationIdiom(LoopNode* node, |
| 249 | HInstruction* instruction, |
| 250 | bool generate_code, |
| 251 | DataType::Type type, |
| 252 | uint64_t restrictions); |
Aart Bik | f3e61ee | 2017-04-12 17:09:20 -0700 | [diff] [blame] | 253 | bool VectorizeHalvingAddIdiom(LoopNode* node, |
| 254 | HInstruction* instruction, |
| 255 | bool generate_code, |
Vladimir Marko | 0ebe0d8 | 2017-09-21 22:50:39 +0100 | [diff] [blame] | 256 | DataType::Type type, |
Aart Bik | f3e61ee | 2017-04-12 17:09:20 -0700 | [diff] [blame] | 257 | uint64_t restrictions); |
Aart Bik | dbbac8f | 2017-09-01 13:06:08 -0700 | [diff] [blame] | 258 | bool VectorizeSADIdiom(LoopNode* node, |
| 259 | HInstruction* instruction, |
| 260 | bool generate_code, |
Vladimir Marko | 0ebe0d8 | 2017-09-21 22:50:39 +0100 | [diff] [blame] | 261 | DataType::Type type, |
Aart Bik | dbbac8f | 2017-09-01 13:06:08 -0700 | [diff] [blame] | 262 | uint64_t restrictions); |
Artem Serov | aaac0e3 | 2018-08-07 00:52:22 +0100 | [diff] [blame] | 263 | bool VectorizeDotProdIdiom(LoopNode* node, |
| 264 | HInstruction* instruction, |
| 265 | bool generate_code, |
| 266 | DataType::Type type, |
| 267 | uint64_t restrictions); |
Aart Bik | f3e61ee | 2017-04-12 17:09:20 -0700 | [diff] [blame] | 268 | |
Aart Bik | 14a68b4 | 2017-06-08 14:06:58 -0700 | [diff] [blame] | 269 | // Vectorization heuristics. |
Aart Bik | 38a3f21 | 2017-10-20 17:02:21 -0700 | [diff] [blame] | 270 | Alignment ComputeAlignment(HInstruction* offset, |
| 271 | DataType::Type type, |
| 272 | bool is_string_char_at, |
| 273 | uint32_t peeling = 0); |
Artem Serov | 55ab7e8 | 2020-04-27 21:02:28 +0100 | [diff] [blame] | 274 | void SetAlignmentStrategy(const ScopedArenaVector<uint32_t>& peeling_votes, |
Aart Bik | 38a3f21 | 2017-10-20 17:02:21 -0700 | [diff] [blame] | 275 | const ArrayReference* peeling_candidate); |
| 276 | uint32_t MaxNumberPeeled(); |
Aart Bik | 14a68b4 | 2017-06-08 14:06:58 -0700 | [diff] [blame] | 277 | bool IsVectorizationProfitable(int64_t trip_count); |
Aart Bik | 14a68b4 | 2017-06-08 14:06:58 -0700 | [diff] [blame] | 278 | |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 279 | // |
Aart Bik | 6b69e0a | 2017-01-11 10:20:43 -0800 | [diff] [blame] | 280 | // Helpers. |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 281 | // |
| 282 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 283 | bool TrySetPhiInduction(HPhi* phi, bool restrict_uses); |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 284 | bool TrySetPhiReduction(HPhi* phi); |
| 285 | |
| 286 | // Detects loop header with a single induction (returned in main_phi), possibly |
| 287 | // other phis for reductions, but no other side effects. Returns true on success. |
| 288 | bool TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi); |
| 289 | |
Aart Bik | cc42be0 | 2016-10-20 16:14:16 -0700 | [diff] [blame] | 290 | bool IsEmptyBody(HBasicBlock* block); |
Aart Bik | 482095d | 2016-10-10 15:39:10 -0700 | [diff] [blame] | 291 | bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info, |
Aart Bik | 8c4a854 | 2016-10-06 11:36:57 -0700 | [diff] [blame] | 292 | HInstruction* instruction, |
Aart Bik | 6b69e0a | 2017-01-11 10:20:43 -0800 | [diff] [blame] | 293 | bool collect_loop_uses, |
Aart Bik | 38a3f21 | 2017-10-20 17:02:21 -0700 | [diff] [blame] | 294 | /*out*/ uint32_t* use_count); |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 295 | bool IsUsedOutsideLoop(HLoopInformation* loop_info, |
| 296 | HInstruction* instruction); |
Nicolas Geoffray | 1a0a519 | 2017-06-22 11:56:01 +0100 | [diff] [blame] | 297 | bool TryReplaceWithLastValue(HLoopInformation* loop_info, |
| 298 | HInstruction* instruction, |
| 299 | HBasicBlock* block); |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 300 | bool TryAssignLastValue(HLoopInformation* loop_info, |
| 301 | HInstruction* instruction, |
| 302 | HBasicBlock* block, |
| 303 | bool collect_loop_uses); |
Aart Bik | 6b69e0a | 2017-01-11 10:20:43 -0800 | [diff] [blame] | 304 | void RemoveDeadInstructions(const HInstructionList& list); |
Nicolas Geoffray | 1a0a519 | 2017-06-22 11:56:01 +0100 | [diff] [blame] | 305 | bool CanRemoveCycle(); // Whether the current 'iset_' is removable. |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 306 | |
Artem Serov | 8ba4de1 | 2019-12-04 21:10:23 +0000 | [diff] [blame] | 307 | bool IsInPredicatedVectorizationMode() const { return predicated_vectorization_mode_; } |
| 308 | |
Vladimir Marko | a043111 | 2018-06-25 09:32:54 +0100 | [diff] [blame] | 309 | // Compiler options (to query ISA features). |
| 310 | const CompilerOptions* compiler_options_; |
Aart Bik | 92685a8 | 2017-03-06 11:13:43 -0800 | [diff] [blame] | 311 | |
Artem Serov | c8150b5 | 2019-07-31 18:28:00 +0100 | [diff] [blame] | 312 | // Cached target SIMD vector register size in bytes. |
| 313 | const size_t simd_register_size_; |
| 314 | |
Aart Bik | 9620230 | 2016-10-04 17:33:56 -0700 | [diff] [blame] | 315 | // Range information based on prior induction variable analysis. |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 316 | InductionVarRange induction_range_; |
| 317 | |
| 318 | // Phase-local heap memory allocator for the loop optimizer. Storage obtained |
Aart Bik | 9620230 | 2016-10-04 17:33:56 -0700 | [diff] [blame] | 319 | // through this allocator is immediately released when the loop optimizer is done. |
Vladimir Marko | ca6fff8 | 2017-10-03 14:49:14 +0100 | [diff] [blame] | 320 | ScopedArenaAllocator* loop_allocator_; |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 321 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 322 | // Global heap memory allocator. Used to build HIR. |
| 323 | ArenaAllocator* global_allocator_; |
| 324 | |
Aart Bik | 9620230 | 2016-10-04 17:33:56 -0700 | [diff] [blame] | 325 | // Entries into the loop hierarchy representation. The hierarchy resides |
| 326 | // in phase-local heap memory. |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 327 | LoopNode* top_loop_; |
| 328 | LoopNode* last_loop_; |
| 329 | |
Aart Bik | 8c4a854 | 2016-10-06 11:36:57 -0700 | [diff] [blame] | 330 | // Temporary bookkeeping of a set of instructions. |
| 331 | // Contents reside in phase-local heap memory. |
Vladimir Marko | ca6fff8 | 2017-10-03 14:49:14 +0100 | [diff] [blame] | 332 | ScopedArenaSet<HInstruction*>* iset_; |
Aart Bik | 8c4a854 | 2016-10-06 11:36:57 -0700 | [diff] [blame] | 333 | |
Aart Bik | b29f684 | 2017-07-28 15:58:41 -0700 | [diff] [blame] | 334 | // Temporary bookkeeping of reduction instructions. Mapping is two-fold: |
| 335 | // (1) reductions in the loop-body are mapped back to their phi definition, |
| 336 | // (2) phi definitions are mapped to their initial value (updated during |
| 337 | // code generation to feed the proper values into the new chain). |
| 338 | // Contents reside in phase-local heap memory. |
Vladimir Marko | ca6fff8 | 2017-10-03 14:49:14 +0100 | [diff] [blame] | 339 | ScopedArenaSafeMap<HInstruction*, HInstruction*>* reductions_; |
Aart Bik | 482095d | 2016-10-10 15:39:10 -0700 | [diff] [blame] | 340 | |
Aart Bik | df7822e | 2016-12-06 10:05:30 -0800 | [diff] [blame] | 341 | // Flag that tracks if any simplifications have occurred. |
| 342 | bool simplified_; |
| 343 | |
Artem Serov | 8ba4de1 | 2019-12-04 21:10:23 +0000 | [diff] [blame] | 344 | // Whether to use predicated loop vectorization (e.g. for arm64 SVE target). |
| 345 | bool predicated_vectorization_mode_; |
| 346 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 347 | // Number of "lanes" for selected packed type. |
| 348 | uint32_t vector_length_; |
| 349 | |
| 350 | // Set of array references in the vector loop. |
| 351 | // Contents reside in phase-local heap memory. |
Vladimir Marko | ca6fff8 | 2017-10-03 14:49:14 +0100 | [diff] [blame] | 352 | ScopedArenaSet<ArrayReference>* vector_refs_; |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 353 | |
Aart Bik | 38a3f21 | 2017-10-20 17:02:21 -0700 | [diff] [blame] | 354 | // Static or dynamic loop peeling for alignment. |
| 355 | uint32_t vector_static_peeling_factor_; |
| 356 | const ArrayReference* vector_dynamic_peeling_candidate_; |
Aart Bik | 14a68b4 | 2017-06-08 14:06:58 -0700 | [diff] [blame] | 357 | |
| 358 | // Dynamic data dependence test of the form a != b. |
| 359 | HInstruction* vector_runtime_test_a_; |
| 360 | HInstruction* vector_runtime_test_b_; |
| 361 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 362 | // Mapping used during vectorization synthesis for both the scalar peeling/cleanup |
Aart Bik | 14a68b4 | 2017-06-08 14:06:58 -0700 | [diff] [blame] | 363 | // loop (mode is kSequential) and the actual vector loop (mode is kVector). The data |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 364 | // structure maps original instructions into the new instructions. |
| 365 | // Contents reside in phase-local heap memory. |
Vladimir Marko | ca6fff8 | 2017-10-03 14:49:14 +0100 | [diff] [blame] | 366 | ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_map_; |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 367 | |
Aart Bik | 0148de4 | 2017-09-05 09:25:01 -0700 | [diff] [blame] | 368 | // Permanent mapping used during vectorization synthesis. |
| 369 | // Contents reside in phase-local heap memory. |
Vladimir Marko | ca6fff8 | 2017-10-03 14:49:14 +0100 | [diff] [blame] | 370 | ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_; |
Aart Bik | 0148de4 | 2017-09-05 09:25:01 -0700 | [diff] [blame] | 371 | |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 372 | // Temporary vectorization bookkeeping. |
Aart Bik | 14a68b4 | 2017-06-08 14:06:58 -0700 | [diff] [blame] | 373 | VectorMode vector_mode_; // synthesis mode |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 374 | HBasicBlock* vector_preheader_; // preheader of the new loop |
| 375 | HBasicBlock* vector_header_; // header of the new loop |
| 376 | HBasicBlock* vector_body_; // body of the new loop |
Aart Bik | 14a68b4 | 2017-06-08 14:06:58 -0700 | [diff] [blame] | 377 | HInstruction* vector_index_; // normalized index of the new loop |
Aart Bik | f8f5a16 | 2017-02-06 15:35:29 -0800 | [diff] [blame] | 378 | |
Artem Serov | 121f203 | 2017-10-23 19:19:06 +0100 | [diff] [blame] | 379 | // Helper for target-specific behaviour for loop optimizations. |
Artem Serov | cf43fb6 | 2018-02-15 14:43:48 +0000 | [diff] [blame] | 380 | ArchNoOptsLoopHelper* arch_loop_helper_; |
Artem Serov | 121f203 | 2017-10-23 19:19:06 +0100 | [diff] [blame] | 381 | |
Aart Bik | 281c681 | 2016-08-26 11:31:48 -0700 | [diff] [blame] | 382 | friend class LoopOptimizationTest; |
| 383 | |
| 384 | DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); |
| 385 | }; |
| 386 | |
| 387 | } // namespace art |
| 388 | |
| 389 | #endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ |