blob: 0c35f294d8d3501b583e2a681f9b1d74568d6279 [file] [log] [blame]
Aart Bik281c6812016-08-26 11:31:48 -07001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
18#define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
19
Vladimir Markoca6fff82017-10-03 14:49:14 +010020#include "base/scoped_arena_allocator.h"
21#include "base/scoped_arena_containers.h"
Aart Bik281c6812016-08-26 11:31:48 -070022#include "induction_var_range.h"
Artem Serov121f2032017-10-23 19:19:06 +010023#include "loop_analysis.h"
Aart Bik281c6812016-08-26 11:31:48 -070024#include "nodes.h"
25#include "optimization.h"
Artem Serov121f2032017-10-23 19:19:06 +010026#include "superblock_cloner.h"
Aart Bik281c6812016-08-26 11:31:48 -070027
Vladimir Marko0a516052019-10-14 13:00:44 +000028namespace art {
Aart Bik281c6812016-08-26 11:31:48 -070029
Vladimir Markoa0431112018-06-25 09:32:54 +010030class CompilerOptions;
Artem Serovcf43fb62018-02-15 14:43:48 +000031class ArchNoOptsLoopHelper;
Aart Bik92685a82017-03-06 11:13:43 -080032
Aart Bik281c6812016-08-26 11:31:48 -070033/**
34 * Loop optimizations. Builds a loop hierarchy and applies optimizations to
Aart Bikf8f5a162017-02-06 15:35:29 -080035 * the detected nested loops, such as removal of dead induction and empty loops
36 * and inner loop vectorization.
Aart Bik281c6812016-08-26 11:31:48 -070037 */
38class HLoopOptimization : public HOptimization {
39 public:
Aart Bik92685a82017-03-06 11:13:43 -080040 HLoopOptimization(HGraph* graph,
Artem Serovc8150b52019-07-31 18:28:00 +010041 const CodeGenerator& codegen, // Needs info about the target.
Aart Bikb92cc332017-09-06 15:53:17 -070042 HInductionVarAnalysis* induction_analysis,
Aart Bik2ca10eb2017-11-15 15:17:53 -080043 OptimizingCompilerStats* stats,
44 const char* name = kLoopOptimizationPassName);
Aart Bik281c6812016-08-26 11:31:48 -070045
Roland Levillainbbc6e7e2018-08-24 16:58:47 +010046 bool Run() override;
Aart Bik281c6812016-08-26 11:31:48 -070047
48 static constexpr const char* kLoopOptimizationPassName = "loop_optimization";
49
50 private:
51 /**
52 * A single loop inside the loop hierarchy representation.
53 */
Aart Bik96202302016-10-04 17:33:56 -070054 struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> {
Aart Bik281c6812016-08-26 11:31:48 -070055 explicit LoopNode(HLoopInformation* lp_info)
56 : loop_info(lp_info),
57 outer(nullptr),
58 inner(nullptr),
59 previous(nullptr),
60 next(nullptr) {}
Aart Bikf8f5a162017-02-06 15:35:29 -080061 HLoopInformation* loop_info;
Aart Bik281c6812016-08-26 11:31:48 -070062 LoopNode* outer;
63 LoopNode* inner;
64 LoopNode* previous;
65 LoopNode* next;
66 };
67
Aart Bikf8f5a162017-02-06 15:35:29 -080068 /*
69 * Vectorization restrictions (bit mask).
70 */
71 enum VectorRestrictions {
Aart Bik0148de42017-09-05 09:25:01 -070072 kNone = 0, // no restrictions
73 kNoMul = 1 << 0, // no multiplication
74 kNoDiv = 1 << 1, // no division
75 kNoShift = 1 << 2, // no shift
76 kNoShr = 1 << 3, // no arithmetic shift right
77 kNoHiBits = 1 << 4, // "wider" operations cannot bring in higher order bits
78 kNoSignedHAdd = 1 << 5, // no signed halving add
79 kNoUnroundedHAdd = 1 << 6, // no unrounded halving add
80 kNoAbs = 1 << 7, // no absolute value
Aart Bik3f08e9b2018-05-01 13:42:03 -070081 kNoStringCharAt = 1 << 8, // no StringCharAt
82 kNoReduction = 1 << 9, // no reduction
83 kNoSAD = 1 << 10, // no sum of absolute differences (SAD)
84 kNoWideSAD = 1 << 11, // no sum of absolute differences (SAD) with operand widening
Artem Serovaaac0e32018-08-07 00:52:22 +010085 kNoDotProd = 1 << 12, // no dot product
Aart Bikf8f5a162017-02-06 15:35:29 -080086 };
Aart Bik96202302016-10-04 17:33:56 -070087
Aart Bikf8f5a162017-02-06 15:35:29 -080088 /*
89 * Vectorization mode during synthesis
90 * (sequential peeling/cleanup loop or vector loop).
91 */
92 enum VectorMode {
93 kSequential,
94 kVector
95 };
96
97 /*
98 * Representation of a unit-stride array reference.
99 */
100 struct ArrayReference {
Aart Bik38a3f212017-10-20 17:02:21 -0700101 ArrayReference(HInstruction* b, HInstruction* o, DataType::Type t, bool l, bool c = false)
102 : base(b), offset(o), type(t), lhs(l), is_string_char_at(c) { }
Aart Bikf8f5a162017-02-06 15:35:29 -0800103 bool operator<(const ArrayReference& other) const {
104 return
105 (base < other.base) ||
106 (base == other.base &&
107 (offset < other.offset || (offset == other.offset &&
108 (type < other.type ||
Aart Bik38a3f212017-10-20 17:02:21 -0700109 (type == other.type &&
110 (lhs < other.lhs ||
111 (lhs == other.lhs &&
112 is_string_char_at < other.is_string_char_at)))))));
Aart Bikf8f5a162017-02-06 15:35:29 -0800113 }
Aart Bik38a3f212017-10-20 17:02:21 -0700114 HInstruction* base; // base address
115 HInstruction* offset; // offset + i
116 DataType::Type type; // component type
117 bool lhs; // def/use
118 bool is_string_char_at; // compressed string read
Aart Bikf8f5a162017-02-06 15:35:29 -0800119 };
120
Aart Bikb29f6842017-07-28 15:58:41 -0700121 //
Aart Bikf8f5a162017-02-06 15:35:29 -0800122 // Loop setup and traversal.
Aart Bikb29f6842017-07-28 15:58:41 -0700123 //
124
Aart Bik24773202018-04-26 10:28:51 -0700125 bool LocalRun();
Aart Bik281c6812016-08-26 11:31:48 -0700126 void AddLoop(HLoopInformation* loop_info);
127 void RemoveLoop(LoopNode* node);
Aart Bik281c6812016-08-26 11:31:48 -0700128
Aart Bikb29f6842017-07-28 15:58:41 -0700129 // Traverses all loops inner to outer to perform simplifications and optimizations.
130 // Returns true if loops nested inside current loop (node) have changed.
131 bool TraverseLoopsInnerToOuter(LoopNode* node);
132
133 //
Aart Bikf8f5a162017-02-06 15:35:29 -0800134 // Optimization.
Aart Bikb29f6842017-07-28 15:58:41 -0700135 //
136
Aart Bik281c6812016-08-26 11:31:48 -0700137 void SimplifyInduction(LoopNode* node);
Aart Bik482095d2016-10-10 15:39:10 -0700138 void SimplifyBlocks(LoopNode* node);
Aart Bikf8f5a162017-02-06 15:35:29 -0800139
Artem Serov121f2032017-10-23 19:19:06 +0100140 // Performs optimizations specific to inner loop with finite header logic (empty loop removal,
Aart Bikb29f6842017-07-28 15:58:41 -0700141 // unrolling, vectorization). Returns true if anything changed.
Artem Serov121f2032017-10-23 19:19:06 +0100142 bool TryOptimizeInnerLoopFinite(LoopNode* node);
143
144 // Performs optimizations specific to inner loop. Returns true if anything changed.
Aart Bikb29f6842017-07-28 15:58:41 -0700145 bool OptimizeInnerLoop(LoopNode* node);
146
Artem Serov121f2032017-10-23 19:19:06 +0100147 // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling
Artem Serov0e329082018-06-12 10:23:27 +0100148 // opportunities. Returns whether transformation happened. 'generate_code' determines whether the
149 // optimization should be actually applied.
150 bool TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info,
151 bool generate_code = true);
Artem Serov121f2032017-10-23 19:19:06 +0100152
Artem Serov72411e62017-10-19 16:18:07 +0100153 // Tries to apply loop peeling for loop invariant exits elimination. Returns whether
Artem Serov0e329082018-06-12 10:23:27 +0100154 // transformation happened. 'generate_code' determines whether the optimization should be
155 // actually applied.
156 bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info,
157 bool generate_code = true);
158
Artem Serov18ba1da2018-05-16 19:06:32 +0100159 // Tries to perform whole loop unrolling for a small loop with a small trip count to eliminate
160 // the loop check overhead and to have more opportunities for inter-iteration optimizations.
161 // Returns whether transformation happened. 'generate_code' determines whether the optimization
162 // should be actually applied.
163 bool TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code = true);
164
Artem Serov0e329082018-06-12 10:23:27 +0100165 // Tries to apply scalar loop peeling and unrolling.
166 bool TryPeelingAndUnrolling(LoopNode* node);
Artem Serov72411e62017-10-19 16:18:07 +0100167
Aart Bikb29f6842017-07-28 15:58:41 -0700168 //
Aart Bikf8f5a162017-02-06 15:35:29 -0800169 // Vectorization analysis and synthesis.
Aart Bikb29f6842017-07-28 15:58:41 -0700170 //
171
Aart Bik14a68b42017-06-08 14:06:58 -0700172 bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
Aart Bikf8f5a162017-02-06 15:35:29 -0800173 void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
174 void GenerateNewLoop(LoopNode* node,
175 HBasicBlock* block,
176 HBasicBlock* new_preheader,
177 HInstruction* lo,
178 HInstruction* hi,
Aart Bik14a68b42017-06-08 14:06:58 -0700179 HInstruction* step,
180 uint32_t unroll);
Aart Bikf8f5a162017-02-06 15:35:29 -0800181 bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
182 bool VectorizeUse(LoopNode* node,
183 HInstruction* instruction,
184 bool generate_code,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100185 DataType::Type type,
Aart Bikf8f5a162017-02-06 15:35:29 -0800186 uint64_t restrictions);
Aart Bik38a3f212017-10-20 17:02:21 -0700187 uint32_t GetVectorSizeInBytes();
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100188 bool TrySetVectorType(DataType::Type type, /*out*/ uint64_t* restrictions);
Artem Serovc8150b52019-07-31 18:28:00 +0100189 bool TrySetVectorLengthImpl(uint32_t length);
190
191 bool TrySetVectorLength(DataType::Type type, uint32_t length) {
192 bool res = TrySetVectorLengthImpl(length);
193 // Currently the vectorizer supports only the mode when full SIMD registers are used.
194 DCHECK(!res || (DataType::Size(type) * length == GetVectorSizeInBytes()));
195 return res;
196 }
197
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100198 void GenerateVecInv(HInstruction* org, DataType::Type type);
Aart Bik14a68b42017-06-08 14:06:58 -0700199 void GenerateVecSub(HInstruction* org, HInstruction* offset);
Aart Bikf8f5a162017-02-06 15:35:29 -0800200 void GenerateVecMem(HInstruction* org,
201 HInstruction* opa,
202 HInstruction* opb,
Aart Bik14a68b42017-06-08 14:06:58 -0700203 HInstruction* offset,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100204 DataType::Type type);
Aart Bik0148de42017-09-05 09:25:01 -0700205 void GenerateVecReductionPhi(HPhi* phi);
206 void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction);
207 HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction);
Aart Bik304c8a52017-05-23 11:01:13 -0700208 void GenerateVecOp(HInstruction* org,
209 HInstruction* opa,
210 HInstruction* opb,
Aart Bik3f08e9b2018-05-01 13:42:03 -0700211 DataType::Type type);
Aart Bik281c6812016-08-26 11:31:48 -0700212
Aart Bikf3e61ee2017-04-12 17:09:20 -0700213 // Vectorization idioms.
Aart Bik29aa0822018-03-08 11:28:00 -0800214 bool VectorizeSaturationIdiom(LoopNode* node,
215 HInstruction* instruction,
216 bool generate_code,
217 DataType::Type type,
218 uint64_t restrictions);
Aart Bikf3e61ee2017-04-12 17:09:20 -0700219 bool VectorizeHalvingAddIdiom(LoopNode* node,
220 HInstruction* instruction,
221 bool generate_code,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100222 DataType::Type type,
Aart Bikf3e61ee2017-04-12 17:09:20 -0700223 uint64_t restrictions);
Aart Bikdbbac8f2017-09-01 13:06:08 -0700224 bool VectorizeSADIdiom(LoopNode* node,
225 HInstruction* instruction,
226 bool generate_code,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100227 DataType::Type type,
Aart Bikdbbac8f2017-09-01 13:06:08 -0700228 uint64_t restrictions);
Artem Serovaaac0e32018-08-07 00:52:22 +0100229 bool VectorizeDotProdIdiom(LoopNode* node,
230 HInstruction* instruction,
231 bool generate_code,
232 DataType::Type type,
233 uint64_t restrictions);
Aart Bikf3e61ee2017-04-12 17:09:20 -0700234
Aart Bik14a68b42017-06-08 14:06:58 -0700235 // Vectorization heuristics.
Aart Bik38a3f212017-10-20 17:02:21 -0700236 Alignment ComputeAlignment(HInstruction* offset,
237 DataType::Type type,
238 bool is_string_char_at,
239 uint32_t peeling = 0);
240 void SetAlignmentStrategy(uint32_t peeling_votes[],
241 const ArrayReference* peeling_candidate);
242 uint32_t MaxNumberPeeled();
Aart Bik14a68b42017-06-08 14:06:58 -0700243 bool IsVectorizationProfitable(int64_t trip_count);
Aart Bik14a68b42017-06-08 14:06:58 -0700244
Aart Bikb29f6842017-07-28 15:58:41 -0700245 //
Aart Bik6b69e0a2017-01-11 10:20:43 -0800246 // Helpers.
Aart Bikb29f6842017-07-28 15:58:41 -0700247 //
248
Aart Bikf8f5a162017-02-06 15:35:29 -0800249 bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
Aart Bikb29f6842017-07-28 15:58:41 -0700250 bool TrySetPhiReduction(HPhi* phi);
251
252 // Detects loop header with a single induction (returned in main_phi), possibly
253 // other phis for reductions, but no other side effects. Returns true on success.
254 bool TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi);
255
Aart Bikcc42be02016-10-20 16:14:16 -0700256 bool IsEmptyBody(HBasicBlock* block);
Aart Bik482095d2016-10-10 15:39:10 -0700257 bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
Aart Bik8c4a8542016-10-06 11:36:57 -0700258 HInstruction* instruction,
Aart Bik6b69e0a2017-01-11 10:20:43 -0800259 bool collect_loop_uses,
Aart Bik38a3f212017-10-20 17:02:21 -0700260 /*out*/ uint32_t* use_count);
Aart Bikf8f5a162017-02-06 15:35:29 -0800261 bool IsUsedOutsideLoop(HLoopInformation* loop_info,
262 HInstruction* instruction);
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +0100263 bool TryReplaceWithLastValue(HLoopInformation* loop_info,
264 HInstruction* instruction,
265 HBasicBlock* block);
Aart Bikf8f5a162017-02-06 15:35:29 -0800266 bool TryAssignLastValue(HLoopInformation* loop_info,
267 HInstruction* instruction,
268 HBasicBlock* block,
269 bool collect_loop_uses);
Aart Bik6b69e0a2017-01-11 10:20:43 -0800270 void RemoveDeadInstructions(const HInstructionList& list);
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +0100271 bool CanRemoveCycle(); // Whether the current 'iset_' is removable.
Aart Bik281c6812016-08-26 11:31:48 -0700272
Vladimir Markoa0431112018-06-25 09:32:54 +0100273 // Compiler options (to query ISA features).
274 const CompilerOptions* compiler_options_;
Aart Bik92685a82017-03-06 11:13:43 -0800275
Artem Serovc8150b52019-07-31 18:28:00 +0100276 // Cached target SIMD vector register size in bytes.
277 const size_t simd_register_size_;
278
Aart Bik96202302016-10-04 17:33:56 -0700279 // Range information based on prior induction variable analysis.
Aart Bik281c6812016-08-26 11:31:48 -0700280 InductionVarRange induction_range_;
281
282 // Phase-local heap memory allocator for the loop optimizer. Storage obtained
Aart Bik96202302016-10-04 17:33:56 -0700283 // through this allocator is immediately released when the loop optimizer is done.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100284 ScopedArenaAllocator* loop_allocator_;
Aart Bik281c6812016-08-26 11:31:48 -0700285
Aart Bikf8f5a162017-02-06 15:35:29 -0800286 // Global heap memory allocator. Used to build HIR.
287 ArenaAllocator* global_allocator_;
288
Aart Bik96202302016-10-04 17:33:56 -0700289 // Entries into the loop hierarchy representation. The hierarchy resides
290 // in phase-local heap memory.
Aart Bik281c6812016-08-26 11:31:48 -0700291 LoopNode* top_loop_;
292 LoopNode* last_loop_;
293
Aart Bik8c4a8542016-10-06 11:36:57 -0700294 // Temporary bookkeeping of a set of instructions.
295 // Contents reside in phase-local heap memory.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100296 ScopedArenaSet<HInstruction*>* iset_;
Aart Bik8c4a8542016-10-06 11:36:57 -0700297
Aart Bikb29f6842017-07-28 15:58:41 -0700298 // Temporary bookkeeping of reduction instructions. Mapping is two-fold:
299 // (1) reductions in the loop-body are mapped back to their phi definition,
300 // (2) phi definitions are mapped to their initial value (updated during
301 // code generation to feed the proper values into the new chain).
302 // Contents reside in phase-local heap memory.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100303 ScopedArenaSafeMap<HInstruction*, HInstruction*>* reductions_;
Aart Bik482095d2016-10-10 15:39:10 -0700304
Aart Bikdf7822e2016-12-06 10:05:30 -0800305 // Flag that tracks if any simplifications have occurred.
306 bool simplified_;
307
Aart Bikf8f5a162017-02-06 15:35:29 -0800308 // Number of "lanes" for selected packed type.
309 uint32_t vector_length_;
310
311 // Set of array references in the vector loop.
312 // Contents reside in phase-local heap memory.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100313 ScopedArenaSet<ArrayReference>* vector_refs_;
Aart Bikf8f5a162017-02-06 15:35:29 -0800314
Aart Bik38a3f212017-10-20 17:02:21 -0700315 // Static or dynamic loop peeling for alignment.
316 uint32_t vector_static_peeling_factor_;
317 const ArrayReference* vector_dynamic_peeling_candidate_;
Aart Bik14a68b42017-06-08 14:06:58 -0700318
319 // Dynamic data dependence test of the form a != b.
320 HInstruction* vector_runtime_test_a_;
321 HInstruction* vector_runtime_test_b_;
322
Aart Bikf8f5a162017-02-06 15:35:29 -0800323 // Mapping used during vectorization synthesis for both the scalar peeling/cleanup
Aart Bik14a68b42017-06-08 14:06:58 -0700324 // loop (mode is kSequential) and the actual vector loop (mode is kVector). The data
Aart Bikf8f5a162017-02-06 15:35:29 -0800325 // structure maps original instructions into the new instructions.
326 // Contents reside in phase-local heap memory.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100327 ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
Aart Bikf8f5a162017-02-06 15:35:29 -0800328
Aart Bik0148de42017-09-05 09:25:01 -0700329 // Permanent mapping used during vectorization synthesis.
330 // Contents reside in phase-local heap memory.
Vladimir Markoca6fff82017-10-03 14:49:14 +0100331 ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_;
Aart Bik0148de42017-09-05 09:25:01 -0700332
Aart Bikf8f5a162017-02-06 15:35:29 -0800333 // Temporary vectorization bookkeeping.
Aart Bik14a68b42017-06-08 14:06:58 -0700334 VectorMode vector_mode_; // synthesis mode
Aart Bikf8f5a162017-02-06 15:35:29 -0800335 HBasicBlock* vector_preheader_; // preheader of the new loop
336 HBasicBlock* vector_header_; // header of the new loop
337 HBasicBlock* vector_body_; // body of the new loop
Aart Bik14a68b42017-06-08 14:06:58 -0700338 HInstruction* vector_index_; // normalized index of the new loop
Aart Bikf8f5a162017-02-06 15:35:29 -0800339
Artem Serov121f2032017-10-23 19:19:06 +0100340 // Helper for target-specific behaviour for loop optimizations.
Artem Serovcf43fb62018-02-15 14:43:48 +0000341 ArchNoOptsLoopHelper* arch_loop_helper_;
Artem Serov121f2032017-10-23 19:19:06 +0100342
Aart Bik281c6812016-08-26 11:31:48 -0700343 friend class LoopOptimizationTest;
344
345 DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
346};
347
348} // namespace art
349
350#endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_