blob: 49be8a3fb498e78a82c66c93b7d00e107035727b [file] [log] [blame]
Aart Bik281c6812016-08-26 11:31:48 -07001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
18#define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
19
Aart Bik281c6812016-08-26 11:31:48 -070020#include "induction_var_range.h"
21#include "nodes.h"
22#include "optimization.h"
23
24namespace art {
25
Aart Bik92685a82017-03-06 11:13:43 -080026class CompilerDriver;
27
Aart Bik281c6812016-08-26 11:31:48 -070028/**
29 * Loop optimizations. Builds a loop hierarchy and applies optimizations to
Aart Bikf8f5a162017-02-06 15:35:29 -080030 * the detected nested loops, such as removal of dead induction and empty loops
31 * and inner loop vectorization.
Aart Bik281c6812016-08-26 11:31:48 -070032 */
33class HLoopOptimization : public HOptimization {
34 public:
Aart Bik92685a82017-03-06 11:13:43 -080035 HLoopOptimization(HGraph* graph,
36 CompilerDriver* compiler_driver,
37 HInductionVarAnalysis* induction_analysis);
Aart Bik281c6812016-08-26 11:31:48 -070038
39 void Run() OVERRIDE;
40
41 static constexpr const char* kLoopOptimizationPassName = "loop_optimization";
42
43 private:
44 /**
45 * A single loop inside the loop hierarchy representation.
46 */
Aart Bik96202302016-10-04 17:33:56 -070047 struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> {
Aart Bik281c6812016-08-26 11:31:48 -070048 explicit LoopNode(HLoopInformation* lp_info)
49 : loop_info(lp_info),
50 outer(nullptr),
51 inner(nullptr),
52 previous(nullptr),
53 next(nullptr) {}
Aart Bikf8f5a162017-02-06 15:35:29 -080054 HLoopInformation* loop_info;
Aart Bik281c6812016-08-26 11:31:48 -070055 LoopNode* outer;
56 LoopNode* inner;
57 LoopNode* previous;
58 LoopNode* next;
59 };
60
Aart Bikf8f5a162017-02-06 15:35:29 -080061 /*
62 * Vectorization restrictions (bit mask).
63 */
64 enum VectorRestrictions {
Nicolas Geoffray982334c2017-09-02 12:54:16 +000065 kNone = 0, // no restrictions
66 kNoMul = 1, // no multiplication
67 kNoDiv = 2, // no division
68 kNoShift = 4, // no shift
69 kNoShr = 8, // no arithmetic shift right
70 kNoHiBits = 16, // "wider" operations cannot bring in higher order bits
71 kNoSignedHAdd = 32, // no signed halving add
72 kNoUnroundedHAdd = 64, // no unrounded halving add
73 kNoAbs = 128, // no absolute value
74 kNoMinMax = 256, // no min/max
75 kNoStringCharAt = 512, // no StringCharAt
Aart Bikf8f5a162017-02-06 15:35:29 -080076 };
Aart Bik96202302016-10-04 17:33:56 -070077
Aart Bikf8f5a162017-02-06 15:35:29 -080078 /*
79 * Vectorization mode during synthesis
80 * (sequential peeling/cleanup loop or vector loop).
81 */
82 enum VectorMode {
83 kSequential,
84 kVector
85 };
86
87 /*
88 * Representation of a unit-stride array reference.
89 */
90 struct ArrayReference {
91 ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l)
92 : base(b), offset(o), type(t), lhs(l) { }
93 bool operator<(const ArrayReference& other) const {
94 return
95 (base < other.base) ||
96 (base == other.base &&
97 (offset < other.offset || (offset == other.offset &&
98 (type < other.type ||
99 (type == other.type && lhs < other.lhs)))));
100 }
101 HInstruction* base; // base address
102 HInstruction* offset; // offset + i
103 Primitive::Type type; // component type
104 bool lhs; // def/use
105 };
106
Aart Bikb29f6842017-07-28 15:58:41 -0700107 //
Aart Bikf8f5a162017-02-06 15:35:29 -0800108 // Loop setup and traversal.
Aart Bikb29f6842017-07-28 15:58:41 -0700109 //
110
Aart Bikf8f5a162017-02-06 15:35:29 -0800111 void LocalRun();
Aart Bik281c6812016-08-26 11:31:48 -0700112 void AddLoop(HLoopInformation* loop_info);
113 void RemoveLoop(LoopNode* node);
Aart Bik281c6812016-08-26 11:31:48 -0700114
Aart Bikb29f6842017-07-28 15:58:41 -0700115 // Traverses all loops inner to outer to perform simplifications and optimizations.
116 // Returns true if loops nested inside current loop (node) have changed.
117 bool TraverseLoopsInnerToOuter(LoopNode* node);
118
119 //
Aart Bikf8f5a162017-02-06 15:35:29 -0800120 // Optimization.
Aart Bikb29f6842017-07-28 15:58:41 -0700121 //
122
Aart Bik281c6812016-08-26 11:31:48 -0700123 void SimplifyInduction(LoopNode* node);
Aart Bik482095d2016-10-10 15:39:10 -0700124 void SimplifyBlocks(LoopNode* node);
Aart Bikf8f5a162017-02-06 15:35:29 -0800125
Aart Bikb29f6842017-07-28 15:58:41 -0700126 // Performs optimizations specific to inner loop (empty loop removal,
127 // unrolling, vectorization). Returns true if anything changed.
128 bool OptimizeInnerLoop(LoopNode* node);
129
130 //
Aart Bikf8f5a162017-02-06 15:35:29 -0800131 // Vectorization analysis and synthesis.
Aart Bikb29f6842017-07-28 15:58:41 -0700132 //
133
Aart Bik14a68b42017-06-08 14:06:58 -0700134 bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
Aart Bikf8f5a162017-02-06 15:35:29 -0800135 void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
136 void GenerateNewLoop(LoopNode* node,
137 HBasicBlock* block,
138 HBasicBlock* new_preheader,
139 HInstruction* lo,
140 HInstruction* hi,
Aart Bik14a68b42017-06-08 14:06:58 -0700141 HInstruction* step,
142 uint32_t unroll);
Aart Bikf8f5a162017-02-06 15:35:29 -0800143 bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
144 bool VectorizeUse(LoopNode* node,
145 HInstruction* instruction,
146 bool generate_code,
147 Primitive::Type type,
148 uint64_t restrictions);
149 bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions);
150 bool TrySetVectorLength(uint32_t length);
151 void GenerateVecInv(HInstruction* org, Primitive::Type type);
Aart Bik14a68b42017-06-08 14:06:58 -0700152 void GenerateVecSub(HInstruction* org, HInstruction* offset);
Aart Bikf8f5a162017-02-06 15:35:29 -0800153 void GenerateVecMem(HInstruction* org,
154 HInstruction* opa,
155 HInstruction* opb,
Aart Bik14a68b42017-06-08 14:06:58 -0700156 HInstruction* offset,
Aart Bikf8f5a162017-02-06 15:35:29 -0800157 Primitive::Type type);
Aart Bik304c8a52017-05-23 11:01:13 -0700158 void GenerateVecOp(HInstruction* org,
159 HInstruction* opa,
160 HInstruction* opb,
161 Primitive::Type type,
162 bool is_unsigned = false);
Aart Bik281c6812016-08-26 11:31:48 -0700163
Aart Bikf3e61ee2017-04-12 17:09:20 -0700164 // Vectorization idioms.
165 bool VectorizeHalvingAddIdiom(LoopNode* node,
166 HInstruction* instruction,
167 bool generate_code,
168 Primitive::Type type,
169 uint64_t restrictions);
170
Aart Bik14a68b42017-06-08 14:06:58 -0700171 // Vectorization heuristics.
172 bool IsVectorizationProfitable(int64_t trip_count);
Aart Bikb29f6842017-07-28 15:58:41 -0700173 void SetPeelingCandidate(const ArrayReference* candidate, int64_t trip_count);
Aart Bik14a68b42017-06-08 14:06:58 -0700174 uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
175
Aart Bikb29f6842017-07-28 15:58:41 -0700176 //
Aart Bik6b69e0a2017-01-11 10:20:43 -0800177 // Helpers.
Aart Bikb29f6842017-07-28 15:58:41 -0700178 //
179
Aart Bikf8f5a162017-02-06 15:35:29 -0800180 bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
Aart Bikb29f6842017-07-28 15:58:41 -0700181 bool TrySetPhiReduction(HPhi* phi);
182
183 // Detects loop header with a single induction (returned in main_phi), possibly
184 // other phis for reductions, but no other side effects. Returns true on success.
185 bool TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi);
186
Aart Bikcc42be02016-10-20 16:14:16 -0700187 bool IsEmptyBody(HBasicBlock* block);
Aart Bik482095d2016-10-10 15:39:10 -0700188 bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
Aart Bik8c4a8542016-10-06 11:36:57 -0700189 HInstruction* instruction,
Aart Bik6b69e0a2017-01-11 10:20:43 -0800190 bool collect_loop_uses,
Aart Bik8c4a8542016-10-06 11:36:57 -0700191 /*out*/ int32_t* use_count);
Aart Bikf8f5a162017-02-06 15:35:29 -0800192 bool IsUsedOutsideLoop(HLoopInformation* loop_info,
193 HInstruction* instruction);
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +0100194 bool TryReplaceWithLastValue(HLoopInformation* loop_info,
195 HInstruction* instruction,
196 HBasicBlock* block);
Aart Bikf8f5a162017-02-06 15:35:29 -0800197 bool TryAssignLastValue(HLoopInformation* loop_info,
198 HInstruction* instruction,
199 HBasicBlock* block,
200 bool collect_loop_uses);
Aart Bik6b69e0a2017-01-11 10:20:43 -0800201 void RemoveDeadInstructions(const HInstructionList& list);
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +0100202 bool CanRemoveCycle(); // Whether the current 'iset_' is removable.
Aart Bik281c6812016-08-26 11:31:48 -0700203
Aart Bik92685a82017-03-06 11:13:43 -0800204 // Compiler driver (to query ISA features).
205 const CompilerDriver* compiler_driver_;
206
Aart Bik96202302016-10-04 17:33:56 -0700207 // Range information based on prior induction variable analysis.
Aart Bik281c6812016-08-26 11:31:48 -0700208 InductionVarRange induction_range_;
209
210 // Phase-local heap memory allocator for the loop optimizer. Storage obtained
Aart Bik96202302016-10-04 17:33:56 -0700211 // through this allocator is immediately released when the loop optimizer is done.
Nicolas Geoffrayebe16742016-10-05 09:55:42 +0100212 ArenaAllocator* loop_allocator_;
Aart Bik281c6812016-08-26 11:31:48 -0700213
Aart Bikf8f5a162017-02-06 15:35:29 -0800214 // Global heap memory allocator. Used to build HIR.
215 ArenaAllocator* global_allocator_;
216
Aart Bik96202302016-10-04 17:33:56 -0700217 // Entries into the loop hierarchy representation. The hierarchy resides
218 // in phase-local heap memory.
Aart Bik281c6812016-08-26 11:31:48 -0700219 LoopNode* top_loop_;
220 LoopNode* last_loop_;
221
Aart Bik8c4a8542016-10-06 11:36:57 -0700222 // Temporary bookkeeping of a set of instructions.
223 // Contents reside in phase-local heap memory.
224 ArenaSet<HInstruction*>* iset_;
225
Aart Bikb29f6842017-07-28 15:58:41 -0700226 // Temporary bookkeeping of reduction instructions. Mapping is two-fold:
227 // (1) reductions in the loop-body are mapped back to their phi definition,
228 // (2) phi definitions are mapped to their initial value (updated during
229 // code generation to feed the proper values into the new chain).
230 // Contents reside in phase-local heap memory.
231 ArenaSafeMap<HInstruction*, HInstruction*>* reductions_;
Aart Bik482095d2016-10-10 15:39:10 -0700232
Aart Bikdf7822e2016-12-06 10:05:30 -0800233 // Flag that tracks if any simplifications have occurred.
234 bool simplified_;
235
Aart Bikf8f5a162017-02-06 15:35:29 -0800236 // Number of "lanes" for selected packed type.
237 uint32_t vector_length_;
238
239 // Set of array references in the vector loop.
240 // Contents reside in phase-local heap memory.
241 ArenaSet<ArrayReference>* vector_refs_;
242
Aart Bik14a68b42017-06-08 14:06:58 -0700243 // Dynamic loop peeling candidate for alignment.
244 const ArrayReference* vector_peeling_candidate_;
245
246 // Dynamic data dependence test of the form a != b.
247 HInstruction* vector_runtime_test_a_;
248 HInstruction* vector_runtime_test_b_;
249
Aart Bikf8f5a162017-02-06 15:35:29 -0800250 // Mapping used during vectorization synthesis for both the scalar peeling/cleanup
Aart Bik14a68b42017-06-08 14:06:58 -0700251 // loop (mode is kSequential) and the actual vector loop (mode is kVector). The data
Aart Bikf8f5a162017-02-06 15:35:29 -0800252 // structure maps original instructions into the new instructions.
253 // Contents reside in phase-local heap memory.
254 ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
255
256 // Temporary vectorization bookkeeping.
Aart Bik14a68b42017-06-08 14:06:58 -0700257 VectorMode vector_mode_; // synthesis mode
Aart Bikf8f5a162017-02-06 15:35:29 -0800258 HBasicBlock* vector_preheader_; // preheader of the new loop
259 HBasicBlock* vector_header_; // header of the new loop
260 HBasicBlock* vector_body_; // body of the new loop
Aart Bik14a68b42017-06-08 14:06:58 -0700261 HInstruction* vector_index_; // normalized index of the new loop
Aart Bikf8f5a162017-02-06 15:35:29 -0800262
Aart Bik281c6812016-08-26 11:31:48 -0700263 friend class LoopOptimizationTest;
264
265 DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
266};
267
268} // namespace art
269
270#endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_