blob: afd5a5d6e79845515368562e8e8b9ca89aabdf2b [file] [log] [blame]
Artem Serov7f4aff62017-06-21 17:02:18 +01001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
18#define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_
19
20#include "base/arena_bit_vector.h"
21#include "base/arena_containers.h"
22#include "base/bit_vector-inl.h"
23#include "nodes.h"
24
25namespace art {
26
27static const bool kSuperblockClonerLogging = false;
Artem Serov7f4aff62017-06-21 17:02:18 +010028
29// Represents an edge between two HBasicBlocks.
30//
31// Note: objects of this class are small - pass them by value.
32class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> {
33 public:
34 HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) {
35 DCHECK_NE(to_, kInvalidBlockId);
36 DCHECK_NE(from_, kInvalidBlockId);
37 }
38 HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) {
39 DCHECK_NE(to_, kInvalidBlockId);
40 DCHECK_NE(from_, kInvalidBlockId);
41 }
42 HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {}
43
44 uint32_t GetFrom() const { return from_; }
45 uint32_t GetTo() const { return to_; }
46
47 bool operator==(const HEdge& other) const {
48 return this->from_ == other.from_ && this->to_ == other.to_;
49 }
50
51 bool operator!=(const HEdge& other) const { return !operator==(other); }
52 void Dump(std::ostream& stream) const;
53
54 // Returns whether an edge represents a valid edge in CF graph: whether the from_ block
55 // has to_ block as a successor.
56 bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; }
57
58 private:
59 // Predecessor block id.
60 uint32_t from_;
61 // Successor block id.
62 uint32_t to_;
63};
64
65// Returns whether a HEdge edge corresponds to an existing edge in the graph.
66inline bool IsEdgeValid(HEdge edge, HGraph* graph) {
67 if (!edge.IsValid()) {
68 return false;
69 }
70 uint32_t from = edge.GetFrom();
71 uint32_t to = edge.GetTo();
72 if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) {
73 return false;
74 }
75
76 HBasicBlock* block_from = graph->GetBlocks()[from];
77 HBasicBlock* block_to = graph->GetBlocks()[to];
78 if (block_from == nullptr || block_to == nullptr) {
79 return false;
80 }
81
82 return block_from->HasSuccessor(block_to, 0);
83}
84
85// SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without
86// fine grain manipulation with IR; data flow and graph properties are resolved/adjusted
87// automatically. The clone transformation is defined by specifying a set of basic blocks to copy
88// and a set of rules how to treat edges, remap their successors. By using this approach such
89// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented.
90//
91// The idea of the transformation is based on "Superblock cloning" technique described in the book
92// "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University
93// Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective
94// Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al.
95// J Supercomput (1993) 7: 229. doi:10.1007/BF01205185.
96//
97// There are two states of the IR graph: original graph (before the transformation) and
98// copy graph (after).
99//
100// Before the transformation:
101// Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original
102// graph into 4 categories/sets (use the following notation for edges: "(pred, succ)",
103// where pred, succ - basic blocks):
104// - internal - pred, succ are members of ‘orig_bb_set’.
105// - outside - pred, succ are not members of ‘orig_bb_set’.
106// - incoming - pred is not a member of ‘orig_bb_set’, succ is.
107// - outgoing - pred is a member of ‘orig_bb_set’, succ is not.
108//
109// Transformation:
110//
111// 1. Initial cloning:
112// 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks
113// form ‘copy_bb_set’.
114// 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the
115// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge
116// set.
117// 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the
118// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge
119// set.
120// 2. Successors remapping.
121// 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should
122// be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)).
123// 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors
124// should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)).
125// 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph
126// whose successors should be remapped to copies nodes: ((X, Y) will be transformed into
127// (X, Y_1)).
128// 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc).
129// 4. Fix/resolve data flow.
130// 5. Do cleanups (DCE, critical edges splitting, etc).
131//
132class SuperblockCloner : public ValueObject {
133 public:
134 // TODO: Investigate optimal types for the containers.
135 using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>;
136 using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>;
137 using HBasicBlockSet = ArenaBitVector;
138 using HEdgeSet = ArenaHashSet<HEdge>;
139
140 SuperblockCloner(HGraph* graph,
141 const HBasicBlockSet* orig_bb_set,
142 HBasicBlockMap* bb_map,
143 HInstructionMap* hir_map);
144
145 // Sets edge successor remapping info specified by corresponding edge sets.
146 void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
147 const HEdgeSet* remap_copy_internal,
148 const HEdgeSet* remap_incoming);
149
150 // Returns whether the specified subgraph is copyable.
151 // TODO: Start from small range of graph patterns then extend it.
152 bool IsSubgraphClonable() const;
153
Artem Serov02eebcf2017-12-13 19:48:31 +0000154 // Returns whether selected subgraph satisfies the criteria for fast data flow resolution
155 // when iterative DF algorithm is not required and dominators/instructions inputs can be
156 // trivially adjusted.
157 //
158 // TODO: formally describe the criteria.
159 //
160 // Loop peeling and unrolling satisfy the criteria.
161 bool IsFastCase() const;
162
Artem Serov7f4aff62017-06-21 17:02:18 +0100163 // Runs the copy algorithm according to the description.
164 void Run();
165
166 // Cleans up the graph after transformation: splits critical edges, recalculates control flow
167 // information (back-edges, dominators, loop info, etc), eliminates redundant phis.
168 void CleanUp();
169
170 // Returns a clone of a basic block (orig_block).
171 //
172 // - The copy block will have no successors/predecessors; they should be set up manually.
173 // - For each instruction in the orig_block a copy is created and inserted into the copy block;
174 // this correspondence is recorded in the map (old instruction, new instruction).
175 // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the
176 // same, as in the original block, PHIs do not reflect a correct correspondence between the
177 // value and predecessors (as the copy block has no predecessors by now), etc.
178 HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block);
179
180 // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_
181 // and hir_map_.
182 void CloneBasicBlocks();
183
184 HInstruction* GetInstrCopy(HInstruction* orig_instr) const {
185 auto copy_input_iter = hir_map_->find(orig_instr);
186 DCHECK(copy_input_iter != hir_map_->end());
187 return copy_input_iter->second;
188 }
189
190 HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const {
191 HBasicBlock* block = bb_map_->Get(orig_block);
192 DCHECK(block != nullptr);
193 return block;
194 }
195
196 HInstruction* GetInstrOrig(HInstruction* copy_instr) const {
197 for (auto it : *hir_map_) {
198 if (it.second == copy_instr) {
199 return it.first;
200 }
201 }
202 return nullptr;
203 }
204
205 bool IsInOrigBBSet(uint32_t block_id) const {
206 return orig_bb_set_.IsBitSet(block_id);
207 }
208
209 bool IsInOrigBBSet(const HBasicBlock* block) const {
210 return IsInOrigBBSet(block->GetBlockId());
211 }
212
Artem Serov02eebcf2017-12-13 19:48:31 +0000213 // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops,
214 // dominators) needs to be adjusted.
215 HLoopInformation* GetRegionToBeAdjusted() const {
216 return outer_loop_;
217 }
218
Artem Serov7f4aff62017-06-21 17:02:18 +0100219 private:
220 // Fills the 'exits' vector with the subgraph exits.
221 void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
222
Artem Serov02eebcf2017-12-13 19:48:31 +0000223 // Finds and records information about the area in the graph for which control flow (back edges,
Artem Serov7f4aff62017-06-21 17:02:18 +0100224 // loops, dominators) needs to be adjusted.
225 void FindAndSetLocalAreaForAdjustments();
226
227 // Remaps edges' successors according to the info specified in the edges sets.
228 //
229 // Only edge successors/predecessors and phis' input records (to have a correspondence between
230 // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither
231 // phis' nor instructions' inputs values are resolved.
232 void RemapEdgesSuccessors();
233
Artem Serov02eebcf2017-12-13 19:48:31 +0000234 // Adjusts control flow (back edges, loops, dominators) for the local area defined by
Artem Serov7f4aff62017-06-21 17:02:18 +0100235 // FindAndSetLocalAreaForAdjustments.
236 void AdjustControlFlowInfo();
237
238 // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in
239 // the SSA form.
240 void ResolveDataFlow();
241
242 //
243 // Helpers for CloneBasicBlock.
244 //
245
246 // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the
247 // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original.
248 void ReplaceInputsWithCopies(HInstruction* copy_instr);
249
250 // Recursively clones the environment for the copy instruction. If the input of the original
251 // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise
252 // leaves it the same as original.
253 void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env);
254
255 //
256 // Helpers for RemapEdgesSuccessors.
257 //
258
259 // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and
260 // copy_succ.
261 void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
262
263 // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ.
264 void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
265
266 // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ.
267 void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ);
268
269 //
270 // Local versions of control flow calculation/adjustment routines.
271 //
272
273 void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set);
274 void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set);
275 GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set);
276 void CleanUpControlFlow();
277
278 //
279 // Helpers for ResolveDataFlow
280 //
281
282 // Resolves the inputs of the phi.
283 void ResolvePhi(HPhi* phi);
284
285 //
286 // Debug and logging methods.
287 //
288 void CheckInstructionInputsRemapping(HInstruction* orig_instr);
Artem Serov02eebcf2017-12-13 19:48:31 +0000289 bool CheckRemappingInfoIsValid();
290 void VerifyGraph();
291 void DumpInputSets();
Artem Serov7f4aff62017-06-21 17:02:18 +0100292
293 HBasicBlock* GetBlockById(uint32_t block_id) const {
294 DCHECK(block_id < graph_->GetBlocks().size());
295 HBasicBlock* block = graph_->GetBlocks()[block_id];
296 DCHECK(block != nullptr);
297 return block;
298 }
299
300 HGraph* const graph_;
301 ArenaAllocator* const arena_;
302
303 // Set of basic block in the original graph to be copied.
304 HBasicBlockSet orig_bb_set_;
305
306 // Sets of edges which require successors remapping.
307 const HEdgeSet* remap_orig_internal_;
308 const HEdgeSet* remap_copy_internal_;
309 const HEdgeSet* remap_incoming_;
310
311 // Correspondence map for blocks: (original block, copy block).
312 HBasicBlockMap* bb_map_;
313 // Correspondence map for instructions: (original HInstruction, copy HInstruction).
314 HInstructionMap* hir_map_;
Artem Serov02eebcf2017-12-13 19:48:31 +0000315 // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted.
Artem Serov7f4aff62017-06-21 17:02:18 +0100316 HLoopInformation* outer_loop_;
317 HBasicBlockSet outer_loop_bb_set_;
318
319 ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
Artem Serov02eebcf2017-12-13 19:48:31 +0000320 ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);
Artem Serov7f4aff62017-06-21 17:02:18 +0100321
322 DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
323};
324
Artem Serov02eebcf2017-12-13 19:48:31 +0000325// Helper class to perform loop peeling/unrolling.
326//
327// This helper should be used when correspondence map between original and copied
328// basic blocks/instructions are demanded.
329class PeelUnrollHelper : public ValueObject {
330 public:
331 explicit PeelUnrollHelper(HLoopInformation* info,
332 SuperblockCloner::HBasicBlockMap* bb_map,
333 SuperblockCloner::HInstructionMap* hir_map) :
334 loop_info_(info),
335 cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map) {
336 // For now do peeling/unrolling only for natural loops.
337 DCHECK(!info->IsIrreducible());
338 }
339
340 // Returns whether the loop can be peeled/unrolled (static function).
341 static bool IsLoopClonable(HLoopInformation* loop_info);
342
343 // Returns whether the loop can be peeled/unrolled.
344 bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); }
345
346 HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll */ false); }
347 HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll */ true); }
348 HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); }
349
350 protected:
351 // Applies loop peeling/unrolling for the loop specified by 'loop_info'.
352 //
353 // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it.
354 HBasicBlock* DoPeelUnrollImpl(bool to_unroll);
355
356 private:
357 HLoopInformation* loop_info_;
358 SuperblockCloner cloner_;
359
360 DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper);
361};
362
363// Helper class to perform loop peeling/unrolling.
364//
365// This helper should be used when there is no need to get correspondence information between
366// original and copied basic blocks/instructions.
367class PeelUnrollSimpleHelper : public ValueObject {
368 public:
369 explicit PeelUnrollSimpleHelper(HLoopInformation* info);
370 bool IsLoopClonable() const { return helper_.IsLoopClonable(); }
371 HBasicBlock* DoPeeling() { return helper_.DoPeeling(); }
372 HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); }
373 HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); }
374
375 private:
376 SuperblockCloner::HBasicBlockMap bb_map_;
377 SuperblockCloner::HInstructionMap hir_map_;
378 PeelUnrollHelper helper_;
379
380 DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper);
381};
382
383// Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info.
384void CollectRemappingInfoForPeelUnroll(bool to_unroll,
385 HLoopInformation* loop_info,
386 SuperblockCloner::HEdgeSet* remap_orig_internal,
387 SuperblockCloner::HEdgeSet* remap_copy_internal,
388 SuperblockCloner::HEdgeSet* remap_incoming);
389
390// Returns whether blocks from 'work_set' are reachable from the rest of the graph.
391//
392// Returns whether such a set 'outer_entries' of basic blocks exists that:
393// - each block from 'outer_entries' is not from 'work_set'.
394// - each block from 'work_set' is reachable from at least one block from 'outer_entries'.
395//
396// After the function returns work_set contains only blocks from the original 'work_set'
397// which are unreachable from the rest of the graph.
398bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph);
399
400// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
401// graph.
402HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2);
Artem Serov7f4aff62017-06-21 17:02:18 +0100403} // namespace art
404
405namespace std {
406
407template <>
408struct hash<art::HEdge> {
409 size_t operator()(art::HEdge const& x) const noexcept {
410 // Use Cantor pairing function as the hash function.
Artem Serov02eebcf2017-12-13 19:48:31 +0000411 size_t a = x.GetFrom();
412 size_t b = x.GetTo();
Artem Serov7f4aff62017-06-21 17:02:18 +0100413 return (a + b) * (a + b + 1) / 2 + b;
414 }
415};
Artem Serov02eebcf2017-12-13 19:48:31 +0000416ostream& operator<<(ostream& os, const art::HEdge& e);
Artem Serov7f4aff62017-06-21 17:02:18 +0100417
418} // namespace std
419
420#endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_