Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ |
| 18 | #define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ |
| 19 | |
| 20 | #include "base/arena_bit_vector.h" |
| 21 | #include "base/arena_containers.h" |
| 22 | #include "base/bit_vector-inl.h" |
| 23 | #include "nodes.h" |
| 24 | |
Vladimir Marko | 0a51605 | 2019-10-14 13:00:44 +0000 | [diff] [blame] | 25 | namespace art { |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 26 | |
Nicolas Geoffray | 256c94b | 2019-04-29 10:55:09 +0100 | [diff] [blame] | 27 | class InductionVarRange; |
| 28 | |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 29 | static const bool kSuperblockClonerLogging = false; |
Artem Serov | c8150b5 | 2019-07-31 18:28:00 +0100 | [diff] [blame] | 30 | static const bool kSuperblockClonerVerify = false; |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 31 | |
| 32 | // Represents an edge between two HBasicBlocks. |
| 33 | // |
| 34 | // Note: objects of this class are small - pass them by value. |
| 35 | class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> { |
| 36 | public: |
| 37 | HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) { |
| 38 | DCHECK_NE(to_, kInvalidBlockId); |
| 39 | DCHECK_NE(from_, kInvalidBlockId); |
| 40 | } |
| 41 | HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) { |
| 42 | DCHECK_NE(to_, kInvalidBlockId); |
| 43 | DCHECK_NE(from_, kInvalidBlockId); |
| 44 | } |
| 45 | HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {} |
| 46 | |
| 47 | uint32_t GetFrom() const { return from_; } |
| 48 | uint32_t GetTo() const { return to_; } |
| 49 | |
| 50 | bool operator==(const HEdge& other) const { |
| 51 | return this->from_ == other.from_ && this->to_ == other.to_; |
| 52 | } |
| 53 | |
| 54 | bool operator!=(const HEdge& other) const { return !operator==(other); } |
| 55 | void Dump(std::ostream& stream) const; |
| 56 | |
| 57 | // Returns whether an edge represents a valid edge in CF graph: whether the from_ block |
| 58 | // has to_ block as a successor. |
| 59 | bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; } |
| 60 | |
| 61 | private: |
| 62 | // Predecessor block id. |
| 63 | uint32_t from_; |
| 64 | // Successor block id. |
| 65 | uint32_t to_; |
| 66 | }; |
| 67 | |
| 68 | // Returns whether a HEdge edge corresponds to an existing edge in the graph. |
| 69 | inline bool IsEdgeValid(HEdge edge, HGraph* graph) { |
| 70 | if (!edge.IsValid()) { |
| 71 | return false; |
| 72 | } |
| 73 | uint32_t from = edge.GetFrom(); |
| 74 | uint32_t to = edge.GetTo(); |
| 75 | if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) { |
| 76 | return false; |
| 77 | } |
| 78 | |
| 79 | HBasicBlock* block_from = graph->GetBlocks()[from]; |
| 80 | HBasicBlock* block_to = graph->GetBlocks()[to]; |
| 81 | if (block_from == nullptr || block_to == nullptr) { |
| 82 | return false; |
| 83 | } |
| 84 | |
| 85 | return block_from->HasSuccessor(block_to, 0); |
| 86 | } |
| 87 | |
| 88 | // SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without |
| 89 | // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted |
| 90 | // automatically. The clone transformation is defined by specifying a set of basic blocks to copy |
| 91 | // and a set of rules how to treat edges, remap their successors. By using this approach such |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 92 | // optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling, Loop Versioning can be |
| 93 | // implemented. |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 94 | // |
| 95 | // The idea of the transformation is based on "Superblock cloning" technique described in the book |
| 96 | // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University |
| 97 | // Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective |
| 98 | // Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al. |
| 99 | // J Supercomput (1993) 7: 229. doi:10.1007/BF01205185. |
| 100 | // |
| 101 | // There are two states of the IR graph: original graph (before the transformation) and |
| 102 | // copy graph (after). |
| 103 | // |
| 104 | // Before the transformation: |
| 105 | // Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original |
| 106 | // graph into 4 categories/sets (use the following notation for edges: "(pred, succ)", |
| 107 | // where pred, succ - basic blocks): |
| 108 | // - internal - pred, succ are members of ‘orig_bb_set’. |
| 109 | // - outside - pred, succ are not members of ‘orig_bb_set’. |
| 110 | // - incoming - pred is not a member of ‘orig_bb_set’, succ is. |
| 111 | // - outgoing - pred is a member of ‘orig_bb_set’, succ is not. |
| 112 | // |
| 113 | // Transformation: |
| 114 | // |
| 115 | // 1. Initial cloning: |
| 116 | // 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks |
| 117 | // form ‘copy_bb_set’. |
| 118 | // 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the |
| 119 | // copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge |
| 120 | // set. |
| 121 | // 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the |
| 122 | // copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge |
| 123 | // set. |
| 124 | // 2. Successors remapping. |
| 125 | // 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should |
| 126 | // be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)). |
| 127 | // 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors |
| 128 | // should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)). |
| 129 | // 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph |
| 130 | // whose successors should be remapped to copies nodes: ((X, Y) will be transformed into |
| 131 | // (X, Y_1)). |
| 132 | // 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc). |
| 133 | // 4. Fix/resolve data flow. |
| 134 | // 5. Do cleanups (DCE, critical edges splitting, etc). |
| 135 | // |
| 136 | class SuperblockCloner : public ValueObject { |
| 137 | public: |
| 138 | // TODO: Investigate optimal types for the containers. |
| 139 | using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>; |
| 140 | using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>; |
| 141 | using HBasicBlockSet = ArenaBitVector; |
| 142 | using HEdgeSet = ArenaHashSet<HEdge>; |
| 143 | |
| 144 | SuperblockCloner(HGraph* graph, |
| 145 | const HBasicBlockSet* orig_bb_set, |
| 146 | HBasicBlockMap* bb_map, |
Nicolas Geoffray | 256c94b | 2019-04-29 10:55:09 +0100 | [diff] [blame] | 147 | HInstructionMap* hir_map, |
| 148 | InductionVarRange* induction_range); |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 149 | |
| 150 | // Sets edge successor remapping info specified by corresponding edge sets. |
| 151 | void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, |
| 152 | const HEdgeSet* remap_copy_internal, |
| 153 | const HEdgeSet* remap_incoming); |
| 154 | |
| 155 | // Returns whether the specified subgraph is copyable. |
| 156 | // TODO: Start from small range of graph patterns then extend it. |
| 157 | bool IsSubgraphClonable() const; |
| 158 | |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 159 | // Returns whether selected subgraph satisfies the criteria for fast data flow resolution |
| 160 | // when iterative DF algorithm is not required and dominators/instructions inputs can be |
| 161 | // trivially adjusted. |
| 162 | // |
| 163 | // TODO: formally describe the criteria. |
| 164 | // |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 165 | // Loop peeling, unrolling and versioning satisfy the criteria. |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 166 | bool IsFastCase() const; |
| 167 | |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 168 | // Runs the copy algorithm according to the description. |
| 169 | void Run(); |
| 170 | |
| 171 | // Cleans up the graph after transformation: splits critical edges, recalculates control flow |
| 172 | // information (back-edges, dominators, loop info, etc), eliminates redundant phis. |
| 173 | void CleanUp(); |
| 174 | |
| 175 | // Returns a clone of a basic block (orig_block). |
| 176 | // |
| 177 | // - The copy block will have no successors/predecessors; they should be set up manually. |
| 178 | // - For each instruction in the orig_block a copy is created and inserted into the copy block; |
| 179 | // this correspondence is recorded in the map (old instruction, new instruction). |
| 180 | // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the |
| 181 | // same, as in the original block, PHIs do not reflect a correct correspondence between the |
| 182 | // value and predecessors (as the copy block has no predecessors by now), etc. |
| 183 | HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block); |
| 184 | |
| 185 | // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_ |
| 186 | // and hir_map_. |
| 187 | void CloneBasicBlocks(); |
| 188 | |
| 189 | HInstruction* GetInstrCopy(HInstruction* orig_instr) const { |
| 190 | auto copy_input_iter = hir_map_->find(orig_instr); |
| 191 | DCHECK(copy_input_iter != hir_map_->end()); |
| 192 | return copy_input_iter->second; |
| 193 | } |
| 194 | |
| 195 | HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const { |
| 196 | HBasicBlock* block = bb_map_->Get(orig_block); |
| 197 | DCHECK(block != nullptr); |
| 198 | return block; |
| 199 | } |
| 200 | |
| 201 | HInstruction* GetInstrOrig(HInstruction* copy_instr) const { |
| 202 | for (auto it : *hir_map_) { |
| 203 | if (it.second == copy_instr) { |
| 204 | return it.first; |
| 205 | } |
| 206 | } |
| 207 | return nullptr; |
| 208 | } |
| 209 | |
| 210 | bool IsInOrigBBSet(uint32_t block_id) const { |
| 211 | return orig_bb_set_.IsBitSet(block_id); |
| 212 | } |
| 213 | |
| 214 | bool IsInOrigBBSet(const HBasicBlock* block) const { |
| 215 | return IsInOrigBBSet(block->GetBlockId()); |
| 216 | } |
| 217 | |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 218 | // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops, |
| 219 | // dominators) needs to be adjusted. |
| 220 | HLoopInformation* GetRegionToBeAdjusted() const { |
| 221 | return outer_loop_; |
| 222 | } |
| 223 | |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 224 | private: |
| 225 | // Fills the 'exits' vector with the subgraph exits. |
Artem Serov | ca210e3 | 2017-12-15 13:43:20 +0000 | [diff] [blame] | 226 | void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const; |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 227 | |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 228 | // Finds and records information about the area in the graph for which control flow (back edges, |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 229 | // loops, dominators) needs to be adjusted. |
| 230 | void FindAndSetLocalAreaForAdjustments(); |
| 231 | |
| 232 | // Remaps edges' successors according to the info specified in the edges sets. |
| 233 | // |
| 234 | // Only edge successors/predecessors and phis' input records (to have a correspondence between |
| 235 | // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither |
| 236 | // phis' nor instructions' inputs values are resolved. |
| 237 | void RemapEdgesSuccessors(); |
| 238 | |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 239 | // Adjusts control flow (back edges, loops, dominators) for the local area defined by |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 240 | // FindAndSetLocalAreaForAdjustments. |
| 241 | void AdjustControlFlowInfo(); |
| 242 | |
| 243 | // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in |
| 244 | // the SSA form. |
| 245 | void ResolveDataFlow(); |
| 246 | |
| 247 | // |
Artem Serov | ca210e3 | 2017-12-15 13:43:20 +0000 | [diff] [blame] | 248 | // Helpers for live-outs processing and Subgraph-closed SSA. |
| 249 | // |
| 250 | // - live-outs - values which are defined inside the subgraph and have uses outside. |
| 251 | // - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph |
| 252 | // have no outside uses except for the phi-nodes in the subgraph exits. |
| 253 | // |
| 254 | // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this |
| 255 | // makes the subgraph-closed SSA form construction much easier. |
| 256 | // |
| 257 | // TODO: Support subgraphs with live-outs and multiple exits. |
| 258 | // |
| 259 | |
| 260 | // For each live-out value 'val' in the region puts a record <val, val> into the map. |
| 261 | // Returns whether all of the instructions in the subgraph are clonable. |
| 262 | bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const; |
| 263 | |
| 264 | // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit. |
| 265 | // |
| 266 | // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates |
| 267 | // the record in the map to <val, phi> and replaces all outside uses with this phi. |
| 268 | void ConstructSubgraphClosedSSA(); |
| 269 | |
| 270 | // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding |
| 271 | // (<val, phi>) phi after the cloning is done. |
| 272 | void FixSubgraphClosedSSAAfterCloning(); |
| 273 | |
| 274 | // |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 275 | // Helpers for CloneBasicBlock. |
| 276 | // |
| 277 | |
| 278 | // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the |
| 279 | // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original. |
| 280 | void ReplaceInputsWithCopies(HInstruction* copy_instr); |
| 281 | |
| 282 | // Recursively clones the environment for the copy instruction. If the input of the original |
| 283 | // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise |
| 284 | // leaves it the same as original. |
| 285 | void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env); |
| 286 | |
| 287 | // |
| 288 | // Helpers for RemapEdgesSuccessors. |
| 289 | // |
| 290 | |
| 291 | // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and |
| 292 | // copy_succ. |
| 293 | void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); |
| 294 | |
| 295 | // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ. |
| 296 | void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); |
| 297 | |
| 298 | // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ. |
| 299 | void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); |
| 300 | |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 301 | // Checks whether the edges remapping info corresponds to the subgraph versioning case: |
| 302 | // - none of the incoming edges are to be remapped (they are being duplicated). |
| 303 | // - none of the internal edges are to be remapped. |
| 304 | bool IsRemapInfoForVersioning() const; |
| 305 | |
| 306 | // Processes incoming edges for subgraph versioning case: for each incoming edge (X, Y) adds |
| 307 | // an edge (X, Y_1) where Y_1 = Copy(Y) and add corresponding phi input to copy phi. |
| 308 | // |
| 309 | // Note: such node X will now have two successors, its unconditional branch instruction |
| 310 | // will be invalid and should be adjusted to some conditional branch by the client code. |
| 311 | void CopyIncomingEdgesForVersioning(); |
| 312 | |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 313 | // |
| 314 | // Local versions of control flow calculation/adjustment routines. |
| 315 | // |
| 316 | |
| 317 | void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set); |
| 318 | void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set); |
| 319 | GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set); |
| 320 | void CleanUpControlFlow(); |
| 321 | |
| 322 | // |
| 323 | // Helpers for ResolveDataFlow |
| 324 | // |
| 325 | |
| 326 | // Resolves the inputs of the phi. |
| 327 | void ResolvePhi(HPhi* phi); |
| 328 | |
Nicolas Geoffray | 256c94b | 2019-04-29 10:55:09 +0100 | [diff] [blame] | 329 | // Update induction range after when fixing SSA. |
| 330 | void UpdateInductionRangeInfoOf( |
| 331 | HInstruction* user, HInstruction* old_instruction, HInstruction* replacement); |
| 332 | |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 333 | // |
| 334 | // Debug and logging methods. |
| 335 | // |
| 336 | void CheckInstructionInputsRemapping(HInstruction* orig_instr); |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 337 | bool CheckRemappingInfoIsValid(); |
| 338 | void VerifyGraph(); |
| 339 | void DumpInputSets(); |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 340 | |
| 341 | HBasicBlock* GetBlockById(uint32_t block_id) const { |
| 342 | DCHECK(block_id < graph_->GetBlocks().size()); |
| 343 | HBasicBlock* block = graph_->GetBlocks()[block_id]; |
| 344 | DCHECK(block != nullptr); |
| 345 | return block; |
| 346 | } |
| 347 | |
| 348 | HGraph* const graph_; |
| 349 | ArenaAllocator* const arena_; |
| 350 | |
| 351 | // Set of basic block in the original graph to be copied. |
| 352 | HBasicBlockSet orig_bb_set_; |
| 353 | |
| 354 | // Sets of edges which require successors remapping. |
| 355 | const HEdgeSet* remap_orig_internal_; |
| 356 | const HEdgeSet* remap_copy_internal_; |
| 357 | const HEdgeSet* remap_incoming_; |
| 358 | |
| 359 | // Correspondence map for blocks: (original block, copy block). |
| 360 | HBasicBlockMap* bb_map_; |
| 361 | // Correspondence map for instructions: (original HInstruction, copy HInstruction). |
| 362 | HInstructionMap* hir_map_; |
Nicolas Geoffray | 256c94b | 2019-04-29 10:55:09 +0100 | [diff] [blame] | 363 | // As a result of cloning, the induction range analysis information can be invalidated |
| 364 | // and must be updated. If not null, the cloner updates it for changed instructions. |
| 365 | InductionVarRange* induction_range_; |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 366 | // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted. |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 367 | HLoopInformation* outer_loop_; |
| 368 | HBasicBlockSet outer_loop_bb_set_; |
| 369 | |
Artem Serov | ca210e3 | 2017-12-15 13:43:20 +0000 | [diff] [blame] | 370 | HInstructionMap live_outs_; |
| 371 | |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 372 | ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 373 | ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected); |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 374 | |
| 375 | DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); |
| 376 | }; |
| 377 | |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 378 | // Helper class to perform loop peeling/unrolling/versioning. |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 379 | // |
| 380 | // This helper should be used when correspondence map between original and copied |
| 381 | // basic blocks/instructions are demanded. |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 382 | class LoopClonerHelper : public ValueObject { |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 383 | public: |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 384 | LoopClonerHelper(HLoopInformation* info, |
Nicolas Geoffray | 256c94b | 2019-04-29 10:55:09 +0100 | [diff] [blame] | 385 | SuperblockCloner::HBasicBlockMap* bb_map, |
| 386 | SuperblockCloner::HInstructionMap* hir_map, |
| 387 | InductionVarRange* induction_range) : |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 388 | loop_info_(info), |
Nicolas Geoffray | 256c94b | 2019-04-29 10:55:09 +0100 | [diff] [blame] | 389 | cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map, induction_range) { |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 390 | // For now do transformations only for natural loops. |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 391 | DCHECK(!info->IsIrreducible()); |
| 392 | } |
| 393 | |
| 394 | // Returns whether the loop can be peeled/unrolled (static function). |
| 395 | static bool IsLoopClonable(HLoopInformation* loop_info); |
| 396 | |
| 397 | // Returns whether the loop can be peeled/unrolled. |
| 398 | bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); } |
| 399 | |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 400 | // Perform loop peeling. |
| 401 | // |
| 402 | // Control flow of an example (ignoring critical edges splitting). |
| 403 | // |
| 404 | // Before After |
| 405 | // |
| 406 | // |B| |B| |
| 407 | // | | |
| 408 | // v v |
| 409 | // |1| |1| |
| 410 | // | | |
| 411 | // v v |
| 412 | // |2|<-\ |2A| |
| 413 | // / \ / / \ |
| 414 | // v v/ / v |
| 415 | // |4| |3| / |3A| |
| 416 | // | / / |
| 417 | // v | v |
| 418 | // |E| \ |2|<-\ |
| 419 | // \ / \ / |
| 420 | // v v / |
| 421 | // |4| |3| |
| 422 | // | |
| 423 | // v |
| 424 | // |E| |
| 425 | HBasicBlock* DoPeeling() { |
| 426 | return DoLoopTransformationImpl(TransformationKind::kPeeling); |
| 427 | } |
| 428 | |
| 429 | // Perform loop unrolling. |
| 430 | // |
| 431 | // Control flow of an example (ignoring critical edges splitting). |
| 432 | // |
| 433 | // Before After |
| 434 | // |
| 435 | // |B| |B| |
| 436 | // | | |
| 437 | // v v |
| 438 | // |1| |1| |
| 439 | // | | |
| 440 | // v v |
| 441 | // |2|<-\ |2A|<-\ |
| 442 | // / \ / / \ \ |
| 443 | // v v/ / v \ |
| 444 | // |4| |3| / |3A| | |
| 445 | // | / / / |
| 446 | // v | v / |
| 447 | // |E| \ |2| / |
| 448 | // \ / \ / |
| 449 | // v v/ |
| 450 | // |4| |3| |
| 451 | // | |
| 452 | // v |
| 453 | // |E| |
| 454 | HBasicBlock* DoUnrolling() { |
| 455 | return DoLoopTransformationImpl(TransformationKind::kUnrolling); |
| 456 | } |
| 457 | |
| 458 | // Perform loop versioning. |
| 459 | // |
| 460 | // Control flow of an example (ignoring critical edges splitting). |
| 461 | // |
| 462 | // Before After |
| 463 | // |
| 464 | // |B| |B| |
| 465 | // | | |
| 466 | // v v |
| 467 | // |1| |1|_________ |
| 468 | // | | | |
| 469 | // v v v |
| 470 | // |2|<-\ |2|<-\ |2A|<-\ |
| 471 | // / \ / / \ / / \ / |
| 472 | // v v/ | v/ | v/ |
| 473 | // | |3| | |3| | |3A| |
| 474 | // | | __________| |
| 475 | // | || |
| 476 | // v vv |
| 477 | // |4| |4| |
| 478 | // | | |
| 479 | // v v |
| 480 | // |E| |E| |
| 481 | HBasicBlock* DoVersioning() { |
| 482 | return DoLoopTransformationImpl(TransformationKind::kVersioning); |
| 483 | } |
| 484 | |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 485 | HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); } |
| 486 | |
| 487 | protected: |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 488 | enum class TransformationKind { |
| 489 | kPeeling, |
| 490 | kUnrolling, |
| 491 | kVersioning, |
| 492 | }; |
| 493 | |
| 494 | // Applies a specific loop transformation to the loop. |
| 495 | HBasicBlock* DoLoopTransformationImpl(TransformationKind transformation); |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 496 | |
| 497 | private: |
| 498 | HLoopInformation* loop_info_; |
| 499 | SuperblockCloner cloner_; |
| 500 | |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 501 | DISALLOW_COPY_AND_ASSIGN(LoopClonerHelper); |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 502 | }; |
| 503 | |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 504 | // Helper class to perform loop peeling/unrolling/versioning. |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 505 | // |
| 506 | // This helper should be used when there is no need to get correspondence information between |
| 507 | // original and copied basic blocks/instructions. |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 508 | class LoopClonerSimpleHelper : public ValueObject { |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 509 | public: |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 510 | LoopClonerSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range); |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 511 | bool IsLoopClonable() const { return helper_.IsLoopClonable(); } |
| 512 | HBasicBlock* DoPeeling() { return helper_.DoPeeling(); } |
| 513 | HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); } |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 514 | HBasicBlock* DoVersioning() { return helper_.DoVersioning(); } |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 515 | HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); } |
| 516 | |
Artem Serov | 72411e6 | 2017-10-19 16:18:07 +0100 | [diff] [blame] | 517 | const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; } |
| 518 | const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; } |
| 519 | |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 520 | private: |
| 521 | SuperblockCloner::HBasicBlockMap bb_map_; |
| 522 | SuperblockCloner::HInstructionMap hir_map_; |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 523 | LoopClonerHelper helper_; |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 524 | |
Artem Serov | 0f5b2bf | 2019-10-23 14:07:41 +0100 | [diff] [blame] | 525 | DISALLOW_COPY_AND_ASSIGN(LoopClonerSimpleHelper); |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 526 | }; |
| 527 | |
| 528 | // Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info. |
| 529 | void CollectRemappingInfoForPeelUnroll(bool to_unroll, |
| 530 | HLoopInformation* loop_info, |
| 531 | SuperblockCloner::HEdgeSet* remap_orig_internal, |
| 532 | SuperblockCloner::HEdgeSet* remap_copy_internal, |
| 533 | SuperblockCloner::HEdgeSet* remap_incoming); |
| 534 | |
| 535 | // Returns whether blocks from 'work_set' are reachable from the rest of the graph. |
| 536 | // |
| 537 | // Returns whether such a set 'outer_entries' of basic blocks exists that: |
| 538 | // - each block from 'outer_entries' is not from 'work_set'. |
| 539 | // - each block from 'work_set' is reachable from at least one block from 'outer_entries'. |
| 540 | // |
| 541 | // After the function returns work_set contains only blocks from the original 'work_set' |
| 542 | // which are unreachable from the rest of the graph. |
| 543 | bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph); |
| 544 | |
| 545 | // Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole |
| 546 | // graph. |
| 547 | HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2); |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 548 | } // namespace art |
| 549 | |
| 550 | namespace std { |
| 551 | |
| 552 | template <> |
| 553 | struct hash<art::HEdge> { |
| 554 | size_t operator()(art::HEdge const& x) const noexcept { |
| 555 | // Use Cantor pairing function as the hash function. |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 556 | size_t a = x.GetFrom(); |
| 557 | size_t b = x.GetTo(); |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 558 | return (a + b) * (a + b + 1) / 2 + b; |
| 559 | } |
| 560 | }; |
Artem Serov | 02eebcf | 2017-12-13 19:48:31 +0000 | [diff] [blame] | 561 | ostream& operator<<(ostream& os, const art::HEdge& e); |
Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame] | 562 | |
| 563 | } // namespace std |
| 564 | |
| 565 | #endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ |