Artem Serov | 7f4aff6 | 2017-06-21 17:02:18 +0100 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ |
| 18 | #define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ |
| 19 | |
| 20 | #include "base/arena_bit_vector.h" |
| 21 | #include "base/arena_containers.h" |
| 22 | #include "base/bit_vector-inl.h" |
| 23 | #include "nodes.h" |
| 24 | |
| 25 | namespace art { |
| 26 | |
| 27 | static const bool kSuperblockClonerLogging = false; |
| 28 | static const bool kSuperblockClonerVerify = false; |
| 29 | |
| 30 | // Represents an edge between two HBasicBlocks. |
| 31 | // |
| 32 | // Note: objects of this class are small - pass them by value. |
| 33 | class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> { |
| 34 | public: |
| 35 | HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) { |
| 36 | DCHECK_NE(to_, kInvalidBlockId); |
| 37 | DCHECK_NE(from_, kInvalidBlockId); |
| 38 | } |
| 39 | HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) { |
| 40 | DCHECK_NE(to_, kInvalidBlockId); |
| 41 | DCHECK_NE(from_, kInvalidBlockId); |
| 42 | } |
| 43 | HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {} |
| 44 | |
| 45 | uint32_t GetFrom() const { return from_; } |
| 46 | uint32_t GetTo() const { return to_; } |
| 47 | |
| 48 | bool operator==(const HEdge& other) const { |
| 49 | return this->from_ == other.from_ && this->to_ == other.to_; |
| 50 | } |
| 51 | |
| 52 | bool operator!=(const HEdge& other) const { return !operator==(other); } |
| 53 | void Dump(std::ostream& stream) const; |
| 54 | |
| 55 | // Returns whether an edge represents a valid edge in CF graph: whether the from_ block |
| 56 | // has to_ block as a successor. |
| 57 | bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; } |
| 58 | |
| 59 | private: |
| 60 | // Predecessor block id. |
| 61 | uint32_t from_; |
| 62 | // Successor block id. |
| 63 | uint32_t to_; |
| 64 | }; |
| 65 | |
| 66 | // Returns whether a HEdge edge corresponds to an existing edge in the graph. |
| 67 | inline bool IsEdgeValid(HEdge edge, HGraph* graph) { |
| 68 | if (!edge.IsValid()) { |
| 69 | return false; |
| 70 | } |
| 71 | uint32_t from = edge.GetFrom(); |
| 72 | uint32_t to = edge.GetTo(); |
| 73 | if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) { |
| 74 | return false; |
| 75 | } |
| 76 | |
| 77 | HBasicBlock* block_from = graph->GetBlocks()[from]; |
| 78 | HBasicBlock* block_to = graph->GetBlocks()[to]; |
| 79 | if (block_from == nullptr || block_to == nullptr) { |
| 80 | return false; |
| 81 | } |
| 82 | |
| 83 | return block_from->HasSuccessor(block_to, 0); |
| 84 | } |
| 85 | |
| 86 | // SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without |
| 87 | // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted |
| 88 | // automatically. The clone transformation is defined by specifying a set of basic blocks to copy |
| 89 | // and a set of rules how to treat edges, remap their successors. By using this approach such |
| 90 | // optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented. |
| 91 | // |
| 92 | // The idea of the transformation is based on "Superblock cloning" technique described in the book |
| 93 | // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University |
| 94 | // Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective |
| 95 | // Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al. |
| 96 | // J Supercomput (1993) 7: 229. doi:10.1007/BF01205185. |
| 97 | // |
| 98 | // There are two states of the IR graph: original graph (before the transformation) and |
| 99 | // copy graph (after). |
| 100 | // |
| 101 | // Before the transformation: |
| 102 | // Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original |
| 103 | // graph into 4 categories/sets (use the following notation for edges: "(pred, succ)", |
| 104 | // where pred, succ - basic blocks): |
| 105 | // - internal - pred, succ are members of ‘orig_bb_set’. |
| 106 | // - outside - pred, succ are not members of ‘orig_bb_set’. |
| 107 | // - incoming - pred is not a member of ‘orig_bb_set’, succ is. |
| 108 | // - outgoing - pred is a member of ‘orig_bb_set’, succ is not. |
| 109 | // |
| 110 | // Transformation: |
| 111 | // |
| 112 | // 1. Initial cloning: |
| 113 | // 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks |
| 114 | // form ‘copy_bb_set’. |
| 115 | // 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the |
| 116 | // copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge |
| 117 | // set. |
| 118 | // 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the |
| 119 | // copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge |
| 120 | // set. |
| 121 | // 2. Successors remapping. |
| 122 | // 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should |
| 123 | // be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)). |
| 124 | // 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors |
| 125 | // should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)). |
| 126 | // 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph |
| 127 | // whose successors should be remapped to copies nodes: ((X, Y) will be transformed into |
| 128 | // (X, Y_1)). |
| 129 | // 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc). |
| 130 | // 4. Fix/resolve data flow. |
| 131 | // 5. Do cleanups (DCE, critical edges splitting, etc). |
| 132 | // |
| 133 | class SuperblockCloner : public ValueObject { |
| 134 | public: |
| 135 | // TODO: Investigate optimal types for the containers. |
| 136 | using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>; |
| 137 | using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>; |
| 138 | using HBasicBlockSet = ArenaBitVector; |
| 139 | using HEdgeSet = ArenaHashSet<HEdge>; |
| 140 | |
| 141 | SuperblockCloner(HGraph* graph, |
| 142 | const HBasicBlockSet* orig_bb_set, |
| 143 | HBasicBlockMap* bb_map, |
| 144 | HInstructionMap* hir_map); |
| 145 | |
| 146 | // Sets edge successor remapping info specified by corresponding edge sets. |
| 147 | void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, |
| 148 | const HEdgeSet* remap_copy_internal, |
| 149 | const HEdgeSet* remap_incoming); |
| 150 | |
| 151 | // Returns whether the specified subgraph is copyable. |
| 152 | // TODO: Start from small range of graph patterns then extend it. |
| 153 | bool IsSubgraphClonable() const; |
| 154 | |
| 155 | // Runs the copy algorithm according to the description. |
| 156 | void Run(); |
| 157 | |
| 158 | // Cleans up the graph after transformation: splits critical edges, recalculates control flow |
| 159 | // information (back-edges, dominators, loop info, etc), eliminates redundant phis. |
| 160 | void CleanUp(); |
| 161 | |
| 162 | // Returns a clone of a basic block (orig_block). |
| 163 | // |
| 164 | // - The copy block will have no successors/predecessors; they should be set up manually. |
| 165 | // - For each instruction in the orig_block a copy is created and inserted into the copy block; |
| 166 | // this correspondence is recorded in the map (old instruction, new instruction). |
| 167 | // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the |
| 168 | // same, as in the original block, PHIs do not reflect a correct correspondence between the |
| 169 | // value and predecessors (as the copy block has no predecessors by now), etc. |
| 170 | HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block); |
| 171 | |
| 172 | // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_ |
| 173 | // and hir_map_. |
| 174 | void CloneBasicBlocks(); |
| 175 | |
| 176 | HInstruction* GetInstrCopy(HInstruction* orig_instr) const { |
| 177 | auto copy_input_iter = hir_map_->find(orig_instr); |
| 178 | DCHECK(copy_input_iter != hir_map_->end()); |
| 179 | return copy_input_iter->second; |
| 180 | } |
| 181 | |
| 182 | HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const { |
| 183 | HBasicBlock* block = bb_map_->Get(orig_block); |
| 184 | DCHECK(block != nullptr); |
| 185 | return block; |
| 186 | } |
| 187 | |
| 188 | HInstruction* GetInstrOrig(HInstruction* copy_instr) const { |
| 189 | for (auto it : *hir_map_) { |
| 190 | if (it.second == copy_instr) { |
| 191 | return it.first; |
| 192 | } |
| 193 | } |
| 194 | return nullptr; |
| 195 | } |
| 196 | |
| 197 | bool IsInOrigBBSet(uint32_t block_id) const { |
| 198 | return orig_bb_set_.IsBitSet(block_id); |
| 199 | } |
| 200 | |
| 201 | bool IsInOrigBBSet(const HBasicBlock* block) const { |
| 202 | return IsInOrigBBSet(block->GetBlockId()); |
| 203 | } |
| 204 | |
| 205 | private: |
| 206 | // Fills the 'exits' vector with the subgraph exits. |
| 207 | void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits); |
| 208 | |
| 209 | // Finds and records information about the area in the graph for which control-flow (back edges, |
| 210 | // loops, dominators) needs to be adjusted. |
| 211 | void FindAndSetLocalAreaForAdjustments(); |
| 212 | |
| 213 | // Remaps edges' successors according to the info specified in the edges sets. |
| 214 | // |
| 215 | // Only edge successors/predecessors and phis' input records (to have a correspondence between |
| 216 | // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither |
| 217 | // phis' nor instructions' inputs values are resolved. |
| 218 | void RemapEdgesSuccessors(); |
| 219 | |
| 220 | // Adjusts control-flow (back edges, loops, dominators) for the local area defined by |
| 221 | // FindAndSetLocalAreaForAdjustments. |
| 222 | void AdjustControlFlowInfo(); |
| 223 | |
| 224 | // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in |
| 225 | // the SSA form. |
| 226 | void ResolveDataFlow(); |
| 227 | |
| 228 | // |
| 229 | // Helpers for CloneBasicBlock. |
| 230 | // |
| 231 | |
| 232 | // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the |
| 233 | // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original. |
| 234 | void ReplaceInputsWithCopies(HInstruction* copy_instr); |
| 235 | |
| 236 | // Recursively clones the environment for the copy instruction. If the input of the original |
| 237 | // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise |
| 238 | // leaves it the same as original. |
| 239 | void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env); |
| 240 | |
| 241 | // |
| 242 | // Helpers for RemapEdgesSuccessors. |
| 243 | // |
| 244 | |
| 245 | // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and |
| 246 | // copy_succ. |
| 247 | void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); |
| 248 | |
| 249 | // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ. |
| 250 | void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); |
| 251 | |
| 252 | // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ. |
| 253 | void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); |
| 254 | |
| 255 | // |
| 256 | // Local versions of control flow calculation/adjustment routines. |
| 257 | // |
| 258 | |
| 259 | void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set); |
| 260 | void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set); |
| 261 | GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set); |
| 262 | void CleanUpControlFlow(); |
| 263 | |
| 264 | // |
| 265 | // Helpers for ResolveDataFlow |
| 266 | // |
| 267 | |
| 268 | // Resolves the inputs of the phi. |
| 269 | void ResolvePhi(HPhi* phi); |
| 270 | |
| 271 | // |
| 272 | // Debug and logging methods. |
| 273 | // |
| 274 | void CheckInstructionInputsRemapping(HInstruction* orig_instr); |
| 275 | |
| 276 | HBasicBlock* GetBlockById(uint32_t block_id) const { |
| 277 | DCHECK(block_id < graph_->GetBlocks().size()); |
| 278 | HBasicBlock* block = graph_->GetBlocks()[block_id]; |
| 279 | DCHECK(block != nullptr); |
| 280 | return block; |
| 281 | } |
| 282 | |
| 283 | HGraph* const graph_; |
| 284 | ArenaAllocator* const arena_; |
| 285 | |
| 286 | // Set of basic block in the original graph to be copied. |
| 287 | HBasicBlockSet orig_bb_set_; |
| 288 | |
| 289 | // Sets of edges which require successors remapping. |
| 290 | const HEdgeSet* remap_orig_internal_; |
| 291 | const HEdgeSet* remap_copy_internal_; |
| 292 | const HEdgeSet* remap_incoming_; |
| 293 | |
| 294 | // Correspondence map for blocks: (original block, copy block). |
| 295 | HBasicBlockMap* bb_map_; |
| 296 | // Correspondence map for instructions: (original HInstruction, copy HInstruction). |
| 297 | HInstructionMap* hir_map_; |
| 298 | // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted. |
| 299 | HLoopInformation* outer_loop_; |
| 300 | HBasicBlockSet outer_loop_bb_set_; |
| 301 | |
| 302 | ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); |
| 303 | |
| 304 | DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); |
| 305 | }; |
| 306 | |
| 307 | } // namespace art |
| 308 | |
| 309 | namespace std { |
| 310 | |
| 311 | template <> |
| 312 | struct hash<art::HEdge> { |
| 313 | size_t operator()(art::HEdge const& x) const noexcept { |
| 314 | // Use Cantor pairing function as the hash function. |
| 315 | uint32_t a = x.GetFrom(); |
| 316 | uint32_t b = x.GetTo(); |
| 317 | return (a + b) * (a + b + 1) / 2 + b; |
| 318 | } |
| 319 | }; |
| 320 | |
| 321 | } // namespace std |
| 322 | |
| 323 | #endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ |