| /* |
| * Copyright (C) 2017 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ |
| #define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ |
| |
| #include "base/arena_bit_vector.h" |
| #include "base/arena_containers.h" |
| #include "base/bit_vector-inl.h" |
| #include "base/macros.h" |
| #include "nodes.h" |
| |
| namespace art HIDDEN { |
| |
| class InductionVarRange; |
| |
| static const bool kSuperblockClonerLogging = false; |
| static const bool kSuperblockClonerVerify = false; |
| |
| // Represents an edge between two HBasicBlocks. |
| // |
| // Note: objects of this class are small - pass them by value. |
| class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> { |
| public: |
| HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) { |
| DCHECK_NE(to_, kInvalidBlockId); |
| DCHECK_NE(from_, kInvalidBlockId); |
| } |
| HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) { |
| DCHECK_NE(to_, kInvalidBlockId); |
| DCHECK_NE(from_, kInvalidBlockId); |
| } |
| HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {} |
| |
| uint32_t GetFrom() const { return from_; } |
| uint32_t GetTo() const { return to_; } |
| |
| bool operator==(const HEdge& other) const { |
| return this->from_ == other.from_ && this->to_ == other.to_; |
| } |
| |
| bool operator!=(const HEdge& other) const { return !operator==(other); } |
| void Dump(std::ostream& stream) const; |
| |
| // Returns whether an edge represents a valid edge in CF graph: whether the from_ block |
| // has to_ block as a successor. |
| bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; } |
| |
| private: |
| // Predecessor block id. |
| uint32_t from_; |
| // Successor block id. |
| uint32_t to_; |
| }; |
| |
| // Returns whether a HEdge edge corresponds to an existing edge in the graph. |
| inline bool IsEdgeValid(HEdge edge, HGraph* graph) { |
| if (!edge.IsValid()) { |
| return false; |
| } |
| uint32_t from = edge.GetFrom(); |
| uint32_t to = edge.GetTo(); |
| if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) { |
| return false; |
| } |
| |
| HBasicBlock* block_from = graph->GetBlocks()[from]; |
| HBasicBlock* block_to = graph->GetBlocks()[to]; |
| if (block_from == nullptr || block_to == nullptr) { |
| return false; |
| } |
| |
| return block_from->HasSuccessor(block_to, 0); |
| } |
| |
| // SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without |
| // fine grain manipulation with IR; data flow and graph properties are resolved/adjusted |
| // automatically. The clone transformation is defined by specifying a set of basic blocks to copy |
| // and a set of rules how to treat edges, remap their successors. By using this approach such |
| // optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling, Loop Versioning can be |
| // implemented. |
| // |
| // The idea of the transformation is based on "Superblock cloning" technique described in the book |
| // "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University |
| // Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective |
| // Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al. |
| // J Supercomput (1993) 7: 229. doi:10.1007/BF01205185. |
| // |
| // There are two states of the IR graph: original graph (before the transformation) and |
| // copy graph (after). |
| // |
| // Before the transformation: |
| // Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original |
| // graph into 4 categories/sets (use the following notation for edges: "(pred, succ)", |
| // where pred, succ - basic blocks): |
| // - internal - pred, succ are members of ‘orig_bb_set’. |
| // - outside - pred, succ are not members of ‘orig_bb_set’. |
| // - incoming - pred is not a member of ‘orig_bb_set’, succ is. |
| // - outgoing - pred is a member of ‘orig_bb_set’, succ is not. |
| // |
| // Transformation: |
| // |
| // 1. Initial cloning: |
| // 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks |
| // form ‘copy_bb_set’. |
| // 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the |
| // copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge |
| // set. |
| // 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the |
| // copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge |
| // set. |
| // 2. Successors remapping. |
| // 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should |
| // be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)). |
| // 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors |
| // should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)). |
| // 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph |
| // whose successors should be remapped to copies nodes: ((X, Y) will be transformed into |
| // (X, Y_1)). |
| // 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc). |
| // 4. Fix/resolve data flow. |
| // 5. Do cleanups (DCE, critical edges splitting, etc). |
| // |
| class SuperblockCloner : public ValueObject { |
| public: |
| // TODO: Investigate optimal types for the containers. |
| using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>; |
| using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>; |
| using HBasicBlockSet = ArenaBitVector; |
| using HEdgeSet = ArenaHashSet<HEdge>; |
| |
| SuperblockCloner(HGraph* graph, |
| const HBasicBlockSet* orig_bb_set, |
| HBasicBlockMap* bb_map, |
| HInstructionMap* hir_map, |
| InductionVarRange* induction_range); |
| |
| // Sets edge successor remapping info specified by corresponding edge sets. |
| void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, |
| const HEdgeSet* remap_copy_internal, |
| const HEdgeSet* remap_incoming); |
| |
| // Returns whether the specified subgraph is copyable. |
| // TODO: Start from small range of graph patterns then extend it. |
| bool IsSubgraphClonable() const; |
| |
| // Returns whether selected subgraph satisfies the criteria for fast data flow resolution |
| // when iterative DF algorithm is not required and dominators/instructions inputs can be |
| // trivially adjusted. |
| // |
| // TODO: formally describe the criteria. |
| // |
| // Loop peeling, unrolling and versioning satisfy the criteria. |
| bool IsFastCase() const; |
| |
| // Runs the copy algorithm according to the description. |
| void Run(); |
| |
| // Cleans up the graph after transformation: splits critical edges, recalculates control flow |
| // information (back-edges, dominators, loop info, etc), eliminates redundant phis. |
| void CleanUp(); |
| |
| // Returns a clone of a basic block (orig_block). |
| // |
| // - The copy block will have no successors/predecessors; they should be set up manually. |
| // - For each instruction in the orig_block a copy is created and inserted into the copy block; |
| // this correspondence is recorded in the map (old instruction, new instruction). |
| // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the |
| // same, as in the original block, PHIs do not reflect a correct correspondence between the |
| // value and predecessors (as the copy block has no predecessors by now), etc. |
| HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block); |
| |
| // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_ |
| // and hir_map_. |
| void CloneBasicBlocks(); |
| |
| HInstruction* GetInstrCopy(HInstruction* orig_instr) const { |
| auto copy_input_iter = hir_map_->find(orig_instr); |
| DCHECK(copy_input_iter != hir_map_->end()); |
| return copy_input_iter->second; |
| } |
| |
| HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const { |
| HBasicBlock* block = bb_map_->Get(orig_block); |
| DCHECK(block != nullptr); |
| return block; |
| } |
| |
| HInstruction* GetInstrOrig(HInstruction* copy_instr) const { |
| for (auto it : *hir_map_) { |
| if (it.second == copy_instr) { |
| return it.first; |
| } |
| } |
| return nullptr; |
| } |
| |
| bool IsInOrigBBSet(uint32_t block_id) const { |
| return orig_bb_set_.IsBitSet(block_id); |
| } |
| |
| bool IsInOrigBBSet(const HBasicBlock* block) const { |
| return IsInOrigBBSet(block->GetBlockId()); |
| } |
| |
| // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops, |
| // dominators) needs to be adjusted. |
| HLoopInformation* GetRegionToBeAdjusted() const { |
| return outer_loop_; |
| } |
| |
| private: |
| // Fills the 'exits' vector with the subgraph exits. |
| void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const; |
| |
| // Finds and records information about the area in the graph for which control flow (back edges, |
| // loops, dominators) needs to be adjusted. |
| void FindAndSetLocalAreaForAdjustments(); |
| |
| // Remaps edges' successors according to the info specified in the edges sets. |
| // |
| // Only edge successors/predecessors and phis' input records (to have a correspondence between |
| // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither |
| // phis' nor instructions' inputs values are resolved. |
| void RemapEdgesSuccessors(); |
| |
| // Adjusts control flow (back edges, loops, dominators) for the local area defined by |
| // FindAndSetLocalAreaForAdjustments. |
| void AdjustControlFlowInfo(); |
| |
| // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in |
| // the SSA form. |
| void ResolveDataFlow(); |
| |
| // |
| // Helpers for live-outs processing and Subgraph-closed SSA. |
| // |
| // - live-outs - values which are defined inside the subgraph and have uses outside. |
| // - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph |
| // have no outside uses except for the phi-nodes in the subgraph exits. |
| // |
| // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this |
| // makes the subgraph-closed SSA form construction much easier. |
| // |
| // TODO: Support subgraphs with live-outs and multiple exits. |
| // |
| |
| // For each live-out value 'val' in the region puts a record <val, val> into the map. |
| // Returns whether all of the instructions in the subgraph are clonable. |
| bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const; |
| |
| // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit. |
| // |
| // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates |
| // the record in the map to <val, phi> and replaces all outside uses with this phi. |
| void ConstructSubgraphClosedSSA(); |
| |
| // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding |
| // (<val, phi>) phi after the cloning is done. |
| void FixSubgraphClosedSSAAfterCloning(); |
| |
| // |
| // Helpers for CloneBasicBlock. |
| // |
| |
| // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the |
| // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original. |
| void ReplaceInputsWithCopies(HInstruction* copy_instr); |
| |
| // Recursively clones the environment for the copy instruction. If the input of the original |
| // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise |
| // leaves it the same as original. |
| void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env); |
| |
| // |
| // Helpers for RemapEdgesSuccessors. |
| // |
| |
| // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and |
| // copy_succ. |
| void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); |
| |
| // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ. |
| void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); |
| |
| // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ. |
| void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); |
| |
| // Checks whether the edges remapping info corresponds to the subgraph versioning case: |
| // - none of the incoming edges are to be remapped (they are being duplicated). |
| // - none of the internal edges are to be remapped. |
| bool IsRemapInfoForVersioning() const; |
| |
| // Processes incoming edges for subgraph versioning case: for each incoming edge (X, Y) adds |
| // an edge (X, Y_1) where Y_1 = Copy(Y) and add corresponding phi input to copy phi. |
| // |
| // Note: such node X will now have two successors, its unconditional branch instruction |
| // will be invalid and should be adjusted to some conditional branch by the client code. |
| void CopyIncomingEdgesForVersioning(); |
| |
| // |
| // Local versions of control flow calculation/adjustment routines. |
| // |
| |
| void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set); |
| void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set); |
| GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set); |
| void CleanUpControlFlow(); |
| |
| // |
| // Helpers for ResolveDataFlow |
| // |
| |
| // Resolves the inputs of the phi. |
| void ResolvePhi(HPhi* phi); |
| |
| // Update induction range after when fixing SSA. |
| void UpdateInductionRangeInfoOf( |
| HInstruction* user, HInstruction* old_instruction, HInstruction* replacement); |
| |
| // |
| // Debug and logging methods. |
| // |
| void CheckInstructionInputsRemapping(HInstruction* orig_instr); |
| bool CheckRemappingInfoIsValid(); |
| void VerifyGraph(); |
| void DumpInputSets(); |
| |
| HBasicBlock* GetBlockById(uint32_t block_id) const { |
| DCHECK(block_id < graph_->GetBlocks().size()); |
| HBasicBlock* block = graph_->GetBlocks()[block_id]; |
| DCHECK(block != nullptr); |
| return block; |
| } |
| |
| HGraph* const graph_; |
| ArenaAllocator* const arena_; |
| |
| // Set of basic block in the original graph to be copied. |
| HBasicBlockSet orig_bb_set_; |
| |
| // Sets of edges which require successors remapping. |
| const HEdgeSet* remap_orig_internal_; |
| const HEdgeSet* remap_copy_internal_; |
| const HEdgeSet* remap_incoming_; |
| |
| // Correspondence map for blocks: (original block, copy block). |
| HBasicBlockMap* bb_map_; |
| // Correspondence map for instructions: (original HInstruction, copy HInstruction). |
| HInstructionMap* hir_map_; |
| // As a result of cloning, the induction range analysis information can be invalidated |
| // and must be updated. If not null, the cloner updates it for changed instructions. |
| InductionVarRange* induction_range_; |
| // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted. |
| HLoopInformation* outer_loop_; |
| HBasicBlockSet outer_loop_bb_set_; |
| |
| HInstructionMap live_outs_; |
| |
| ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); |
| ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected); |
| |
| DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); |
| }; |
| |
| // Helper class to perform loop peeling/unrolling/versioning. |
| // |
| // This helper should be used when correspondence map between original and copied |
| // basic blocks/instructions are demanded. |
| class LoopClonerHelper : public ValueObject { |
| public: |
| LoopClonerHelper(HLoopInformation* info, |
| SuperblockCloner::HBasicBlockMap* bb_map, |
| SuperblockCloner::HInstructionMap* hir_map, |
| InductionVarRange* induction_range) : |
| loop_info_(info), |
| cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map, induction_range) { |
| // For now do transformations only for natural loops. |
| DCHECK(!info->IsIrreducible()); |
| } |
| |
| // Returns whether the loop can be peeled/unrolled (static function). |
| static bool IsLoopClonable(HLoopInformation* loop_info); |
| |
| // Returns whether the loop can be peeled/unrolled. |
| bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); } |
| |
| // Perform loop peeling. |
| // |
| // Control flow of an example (ignoring critical edges splitting). |
| // |
| // Before After |
| // |
| // |B| |B| |
| // | | |
| // v v |
| // |1| |1| |
| // | | |
| // v v |
| // |2|<-\ |2A| |
| // / \ / / \ |
| // v v/ / v |
| // |4| |3| / |3A| |
| // | / / |
| // v | v |
| // |E| \ |2|<-\ |
| // \ / \ / |
| // v v / |
| // |4| |3| |
| // | |
| // v |
| // |E| |
| HBasicBlock* DoPeeling() { |
| return DoLoopTransformationImpl(TransformationKind::kPeeling); |
| } |
| |
| // Perform loop unrolling. |
| // |
| // Control flow of an example (ignoring critical edges splitting). |
| // |
| // Before After |
| // |
| // |B| |B| |
| // | | |
| // v v |
| // |1| |1| |
| // | | |
| // v v |
| // |2|<-\ |2A|<-\ |
| // / \ / / \ \ |
| // v v/ / v \ |
| // |4| |3| / |3A| | |
| // | / / / |
| // v | v / |
| // |E| \ |2| / |
| // \ / \ / |
| // v v/ |
| // |4| |3| |
| // | |
| // v |
| // |E| |
| HBasicBlock* DoUnrolling() { |
| return DoLoopTransformationImpl(TransformationKind::kUnrolling); |
| } |
| |
| // Perform loop versioning. |
| // |
| // Control flow of an example (ignoring critical edges splitting). |
| // |
| // Before After |
| // |
| // |B| |B| |
| // | | |
| // v v |
| // |1| |1|_________ |
| // | | | |
| // v v v |
| // |2|<-\ |2|<-\ |2A|<-\ |
| // / \ / / \ / / \ / |
| // v v/ | v/ | v/ |
| // | |3| | |3| | |3A| |
| // | | __________| |
| // | || |
| // v vv |
| // |4| |4| |
| // | | |
| // v v |
| // |E| |E| |
| HBasicBlock* DoVersioning() { |
| return DoLoopTransformationImpl(TransformationKind::kVersioning); |
| } |
| |
| HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); } |
| |
| protected: |
| enum class TransformationKind { |
| kPeeling, |
| kUnrolling, |
| kVersioning, |
| }; |
| |
| // Applies a specific loop transformation to the loop. |
| HBasicBlock* DoLoopTransformationImpl(TransformationKind transformation); |
| |
| private: |
| HLoopInformation* loop_info_; |
| SuperblockCloner cloner_; |
| |
| DISALLOW_COPY_AND_ASSIGN(LoopClonerHelper); |
| }; |
| |
| // Helper class to perform loop peeling/unrolling/versioning. |
| // |
| // This helper should be used when there is no need to get correspondence information between |
| // original and copied basic blocks/instructions. |
| class LoopClonerSimpleHelper : public ValueObject { |
| public: |
| LoopClonerSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range); |
| bool IsLoopClonable() const { return helper_.IsLoopClonable(); } |
| HBasicBlock* DoPeeling() { return helper_.DoPeeling(); } |
| HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); } |
| HBasicBlock* DoVersioning() { return helper_.DoVersioning(); } |
| HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); } |
| |
| const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; } |
| const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; } |
| |
| private: |
| SuperblockCloner::HBasicBlockMap bb_map_; |
| SuperblockCloner::HInstructionMap hir_map_; |
| LoopClonerHelper helper_; |
| |
| DISALLOW_COPY_AND_ASSIGN(LoopClonerSimpleHelper); |
| }; |
| |
| // Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info. |
| void CollectRemappingInfoForPeelUnroll(bool to_unroll, |
| HLoopInformation* loop_info, |
| SuperblockCloner::HEdgeSet* remap_orig_internal, |
| SuperblockCloner::HEdgeSet* remap_copy_internal, |
| SuperblockCloner::HEdgeSet* remap_incoming); |
| |
| // Returns whether blocks from 'work_set' are reachable from the rest of the graph. |
| // |
| // Returns whether such a set 'outer_entries' of basic blocks exists that: |
| // - each block from 'outer_entries' is not from 'work_set'. |
| // - each block from 'work_set' is reachable from at least one block from 'outer_entries'. |
| // |
| // After the function returns work_set contains only blocks from the original 'work_set' |
| // which are unreachable from the rest of the graph. |
| bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph); |
| |
| // Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole |
| // graph. |
| HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2); |
| } // namespace art |
| |
| namespace std { |
| |
| template <> |
| struct hash<art::HEdge> { |
| size_t operator()(art::HEdge const& x) const noexcept { |
| // Use Cantor pairing function as the hash function. |
| size_t a = x.GetFrom(); |
| size_t b = x.GetTo(); |
| return (a + b) * (a + b + 1) / 2 + b; |
| } |
| }; |
| ostream& operator<<(ostream& os, const art::HEdge& e); |
| |
| } // namespace std |
| |
| #endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ |