| /* |
| * Copyright (C) 2020 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ |
| #define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ |
| |
| #include <algorithm> |
| #include <sstream> |
| |
| #include "base/arena_allocator.h" |
| #include "base/arena_bit_vector.h" |
| #include "base/arena_containers.h" |
| #include "base/array_ref.h" |
| #include "base/bit_vector-inl.h" |
| #include "base/globals.h" |
| #include "base/iteration_range.h" |
| #include "base/mutex.h" |
| #include "base/scoped_arena_allocator.h" |
| #include "base/scoped_arena_containers.h" |
| #include "base/stl_util.h" |
| #include "base/transform_iterator.h" |
| #include "nodes.h" |
| |
| namespace art { |
| |
| // Helper for transforming blocks to block_ids. |
| class BlockToBlockIdTransformer { |
| public: |
| BlockToBlockIdTransformer(BlockToBlockIdTransformer&&) = default; |
| BlockToBlockIdTransformer(const BlockToBlockIdTransformer&) = default; |
| BlockToBlockIdTransformer() {} |
| |
| inline uint32_t operator()(const HBasicBlock* b) const { |
| return b->GetBlockId(); |
| } |
| }; |
| |
| // Helper for transforming block ids to blocks. |
| class BlockIdToBlockTransformer { |
| public: |
| BlockIdToBlockTransformer(BlockIdToBlockTransformer&&) = default; |
| BlockIdToBlockTransformer(const BlockIdToBlockTransformer&) = default; |
| explicit BlockIdToBlockTransformer(const HGraph* graph) : graph_(graph) {} |
| |
| inline const HGraph* GetGraph() const { |
| return graph_; |
| } |
| |
| inline HBasicBlock* GetBlock(uint32_t id) const { |
| DCHECK_LT(id, graph_->GetBlocks().size()) << graph_->PrettyMethod(); |
| HBasicBlock* blk = graph_->GetBlocks()[id]; |
| DCHECK(blk != nullptr); |
| return blk; |
| } |
| |
| inline HBasicBlock* operator()(uint32_t id) const { |
| return GetBlock(id); |
| } |
| |
| private: |
| const HGraph* const graph_; |
| }; |
| |
| class BlockIdFilterThunk { |
| public: |
| explicit BlockIdFilterThunk(const BitVector& i) : inner_(i) {} |
| BlockIdFilterThunk(BlockIdFilterThunk&& other) noexcept = default; |
| BlockIdFilterThunk(const BlockIdFilterThunk&) = default; |
| |
| bool operator()(const HBasicBlock* b) const { |
| return inner_.IsBitSet(b->GetBlockId()); |
| } |
| |
| private: |
| const BitVector& inner_; |
| }; |
| |
| // A representation of a particular section of the graph. The graph is split |
| // into an excluded and included area and is used to track escapes. |
| // |
| // This object is a view of the graph and is not updated as the graph is |
| // changed. |
| // |
| // This is implemented by removing various escape points from the subgraph using |
| // the 'RemoveBlock' function. Once all required blocks are removed one will |
| // 'Finalize' the subgraph. This will extend the removed area to include: |
| // (1) Any block which inevitably leads to (post-dominates) a removed block |
| // (2) any block which is between 2 removed blocks |
| // |
| // This allows us to create a set of 'ExcludedCohorts' which are the |
| // well-connected subsets of the graph made up of removed blocks. These cohorts |
| // have a set of entry and exit blocks which act as the boundary of the cohort. |
| // Since we removed blocks between 2 excluded blocks it is impossible for any |
| // cohort-exit block to reach any cohort-entry block. This means we can use the |
| // boundary between the cohort and the rest of the graph to insert |
| // materialization blocks for partial LSE. |
| // |
| // TODO We really should expand this to take into account where the object |
| // allocation takes place directly. Currently we always act as though it were |
| // allocated in the entry block. This is a massively simplifying assumption but |
| // means we can't partially remove objects that are repeatedly allocated in a |
| // loop. |
| class ExecutionSubgraph : public DeletableArenaObject<kArenaAllocLSA> { |
| public: |
| using BitVecBlockRange = |
| IterationRange<TransformIterator<BitVector::IndexIterator, BlockIdToBlockTransformer>>; |
| using FilteredBitVecBlockRange = IterationRange< |
| FilterIterator<ArenaVector<HBasicBlock*>::const_iterator, BlockIdFilterThunk>>; |
| |
| // A set of connected blocks which are connected and removed from the |
| // ExecutionSubgraph. See above comment for explanation. |
| class ExcludedCohort : public ArenaObject<kArenaAllocLSA> { |
| public: |
| ExcludedCohort(ExcludedCohort&&) = default; |
| ExcludedCohort(const ExcludedCohort&) = delete; |
| explicit ExcludedCohort(ScopedArenaAllocator* allocator, HGraph* graph) |
| : graph_(graph), |
| entry_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), |
| exit_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), |
| blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA) {} |
| |
| ~ExcludedCohort() = default; |
| |
| // All blocks in the cohort. |
| BitVecBlockRange Blocks() const { |
| return BlockIterRange(blocks_); |
| } |
| |
| // Blocks that have predecessors outside of the cohort. These blocks will |
| // need to have PHIs/control-flow added to create the escaping value. |
| BitVecBlockRange EntryBlocks() const { |
| return BlockIterRange(entry_blocks_); |
| } |
| |
| FilteredBitVecBlockRange EntryBlocksReversePostOrder() const { |
| return Filter(MakeIterationRange(graph_->GetReversePostOrder()), |
| BlockIdFilterThunk(entry_blocks_)); |
| } |
| |
| bool IsEntryBlock(const HBasicBlock* blk) const { |
| return entry_blocks_.IsBitSet(blk->GetBlockId()); |
| } |
| |
| // Blocks that have successors outside of the cohort. The successors of |
| // these blocks will need to have PHI's to restore state. |
| BitVecBlockRange ExitBlocks() const { |
| return BlockIterRange(exit_blocks_); |
| } |
| |
| bool operator==(const ExcludedCohort& other) const { |
| return blocks_.Equal(&other.blocks_); |
| } |
| |
| bool ContainsBlock(const HBasicBlock* blk) const { |
| return blocks_.IsBitSet(blk->GetBlockId()); |
| } |
| |
| // Returns true if there is a path from 'blk' to any block in this cohort. |
| // NB blocks contained within the cohort are not considered to be succeeded |
| // by the cohort (i.e. this function will return false). |
| bool SucceedsBlock(const HBasicBlock* blk) const { |
| if (ContainsBlock(blk)) { |
| return false; |
| } |
| auto idxs = entry_blocks_.Indexes(); |
| return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t entry) -> bool { |
| return blk->GetGraph()->PathBetween(blk->GetBlockId(), entry); |
| }); |
| } |
| |
| // Returns true if there is a path from any block in this cohort to 'blk'. |
| // NB blocks contained within the cohort are not considered to be preceded |
| // by the cohort (i.e. this function will return false). |
| bool PrecedesBlock(const HBasicBlock* blk) const { |
| if (ContainsBlock(blk)) { |
| return false; |
| } |
| auto idxs = exit_blocks_.Indexes(); |
| return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t exit) -> bool { |
| return blk->GetGraph()->PathBetween(exit, blk->GetBlockId()); |
| }); |
| } |
| |
| void Dump(std::ostream& os) const; |
| |
| private: |
| BitVecBlockRange BlockIterRange(const ArenaBitVector& bv) const { |
| auto indexes = bv.Indexes(); |
| BitVecBlockRange res = MakeTransformRange(indexes, BlockIdToBlockTransformer(graph_)); |
| return res; |
| } |
| |
| ExcludedCohort() = delete; |
| |
| HGraph* graph_; |
| ArenaBitVector entry_blocks_; |
| ArenaBitVector exit_blocks_; |
| ArenaBitVector blocks_; |
| |
| friend class ExecutionSubgraph; |
| friend class LoadStoreAnalysisTest; |
| }; |
| |
| // The number of successors we can track on a single block. Graphs which |
| // contain a block with a branching factor greater than this will not be |
| // analysed. This is used to both limit the memory usage of analysis to |
| // reasonable levels and ensure that the analysis will complete in a |
| // reasonable amount of time. It also simplifies the implementation somewhat |
| // to have a constant branching factor. |
| static constexpr uint32_t kMaxFilterableSuccessors = 8; |
| |
| // Instantiate a subgraph. The subgraph can be instantiated only if partial-escape |
| // analysis is desired (eg not when being used for instruction scheduling) and |
| // when the branching factor in the graph is not too high. These conditions |
| // are determined once and passed down for performance reasons. |
| ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator); |
| |
| void Invalidate() { |
| valid_ = false; |
| } |
| |
| // A block is contained by the ExecutionSubgraph if it is reachable. This |
| // means it has not been removed explicitly or via pruning/concavity removal. |
| // Finalization is needed to call this function. |
| // See RemoveConcavity and Prune for more information. |
| bool ContainsBlock(const HBasicBlock* blk) const { |
| DCHECK(!finalized_ || !needs_prune_) << "finalized: " << finalized_; |
| if (!valid_) { |
| return false; |
| } |
| return !unreachable_blocks_.IsBitSet(blk->GetBlockId()); |
| } |
| |
| // Mark the block as removed from the subgraph. |
| void RemoveBlock(const HBasicBlock* to_remove); |
| |
| // Called when no more updates will be done to the subgraph. Calculate the |
| // final subgraph |
| void Finalize() { |
| Prune(); |
| RemoveConcavity(); |
| finalized_ = true; |
| } |
| |
| BitVecBlockRange UnreachableBlocks() const { |
| auto idxs = unreachable_blocks_.Indexes(); |
| return MakeTransformRange(idxs, BlockIdToBlockTransformer(graph_)); |
| } |
| |
| // Returns true if all allowed execution paths from start eventually reach the |
| // graph's exit block (or diverge). |
| bool IsValid() const { |
| return valid_; |
| } |
| |
| ArrayRef<const ExcludedCohort> GetExcludedCohorts() const { |
| DCHECK(!valid_ || !needs_prune_); |
| if (!valid_ || !unreachable_blocks_.IsAnyBitSet()) { |
| return ArrayRef<const ExcludedCohort>(); |
| } else { |
| return ArrayRef<const ExcludedCohort>(*excluded_list_); |
| } |
| } |
| |
| // Helper class to create reachable blocks iterator. |
| class ContainsFunctor { |
| public: |
| bool operator()(HBasicBlock* blk) const { |
| return subgraph_->ContainsBlock(blk); |
| } |
| |
| private: |
| explicit ContainsFunctor(const ExecutionSubgraph* subgraph) : subgraph_(subgraph) {} |
| const ExecutionSubgraph* const subgraph_; |
| friend class ExecutionSubgraph; |
| }; |
| // Returns an iterator over reachable blocks (filtered as we go). This is primarilly for testing. |
| IterationRange< |
| FilterIterator<typename ArenaVector<HBasicBlock*>::const_iterator, ContainsFunctor>> |
| ReachableBlocks() const { |
| return Filter(MakeIterationRange(graph_->GetBlocks()), ContainsFunctor(this)); |
| } |
| |
| static bool CanAnalyse(HGraph* graph) { |
| // If there are any blocks with more than kMaxFilterableSuccessors we can't |
| // analyse the graph. We avoid this case to prevent excessive memory and |
| // time usage while allowing a simpler algorithm with a fixed-width |
| // branching factor. |
| return std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* blk) { |
| return blk == nullptr || blk->GetSuccessors().size() <= kMaxFilterableSuccessors; |
| }); |
| } |
| |
| private: |
| std::bitset<kMaxFilterableSuccessors> GetAllowedSuccessors(const HBasicBlock* blk) const { |
| DCHECK(valid_); |
| return allowed_successors_[blk->GetBlockId()]; |
| } |
| |
| void LimitBlockSuccessors(const HBasicBlock* block, |
| std::bitset<kMaxFilterableSuccessors> allowed) { |
| needs_prune_ = true; |
| allowed_successors_[block->GetBlockId()] &= allowed; |
| } |
| |
| // Remove nodes which both precede and follow any exclusions. This ensures we don't need to deal |
| // with only conditionally materializing objects depending on if we already materialized them |
| // Ensure that for all blocks A, B, C: Unreachable(A) && Unreachable(C) && PathBetween(A, B) && |
| // PathBetween(A, C) implies Unreachable(B). This simplifies later transforms since it ensures |
| // that no execution can leave and then re-enter any exclusion. |
| void RemoveConcavity(); |
| |
| // Removes sink nodes. Sink nodes are nodes where there is no execution which |
| // avoids all removed nodes. |
| void Prune(); |
| |
| void RecalculateExcludedCohort(); |
| |
| HGraph* graph_; |
| ScopedArenaAllocator* allocator_; |
| // The map from block_id -> allowed-successors. |
| // This is the canonical representation of this subgraph. If a bit in the |
| // bitset is not set then the corresponding outgoing edge of that block is not |
| // considered traversable. |
| ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> allowed_successors_; |
| // Helper that holds which blocks we are able to reach. Only valid if |
| // 'needs_prune_ == false'. |
| ArenaBitVector unreachable_blocks_; |
| // A list of the excluded-cohorts of this subgraph. This is only valid when |
| // 'needs_prune_ == false' |
| std::optional<ScopedArenaVector<ExcludedCohort>> excluded_list_; |
| // Bool to hold if there is at least one known path from the start block to |
| // the end in this graph. Used to short-circuit computation. |
| bool valid_; |
| // True if the subgraph is consistent and can be queried. Modifying the |
| // subgraph clears this and requires a prune to restore. |
| bool needs_prune_; |
| // True if no more modification of the subgraph is permitted. |
| bool finalized_; |
| |
| friend class ExecutionSubgraphTest; |
| friend class LoadStoreAnalysisTest; |
| |
| DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraph); |
| }; |
| |
| std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex); |
| |
| } // namespace art |
| |
| #endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ |