blob: 05855c30d407a627d7a7bee3d1cf166adce31244 [file] [log] [blame]
/*
* Copyright (C) 2020 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
#define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_
#include <algorithm>
#include <sstream>
#include "base/arena_allocator.h"
#include "base/arena_bit_vector.h"
#include "base/arena_containers.h"
#include "base/array_ref.h"
#include "base/bit_vector-inl.h"
#include "base/globals.h"
#include "base/iteration_range.h"
#include "base/mutex.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "base/stl_util.h"
#include "base/transform_iterator.h"
#include "nodes.h"
namespace art {
// Helper for transforming blocks to block_ids.
class BlockToBlockIdTransformer {
public:
BlockToBlockIdTransformer(BlockToBlockIdTransformer&&) = default;
BlockToBlockIdTransformer(const BlockToBlockIdTransformer&) = default;
BlockToBlockIdTransformer() {}
inline uint32_t operator()(const HBasicBlock* b) const {
return b->GetBlockId();
}
};
// Helper for transforming block ids to blocks.
class BlockIdToBlockTransformer {
public:
BlockIdToBlockTransformer(BlockIdToBlockTransformer&&) = default;
BlockIdToBlockTransformer(const BlockIdToBlockTransformer&) = default;
explicit BlockIdToBlockTransformer(const HGraph* graph) : graph_(graph) {}
inline const HGraph* GetGraph() const {
return graph_;
}
inline HBasicBlock* GetBlock(uint32_t id) const {
DCHECK_LT(id, graph_->GetBlocks().size()) << graph_->PrettyMethod();
HBasicBlock* blk = graph_->GetBlocks()[id];
DCHECK(blk != nullptr);
return blk;
}
inline HBasicBlock* operator()(uint32_t id) const {
return GetBlock(id);
}
private:
const HGraph* const graph_;
};
class BlockIdFilterThunk {
public:
explicit BlockIdFilterThunk(const BitVector& i) : inner_(i) {}
BlockIdFilterThunk(BlockIdFilterThunk&& other) noexcept = default;
BlockIdFilterThunk(const BlockIdFilterThunk&) = default;
bool operator()(const HBasicBlock* b) const {
return inner_.IsBitSet(b->GetBlockId());
}
private:
const BitVector& inner_;
};
// A representation of a particular section of the graph. The graph is split
// into an excluded and included area and is used to track escapes.
//
// This object is a view of the graph and is not updated as the graph is
// changed.
//
// This is implemented by removing various escape points from the subgraph using
// the 'RemoveBlock' function. Once all required blocks are removed one will
// 'Finalize' the subgraph. This will extend the removed area to include:
// (1) Any block which inevitably leads to (post-dominates) a removed block
// (2) any block which is between 2 removed blocks
//
// This allows us to create a set of 'ExcludedCohorts' which are the
// well-connected subsets of the graph made up of removed blocks. These cohorts
// have a set of entry and exit blocks which act as the boundary of the cohort.
// Since we removed blocks between 2 excluded blocks it is impossible for any
// cohort-exit block to reach any cohort-entry block. This means we can use the
// boundary between the cohort and the rest of the graph to insert
// materialization blocks for partial LSE.
//
// TODO We really should expand this to take into account where the object
// allocation takes place directly. Currently we always act as though it were
// allocated in the entry block. This is a massively simplifying assumption but
// means we can't partially remove objects that are repeatedly allocated in a
// loop.
class ExecutionSubgraph : public DeletableArenaObject<kArenaAllocLSA> {
public:
using BitVecBlockRange =
IterationRange<TransformIterator<BitVector::IndexIterator, BlockIdToBlockTransformer>>;
using FilteredBitVecBlockRange = IterationRange<
FilterIterator<ArenaVector<HBasicBlock*>::const_iterator, BlockIdFilterThunk>>;
// A set of connected blocks which are connected and removed from the
// ExecutionSubgraph. See above comment for explanation.
class ExcludedCohort : public ArenaObject<kArenaAllocLSA> {
public:
ExcludedCohort(ExcludedCohort&&) = default;
ExcludedCohort(const ExcludedCohort&) = delete;
explicit ExcludedCohort(ScopedArenaAllocator* allocator, HGraph* graph)
: graph_(graph),
entry_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA),
exit_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA),
blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA) {}
~ExcludedCohort() = default;
// All blocks in the cohort.
BitVecBlockRange Blocks() const {
return BlockIterRange(blocks_);
}
// Blocks that have predecessors outside of the cohort. These blocks will
// need to have PHIs/control-flow added to create the escaping value.
BitVecBlockRange EntryBlocks() const {
return BlockIterRange(entry_blocks_);
}
FilteredBitVecBlockRange EntryBlocksReversePostOrder() const {
return Filter(MakeIterationRange(graph_->GetReversePostOrder()),
BlockIdFilterThunk(entry_blocks_));
}
bool IsEntryBlock(const HBasicBlock* blk) const {
return entry_blocks_.IsBitSet(blk->GetBlockId());
}
// Blocks that have successors outside of the cohort. The successors of
// these blocks will need to have PHI's to restore state.
BitVecBlockRange ExitBlocks() const {
return BlockIterRange(exit_blocks_);
}
bool operator==(const ExcludedCohort& other) const {
return blocks_.Equal(&other.blocks_);
}
bool ContainsBlock(const HBasicBlock* blk) const {
return blocks_.IsBitSet(blk->GetBlockId());
}
// Returns true if there is a path from 'blk' to any block in this cohort.
// NB blocks contained within the cohort are not considered to be succeeded
// by the cohort (i.e. this function will return false).
bool SucceedsBlock(const HBasicBlock* blk) const {
if (ContainsBlock(blk)) {
return false;
}
auto idxs = entry_blocks_.Indexes();
return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t entry) -> bool {
return blk->GetGraph()->PathBetween(blk->GetBlockId(), entry);
});
}
// Returns true if there is a path from any block in this cohort to 'blk'.
// NB blocks contained within the cohort are not considered to be preceded
// by the cohort (i.e. this function will return false).
bool PrecedesBlock(const HBasicBlock* blk) const {
if (ContainsBlock(blk)) {
return false;
}
auto idxs = exit_blocks_.Indexes();
return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t exit) -> bool {
return blk->GetGraph()->PathBetween(exit, blk->GetBlockId());
});
}
void Dump(std::ostream& os) const;
private:
BitVecBlockRange BlockIterRange(const ArenaBitVector& bv) const {
auto indexes = bv.Indexes();
BitVecBlockRange res = MakeTransformRange(indexes, BlockIdToBlockTransformer(graph_));
return res;
}
ExcludedCohort() = delete;
HGraph* graph_;
ArenaBitVector entry_blocks_;
ArenaBitVector exit_blocks_;
ArenaBitVector blocks_;
friend class ExecutionSubgraph;
friend class LoadStoreAnalysisTest;
};
// The number of successors we can track on a single block. Graphs which
// contain a block with a branching factor greater than this will not be
// analysed. This is used to both limit the memory usage of analysis to
// reasonable levels and ensure that the analysis will complete in a
// reasonable amount of time. It also simplifies the implementation somewhat
// to have a constant branching factor.
static constexpr uint32_t kMaxFilterableSuccessors = 8;
// Instantiate a subgraph. The subgraph can be instantiated only if partial-escape
// analysis is desired (eg not when being used for instruction scheduling) and
// when the branching factor in the graph is not too high. These conditions
// are determined once and passed down for performance reasons.
ExecutionSubgraph(HGraph* graph, ScopedArenaAllocator* allocator);
void Invalidate() {
valid_ = false;
}
// A block is contained by the ExecutionSubgraph if it is reachable. This
// means it has not been removed explicitly or via pruning/concavity removal.
// Finalization is needed to call this function.
// See RemoveConcavity and Prune for more information.
bool ContainsBlock(const HBasicBlock* blk) const {
DCHECK(!finalized_ || !needs_prune_) << "finalized: " << finalized_;
if (!valid_) {
return false;
}
return !unreachable_blocks_.IsBitSet(blk->GetBlockId());
}
// Mark the block as removed from the subgraph.
void RemoveBlock(const HBasicBlock* to_remove);
// Called when no more updates will be done to the subgraph. Calculate the
// final subgraph
void Finalize() {
Prune();
RemoveConcavity();
finalized_ = true;
}
BitVecBlockRange UnreachableBlocks() const {
auto idxs = unreachable_blocks_.Indexes();
return MakeTransformRange(idxs, BlockIdToBlockTransformer(graph_));
}
// Returns true if all allowed execution paths from start eventually reach the
// graph's exit block (or diverge).
bool IsValid() const {
return valid_;
}
ArrayRef<const ExcludedCohort> GetExcludedCohorts() const {
DCHECK(!valid_ || !needs_prune_);
if (!valid_ || !unreachable_blocks_.IsAnyBitSet()) {
return ArrayRef<const ExcludedCohort>();
} else {
return ArrayRef<const ExcludedCohort>(*excluded_list_);
}
}
// Helper class to create reachable blocks iterator.
class ContainsFunctor {
public:
bool operator()(HBasicBlock* blk) const {
return subgraph_->ContainsBlock(blk);
}
private:
explicit ContainsFunctor(const ExecutionSubgraph* subgraph) : subgraph_(subgraph) {}
const ExecutionSubgraph* const subgraph_;
friend class ExecutionSubgraph;
};
// Returns an iterator over reachable blocks (filtered as we go). This is primarilly for testing.
IterationRange<
FilterIterator<typename ArenaVector<HBasicBlock*>::const_iterator, ContainsFunctor>>
ReachableBlocks() const {
return Filter(MakeIterationRange(graph_->GetBlocks()), ContainsFunctor(this));
}
static bool CanAnalyse(HGraph* graph) {
// If there are any blocks with more than kMaxFilterableSuccessors we can't
// analyse the graph. We avoid this case to prevent excessive memory and
// time usage while allowing a simpler algorithm with a fixed-width
// branching factor.
return std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* blk) {
return blk == nullptr || blk->GetSuccessors().size() <= kMaxFilterableSuccessors;
});
}
private:
std::bitset<kMaxFilterableSuccessors> GetAllowedSuccessors(const HBasicBlock* blk) const {
DCHECK(valid_);
return allowed_successors_[blk->GetBlockId()];
}
void LimitBlockSuccessors(const HBasicBlock* block,
std::bitset<kMaxFilterableSuccessors> allowed) {
needs_prune_ = true;
allowed_successors_[block->GetBlockId()] &= allowed;
}
// Remove nodes which both precede and follow any exclusions. This ensures we don't need to deal
// with only conditionally materializing objects depending on if we already materialized them
// Ensure that for all blocks A, B, C: Unreachable(A) && Unreachable(C) && PathBetween(A, B) &&
// PathBetween(A, C) implies Unreachable(B). This simplifies later transforms since it ensures
// that no execution can leave and then re-enter any exclusion.
void RemoveConcavity();
// Removes sink nodes. Sink nodes are nodes where there is no execution which
// avoids all removed nodes.
void Prune();
void RecalculateExcludedCohort();
HGraph* graph_;
ScopedArenaAllocator* allocator_;
// The map from block_id -> allowed-successors.
// This is the canonical representation of this subgraph. If a bit in the
// bitset is not set then the corresponding outgoing edge of that block is not
// considered traversable.
ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> allowed_successors_;
// Helper that holds which blocks we are able to reach. Only valid if
// 'needs_prune_ == false'.
ArenaBitVector unreachable_blocks_;
// A list of the excluded-cohorts of this subgraph. This is only valid when
// 'needs_prune_ == false'
std::optional<ScopedArenaVector<ExcludedCohort>> excluded_list_;
// Bool to hold if there is at least one known path from the start block to
// the end in this graph. Used to short-circuit computation.
bool valid_;
// True if the subgraph is consistent and can be queried. Modifying the
// subgraph clears this and requires a prune to restore.
bool needs_prune_;
// True if no more modification of the subgraph is permitted.
bool finalized_;
friend class ExecutionSubgraphTest;
friend class LoadStoreAnalysisTest;
DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraph);
};
std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex);
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_