diff options
28 files changed, 4767 insertions, 58 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index e91700bb47..523aab6a25 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -48,6 +48,7 @@ art_cc_defaults { "optimizing/data_type.cc", "optimizing/dead_code_elimination.cc", "optimizing/escape.cc", + "optimizing/execution_subgraph.cc", "optimizing/graph_checker.cc", "optimizing/graph_visualizer.cc", "optimizing/gvn.cc", @@ -414,6 +415,7 @@ art_cc_test { "jni/jni_cfi_test.cc", "optimizing/codegen_test.cc", + "optimizing/execution_subgraph_test.cc", "optimizing/load_store_analysis_test.cc", "optimizing/load_store_elimination_test.cc", "optimizing/optimizing_cfi_test.cc", diff --git a/compiler/optimizing/execution_subgraph.cc b/compiler/optimizing/execution_subgraph.cc new file mode 100644 index 0000000000..5045e8db0b --- /dev/null +++ b/compiler/optimizing/execution_subgraph.cc @@ -0,0 +1,364 @@ +/* + * Copyright (C) 2020 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "execution_subgraph.h" + +#include <algorithm> +#include <unordered_set> + +#include "android-base/macros.h" +#include "base/arena_allocator.h" +#include "base/arena_bit_vector.h" +#include "base/globals.h" +#include "base/scoped_arena_allocator.h" +#include "nodes.h" + +namespace art { + +ExecutionSubgraph::ExecutionSubgraph(HGraph* graph, + bool analysis_possible, + ScopedArenaAllocator* allocator) + : graph_(graph), + allocator_(allocator), + allowed_successors_(analysis_possible ? graph_->GetBlocks().size() : 0, + ~(std::bitset<kMaxFilterableSuccessors> {}), + allocator_->Adapter(kArenaAllocLSA)), + unreachable_blocks_( + allocator_, analysis_possible ? graph_->GetBlocks().size() : 0, false, kArenaAllocLSA), + valid_(analysis_possible), + needs_prune_(false), + finalized_(false) { + if (valid_) { + DCHECK(std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* it) { + return it == nullptr || it->GetSuccessors().size() <= kMaxFilterableSuccessors; + })); + } +} + +void ExecutionSubgraph::RemoveBlock(const HBasicBlock* to_remove) { + if (!valid_) { + return; + } + uint32_t id = to_remove->GetBlockId(); + if (unreachable_blocks_.IsBitSet(id)) { + if (kIsDebugBuild) { + // This isn't really needed but it's good to have this so it functions as + // a DCHECK that we always call Prune after removing any block. + needs_prune_ = true; + } + return; + } + unreachable_blocks_.SetBit(id); + for (HBasicBlock* pred : to_remove->GetPredecessors()) { + std::bitset<kMaxFilterableSuccessors> allowed_successors {}; + // ZipCount iterates over both the successors and the index of them at the same time. + for (auto [succ, i] : ZipCount(MakeIterationRange(pred->GetSuccessors()))) { + if (succ != to_remove) { + allowed_successors.set(i); + } + } + LimitBlockSuccessors(pred, allowed_successors); + } +} + +// Removes sink nodes. +void ExecutionSubgraph::Prune() { + if (UNLIKELY(!valid_)) { + return; + } + needs_prune_ = false; + // This is the record of the edges that were both (1) explored and (2) reached + // the exit node. + { + // Allocator for temporary values. + ScopedArenaAllocator temporaries(graph_->GetArenaStack()); + ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> results( + graph_->GetBlocks().size(), temporaries.Adapter(kArenaAllocLSA)); + unreachable_blocks_.ClearAllBits(); + // TODO We should support infinite loops as well. + if (UNLIKELY(graph_->GetExitBlock() == nullptr)) { + // Infinite loop + valid_ = false; + return; + } + // Fills up the 'results' map with what we need to add to update + // allowed_successors in order to prune sink nodes. + bool start_reaches_end = false; + // This is basically a DFS of the graph with some edges skipped. + { + const size_t num_blocks = graph_->GetBlocks().size(); + constexpr ssize_t kUnvisitedSuccIdx = -1; + ArenaBitVector visiting(&temporaries, num_blocks, false, kArenaAllocLSA); + // How many of the successors of each block we have already examined. This + // has three states. + // (1) kUnvisitedSuccIdx: we have not examined any edges, + // (2) 0 <= val < # of successors: we have examined 'val' successors/are + // currently examining successors_[val], + // (3) kMaxFilterableSuccessors: We have examined all of the successors of + // the block (the 'result' is final). + ScopedArenaVector<ssize_t> last_succ_seen( + num_blocks, kUnvisitedSuccIdx, temporaries.Adapter(kArenaAllocLSA)); + // A stack of which blocks we are visiting in this DFS traversal. Does not + // include the current-block. Used with last_succ_seen to figure out which + // bits to set if we find a path to the end/loop. + ScopedArenaVector<uint32_t> current_path(temporaries.Adapter(kArenaAllocLSA)); + // Just ensure we have enough space. The allocator will be cleared shortly + // anyway so this is fast. + current_path.reserve(num_blocks); + // Current block we are examining. Modified only by 'push_block' and 'pop_block' + const HBasicBlock* cur_block = graph_->GetEntryBlock(); + // Used to note a recur where we will start iterating on 'blk' and save + // where we are. We must 'continue' immediately after this. + auto push_block = [&](const HBasicBlock* blk) { + DCHECK(std::find(current_path.cbegin(), current_path.cend(), cur_block->GetBlockId()) == + current_path.end()); + if (kIsDebugBuild) { + std::for_each(current_path.cbegin(), current_path.cend(), [&](auto id) { + DCHECK_GT(last_succ_seen[id], kUnvisitedSuccIdx) << id; + DCHECK_LT(last_succ_seen[id], static_cast<ssize_t>(kMaxFilterableSuccessors)) << id; + }); + } + current_path.push_back(cur_block->GetBlockId()); + visiting.SetBit(cur_block->GetBlockId()); + cur_block = blk; + }; + // Used to note that we have fully explored a block and should return back + // up. Sets cur_block appropriately. We must 'continue' immediately after + // calling this. + auto pop_block = [&]() { + if (UNLIKELY(current_path.empty())) { + // Should only happen if entry-blocks successors are exhausted. + DCHECK_GE(last_succ_seen[graph_->GetEntryBlock()->GetBlockId()], + static_cast<ssize_t>(graph_->GetEntryBlock()->GetSuccessors().size())); + cur_block = nullptr; + } else { + const HBasicBlock* last = graph_->GetBlocks()[current_path.back()]; + visiting.ClearBit(current_path.back()); + current_path.pop_back(); + cur_block = last; + } + }; + // Mark the current path as a path to the end. This is in contrast to paths + // that end in (eg) removed blocks. + auto propagate_true = [&]() { + for (uint32_t id : current_path) { + DCHECK_GT(last_succ_seen[id], kUnvisitedSuccIdx); + DCHECK_LT(last_succ_seen[id], static_cast<ssize_t>(kMaxFilterableSuccessors)); + results[id].set(last_succ_seen[id]); + } + }; + ssize_t num_entry_succ = graph_->GetEntryBlock()->GetSuccessors().size(); + // As long as the entry-block has not explored all successors we still have + // work to do. + const uint32_t entry_block_id = graph_->GetEntryBlock()->GetBlockId(); + while (num_entry_succ > last_succ_seen[entry_block_id]) { + DCHECK(cur_block != nullptr); + uint32_t id = cur_block->GetBlockId(); + DCHECK((current_path.empty() && cur_block == graph_->GetEntryBlock()) || + current_path.front() == graph_->GetEntryBlock()->GetBlockId()) + << "current path size: " << current_path.size() + << " cur_block id: " << cur_block->GetBlockId() << " entry id " + << graph_->GetEntryBlock()->GetBlockId(); + DCHECK(!visiting.IsBitSet(id)) + << "Somehow ended up in a loop! This should have been caught before now! " << id; + std::bitset<kMaxFilterableSuccessors>& result = results[id]; + if (cur_block == graph_->GetExitBlock()) { + start_reaches_end = true; + propagate_true(); + pop_block(); + continue; + } else if (last_succ_seen[id] == kMaxFilterableSuccessors) { + // Already fully explored. + if (result.any()) { + propagate_true(); + } + pop_block(); + continue; + } + // NB This is a pointer. Modifications modify the last_succ_seen. + ssize_t* cur_succ = &last_succ_seen[id]; + std::bitset<kMaxFilterableSuccessors> succ_bitmap = GetAllowedSuccessors(cur_block); + // Get next successor allowed. + while (++(*cur_succ) < static_cast<ssize_t>(kMaxFilterableSuccessors) && + !succ_bitmap.test(*cur_succ)) { + DCHECK_GE(*cur_succ, 0); + } + if (*cur_succ >= static_cast<ssize_t>(cur_block->GetSuccessors().size())) { + // No more successors. Mark that we've checked everything. Later visits + // to this node can use the existing data. + DCHECK_LE(*cur_succ, static_cast<ssize_t>(kMaxFilterableSuccessors)); + *cur_succ = kMaxFilterableSuccessors; + pop_block(); + continue; + } + const HBasicBlock* nxt = cur_block->GetSuccessors()[*cur_succ]; + DCHECK(nxt != nullptr) << "id: " << *cur_succ + << " max: " << cur_block->GetSuccessors().size(); + if (visiting.IsBitSet(nxt->GetBlockId())) { + // This is a loop. Mark it and continue on. Mark allowed-successor on + // this block's results as well. + result.set(*cur_succ); + propagate_true(); + } else { + // Not a loop yet. Recur. + push_block(nxt); + } + } + } + // If we can't reach the end then there is no path through the graph without + // hitting excluded blocks + if (UNLIKELY(!start_reaches_end)) { + valid_ = false; + return; + } + // Mark blocks we didn't see in the ReachesEnd flood-fill + for (const HBasicBlock* blk : graph_->GetBlocks()) { + if (blk != nullptr && + results[blk->GetBlockId()].none() && + blk != graph_->GetExitBlock() && + blk != graph_->GetEntryBlock()) { + // We never visited this block, must be unreachable. + unreachable_blocks_.SetBit(blk->GetBlockId()); + } + } + // write the new data. + memcpy(allowed_successors_.data(), + results.data(), + results.size() * sizeof(std::bitset<kMaxFilterableSuccessors>)); + } + RecalculateExcludedCohort(); +} + +void ExecutionSubgraph::RemoveConcavity() { + if (UNLIKELY(!valid_)) { + return; + } + DCHECK(!needs_prune_); + for (const HBasicBlock* blk : graph_->GetBlocks()) { + if (blk == nullptr || unreachable_blocks_.IsBitSet(blk->GetBlockId())) { + continue; + } + uint32_t blkid = blk->GetBlockId(); + if (std::any_of(unreachable_blocks_.Indexes().begin(), + unreachable_blocks_.Indexes().end(), + [&](uint32_t skipped) { return graph_->PathBetween(skipped, blkid); }) && + std::any_of(unreachable_blocks_.Indexes().begin(), + unreachable_blocks_.Indexes().end(), + [&](uint32_t skipped) { return graph_->PathBetween(blkid, skipped); })) { + RemoveBlock(blk); + } + } + Prune(); +} + +void ExecutionSubgraph::RecalculateExcludedCohort() { + DCHECK(!needs_prune_); + excluded_list_.emplace(allocator_->Adapter(kArenaAllocLSA)); + ScopedArenaVector<ExcludedCohort>& res = excluded_list_.value(); + // Make a copy of unreachable_blocks_; + ArenaBitVector unreachable(allocator_, graph_->GetBlocks().size(), false, kArenaAllocLSA); + unreachable.Copy(&unreachable_blocks_); + // Split cohorts with union-find + while (unreachable.IsAnyBitSet()) { + res.emplace_back(allocator_, graph_); + ExcludedCohort& cohort = res.back(); + // We don't allocate except for the queue beyond here so create another arena to save memory. + ScopedArenaAllocator alloc(graph_->GetArenaStack()); + ScopedArenaQueue<const HBasicBlock*> worklist(alloc.Adapter(kArenaAllocLSA)); + // Select an arbitrary node + const HBasicBlock* first = graph_->GetBlocks()[unreachable.GetHighestBitSet()]; + worklist.push(first); + do { + // Flood-fill both forwards and backwards. + const HBasicBlock* cur = worklist.front(); + worklist.pop(); + if (!unreachable.IsBitSet(cur->GetBlockId())) { + // Already visited or reachable somewhere else. + continue; + } + unreachable.ClearBit(cur->GetBlockId()); + cohort.blocks_.SetBit(cur->GetBlockId()); + // don't bother filtering here, it's done next go-around + for (const HBasicBlock* pred : cur->GetPredecessors()) { + worklist.push(pred); + } + for (const HBasicBlock* succ : cur->GetSuccessors()) { + worklist.push(succ); + } + } while (!worklist.empty()); + } + // Figure out entry & exit nodes. + for (ExcludedCohort& cohort : res) { + DCHECK(cohort.blocks_.IsAnyBitSet()); + auto is_external = [&](const HBasicBlock* ext) -> bool { + return !cohort.blocks_.IsBitSet(ext->GetBlockId()); + }; + for (const HBasicBlock* blk : cohort.Blocks()) { + const auto& preds = blk->GetPredecessors(); + const auto& succs = blk->GetSuccessors(); + if (std::any_of(preds.cbegin(), preds.cend(), is_external)) { + cohort.entry_blocks_.SetBit(blk->GetBlockId()); + } + if (std::any_of(succs.cbegin(), succs.cend(), is_external)) { + cohort.exit_blocks_.SetBit(blk->GetBlockId()); + } + } + } +} + +std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex) { + ex.Dump(os); + return os; +} + +void ExecutionSubgraph::ExcludedCohort::Dump(std::ostream& os) const { + auto dump = [&](BitVecBlockRange arr) { + os << "["; + bool first = true; + for (const HBasicBlock* b : arr) { + if (!first) { + os << ", "; + } + first = false; + os << b->GetBlockId(); + } + os << "]"; + }; + auto dump_blocks = [&]() { + os << "["; + bool first = true; + for (const HBasicBlock* b : Blocks()) { + if (!entry_blocks_.IsBitSet(b->GetBlockId()) && !exit_blocks_.IsBitSet(b->GetBlockId())) { + if (!first) { + os << ", "; + } + first = false; + os << b->GetBlockId(); + } + } + os << "]"; + }; + + os << "{ entry: "; + dump(EntryBlocks()); + os << ", interior: "; + dump_blocks(); + os << ", exit: "; + dump(ExitBlocks()); + os << "}"; +} + +} // namespace art diff --git a/compiler/optimizing/execution_subgraph.h b/compiler/optimizing/execution_subgraph.h new file mode 100644 index 0000000000..dac938ed62 --- /dev/null +++ b/compiler/optimizing/execution_subgraph.h @@ -0,0 +1,321 @@ +/* + * Copyright (C) 2020 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ +#define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ + +#include <algorithm> +#include <sstream> + +#include "base/arena_allocator.h" +#include "base/arena_bit_vector.h" +#include "base/arena_containers.h" +#include "base/array_ref.h" +#include "base/bit_vector-inl.h" +#include "base/globals.h" +#include "base/iteration_range.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" +#include "base/stl_util.h" +#include "base/transform_iterator.h" +#include "nodes.h" + +namespace art { + +// Helper for transforming block ids to blocks. +class BlockIdToBlockTransformer { + public: + BlockIdToBlockTransformer(BlockIdToBlockTransformer&&) = default; + BlockIdToBlockTransformer(const BlockIdToBlockTransformer&) = default; + explicit BlockIdToBlockTransformer(const HGraph* graph) : graph_(graph) {} + + inline const HGraph* GetGraph() const { + return graph_; + } + + inline HBasicBlock* GetBlock(uint32_t id) const { + DCHECK_LT(id, graph_->GetBlocks().size()) << graph_->PrettyMethod(); + HBasicBlock* blk = graph_->GetBlocks()[id]; + DCHECK(blk != nullptr); + return blk; + } + + inline HBasicBlock* operator()(uint32_t id) const { + return GetBlock(id); + } + + private: + const HGraph* const graph_; +}; + +// A representation of a particular section of the graph. The graph is split +// into an excluded and included area and is used to track escapes. +// +// This object is a view of the graph and is not updated as the graph is +// changed. +// +// This is implemented by removing various escape points from the subgraph using +// the 'RemoveBlock' function. Once all required blocks are removed one will +// 'Finalize' the subgraph. This will extend the removed area to include: +// (1) Any block which inevitably leads to (post-dominates) a removed block +// (2) any block which is between 2 removed blocks +// +// This allows us to create a set of 'ExcludedCohorts' which are the +// well-connected subsets of the graph made up of removed blocks. These cohorts +// have a set of entry and exit blocks which act as the boundary of the cohort. +// Since we removed blocks between 2 excluded blocks it is impossible for any +// cohort-exit block to reach any cohort-entry block. This means we can use the +// boundary between the cohort and the rest of the graph to insert +// materialization blocks for partial LSE. +class ExecutionSubgraph : public ArenaObject<kArenaAllocLSA> { + public: + using BitVecBlockRange = + IterationRange<TransformIterator<BitVector::IndexIterator, BlockIdToBlockTransformer>>; + + // A set of connected blocks which are connected and removed from the + // ExecutionSubgraph. See above comment for explanation. + class ExcludedCohort : public ArenaObject<kArenaAllocLSA> { + public: + ExcludedCohort(ExcludedCohort&&) = default; + ExcludedCohort(const ExcludedCohort&) = delete; + explicit ExcludedCohort(ScopedArenaAllocator* allocator, HGraph* graph) + : graph_(graph), + entry_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), + exit_blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA), + blocks_(allocator, graph_->GetBlocks().size(), false, kArenaAllocLSA) {} + + ~ExcludedCohort() = default; + + // All blocks in the cohort. + BitVecBlockRange Blocks() const { + return BlockIterRange(blocks_); + } + + // Blocks that have predecessors outside of the cohort. These blocks will + // need to have PHIs/control-flow added to create the escaping value. + BitVecBlockRange EntryBlocks() const { + return BlockIterRange(entry_blocks_); + } + + // Blocks that have successors outside of the cohort. The successors of + // these blocks will need to have PHI's to restore state. + BitVecBlockRange ExitBlocks() const { + return BlockIterRange(exit_blocks_); + } + + bool operator==(const ExcludedCohort& other) const { + return blocks_.Equal(&other.blocks_); + } + + bool ContainsBlock(const HBasicBlock* blk) const { + return blocks_.IsBitSet(blk->GetBlockId()); + } + + // Returns true if there is a path from 'blk' to any block in this cohort. + // NB blocks contained within the cohort are not considered to be succeeded + // by the cohort (i.e. this function will return false). + bool SucceedsBlock(const HBasicBlock* blk) const { + if (ContainsBlock(blk)) { + return false; + } + auto idxs = entry_blocks_.Indexes(); + return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t entry) -> bool { + return blk->GetGraph()->PathBetween(blk->GetBlockId(), entry); + }); + } + + // Returns true if there is a path from any block in this cohort to 'blk'. + // NB blocks contained within the cohort are not considered to be preceded + // by the cohort (i.e. this function will return false). + bool PrecedesBlock(const HBasicBlock* blk) const { + if (ContainsBlock(blk)) { + return false; + } + auto idxs = exit_blocks_.Indexes(); + return std::any_of(idxs.begin(), idxs.end(), [&](uint32_t exit) -> bool { + return blk->GetGraph()->PathBetween(exit, blk->GetBlockId()); + }); + } + + void Dump(std::ostream& os) const; + + private: + BitVecBlockRange BlockIterRange(const ArenaBitVector& bv) const { + auto indexes = bv.Indexes(); + BitVecBlockRange res = MakeTransformRange(indexes, BlockIdToBlockTransformer(graph_)); + return res; + } + + ExcludedCohort() = delete; + + HGraph* graph_; + ArenaBitVector entry_blocks_; + ArenaBitVector exit_blocks_; + ArenaBitVector blocks_; + + friend class ExecutionSubgraph; + friend class LoadStoreAnalysisTest; + }; + + // The number of successors we can track on a single block. Graphs which + // contain a block with a branching factor greater than this will not be + // analysed. This is used to both limit the memory usage of analysis to + // reasonable levels and ensure that the analysis will complete in a + // reasonable amount of time. It also simplifies the implementation somewhat + // to have a constant branching factor. + static constexpr uint32_t kMaxFilterableSuccessors = 8; + + // Instantiate a subgraph. analysis_possible controls whether or not to even + // attempt partial-escape analysis. It should be false if partial-escape + // analysis is not desired (eg when being used for instruction scheduling) or + // when the branching factor in the graph is too high. This is calculated once + // and passed down for performance reasons. + ExecutionSubgraph(HGraph* graph, bool analysis_possible, ScopedArenaAllocator* allocator); + + void Invalidate() { + valid_ = false; + } + + // A block is contained by the ExecutionSubgraph if it is reachable. This + // means it has not been removed explicitly or via pruning/concavity removal. + // Finalization is needed to call this function. + // See RemoveConcavity and Prune for more information. + bool ContainsBlock(const HBasicBlock* blk) const { + DCHECK(!finalized_ || !needs_prune_) << "finalized: " << finalized_; + if (!valid_) { + return false; + } + return !unreachable_blocks_.IsBitSet(blk->GetBlockId()); + } + + // Mark the block as removed from the subgraph. + void RemoveBlock(const HBasicBlock* to_remove); + + // Called when no more updates will be done to the subgraph. Calculate the + // final subgraph + void Finalize() { + Prune(); + RemoveConcavity(); + finalized_ = true; + } + + BitVecBlockRange UnreachableBlocks() const { + auto idxs = unreachable_blocks_.Indexes(); + return MakeTransformRange(idxs, BlockIdToBlockTransformer(graph_)); + } + + // Returns true if all allowed execution paths from start eventually reach the + // graph's exit block (or diverge). + bool IsValid() const { + return valid_; + } + + ArrayRef<const ExcludedCohort> GetExcludedCohorts() const { + DCHECK(!valid_ || !needs_prune_); + if (!valid_ || !unreachable_blocks_.IsAnyBitSet()) { + return ArrayRef<const ExcludedCohort>(); + } else { + return ArrayRef<const ExcludedCohort>(*excluded_list_); + } + } + + // Helper class to create reachable blocks iterator. + class ContainsFunctor { + public: + bool operator()(HBasicBlock* blk) const { + return subgraph_->ContainsBlock(blk); + } + + private: + explicit ContainsFunctor(const ExecutionSubgraph* subgraph) : subgraph_(subgraph) {} + const ExecutionSubgraph* const subgraph_; + friend class ExecutionSubgraph; + }; + // Returns an iterator over reachable blocks (filtered as we go). This is primarilly for testing. + IterationRange< + FilterIterator<typename ArenaVector<HBasicBlock*>::const_iterator, ContainsFunctor>> + ReachableBlocks() const { + return Filter(MakeIterationRange(graph_->GetBlocks()), ContainsFunctor(this)); + } + + static bool CanAnalyse(HGraph* graph) { + // If there are any blocks with more than kMaxFilterableSuccessors we can't + // analyse the graph. We avoid this case to prevent excessive memory and + // time usage while allowing a simpler algorithm with a fixed-width + // branching factor. + return std::all_of(graph->GetBlocks().begin(), graph->GetBlocks().end(), [](HBasicBlock* blk) { + return blk == nullptr || blk->GetSuccessors().size() <= kMaxFilterableSuccessors; + }); + } + + private: + std::bitset<kMaxFilterableSuccessors> GetAllowedSuccessors(const HBasicBlock* blk) const { + DCHECK(valid_); + return allowed_successors_[blk->GetBlockId()]; + } + + void LimitBlockSuccessors(const HBasicBlock* block, + std::bitset<kMaxFilterableSuccessors> allowed) { + needs_prune_ = true; + allowed_successors_[block->GetBlockId()] &= allowed; + } + + // Remove nodes which both precede and follow any exclusions. This ensures we don't need to deal + // with only conditionally materializing objects depending on if we already materialized them + // Ensure that for all blocks A, B, C: Unreachable(A) && Unreachable(C) && PathBetween(A, B) && + // PathBetween(A, C) implies Unreachable(B). This simplifies later transforms since it ensures + // that no execution can leave and then re-enter any exclusion. + void RemoveConcavity(); + + // Removes sink nodes. Sink nodes are nodes where there is no execution which + // avoids all removed nodes. + void Prune(); + + void RecalculateExcludedCohort(); + + HGraph* graph_; + ScopedArenaAllocator* allocator_; + // The map from block_id -> allowed-successors. + // This is the canonical representation of this subgraph. If a bit in the + // bitset is not set then the corresponding outgoing edge of that block is not + // considered traversable. + ScopedArenaVector<std::bitset<kMaxFilterableSuccessors>> allowed_successors_; + // Helper that holds which blocks we are able to reach. Only valid if + // 'needs_prune_ == false'. + ArenaBitVector unreachable_blocks_; + // A list of the excluded-cohorts of this subgraph. This is only valid when + // 'needs_prune_ == false' + std::optional<ScopedArenaVector<ExcludedCohort>> excluded_list_; + // Bool to hold if there is at least one known path from the start block to + // the end in this graph. Used to short-circuit computation. + bool valid_; + // True if the subgraph is consistent and can be queried. Modifying the + // subgraph clears this and requires a prune to restore. + bool needs_prune_; + // True if no more modification of the subgraph is permitted. + bool finalized_; + + friend class ExecutionSubgraphTest; + friend class LoadStoreAnalysisTest; + + DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraph); +}; + +std::ostream& operator<<(std::ostream& os, const ExecutionSubgraph::ExcludedCohort& ex); + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_H_ diff --git a/compiler/optimizing/execution_subgraph_test.cc b/compiler/optimizing/execution_subgraph_test.cc new file mode 100644 index 0000000000..1fc00d9f6b --- /dev/null +++ b/compiler/optimizing/execution_subgraph_test.cc @@ -0,0 +1,831 @@ +/* + * Copyright (C) 2020 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "execution_subgraph_test.h" + +#include <array> +#include <sstream> +#include <string_view> +#include <unordered_map> +#include <unordered_set> + +#include "base/scoped_arena_allocator.h" +#include "base/stl_util.h" +#include "class_root.h" +#include "dex/dex_file_types.h" +#include "dex/method_reference.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" +#include "execution_subgraph.h" +#include "gtest/gtest.h" +#include "handle.h" +#include "handle_scope.h" +#include "nodes.h" +#include "optimizing/data_type.h" +#include "optimizing_unit_test.h" +#include "scoped_thread_state_change.h" + +namespace art { + +using BlockSet = std::unordered_set<const HBasicBlock*>; + +// Helper that checks validity directly. +bool ExecutionSubgraphTestHelper::CalculateValidity(HGraph* graph, const ExecutionSubgraph* esg) { + bool reached_end = false; + std::queue<const HBasicBlock*> worklist; + std::unordered_set<const HBasicBlock*> visited; + worklist.push(graph->GetEntryBlock()); + while (!worklist.empty()) { + const HBasicBlock* cur = worklist.front(); + worklist.pop(); + if (visited.find(cur) != visited.end()) { + continue; + } else { + visited.insert(cur); + } + if (cur == graph->GetExitBlock()) { + reached_end = true; + continue; + } + bool has_succ = false; + for (const HBasicBlock* succ : cur->GetSuccessors()) { + DCHECK(succ != nullptr) << "Bad successors on block " << cur->GetBlockId(); + if (!esg->ContainsBlock(succ)) { + continue; + } + has_succ = true; + worklist.push(succ); + } + if (!has_succ) { + // We aren't at the end and have nowhere to go so fail. + return false; + } + } + return reached_end; +} + +class ExecutionSubgraphTest : public OptimizingUnitTest { + public: + ExecutionSubgraphTest() : graph_(CreateGraph()) {} + + AdjacencyListGraph SetupFromAdjacencyList(const std::string_view entry_name, + const std::string_view exit_name, + const std::vector<AdjacencyListGraph::Edge>& adj) { + return AdjacencyListGraph(graph_, GetAllocator(), entry_name, exit_name, adj); + } + + bool IsValidSubgraph(const ExecutionSubgraph* esg) { + return ExecutionSubgraphTestHelper::CalculateValidity(graph_, esg); + } + + bool IsValidSubgraph(const ExecutionSubgraph& esg) { + return ExecutionSubgraphTestHelper::CalculateValidity(graph_, &esg); + } + + HGraph* graph_; +}; + +// Some comparators used by these tests to avoid having to deal with various set types. +template <typename BLKS, typename = std::enable_if_t<!std::is_same_v<BlockSet, BLKS>>> +bool operator==(const BlockSet& bs, const BLKS& sas) { + std::unordered_set<const HBasicBlock*> us(sas.begin(), sas.end()); + return bs == us; +} +template <typename BLKS, typename = std::enable_if_t<!std::is_same_v<BlockSet, BLKS>>> +bool operator==(const BLKS& sas, const BlockSet& bs) { + return bs == sas; +} +template <typename BLKS, typename = std::enable_if_t<!std::is_same_v<BlockSet, BLKS>>> +bool operator!=(const BlockSet& bs, const BLKS& sas) { + return !(bs == sas); +} +template <typename BLKS, typename = std::enable_if_t<!std::is_same_v<BlockSet, BLKS>>> +bool operator!=(const BLKS& sas, const BlockSet& bs) { + return !(bs == sas); +} + +// +-------+ +-------+ +// | right | <-- | entry | +// +-------+ +-------+ +// | | +// | | +// | v +// | + - - - - - + +// | ' removed ' +// | ' ' +// | ' +-------+ ' +// | ' | left | ' +// | ' +-------+ ' +// | ' ' +// | + - - - - - + +// | | +// | | +// | v +// | +-------+ +// +---------> | exit | +// +-------+ +TEST_F(ExecutionSubgraphTest, Basic) { + AdjacencyListGraph blks(SetupFromAdjacencyList( + "entry", + "exit", + { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, true, GetScopedAllocator()); + esg.RemoveBlock(blks.Get("left")); + esg.Finalize(); + ASSERT_TRUE(esg.IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 3u); + ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); + + ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); + esg.RemoveBlock(blks.Get("right")); + esg.Finalize(); + std::unordered_set<const HBasicBlock*> contents_2(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + ASSERT_EQ(contents_2.size(), 0u); +} + +// +-------+ +-------+ +// | right | <-- | entry | +// +-------+ +-------+ +// | | +// | | +// | v +// | + - - - - - - - - - - - - - - - - - - - -+ +// | ' indirectly_removed ' +// | ' ' +// | ' +-------+ +-----+ ' +// | ' | l1 | -------------------> | l1r | ' +// | ' +-------+ +-----+ ' +// | ' | | ' +// | ' | | ' +// | ' v | ' +// | ' +-------+ | ' +// | ' | l1l | | ' +// | ' +-------+ | ' +// | ' | | ' +// | ' | | ' +// | ' | | ' +// + - - - - - - - -+ | +- - - | | ' +// ' ' | +- v | ' +// ' +-----+ | +----------------+ | ' +// ' | l2r | <---------+-------------- | l2 (removed) | <-------------+ ' +// ' +-----+ | +----------------+ ' +// ' | ' | +- | ' +// ' | - - -+ | +- - - | - - - - - - - - - - - - - -+ +// ' | ' | ' | ' +// ' | ' | ' | ' +// ' | ' | ' v ' +// ' | ' | ' +-------+ ' +// ' | ' | ' | l2l | ' +// ' | ' | ' +-------+ ' +// ' | ' | ' | ' +// ' | ' | ' | ' +// ' | ' | ' | ' +// ' | - - -+ | +- - - | ' +// ' | ' | +- v ' +// ' | | +-------+ ' +// ' +---------------+-------------> | l3 | ' +// ' | +-------+ ' +// ' ' | +- ' +// + - - - - - - - -+ | +- - - - - - - - - + +// | | +// | | +// | v +// | +-------+ +// +-----------> | exit | +// +-------+ +TEST_F(ExecutionSubgraphTest, Propagation) { + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "l1" }, + { "l1", "l1l" }, + { "l1", "l1r" }, + { "l1l", "l2" }, + { "l1r", "l2" }, + { "l2", "l2l" }, + { "l2", "l2r" }, + { "l2l", "l3" }, + { "l2r", "l3" }, + { "l3", "exit" }, + { "entry", "right" }, + { "right", "exit" } })); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + esg.RemoveBlock(blks.Get("l2")); + esg.Finalize(); + ASSERT_TRUE(esg.IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + + // ASSERT_EQ(contents.size(), 3u); + // Not present, no path through. + ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("l3")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("l1l")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("l1r")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("l2l")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("l2r")) == contents.end()); + + // present, path through. + ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); +} + +// +------------------------------------+ +// | | +// | +-------+ +-------+ | +// | | right | <-- | entry | | +// | +-------+ +-------+ | +// | | | | +// | | | | +// | | v | +// | | +-------+ +--------+ +// +----+---------> | l1 | --> | l1loop | +// | +-------+ +--------+ +// | | +// | | +// | v +// | +- - - - - -+ +// | ' removed ' +// | ' ' +// | ' +-------+ ' +// | ' | l2 | ' +// | ' +-------+ ' +// | ' ' +// | +- - - - - -+ +// | | +// | | +// | v +// | +-------+ +// +---------> | exit | +// +-------+ +TEST_F(ExecutionSubgraphTest, PropagationLoop) { + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "l1" }, + { "l1", "l2" }, + { "l1", "l1loop" }, + { "l1loop", "l1" }, + { "l2", "exit" }, + { "entry", "right" }, + { "right", "exit" } })); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + esg.RemoveBlock(blks.Get("l2")); + esg.Finalize(); + ASSERT_TRUE(esg.IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 5u); + + // Not present, no path through. + ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end()); + + // present, path through. + // Since the loop can diverge we should leave it in the execution subgraph. + ASSERT_TRUE(contents.find(blks.Get("l1")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("l1loop")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); +} + +// +--------------------------------+ +// | | +// | +-------+ +-------+ | +// | | right | <-- | entry | | +// | +-------+ +-------+ | +// | | | | +// | | | | +// | | v | +// | | +-------+ +--------+ +// +----+---------> | l1 | --> | l1loop | +// | +-------+ +--------+ +// | | +// | | +// | v +// | +-------+ +// | | l2 | +// | +-------+ +// | | +// | | +// | v +// | +-------+ +// +---------> | exit | +// +-------+ +TEST_F(ExecutionSubgraphTest, PropagationLoop2) { + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "l1" }, + { "l1", "l2" }, + { "l1", "l1loop" }, + { "l1loop", "l1" }, + { "l2", "exit" }, + { "entry", "right" }, + { "right", "exit" } })); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + esg.RemoveBlock(blks.Get("l1")); + esg.Finalize(); + ASSERT_TRUE(esg.IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 3u); + + // Not present, no path through. + ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("l1loop")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end()); + + // present, path through. + ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); +} + +// +--------------------------------+ +// | | +// | +-------+ +-------+ | +// | | right | <-- | entry | | +// | +-------+ +-------+ | +// | | | | +// | | | | +// | | v | +// | | +-------+ +--------+ +// +----+---------> | l1 | --> | l1loop | +// | +-------+ +--------+ +// | | +// | | +// | v +// | +-------+ +// | | l2 | +// | +-------+ +// | | +// | | +// | v +// | +-------+ +// +---------> | exit | +// +-------+ +TEST_F(ExecutionSubgraphTest, PropagationLoop3) { + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "l1" }, + { "l1", "l2" }, + { "l1", "l1loop" }, + { "l1loop", "l1" }, + { "l2", "exit" }, + { "entry", "right" }, + { "right", "exit" } })); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + esg.RemoveBlock(blks.Get("l1loop")); + esg.Finalize(); + ASSERT_TRUE(esg.IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 3u); + + // Not present, no path through. If we got to l1 loop then we must merge back + // with l1 and l2 so they're bad too. + ASSERT_TRUE(contents.find(blks.Get("l1loop")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("l1")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("l2")) == contents.end()); + + // present, path through. + ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); +} + +TEST_F(ExecutionSubgraphTest, Invalid) { + AdjacencyListGraph blks(SetupFromAdjacencyList( + "entry", + "exit", + { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + esg.RemoveBlock(blks.Get("left")); + esg.RemoveBlock(blks.Get("right")); + esg.Finalize(); + + ASSERT_FALSE(esg.IsValid()); + ASSERT_FALSE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 0u); +} +// Sibling branches are disconnected. +TEST_F(ExecutionSubgraphTest, Exclusions) { + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "a" }, + { "entry", "b" }, + { "entry", "c" }, + { "a", "exit" }, + { "b", "exit" }, + { "c", "exit" } })); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + esg.RemoveBlock(blks.Get("a")); + esg.RemoveBlock(blks.Get("c")); + esg.Finalize(); + ASSERT_TRUE(esg.IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 3u); + // Not present, no path through. + ASSERT_TRUE(contents.find(blks.Get("a")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("c")) == contents.end()); + + // present, path through. + ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("b")) != contents.end()); + + ArrayRef<const ExecutionSubgraph::ExcludedCohort> exclusions(esg.GetExcludedCohorts()); + ASSERT_EQ(exclusions.size(), 2u); + std::unordered_set<const HBasicBlock*> exclude_a({ blks.Get("a") }); + std::unordered_set<const HBasicBlock*> exclude_c({ blks.Get("c") }); + ASSERT_TRUE(std::find_if(exclusions.cbegin(), + exclusions.cend(), + [&](const ExecutionSubgraph::ExcludedCohort& it) { + return it.Blocks() == exclude_a; + }) != exclusions.cend()); + ASSERT_TRUE(std::find_if(exclusions.cbegin(), + exclusions.cend(), + [&](const ExecutionSubgraph::ExcludedCohort& it) { + return it.Blocks() == exclude_c; + }) != exclusions.cend()); +} + +// Sibling branches are disconnected. +// +- - - - - - - - - - - - - - - - - - - - - - + +// ' remove_c ' +// ' ' +// ' +-----------+ ' +// ' | c_begin_2 | -------------------------+ ' +// ' +-----------+ | ' +// ' | ' +// +- - - - - - - - - - - - - - - - - - | ' +// ^ ' | ' +// | ' | ' +// | ' | ' +// + - - - - - -+ ' | ' +// ' remove_a ' ' | ' +// ' ' ' | ' +// ' +--------+ ' +-----------+ +---+' | ' +// ' | **a** | ' <-- | entry | --> | b |' | ' +// ' +--------+ ' +-----------+ +---+' | ' +// ' ' ' | ' +// + - - - - - -+ ' | ' +// | | | ' | ' +// | | | ' | ' +// | v | ' | ' +// | +- - - - - - - -+ | ' | ' +// | ' ' | ' | ' +// | ' +-----------+ ' | ' | ' +// | ' | c_begin_1 | ' | ' | ' +// | ' +-----------+ ' | ' | ' +// | ' | ' | ' | ' +// | ' | ' | ' | ' +// | ' | ' | ' | ' +// + - - - - - - - - -+ | + - - - | - - - - - - - + | ' | ' +// ' ' | + v ' | + | ' +// ' +---------+ | +-----------+ | | ' +// ' | c_end_2 | <-------+--------------- | **c_mid** | <-----------------+------+ ' +// ' +---------+ | +-----------+ | ' +// ' ' | + | ' | + ' +// + - - - - - - - - -+ | + - - - | - - - - - - - + | + - - - + +// | | ' | ' | +// | | ' | ' | +// | | ' v ' | +// | | ' +-----------+ ' | +// | | ' | c_end_1 | ' | +// | | ' +-----------+ ' | +// | | ' ' | +// | | +- - - - - - - -+ | +// | | | | +// | | | | +// | | v v +// | | +---------------------------------+ +// | +------------> | exit | +// | +---------------------------------+ +// | ^ +// +------------------------------------+ +TEST_F(ExecutionSubgraphTest, ExclusionExtended) { + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "a" }, + { "entry", "b" }, + { "entry", "c_begin_1" }, + { "entry", "c_begin_2" }, + { "c_begin_1", "c_mid" }, + { "c_begin_2", "c_mid" }, + { "c_mid", "c_end_1" }, + { "c_mid", "c_end_2" }, + { "a", "exit" }, + { "b", "exit" }, + { "c_end_1", "exit" }, + { "c_end_2", "exit" } })); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + esg.RemoveBlock(blks.Get("a")); + esg.RemoveBlock(blks.Get("c_mid")); + esg.Finalize(); + ASSERT_TRUE(esg.IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 3u); + // Not present, no path through. + ASSERT_TRUE(contents.find(blks.Get("a")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("c_begin_1")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("c_begin_2")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("c_mid")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("c_end_1")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("c_end_2")) == contents.end()); + + // present, path through. + ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("b")) != contents.end()); + + ArrayRef<const ExecutionSubgraph::ExcludedCohort> exclusions(esg.GetExcludedCohorts()); + ASSERT_EQ(exclusions.size(), 2u); + BlockSet exclude_a({ blks.Get("a") }); + BlockSet exclude_c({ blks.Get("c_begin_1"), + blks.Get("c_begin_2"), + blks.Get("c_mid"), + blks.Get("c_end_1"), + blks.Get("c_end_2") }); + ASSERT_TRUE(std::find_if(exclusions.cbegin(), + exclusions.cend(), + [&](const ExecutionSubgraph::ExcludedCohort& it) { + return it.Blocks() == exclude_a; + }) != exclusions.cend()); + ASSERT_TRUE( + std::find_if( + exclusions.cbegin(), exclusions.cend(), [&](const ExecutionSubgraph::ExcludedCohort& it) { + return it.Blocks() == exclude_c && + BlockSet({ blks.Get("c_begin_1"), blks.Get("c_begin_2") }) == it.EntryBlocks() && + BlockSet({ blks.Get("c_end_1"), blks.Get("c_end_2") }) == it.ExitBlocks(); + }) != exclusions.cend()); +} + +// ┌───────┐ ┌────────────┐ +// ┌─ │ right │ ◀── │ entry │ +// │ └───────┘ └────────────┘ +// │ │ +// │ │ +// │ ▼ +// │ ┌────────────┐ +// │ │ esc_top │ +// │ └────────────┘ +// │ │ +// │ │ +// │ ▼ +// │ ┌────────────┐ +// └──────────────▶ │ middle │ ─┐ +// └────────────┘ │ +// │ │ +// │ │ +// ▼ │ +// ┌────────────┐ │ +// │ esc_bottom │ │ +// └────────────┘ │ +// │ │ +// │ │ +// ▼ │ +// ┌────────────┐ │ +// │ exit │ ◀┘ +// └────────────┘ +TEST_F(ExecutionSubgraphTest, InAndOutEscape) { + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "esc_top" }, + { "entry", "right" }, + { "esc_top", "middle" }, + { "right", "middle" }, + { "middle", "exit" }, + { "middle", "esc_bottom" }, + { "esc_bottom", "exit" } })); + + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + esg.RemoveBlock(blks.Get("esc_top")); + esg.RemoveBlock(blks.Get("esc_bottom")); + esg.Finalize(); + + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + ASSERT_EQ(contents.size(), 0u); + ASSERT_FALSE(esg.IsValid()); + ASSERT_FALSE(IsValidSubgraph(esg)); + + ASSERT_EQ(contents.size(), 0u); +} + +// Test with max number of successors and no removals. +TEST_F(ExecutionSubgraphTest, BigNodes) { + std::vector<std::string> mid_blocks; + for (auto i : Range(ExecutionSubgraph::kMaxFilterableSuccessors)) { + std::ostringstream oss; + oss << "blk" << i; + mid_blocks.push_back(oss.str().c_str()); + } + ASSERT_EQ(mid_blocks.size(), ExecutionSubgraph::kMaxFilterableSuccessors); + std::vector<AdjacencyListGraph::Edge> edges; + for (const auto& mid : mid_blocks) { + edges.emplace_back("entry", mid); + edges.emplace_back(mid, "exit"); + } + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges)); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + esg.Finalize(); + ASSERT_TRUE(esg.IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + + for (const auto& mid : mid_blocks) { + EXPECT_TRUE(contents.find(blks.Get(mid)) != contents.end()) << mid; + } + // + 2 for entry and exit nodes. + ASSERT_EQ(contents.size(), ExecutionSubgraph::kMaxFilterableSuccessors + 2); +} + +// Test with max number of successors and some removals. +TEST_F(ExecutionSubgraphTest, BigNodesMissing) { + std::vector<std::string> mid_blocks; + for (auto i : Range(ExecutionSubgraph::kMaxFilterableSuccessors)) { + std::ostringstream oss; + oss << "blk" << i; + mid_blocks.push_back(oss.str()); + } + std::vector<AdjacencyListGraph::Edge> edges; + for (const auto& mid : mid_blocks) { + edges.emplace_back("entry", mid); + edges.emplace_back(mid, "exit"); + } + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges)); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + esg.RemoveBlock(blks.Get("blk2")); + esg.RemoveBlock(blks.Get("blk4")); + esg.Finalize(); + ASSERT_TRUE(esg.IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), ExecutionSubgraph::kMaxFilterableSuccessors + 2 - 2); + + // Not present, no path through. + ASSERT_TRUE(contents.find(blks.Get("blk2")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("blk4")) == contents.end()); +} + +// Test with max number of successors and all successors removed. +TEST_F(ExecutionSubgraphTest, BigNodesNoPath) { + std::vector<std::string> mid_blocks; + for (auto i : Range(ExecutionSubgraph::kMaxFilterableSuccessors)) { + std::ostringstream oss; + oss << "blk" << i; + mid_blocks.push_back(oss.str()); + } + std::vector<AdjacencyListGraph::Edge> edges; + for (const auto& mid : mid_blocks) { + edges.emplace_back("entry", mid); + edges.emplace_back(mid, "exit"); + } + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges)); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + for (const auto& mid : mid_blocks) { + esg.RemoveBlock(blks.Get(mid)); + } + esg.Finalize(); + ASSERT_FALSE(esg.IsValid()); + ASSERT_FALSE(IsValidSubgraph(esg)); +} + +// Test with max number of successors +TEST_F(ExecutionSubgraphTest, CanAnalyseBig) { + // Make an absurdly huge and well connected graph. This should be pretty worst-case scenario. + constexpr size_t kNumBlocks = ExecutionSubgraph::kMaxFilterableSuccessors + 1000; + std::vector<std::string> mid_blocks; + for (auto i : Range(kNumBlocks)) { + std::ostringstream oss; + oss << "blk" << i; + mid_blocks.push_back(oss.str()); + } + std::vector<AdjacencyListGraph::Edge> edges; + for (auto cur : Range(kNumBlocks)) { + for (auto nxt : + Range(cur + 1, + std::min(cur + ExecutionSubgraph::kMaxFilterableSuccessors + 1, kNumBlocks))) { + edges.emplace_back(mid_blocks[cur], mid_blocks[nxt]); + } + } + AdjacencyListGraph blks(SetupFromAdjacencyList(mid_blocks.front(), mid_blocks.back(), edges)); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + esg.Finalize(); + ASSERT_TRUE(esg.IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), kNumBlocks); +} + +// Test with many successors +TEST_F(ExecutionSubgraphTest, CanAnalyseBig2) { + // Make an absurdly huge and well connected graph. This should be pretty worst-case scenario. + constexpr size_t kNumBlocks = ExecutionSubgraph::kMaxFilterableSuccessors + 1000; + constexpr size_t kTestMaxSuccessors = ExecutionSubgraph::kMaxFilterableSuccessors - 1; + std::vector<std::string> mid_blocks; + for (auto i : Range(kNumBlocks)) { + std::ostringstream oss; + oss << "blk" << i; + mid_blocks.push_back(oss.str()); + } + std::vector<AdjacencyListGraph::Edge> edges; + for (auto cur : Range(kNumBlocks)) { + for (auto nxt : Range(cur + 1, std::min(cur + 1 + kTestMaxSuccessors, kNumBlocks))) { + edges.emplace_back(mid_blocks[cur], mid_blocks[nxt]); + } + } + edges.emplace_back(mid_blocks.front(), mid_blocks.back()); + AdjacencyListGraph blks(SetupFromAdjacencyList(mid_blocks.front(), mid_blocks.back(), edges)); + ASSERT_TRUE(ExecutionSubgraph::CanAnalyse(graph_)); + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + constexpr size_t kToRemoveIdx = kNumBlocks / 2; + HBasicBlock* remove_implicit = blks.Get(mid_blocks[kToRemoveIdx]); + for (HBasicBlock* pred : remove_implicit->GetPredecessors()) { + esg.RemoveBlock(pred); + } + esg.Finalize(); + EXPECT_TRUE(esg.IsValid()); + EXPECT_TRUE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + + // Only entry and exit. The middle ones should eliminate everything else. + EXPECT_EQ(contents.size(), 2u); + EXPECT_TRUE(contents.find(remove_implicit) == contents.end()); + EXPECT_TRUE(contents.find(blks.Get(mid_blocks.front())) != contents.end()); + EXPECT_TRUE(contents.find(blks.Get(mid_blocks.back())) != contents.end()); +} + +// Test with too many successors +TEST_F(ExecutionSubgraphTest, CanNotAnalyseBig) { + std::vector<std::string> mid_blocks; + for (auto i : Range(ExecutionSubgraph::kMaxFilterableSuccessors + 4)) { + std::ostringstream oss; + oss << "blk" << i; + mid_blocks.push_back(oss.str()); + } + std::vector<AdjacencyListGraph::Edge> edges; + for (const auto& mid : mid_blocks) { + edges.emplace_back("entry", mid); + edges.emplace_back(mid, "exit"); + } + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", edges)); + ASSERT_FALSE(ExecutionSubgraph::CanAnalyse(graph_)); +} +} // namespace art diff --git a/compiler/optimizing/execution_subgraph_test.h b/compiler/optimizing/execution_subgraph_test.h new file mode 100644 index 0000000000..13cb2bc7c5 --- /dev/null +++ b/compiler/optimizing/execution_subgraph_test.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2020 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_TEST_H_ +#define ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_TEST_H_ + +#include "android-base/macros.h" + +namespace art { + +class HGraph; +class ExecutionSubgraph; + +class ExecutionSubgraphTestHelper { + public: + static bool CalculateValidity(HGraph* graph, const ExecutionSubgraph* subgraph); + + private: + ExecutionSubgraphTestHelper() = delete; + + DISALLOW_COPY_AND_ASSIGN(ExecutionSubgraphTestHelper); +}; +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_EXECUTION_SUBGRAPH_TEST_H_ diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc index 7a67fc5b29..3daa6472de 100644 --- a/compiler/optimizing/load_store_analysis.cc +++ b/compiler/optimizing/load_store_analysis.cc @@ -88,6 +88,94 @@ static bool CanBinaryOpsAlias(const HBinaryOperation* idx1, return CanIntegerRangesOverlap(l1, h1, l2, h2); } +// Make sure we mark any writes/potential writes to heap-locations within partially +// escaped values as escaping. +void ReferenceInfo::PrunePartialEscapeWrites() { + if (!subgraph_.IsValid()) { + // All paths escape. + return; + } + HGraph* graph = reference_->GetBlock()->GetGraph(); + ArenaBitVector additional_exclusions( + allocator_, graph->GetBlocks().size(), false, kArenaAllocLSA); + for (const HUseListNode<HInstruction*>& use : reference_->GetUses()) { + const HInstruction* user = use.GetUser(); + const bool possible_exclusion = + !additional_exclusions.IsBitSet(user->GetBlock()->GetBlockId()) && + subgraph_.ContainsBlock(user->GetBlock()); + const bool is_written_to = + (user->IsUnresolvedInstanceFieldSet() || user->IsUnresolvedStaticFieldSet() || + user->IsInstanceFieldSet() || user->IsStaticFieldSet() || user->IsArraySet()) && + (reference_ == user->InputAt(0)); + if (possible_exclusion && is_written_to && + std::any_of(subgraph_.UnreachableBlocks().begin(), + subgraph_.UnreachableBlocks().end(), + [&](const HBasicBlock* excluded) -> bool { + return reference_->GetBlock()->GetGraph()->PathBetween(excluded, + user->GetBlock()); + })) { + // This object had memory written to it somewhere, if it escaped along + // some paths prior to the current block this write also counts as an + // escape. + additional_exclusions.SetBit(user->GetBlock()->GetBlockId()); + } + } + if (UNLIKELY(additional_exclusions.IsAnyBitSet())) { + for (uint32_t exc : additional_exclusions.Indexes()) { + subgraph_.RemoveBlock(graph->GetBlocks()[exc]); + } + } +} + +bool HeapLocationCollector::InstructionEligibleForLSERemoval(HInstruction* inst) const { + if (inst->IsNewInstance()) { + return !inst->AsNewInstance()->NeedsChecks(); + } else if (inst->IsNewArray()) { + HInstruction* array_length = inst->AsNewArray()->GetLength(); + bool known_array_length = + array_length->IsIntConstant() && array_length->AsIntConstant()->GetValue() >= 0; + return known_array_length && + std::all_of(inst->GetUses().cbegin(), + inst->GetUses().cend(), + [&](const HUseListNode<HInstruction*>& user) { + if (user.GetUser()->IsArrayGet() || user.GetUser()->IsArraySet()) { + return user.GetUser()->InputAt(1)->IsIntConstant(); + } + return true; + }); + } else { + return false; + } +} + +void HeapLocationCollector::DumpReferenceStats(OptimizingCompilerStats* stats) { + if (stats == nullptr) { + return; + } + std::vector<bool> seen_instructions(GetGraph()->GetCurrentInstructionId(), false); + for (auto hl : heap_locations_) { + auto ri = hl->GetReferenceInfo(); + if (ri == nullptr || seen_instructions[ri->GetReference()->GetId()]) { + continue; + } + auto instruction = ri->GetReference(); + seen_instructions[instruction->GetId()] = true; + if (ri->IsSingletonAndRemovable()) { + if (InstructionEligibleForLSERemoval(instruction)) { + MaybeRecordStat(stats, MethodCompilationStat::kFullLSEPossible); + } + } + // TODO This is an estimate of the number of allocations we will be able + // to (partially) remove. As additional work is done this can be refined. + if (ri->IsPartialSingleton() && instruction->IsNewInstance() && + ri->GetNoEscapeSubgraph()->ContainsBlock(instruction->GetBlock()) && + !ri->GetNoEscapeSubgraph()->GetExcludedCohorts().empty() && + InstructionEligibleForLSERemoval(instruction)) { + MaybeRecordStat(stats, MethodCompilationStat::kPartialLSEPossible); + } + } +} + bool HeapLocationCollector::CanArrayElementsAlias(const HInstruction* idx1, const size_t vector_length1, const HInstruction* idx2, @@ -172,6 +260,7 @@ bool LoadStoreAnalysis::Run() { } heap_location_collector_.BuildAliasingMatrix(); + heap_location_collector_.DumpReferenceStats(stats_); return true; } diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h index 882fe28cc7..5d2d841b37 100644 --- a/compiler/optimizing/load_store_analysis.h +++ b/compiler/optimizing/load_store_analysis.h @@ -17,29 +17,61 @@ #ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ #define ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_ +#include "base/arena_allocator.h" +#include "base/arena_bit_vector.h" #include "base/bit_vector-inl.h" +#include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" +#include "base/stl_util.h" #include "escape.h" +#include "execution_subgraph.h" #include "nodes.h" -#include "optimization.h" +#include "optimizing/optimizing_compiler_stats.h" namespace art { // A ReferenceInfo contains additional info about a reference such as // whether it's a singleton, returned, etc. -class ReferenceInfo : public ArenaObject<kArenaAllocLSA> { +class ReferenceInfo : public DeletableArenaObject<kArenaAllocLSA> { public: - ReferenceInfo(HInstruction* reference, size_t pos) + ReferenceInfo(HInstruction* reference, + ScopedArenaAllocator* allocator, + size_t pos, + bool for_partial_elimination) : reference_(reference), position_(pos), is_singleton_(true), is_singleton_and_not_returned_(true), - is_singleton_and_not_deopt_visible_(true) { + is_singleton_and_not_deopt_visible_(true), + allocator_(allocator), + subgraph_(reference->GetBlock()->GetGraph(), for_partial_elimination, allocator_) { + // TODO We can do this in one pass. + // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads + // for now just ignore it. + bool can_be_partial = + for_partial_elimination && (/* reference_->IsNewArray() || */ reference_->IsNewInstance()); + LambdaEscapeVisitor func([&](HInstruction* inst) { return HandleEscape(inst); }); + if (can_be_partial) { + VisitEscapes(reference_, func); + } CalculateEscape(reference_, nullptr, &is_singleton_, &is_singleton_and_not_returned_, &is_singleton_and_not_deopt_visible_); + if (can_be_partial) { + // This is to mark writes to partially escaped values as also part of the escaped subset. + // TODO We can avoid this if we have a 'ConditionalWrite' instruction. Will require testing + // to see if the additional branches are worth it. + PrunePartialEscapeWrites(); + subgraph_.Finalize(); + } else { + subgraph_.Invalidate(); + } + } + + const ExecutionSubgraph* GetNoEscapeSubgraph() const { + return &subgraph_; } HInstruction* GetReference() const { @@ -57,6 +89,14 @@ class ReferenceInfo : public ArenaObject<kArenaAllocLSA> { return is_singleton_; } + // This is a singleton and there are paths that don't escape the method + bool IsPartialSingleton() const { + auto ref = GetReference(); + // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads + // for now just ignore it. + return (/* ref->IsNewArray() || */ ref->IsNewInstance()) && GetNoEscapeSubgraph()->IsValid(); + } + // Returns true if reference_ is a singleton and not returned to the caller or // used as an environment local of an HDeoptimize instruction. // The allocation and stores into reference_ may be eliminated for such cases. @@ -72,6 +112,15 @@ class ReferenceInfo : public ArenaObject<kArenaAllocLSA> { } private: + bool HandleEscape(HInstruction* escape) { + subgraph_.RemoveBlock(escape->GetBlock()); + return true; + } + + // Make sure we mark any writes/potential writes to heap-locations within partially + // escaped values as escaping. + void PrunePartialEscapeWrites(); + HInstruction* const reference_; const size_t position_; // position in HeapLocationCollector's ref_info_array_. @@ -82,6 +131,10 @@ class ReferenceInfo : public ArenaObject<kArenaAllocLSA> { // Is singleton and not used as an environment local of HDeoptimize. bool is_singleton_and_not_deopt_visible_; + ScopedArenaAllocator* allocator_; + + ExecutionSubgraph subgraph_; + DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); }; @@ -174,23 +227,28 @@ class HeapLocationCollector : public HGraphVisitor { // aliasing matrix of 8 heap locations. static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32; - explicit HeapLocationCollector(HGraph* graph, ScopedArenaAllocator* allocator) + HeapLocationCollector(HGraph* graph, + ScopedArenaAllocator* allocator, + bool for_partial_elimination) : HGraphVisitor(graph), allocator_(allocator), ref_info_array_(allocator->Adapter(kArenaAllocLSA)), heap_locations_(allocator->Adapter(kArenaAllocLSA)), - aliasing_matrix_(allocator, - kInitialAliasingMatrixBitVectorSize, - true, - kArenaAllocLSA), + aliasing_matrix_(allocator, kInitialAliasingMatrixBitVectorSize, true, kArenaAllocLSA), has_heap_stores_(false), has_volatile_(false), - has_monitor_operations_(false) { + has_monitor_operations_(false), + for_partial_elimination_(for_partial_elimination) { aliasing_matrix_.ClearAllBits(); } + ~HeapLocationCollector() { + CleanUp(); + } + void CleanUp() { heap_locations_.clear(); + STLDeleteContainerPointers(ref_info_array_.begin(), ref_info_array_.end()); ref_info_array_.clear(); } @@ -303,6 +361,11 @@ class HeapLocationCollector : public HGraphVisitor { return kHeapLocationNotFound; } + bool InstructionEligibleForLSERemoval(HInstruction* inst) const; + + // Get some estimated statistics based on our analysis. + void DumpReferenceStats(OptimizingCompilerStats* stats); + // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias. bool MayAlias(size_t index1, size_t index2) const { if (index1 < index2) { @@ -417,7 +480,8 @@ class HeapLocationCollector : public HGraphVisitor { ReferenceInfo* ref_info = FindReferenceInfoOf(instruction); if (ref_info == nullptr) { size_t pos = ref_info_array_.size(); - ref_info = new (allocator_) ReferenceInfo(instruction, pos); + ref_info = + new (allocator_) ReferenceInfo(instruction, allocator_, pos, for_partial_elimination_); ref_info_array_.push_back(ref_info); } return ref_info; @@ -554,15 +618,25 @@ class HeapLocationCollector : public HGraphVisitor { // alias analysis and won't be as effective. bool has_volatile_; // If there are volatile field accesses. bool has_monitor_operations_; // If there are monitor operations. + bool for_partial_elimination_; DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector); }; class LoadStoreAnalysis { public: - explicit LoadStoreAnalysis(HGraph* graph, ScopedArenaAllocator* local_allocator) - : graph_(graph), - heap_location_collector_(graph, local_allocator) {} + // for_elimination controls whether we should keep track of escapes at a per-block level for + // partial LSE. + explicit LoadStoreAnalysis(HGraph* graph, + OptimizingCompilerStats* stats, + ScopedArenaAllocator* local_allocator, + bool for_elimination = true) + : graph_(graph), + stats_(stats), + heap_location_collector_( + graph, + local_allocator, + /*for_partial_elimination=*/for_elimination && ExecutionSubgraph::CanAnalyse(graph_)) {} const HeapLocationCollector& GetHeapLocationCollector() const { return heap_location_collector_; @@ -572,6 +646,7 @@ class LoadStoreAnalysis { private: HGraph* graph_; + OptimizingCompilerStats* stats_; HeapLocationCollector heap_location_collector_; DISALLOW_COPY_AND_ASSIGN(LoadStoreAnalysis); diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc index c518f03fbe..b567a4f873 100644 --- a/compiler/optimizing/load_store_analysis_test.cc +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -15,16 +15,49 @@ */ #include "load_store_analysis.h" -#include "nodes.h" -#include "optimizing_unit_test.h" +#include <array> +#include <string_view> +#include <unordered_map> +#include <unordered_set> + +#include "base/scoped_arena_allocator.h" +#include "class_root.h" +#include "dex/dex_file_types.h" +#include "dex/method_reference.h" +#include "entrypoints/quick/quick_entrypoints_enum.h" +#include "execution_subgraph.h" +#include "execution_subgraph_test.h" #include "gtest/gtest.h" +#include "handle.h" +#include "handle_scope.h" +#include "nodes.h" +#include "optimizing/data_type.h" +#include "optimizing_unit_test.h" +#include "scoped_thread_state_change.h" namespace art { class LoadStoreAnalysisTest : public OptimizingUnitTest { public: - LoadStoreAnalysisTest() : graph_(CreateGraph()) { } + LoadStoreAnalysisTest() : graph_(CreateGraph()) {} + + AdjacencyListGraph SetupFromAdjacencyList( + const std::string_view entry_name, + const std::string_view exit_name, + const std::vector<AdjacencyListGraph::Edge>& adj) { + return AdjacencyListGraph(graph_, GetAllocator(), entry_name, exit_name, adj); + } + + bool IsValidSubgraph(const ExecutionSubgraph* esg) { + return ExecutionSubgraphTestHelper::CalculateValidity(graph_, esg); + } + + bool IsValidSubgraph(const ExecutionSubgraph& esg) { + return ExecutionSubgraphTestHelper::CalculateValidity(graph_, &esg); + } + void CheckReachability(const AdjacencyListGraph& adj, + const std::vector<AdjacencyListGraph::Edge>& reach); HGraph* graph_; }; @@ -67,7 +100,8 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { // Test HeapLocationCollector initialization. // Should be no heap locations, no operations on the heap. ScopedArenaAllocator allocator(graph_->GetArenaStack()); - HeapLocationCollector heap_location_collector(graph_, &allocator); + HeapLocationCollector heap_location_collector( + graph_, &allocator, /*for_partial_elimination=*/true); ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U); ASSERT_FALSE(heap_location_collector.HasHeapStores()); @@ -164,7 +198,8 @@ TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { // Test HeapLocationCollector initialization. // Should be no heap locations, no operations on the heap. ScopedArenaAllocator allocator(graph_->GetArenaStack()); - HeapLocationCollector heap_location_collector(graph_, &allocator); + HeapLocationCollector heap_location_collector( + graph_, &allocator, /*for_partial_elimination=*/true); ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U); ASSERT_FALSE(heap_location_collector.HasHeapStores()); @@ -244,7 +279,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { entry->AddInstruction(arr_set8); // array[i-(-1)] = c0 ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, &allocator); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator, /*for_elimination=*/false); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); @@ -411,7 +446,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) { entry->AddInstruction(vstore_i_add6_vlen2); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, &allocator); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator, /*for_elimination=*/false); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); @@ -570,7 +605,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexCalculationOverflowTest) { entry->AddInstruction(arr_set_8); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, &allocator); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator, /*for_elimination=*/false); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); @@ -660,7 +695,8 @@ TEST_F(LoadStoreAnalysisTest, TestHuntOriginalRef) { entry->AddInstruction(array_get4); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - HeapLocationCollector heap_location_collector(graph_, &allocator); + HeapLocationCollector heap_location_collector( + graph_, &allocator, /*for_partial_elimination=*/true); heap_location_collector.VisitBasicBlock(entry); // Test that the HeapLocationCollector should be able to tell @@ -678,4 +714,976 @@ TEST_F(LoadStoreAnalysisTest, TestHuntOriginalRef) { ASSERT_EQ(loc1, loc4); } +void LoadStoreAnalysisTest::CheckReachability(const AdjacencyListGraph& adj, + const std::vector<AdjacencyListGraph::Edge>& reach) { + uint32_t cnt = 0; + for (HBasicBlock* blk : graph_->GetBlocks()) { + if (adj.HasBlock(blk)) { + for (HBasicBlock* other : graph_->GetBlocks()) { + if (other == nullptr) { + continue; + } + if (adj.HasBlock(other)) { + bool contains_edge = + std::find(reach.begin(), + reach.end(), + AdjacencyListGraph::Edge { adj.GetName(blk), adj.GetName(other) }) != + reach.end(); + if (graph_->PathBetween(blk, other)) { + cnt++; + EXPECT_TRUE(contains_edge) << "Unexpected edge found between " << adj.GetName(blk) + << " and " << adj.GetName(other); + } else { + EXPECT_FALSE(contains_edge) << "Expected edge not found between " << adj.GetName(blk) + << " and " << adj.GetName(other); + } + } else if (graph_->PathBetween(blk, other)) { + ADD_FAILURE() << "block " << adj.GetName(blk) + << " has path to non-adjacency-graph block id: " << other->GetBlockId(); + } + } + } else { + for (HBasicBlock* other : graph_->GetBlocks()) { + if (other == nullptr) { + continue; + } + EXPECT_FALSE(graph_->PathBetween(blk, other)) + << "Reachable blocks outside of adjacency-list"; + } + } + } + EXPECT_EQ(cnt, reach.size()); +} + +TEST_F(LoadStoreAnalysisTest, ReachabilityTest1) { + AdjacencyListGraph blks(SetupFromAdjacencyList( + "entry", + "exit", + { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); + CheckReachability(blks, + { + { "entry", "left" }, + { "entry", "right" }, + { "entry", "exit" }, + { "right", "exit" }, + { "left", "exit" }, + }); +} + +TEST_F(LoadStoreAnalysisTest, ReachabilityTest2) { + AdjacencyListGraph blks(SetupFromAdjacencyList( + "entry", + "exit", + { { "entry", "loop-header" }, { "loop-header", "loop" }, { "loop", "loop-header" } })); + CheckReachability(blks, + { + { "entry", "loop-header" }, + { "entry", "loop" }, + { "loop-header", "loop-header" }, + { "loop-header", "loop" }, + { "loop", "loop-header" }, + { "loop", "loop" }, + }); +} + +TEST_F(LoadStoreAnalysisTest, ReachabilityTest3) { + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "loop-header" }, + { "loop-header", "loop" }, + { "loop", "loop-header" }, + { "entry", "right" }, + { "right", "exit" } })); + CheckReachability(blks, + { + { "entry", "loop-header" }, + { "entry", "loop" }, + { "entry", "right" }, + { "entry", "exit" }, + { "loop-header", "loop-header" }, + { "loop-header", "loop" }, + { "loop", "loop-header" }, + { "loop", "loop" }, + { "right", "exit" }, + }); +} + +static bool AreExclusionsIndependent(HGraph* graph, const ExecutionSubgraph* esg) { + auto excluded = esg->GetExcludedCohorts(); + if (excluded.size() < 2) { + return true; + } + for (auto first = excluded.begin(); first != excluded.end(); ++first) { + for (auto second = excluded.begin(); second != excluded.end(); ++second) { + if (first == second) { + continue; + } + for (const HBasicBlock* entry : first->EntryBlocks()) { + for (const HBasicBlock* exit : second->ExitBlocks()) { + if (graph->PathBetween(exit, entry)) { + return false; + } + } + } + } + } + return true; +} + +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // LEFT +// call_func(obj); +// } else { +// // RIGHT +// obj.field = 1; +// } +// // EXIT +// obj.field; +TEST_F(LoadStoreAnalysisTest, PartialEscape) { + AdjacencyListGraph blks(SetupFromAdjacencyList( + "entry", + "exit", + { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); + HBasicBlock* entry = blks.Get("entry"); + HBasicBlock* left = blks.Get("left"); + HBasicBlock* right = blks.Get("right"); + HBasicBlock* exit = blks.Get("exit"); + + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry->AddInstruction(bool_value); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(if_inst); + + HInstruction* call_left = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* goto_left = new (GetAllocator()) HGoto(); + call_left->AsInvoke()->SetRawInputAt(0, new_inst); + left->AddInstruction(call_left); + left->AddInstruction(goto_left); + + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c0, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_right = new (GetAllocator()) HGoto(); + right->AddInstruction(write_right); + right->AddInstruction(goto_right); + + HInstruction* read_final = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + exit->AddInstruction(read_final); + + std::unique_ptr<ScopedArenaAllocator> allocator( + new ScopedArenaAllocator(graph_->GetArenaStack())); + LoadStoreAnalysis lsa(graph_, nullptr, allocator.get(), /*for_elimination=*/true); + lsa.Run(); + + const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); + ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); + + ASSERT_TRUE(esg->IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + ASSERT_TRUE(AreExclusionsIndependent(graph_, esg)); + std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), + esg->ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 3u); + ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); + + ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); +} + +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // LEFT +// call_func(obj); +// } else { +// // RIGHT +// obj.field = 1; +// } +// // EXIT +// obj.field2; +TEST_F(LoadStoreAnalysisTest, PartialEscape2) { + AdjacencyListGraph blks(SetupFromAdjacencyList( + "entry", + "exit", + { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); + HBasicBlock* entry = blks.Get("entry"); + HBasicBlock* left = blks.Get("left"); + HBasicBlock* right = blks.Get("right"); + HBasicBlock* exit = blks.Get("exit"); + + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry->AddInstruction(bool_value); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(if_inst); + + HInstruction* call_left = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* goto_left = new (GetAllocator()) HGoto(); + call_left->AsInvoke()->SetRawInputAt(0, new_inst); + left->AddInstruction(call_left); + left->AddInstruction(goto_left); + + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c0, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_right = new (GetAllocator()) HGoto(); + right->AddInstruction(write_right); + right->AddInstruction(goto_right); + + HInstruction* read_final = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(16), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + exit->AddInstruction(read_final); + + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator, /*for_elimination=*/true); + lsa.Run(); + + const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); + ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); + + ASSERT_TRUE(esg->IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + ASSERT_TRUE(AreExclusionsIndependent(graph_, esg)); + std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), + esg->ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 3u); + ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); + + ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); +} + +// // ENTRY +// obj = new Obj(); +// obj.field = 10; +// if (parameter_value) { +// // LEFT +// call_func(obj); +// } else { +// // RIGHT +// obj.field = 20; +// } +// // EXIT +// obj.field; +TEST_F(LoadStoreAnalysisTest, PartialEscape3) { + AdjacencyListGraph blks(SetupFromAdjacencyList( + "entry", + "exit", + { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); + HBasicBlock* entry = blks.Get("entry"); + HBasicBlock* left = blks.Get("left"); + HBasicBlock* right = blks.Get("right"); + HBasicBlock* exit = blks.Get("exit"); + + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* c10 = graph_->GetIntConstant(10); + HInstruction* c20 = graph_->GetIntConstant(20); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + + HInstruction* write_entry = new (GetAllocator()) HInstanceFieldSet(new_inst, + c10, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry->AddInstruction(bool_value); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(write_entry); + entry->AddInstruction(if_inst); + + HInstruction* call_left = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* goto_left = new (GetAllocator()) HGoto(); + call_left->AsInvoke()->SetRawInputAt(0, new_inst); + left->AddInstruction(call_left); + left->AddInstruction(goto_left); + + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c20, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_right = new (GetAllocator()) HGoto(); + right->AddInstruction(write_right); + right->AddInstruction(goto_right); + + HInstruction* read_final = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + exit->AddInstruction(read_final); + + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator, /*for_elimination=*/true); + lsa.Run(); + + const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); + ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); + + ASSERT_TRUE(esg->IsValid()); + ASSERT_TRUE(IsValidSubgraph(esg)); + ASSERT_TRUE(AreExclusionsIndependent(graph_, esg)); + std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), + esg->ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 3u); + ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); + + ASSERT_TRUE(contents.find(blks.Get("right")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("entry")) != contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) != contents.end()); +} + +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // LEFT +// call_func(obj); +// } else { +// // RIGHT +// obj.f1 = 0; +// } +// // EXIT +// // call_func prevents the elimination of this store. +// obj.f2 = 0; +TEST_F(LoadStoreAnalysisTest, TotalEscapeAdjacent) { + AdjacencyListGraph blks(SetupFromAdjacencyList( + "entry", + "exit", + { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); + HBasicBlock* entry = blks.Get("entry"); + HBasicBlock* left = blks.Get("left"); + HBasicBlock* right = blks.Get("right"); + HBasicBlock* exit = blks.Get("exit"); + + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry->AddInstruction(bool_value); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(if_inst); + + HInstruction* call_left = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* goto_left = new (GetAllocator()) HGoto(); + call_left->AsInvoke()->SetRawInputAt(0, new_inst); + left->AddInstruction(call_left); + left->AddInstruction(goto_left); + + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c0, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_right = new (GetAllocator()) HGoto(); + right->AddInstruction(write_right); + right->AddInstruction(goto_right); + + HInstruction* write_final = new (GetAllocator()) HInstanceFieldSet(new_inst, + c0, + nullptr, + DataType::Type::kInt32, + MemberOffset(16), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + exit->AddInstruction(write_final); + + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator, /*for_elimination=*/true); + lsa.Run(); + + const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); + ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); + + ASSERT_FALSE(esg->IsValid()) << esg->GetExcludedCohorts(); + ASSERT_FALSE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), + esg->ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 0u); + ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("right")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("entry")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) == contents.end()); +} + +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // LEFT +// call_func(obj); +// } else { +// // RIGHT +// obj.f0 = 0; +// call_func2(obj); +// } +// // EXIT +// obj.f0; +TEST_F(LoadStoreAnalysisTest, TotalEscape) { + AdjacencyListGraph blks(SetupFromAdjacencyList( + "entry", + "exit", + { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); + HBasicBlock* entry = blks.Get("entry"); + HBasicBlock* left = blks.Get("left"); + HBasicBlock* right = blks.Get("right"); + HBasicBlock* exit = blks.Get("exit"); + + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry->AddInstruction(bool_value); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(if_inst); + + HInstruction* call_left = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* goto_left = new (GetAllocator()) HGoto(); + call_left->AsInvoke()->SetRawInputAt(0, new_inst); + left->AddInstruction(call_left); + left->AddInstruction(goto_left); + + HInstruction* call_right = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c0, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_right = new (GetAllocator()) HGoto(); + call_right->AsInvoke()->SetRawInputAt(0, new_inst); + right->AddInstruction(write_right); + right->AddInstruction(call_right); + right->AddInstruction(goto_right); + + HInstruction* read_final = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + exit->AddInstruction(read_final); + + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator, /*for_elimination=*/true); + lsa.Run(); + + const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); + ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); + + ASSERT_FALSE(esg->IsValid()); + ASSERT_FALSE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), + esg->ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 0u); + ASSERT_TRUE(contents.find(blks.Get("left")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("right")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("entry")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) == contents.end()); +} + +// // ENTRY +// obj = new Obj(); +// obj.foo = 0; +// // EXIT +// return obj; +TEST_F(LoadStoreAnalysisTest, TotalEscape2) { + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", "exit", { { "entry", "exit" } })); + HBasicBlock* entry = blks.Get("entry"); + HBasicBlock* exit = blks.Get("exit"); + + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + + HInstruction* write_start = new (GetAllocator()) HInstanceFieldSet(new_inst, + c0, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_inst = new (GetAllocator()) HGoto(); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(write_start); + entry->AddInstruction(goto_inst); + + HInstruction* return_final = new (GetAllocator()) HReturn(new_inst); + exit->AddInstruction(return_final); + + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator, /*for_elimination=*/true); + lsa.Run(); + + const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); + ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); + + ASSERT_FALSE(esg->IsValid()); + ASSERT_FALSE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), + esg->ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 0u); + ASSERT_TRUE(contents.find(blks.Get("entry")) == contents.end()); + ASSERT_TRUE(contents.find(blks.Get("exit")) == contents.end()); +} + +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // HIGH_LEFT +// call_func(obj); +// } else { +// // HIGH_RIGHT +// obj.f0 = 1; +// } +// // MID +// obj.f0 *= 2; +// if (parameter_value2) { +// // LOW_LEFT +// call_func(obj); +// } else { +// // LOW_RIGHT +// obj.f0 = 1; +// } +// // EXIT +// obj.f0 +TEST_F(LoadStoreAnalysisTest, DoubleDiamondEscape) { + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "high_left" }, + { "entry", "high_right" }, + { "low_left", "exit" }, + { "low_right", "exit" }, + { "high_right", "mid" }, + { "high_left", "mid" }, + { "mid", "low_left" }, + { "mid", "low_right" } })); + HBasicBlock* entry = blks.Get("entry"); + HBasicBlock* high_left = blks.Get("high_left"); + HBasicBlock* high_right = blks.Get("high_right"); + HBasicBlock* mid = blks.Get("mid"); + HBasicBlock* low_left = blks.Get("low_left"); + HBasicBlock* low_right = blks.Get("low_right"); + HBasicBlock* exit = blks.Get("exit"); + + HInstruction* bool_value1 = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* bool_value2 = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 2, DataType::Type::kBool); + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value1); + entry->AddInstruction(bool_value1); + entry->AddInstruction(bool_value2); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(if_inst); + + HInstruction* call_left = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* goto_left = new (GetAllocator()) HGoto(); + call_left->AsInvoke()->SetRawInputAt(0, new_inst); + high_left->AddInstruction(call_left); + high_left->AddInstruction(goto_left); + + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c0, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_right = new (GetAllocator()) HGoto(); + high_right->AddInstruction(write_right); + high_right->AddInstruction(goto_right); + + HInstruction* read_mid = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* mul_mid = new (GetAllocator()) HMul(DataType::Type::kInt32, read_mid, c2); + HInstruction* write_mid = new (GetAllocator()) HInstanceFieldSet(new_inst, + mul_mid, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* if_mid = new (GetAllocator()) HIf(bool_value2); + mid->AddInstruction(read_mid); + mid->AddInstruction(mul_mid); + mid->AddInstruction(write_mid); + mid->AddInstruction(if_mid); + + HInstruction* call_low_left = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* goto_low_left = new (GetAllocator()) HGoto(); + call_low_left->AsInvoke()->SetRawInputAt(0, new_inst); + low_left->AddInstruction(call_low_left); + low_left->AddInstruction(goto_low_left); + + HInstruction* write_low_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c0, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_low_right = new (GetAllocator()) HGoto(); + low_right->AddInstruction(write_low_right); + low_right->AddInstruction(goto_low_right); + + HInstruction* read_final = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + exit->AddInstruction(read_final); + + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + LoadStoreAnalysis lsa(graph_, nullptr, &allocator, /*for_elimination=*/true); + lsa.Run(); + + const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); + ReferenceInfo* info = heap_location_collector.FindReferenceInfoOf(new_inst); + const ExecutionSubgraph* esg = info->GetNoEscapeSubgraph(); + + ASSERT_FALSE(esg->IsValid()); + ASSERT_FALSE(IsValidSubgraph(esg)); + std::unordered_set<const HBasicBlock*> contents(esg->ReachableBlocks().begin(), + esg->ReachableBlocks().end()); + + ASSERT_EQ(contents.size(), 0u); +} + +// ┌───────┐ ┌────────────┐ +// ┌─ │ right │ ◀── │ entry │ +// │ └───────┘ └────────────┘ +// │ │ +// │ │ +// │ ▼ +// │ ┌────────────┐ +// │ │ esc_top │ +// │ └────────────┘ +// │ │ +// │ │ +// │ ▼ +// │ ┌────────────┐ +// └──────────────▶ │ middle │ ─┐ +// └────────────┘ │ +// │ │ +// │ │ +// ▼ │ +// ┌────────────┐ │ +// │ esc_bottom │ │ +// └────────────┘ │ +// │ │ +// │ │ +// ▼ │ +// ┌────────────┐ │ +// │ exit │ ◀┘ +// └────────────┘ +TEST_F(LoadStoreAnalysisTest, InAndOutEscape) { + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "esc_top" }, + { "entry", "right" }, + { "esc_top", "middle" }, + { "right", "middle" }, + { "middle", "exit" }, + { "middle", "esc_bottom" }, + { "esc_bottom", "exit" } })); + + ExecutionSubgraph esg(graph_, /*analysis_possible=*/true, GetScopedAllocator()); + esg.RemoveBlock(blks.Get("esc_top")); + esg.RemoveBlock(blks.Get("esc_bottom")); + esg.Finalize(); + + std::unordered_set<const HBasicBlock*> contents(esg.ReachableBlocks().begin(), + esg.ReachableBlocks().end()); + ASSERT_EQ(contents.size(), 0u); + ASSERT_FALSE(esg.IsValid()); + ASSERT_FALSE(IsValidSubgraph(esg)); + + ASSERT_EQ(contents.size(), 0u); +} + } // namespace art diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 5f1582275d..dcccc5ba5b 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -16,15 +16,19 @@ #include "load_store_elimination.h" +#include "base/arena_allocator.h" #include "base/arena_bit_vector.h" #include "base/array_ref.h" #include "base/bit_vector-inl.h" +#include "base/bit_vector.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "escape.h" #include "load_store_analysis.h" -#include "optimizing/optimizing_compiler_stats.h" +#include "nodes.h" +#include "optimizing_compiler_stats.h" #include "reference_type_propagation.h" +#include "side_effects_analysis.h" /** * The general algorithm of load-store elimination (LSE). @@ -258,6 +262,7 @@ class LSEVisitor final : private HGraphDelegateVisitor { enum class Type { kInvalid, kUnknown, + kMergedUnknown, kDefault, kInstruction, kNeedsNonLoopPhi, @@ -282,6 +287,13 @@ class LSEVisitor final : private HGraphDelegateVisitor { return value; } + static Value MergedUnknown(const PhiPlaceholder* phi_placeholder) { + Value value; + value.type_ = Type::kMergedUnknown; + value.phi_placeholder_ = phi_placeholder; + return value; + } + // Default heap value after an allocation. // A heap location can be set to that value right after an allocation. static Value Default() { @@ -325,10 +337,18 @@ class LSEVisitor final : private HGraphDelegateVisitor { return type_ == Type::kInvalid; } - bool IsUnknown() const { + bool IsMergedUnknown() const { + return type_ == Type::kMergedUnknown; + } + + bool IsPureUnknown() const { return type_ == Type::kUnknown; } + bool IsUnknown() const { + return type_ == Type::kUnknown || type_ == Type::kMergedUnknown; + } + bool IsDefault() const { return type_ == Type::kDefault; } @@ -355,10 +375,25 @@ class LSEVisitor final : private HGraphDelegateVisitor { } const PhiPlaceholder* GetPhiPlaceholder() const { - DCHECK(NeedsPhi()); + DCHECK(NeedsPhi() || IsMergedUnknown()); return phi_placeholder_; } + uint32_t GetMergeBlockId() const { + DCHECK(IsMergedUnknown()) << this; + return phi_placeholder_->GetBlockId(); + } + + HBasicBlock* GetMergeBlock(const HGraph* graph) const { + DCHECK(IsMergedUnknown()) << this; + return graph->GetBlocks()[GetMergeBlockId()]; + } + + size_t GetHeapLocation() const { + DCHECK(IsMergedUnknown() || NeedsPhi()) << this; + return phi_placeholder_->GetHeapLocation(); + } + bool Equals(Value other) const { // Only valid values can be compared. DCHECK(IsValid()); @@ -369,9 +404,9 @@ class LSEVisitor final : private HGraphDelegateVisitor { (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction())); } else { // Note: Two unknown values are considered different. - return IsDefault() || - (IsInstruction() && GetInstruction() == other.GetInstruction()) || - (NeedsPhi() && GetPhiPlaceholder() == other.GetPhiPlaceholder()); + return IsDefault() || (IsInstruction() && GetInstruction() == other.GetInstruction()) || + (NeedsPhi() && GetPhiPlaceholder() == other.GetPhiPlaceholder()) || + (IsMergedUnknown() && GetPhiPlaceholder() == other.GetPhiPlaceholder()); } } @@ -379,7 +414,11 @@ class LSEVisitor final : private HGraphDelegateVisitor { return Equals(ForInstruction(instruction)); } + std::ostream& Dump(std::ostream& os) const; + private: + friend std::ostream& operator<<(std::ostream& os, const Value& v); + Type type_; union { HInstruction* instruction_; @@ -397,6 +436,20 @@ class LSEVisitor final : private HGraphDelegateVisitor { return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder()); } + bool IsPartialNoEscape(HBasicBlock* blk, size_t idx) { + auto* ri = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo(); + auto* sg = ri->GetNoEscapeSubgraph(); + return ri->IsPartialSingleton() && + std::none_of(sg->GetExcludedCohorts().cbegin(), + sg->GetExcludedCohorts().cend(), + [&](const ExecutionSubgraph::ExcludedCohort& ex) -> bool { + // Make sure we haven't yet and never will escape. + return ex.PrecedesBlock(blk) || + ex.ContainsBlock(blk) || + ex.SucceedsBlock(blk); + }); + } + const PhiPlaceholder* GetPhiPlaceholder(uint32_t block_id, size_t idx) const { size_t phi_placeholders_begin = phi_placeholders_begin_for_block_[block_id]; const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholders_begin + idx]; @@ -545,7 +598,12 @@ class LSEVisitor final : private HGraphDelegateVisitor { // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder. // This is necessary if the stored heap value can be observed. void KeepStores(Value value) { - if (value.IsUnknown()) { + if (value.IsPureUnknown()) { + return; + } + if (value.IsMergedUnknown()) { + kept_merged_unknowns_.SetBit(PhiPlaceholderIndex(value)); + phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value)); return; } if (value.NeedsPhi()) { @@ -568,7 +626,9 @@ class LSEVisitor final : private HGraphDelegateVisitor { // We use this function when reading a location with unknown value and // therefore we cannot know what exact store wrote that unknown value. // But we can have a phi placeholder here marking multiple stores to keep. - DCHECK(!heap_values[i].stored_by.IsInstruction()); + DCHECK( + !heap_values[i].stored_by.IsInstruction() || + heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->IsPartialSingleton()); KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::Unknown(); } else if (heap_location_collector_.MayAlias(i, loc_index)) { @@ -779,7 +839,8 @@ class LSEVisitor final : private HGraphDelegateVisitor { ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); - if (!ref_info->IsSingletonAndRemovable()) { + if (!ref_info->IsSingletonAndRemovable() && + !(ref_info->IsPartialSingleton() && IsPartialNoEscape(block, i))) { KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::Unknown(); } @@ -953,11 +1014,44 @@ class LSEVisitor final : private HGraphDelegateVisitor { // The unknown heap value is used to mark Phi placeholders that cannot be replaced. ScopedArenaVector<Value> phi_placeholder_replacements_; + // Merged-unknowns that must have their predecessor values kept to ensure + // partially escaped values are written + ArenaBitVector kept_merged_unknowns_; + ScopedArenaVector<HInstruction*> singleton_new_instances_; + friend std::ostream& operator<<(std::ostream& os, const Value& v); + DISALLOW_COPY_AND_ASSIGN(LSEVisitor); }; +std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const { + switch (type_) { + case Type::kDefault: + return os << "Default"; + case Type::kInstruction: + return os << "Instruction[id: " << instruction_->GetId() + << ", block: " << instruction_->GetBlock()->GetBlockId() << "]"; + case Type::kUnknown: + return os << "Unknown"; + case Type::kInvalid: + return os << "Invalid"; + case Type::kMergedUnknown: + return os << "MergedUnknown[block: " << phi_placeholder_->GetBlockId() + << ", heap_loc: " << phi_placeholder_->GetHeapLocation() << "]"; + case Type::kNeedsLoopPhi: + return os << "NeedsLoopPhi[block: " << phi_placeholder_->GetBlockId() + << ", heap_loc: " << phi_placeholder_->GetHeapLocation() << "]"; + case Type::kNeedsNonLoopPhi: + return os << "NeedsNonLoopPhi[block: " << phi_placeholder_->GetBlockId() + << ", heap_loc: " << phi_placeholder_->GetHeapLocation() << "]"; + } +} + +std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) { + return v.Dump(os); +} + ScopedArenaVector<LSEVisitor::PhiPlaceholder> LSEVisitor::CreatePhiPlaceholders( HGraph* graph, const HeapLocationCollector& heap_location_collector, @@ -1032,6 +1126,10 @@ LSEVisitor::LSEVisitor(HGraph* graph, phi_placeholder_replacements_(phi_placeholders_.size(), Value::Invalid(), allocator_.Adapter(kArenaAllocLSE)), + kept_merged_unknowns_(&allocator_, + /*start_bits=*/ phi_placeholders_.size(), + /*expandable=*/ false, + kArenaAllocLSE), singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) { // Clear bit vectors. phi_placeholders_to_search_for_kept_stores_.ClearAllBits(); @@ -1049,18 +1147,21 @@ LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) { uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId(); Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value); if (pre_header_value.IsUnknown()) { - return Value::Unknown(); + return pre_header_value; } if (kIsDebugBuild) { // Check that the reference indeed dominates this loop. HeapLocation* location = heap_location_collector_.GetHeapLocation(idx); HInstruction* ref = location->GetReferenceInfo()->GetReference(); - CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block)); + CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block)) + << GetGraph()->PrettyMethod(); // Check that the index, if defined inside the loop, tracks a default value // or a Phi placeholder requiring a loop Phi. HInstruction* index = location->GetIndex(); if (index != nullptr && loop_info->Contains(*index->GetBlock())) { - CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default())); + CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default())) + << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " " + << pre_header_value; } } const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx); @@ -1115,14 +1216,21 @@ LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t Value merged_value = ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value); for (size_t i = 1u, size = predecessors.size(); i != size; ++i) { - if (merged_value.IsUnknown()) { - break; - } Value pred_value = ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value); - if (pred_value.IsUnknown()) { - merged_value = Value::Unknown(); - } else if (!pred_value.Equals(merged_value)) { + if (pred_value.Equals(merged_value) || + (pred_value.IsPureUnknown() && merged_value.IsPureUnknown())) { + // Value is the same. No need to update our merged value. + continue; + } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) { + // If one is unknown and the other is a different type of unknown + const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx); + merged_value = Value::MergedUnknown(phi_placeholder); + // We know that at least one of the merge points is unknown (and both are + // not pure-unknowns since that's captured above). This means that the + // overall value needs to be a MergedUnknown. Just return that. + break; + } else { // There are conflicting known values. We may still be able to replace loads with a Phi. const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx); // Propagate the need for a new loop Phi from all predecessors. @@ -1247,7 +1355,8 @@ void LSEVisitor::MaterializeNonLoopPhis(const PhiPlaceholder* phi_placeholder, phi_inputs.clear(); for (HBasicBlock* predecessor : current_block->GetPredecessors()) { Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value); - DCHECK(!pred_value.IsUnknown()); + DCHECK(!pred_value.IsUnknown()) + << "block " << current_block->GetBlockId() << " pred: " << predecessor->GetBlockId(); if (pred_value.NeedsNonLoopPhi()) { // We need to process the Phi placeholder first. work_queue.push_back(pred_value.GetPhiPlaceholder()); @@ -1287,8 +1396,10 @@ void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) { } else if (record.value.IsUnknown()) { // Load isn't eliminated. Put the load as the value into the HeapLocation. // This acts like GVN but with better aliasing analysis. + Value old_value = record.value; record.value = Value::ForInstruction(instruction); KeepStoresIfAliasedToLocation(heap_values, idx); + KeepStores(old_value); } else if (record.value.NeedsLoopPhi()) { // We do not know yet if the value is known for all back edges. Record for future processing. loads_requiring_loop_phi_.insert(std::make_pair(instruction, record)); @@ -2047,8 +2158,22 @@ void LSEVisitor::SearchPhiPlaceholdersForKeptStores() { work_queue.push_back(index); } const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks(); + std::optional<ArenaBitVector> not_kept_stores; + if (stats_) { + not_kept_stores.emplace(GetGraph()->GetAllocator(), + kept_stores_.GetBitSizeOf(), + false, + ArenaAllocKind::kArenaAllocLSE); + } while (!work_queue.empty()) { - const PhiPlaceholder* phi_placeholder = &phi_placeholders_[work_queue.back()]; + uint32_t cur_phi_idx = work_queue.back(); + const PhiPlaceholder* phi_placeholder = &phi_placeholders_[cur_phi_idx]; + // Only writes to partial-escapes need to be specifically kept. + bool is_partial_kept_merged_unknown = + kept_merged_unknowns_.IsBitSet(cur_phi_idx) && + heap_location_collector_.GetHeapLocation(phi_placeholder->GetHeapLocation()) + ->GetReferenceInfo() + ->IsPartialSingleton(); work_queue.pop_back(); size_t idx = phi_placeholder->GetHeapLocation(); HBasicBlock* block = blocks[phi_placeholder->GetBlockId()]; @@ -2091,18 +2216,39 @@ void LSEVisitor::SearchPhiPlaceholdersForKeptStores() { if (!stored_by.IsUnknown() && (i == idx || may_alias(i))) { if (stored_by.NeedsPhi()) { size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by); + if (is_partial_kept_merged_unknown) { + // Propagate merged-unknown keep since otherwise this might look + // like a partial escape we can remove. + kept_merged_unknowns_.SetBit(phi_placeholder_index); + } if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) { phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index); work_queue.push_back(phi_placeholder_index); } } else { DCHECK(IsStore(stored_by.GetInstruction())); - kept_stores_.SetBit(stored_by.GetInstruction()->GetId()); + ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); + DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName() + << " id: " << stored_by.GetInstruction()->GetId() << " block: " + << stored_by.GetInstruction()->GetBlock()->GetBlockId(); + if (!is_partial_kept_merged_unknown && IsPartialNoEscape(predecessor, idx)) { + if (not_kept_stores) { + not_kept_stores->SetBit(stored_by.GetInstruction()->GetId()); + } + } else { + kept_stores_.SetBit(stored_by.GetInstruction()->GetId()); + } } } } } } + if (not_kept_stores) { + // a - b := (a & ~b) + not_kept_stores->Subtract(&kept_stores_); + auto num_removed = not_kept_stores->NumSetBits(); + MaybeRecordStat(stats_, MethodCompilationStat::kPartialStoreRemoved, num_removed); + } } void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) { @@ -2273,6 +2419,7 @@ void LSEVisitor::Run() { if (!new_instance->HasNonEnvironmentUses()) { new_instance->RemoveEnvironmentUsers(); new_instance->GetBlock()->RemoveInstruction(new_instance); + MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved); } } } @@ -2284,8 +2431,14 @@ bool LoadStoreElimination::Run() { // Skip this optimization. return false; } + // We need to be able to determine reachability. Clear it just to be safe but + // this should initially be empty. + graph_->ClearReachabilityInformation(); + // This is O(blocks^3) time complexity. It means we can query reachability in + // O(1) though. + graph_->ComputeReachabilityInformation(); ScopedArenaAllocator allocator(graph_->GetArenaStack()); - LoadStoreAnalysis lsa(graph_, &allocator); + LoadStoreAnalysis lsa(graph_, stats_, &allocator, /*for_elimination=*/true); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); if (heap_location_collector.GetNumberOfHeapLocations() == 0) { diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc index f71a4734a6..8196a292d7 100644 --- a/compiler/optimizing/load_store_elimination_test.cc +++ b/compiler/optimizing/load_store_elimination_test.cc @@ -27,6 +27,13 @@ namespace art { class LoadStoreEliminationTest : public OptimizingUnitTest { public: + AdjacencyListGraph SetupFromAdjacencyList( + const std::string_view entry_name, + const std::string_view exit_name, + const std::vector<AdjacencyListGraph::Edge>& adj) { + return AdjacencyListGraph(graph_, GetAllocator(), entry_name, exit_name, adj); + } + void PerformLSE() { graph_->BuildDominatorTree(); LoadStoreElimination lse(graph_, /*stats=*/ nullptr); @@ -1442,4 +1449,1182 @@ TEST_F(LoadStoreEliminationTest, ArrayMergeDefault) { EXPECT_TRUE(IsRemoved(alloc_w)); } +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // LEFT +// obj.field = 1; +// call_func(obj); +// foo_r = obj.field +// } else { +// // TO BE ELIMINATED +// obj.field = 2; +// // RIGHT +// // TO BE ELIMINATED +// foo_l = obj.field; +// } +// EXIT +// return PHI(foo_l, foo_r) +TEST_F(LoadStoreEliminationTest, PartialLoadElimination) { + InitGraph(); + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit_REAL", + { { "entry", "left" }, + { "entry", "right" }, + { "left", "exit" }, + { "right", "exit" }, + { "exit", "exit_REAL" } })); + HBasicBlock* entry = blks.Get("entry"); + HBasicBlock* left = blks.Get("left"); + HBasicBlock* right = blks.Get("right"); + HBasicBlock* exit = blks.Get("exit"); + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry->AddInstruction(bool_value); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(if_inst); + ArenaVector<HInstruction*> current_locals({}, GetAllocator()->Adapter(kArenaAllocInstruction)); + ManuallyBuildEnvFor(cls, ¤t_locals); + new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* write_left = new (GetAllocator()) HInstanceFieldSet(new_inst, + c1, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* call_left = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* read_left = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(16), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_left = new (GetAllocator()) HGoto(); + call_left->AsInvoke()->SetRawInputAt(0, new_inst); + left->AddInstruction(write_left); + left->AddInstruction(call_left); + left->AddInstruction(read_left); + left->AddInstruction(goto_left); + call_left->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c2, + nullptr, + DataType::Type::kInt32, + MemberOffset(16), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* read_right = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(16), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_right = new (GetAllocator()) HGoto(); + right->AddInstruction(write_right); + right->AddInstruction(read_right); + right->AddInstruction(goto_right); + + HInstruction* phi_final = + new (GetAllocator()) HPhi(GetAllocator(), 12, 2, DataType::Type::kInt32); + phi_final->SetRawInputAt(0, read_left); + phi_final->SetRawInputAt(1, read_right); + HInstruction* return_exit = new (GetAllocator()) HReturn(phi_final); + exit->AddPhi(phi_final->AsPhi()); + exit->AddInstruction(return_exit); + + // PerformLSE expects this to be empty. + graph_->ClearDominanceInformation(); + PerformLSE(); + + ASSERT_TRUE(IsRemoved(read_right)); + ASSERT_FALSE(IsRemoved(read_left)); + ASSERT_FALSE(IsRemoved(phi_final)); + ASSERT_TRUE(phi_final->GetInputs()[1] == c2); + ASSERT_TRUE(phi_final->GetInputs()[0] == read_left); + ASSERT_TRUE(IsRemoved(write_right)); +} + +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // LEFT +// obj.field = 1; +// call_func(obj); +// // We don't know what obj.field is now we aren't able to eliminate the read below! +// } else { +// // DO NOT ELIMINATE +// obj.field = 2; +// // RIGHT +// } +// EXIT +// return obj.field +// TODO We eventually want to be able to eliminate the right write along with the final read but +// will need either new blocks or new instructions. +TEST_F(LoadStoreEliminationTest, PartialLoadPreserved) { + InitGraph(); + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit_REAL", + { { "entry", "left" }, + { "entry", "right" }, + { "left", "exit" }, + { "right", "exit" }, + { "exit", "exit_REAL" } })); + HBasicBlock* entry = blks.Get("entry"); + HBasicBlock* left = blks.Get("left"); + HBasicBlock* right = blks.Get("right"); + HBasicBlock* exit = blks.Get("exit"); + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry->AddInstruction(bool_value); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(if_inst); + ArenaVector<HInstruction*> current_locals({}, GetAllocator()->Adapter(kArenaAllocInstruction)); + ManuallyBuildEnvFor(cls, ¤t_locals); + new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* write_left = new (GetAllocator()) HInstanceFieldSet(new_inst, + c1, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* call_left = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* goto_left = new (GetAllocator()) HGoto(); + call_left->AsInvoke()->SetRawInputAt(0, new_inst); + left->AddInstruction(write_left); + left->AddInstruction(call_left); + left->AddInstruction(goto_left); + call_left->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c2, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_right = new (GetAllocator()) HGoto(); + right->AddInstruction(write_right); + right->AddInstruction(goto_right); + + HInstruction* read_bottom = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); + exit->AddInstruction(read_bottom); + exit->AddInstruction(return_exit); + // PerformLSE expects this to be empty. + graph_->ClearDominanceInformation(); + PerformLSE(); + + ASSERT_FALSE(IsRemoved(read_bottom)); + ASSERT_FALSE(IsRemoved(write_right)); +} + +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // LEFT +// obj.field = 1; +// call_func(obj); +// // We don't know what obj.field is now we aren't able to eliminate the read below! +// } else { +// // DO NOT ELIMINATE +// if (param2) { +// obj.field = 2; +// } else { +// obj.field = 3; +// } +// // RIGHT +// } +// EXIT +// return obj.field +// TODO We eventually want to be able to eliminate the right write along with the final read but +// will need either new blocks or new instructions. +TEST_F(LoadStoreEliminationTest, PartialLoadPreserved2) { + InitGraph(); + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit_REAL", + { { "entry", "left" }, + { "entry", "right_start" }, + { "left", "exit" }, + { "right_start", "right_first" }, + { "right_start", "right_second" }, + { "right_first", "right_end" }, + { "right_second", "right_end" }, + { "right_end", "exit" }, + { "exit", "exit_REAL" } })); + HBasicBlock* entry = blks.Get("entry"); + HBasicBlock* left = blks.Get("left"); + HBasicBlock* right_start = blks.Get("right_start"); + HBasicBlock* right_first = blks.Get("right_first"); + HBasicBlock* right_second = blks.Get("right_second"); + HBasicBlock* right_end = blks.Get("right_end"); + HBasicBlock* exit = blks.Get("exit"); + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* bool_value_2 = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 2, DataType::Type::kBool); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* c3 = graph_->GetIntConstant(3); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry->AddInstruction(bool_value); + entry->AddInstruction(bool_value_2); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(if_inst); + ArenaVector<HInstruction*> current_locals({}, GetAllocator()->Adapter(kArenaAllocInstruction)); + ManuallyBuildEnvFor(cls, ¤t_locals); + new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* write_left = new (GetAllocator()) HInstanceFieldSet(new_inst, + c1, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* call_left = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* goto_left = new (GetAllocator()) HGoto(); + call_left->AsInvoke()->SetRawInputAt(0, new_inst); + left->AddInstruction(write_left); + left->AddInstruction(call_left); + left->AddInstruction(goto_left); + call_left->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* right_if = new (GetAllocator()) HIf(bool_value_2); + right_start->AddInstruction(right_if); + + HInstruction* write_right_first = new (GetAllocator()) HInstanceFieldSet(new_inst, + c2, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_right_first = new (GetAllocator()) HGoto(); + right_first->AddInstruction(write_right_first); + right_first->AddInstruction(goto_right_first); + + HInstruction* write_right_second = new (GetAllocator()) HInstanceFieldSet(new_inst, + c3, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_right_second = new (GetAllocator()) HGoto(); + right_second->AddInstruction(write_right_second); + right_second->AddInstruction(goto_right_second); + + HInstruction* goto_right_end = new (GetAllocator()) HGoto(); + right_end->AddInstruction(goto_right_end); + + HInstruction* read_bottom = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); + exit->AddInstruction(read_bottom); + exit->AddInstruction(return_exit); + // PerformLSE expects this to be empty. + graph_->ClearDominanceInformation(); + PerformLSE(); + + ASSERT_FALSE(IsRemoved(read_bottom)); + EXPECT_FALSE(IsRemoved(write_right_first)); + EXPECT_FALSE(IsRemoved(write_right_second)); +} + +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // LEFT +// // DO NOT ELIMINATE +// escape(obj); +// obj.field = 1; +// } else { +// // RIGHT +// // ELIMINATE +// obj.field = 2; +// } +// EXIT +// ELIMINATE +// return obj.field +TEST_F(LoadStoreEliminationTest, PartialLoadElimination2) { + InitGraph(); + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "left" }, + { "entry", "right" }, + { "left", "breturn"}, + { "right", "breturn" }, + { "breturn", "exit" } })); +#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) + GET_BLOCK(entry); + GET_BLOCK(exit); + GET_BLOCK(breturn); + GET_BLOCK(left); + GET_BLOCK(right); +#undef GET_BLOCK + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry->AddInstruction(bool_value); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(if_inst); + ArenaVector<HInstruction*> current_locals({}, GetAllocator()->Adapter(kArenaAllocInstruction)); + ManuallyBuildEnvFor(cls, ¤t_locals); + new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* call_left = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* write_left = new (GetAllocator()) HInstanceFieldSet(new_inst, + c1, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_left = new (GetAllocator()) HGoto(); + call_left->AsInvoke()->SetRawInputAt(0, new_inst); + left->AddInstruction(call_left); + left->AddInstruction(write_left); + left->AddInstruction(goto_left); + call_left->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c2, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_right = new (GetAllocator()) HGoto(); + right->AddInstruction(write_right); + right->AddInstruction(goto_right); + + HInstruction* read_bottom = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* return_exit = new (GetAllocator()) HReturn(read_bottom); + breturn->AddInstruction(read_bottom); + breturn->AddInstruction(return_exit); + + HInstruction* exit_instruction = new (GetAllocator()) HExit(); + exit->AddInstruction(exit_instruction); + // PerformLSE expects this to be empty. + graph_->ClearDominanceInformation(); + PerformLSE(); + + ASSERT_TRUE(IsRemoved(read_bottom)); + ASSERT_TRUE(IsRemoved(write_right)); + ASSERT_FALSE(IsRemoved(write_left)); + ASSERT_FALSE(IsRemoved(call_left)); +} + +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // LEFT +// // DO NOT ELIMINATE +// obj.field = 1; +// escape(obj); +// return obj.field; +// } else { +// // RIGHT +// // ELIMINATE +// obj.field = 2; +// return obj.field; +// } +// EXIT +TEST_F(LoadStoreEliminationTest, PartialLoadElimination3) { + InitGraph(); + AdjacencyListGraph blks(SetupFromAdjacencyList( + "entry", + "exit", + { { "entry", "left" }, { "entry", "right" }, { "left", "exit" }, { "right", "exit" } })); +#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) + GET_BLOCK(entry); + GET_BLOCK(exit); + GET_BLOCK(left); + GET_BLOCK(right); +#undef GET_BLOCK + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry->AddInstruction(bool_value); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(if_inst); + ArenaVector<HInstruction*> current_locals({}, GetAllocator()->Adapter(kArenaAllocInstruction)); + ManuallyBuildEnvFor(cls, ¤t_locals); + new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* write_left = new (GetAllocator()) HInstanceFieldSet(new_inst, + c1, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* call_left = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kVoid, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* read_left = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* return_left = new (GetAllocator()) HReturn(read_left); + call_left->AsInvoke()->SetRawInputAt(0, new_inst); + left->AddInstruction(write_left); + left->AddInstruction(call_left); + left->AddInstruction(read_left); + left->AddInstruction(return_left); + call_left->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c2, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* read_right = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* return_right = new (GetAllocator()) HReturn(read_right); + right->AddInstruction(write_right); + right->AddInstruction(read_right); + right->AddInstruction(return_right); + + HInstruction* exit_instruction = new (GetAllocator()) HExit(); + exit->AddInstruction(exit_instruction); + // PerformLSE expects this to be empty. + graph_->ClearDominanceInformation(); + PerformLSE(); + + EXPECT_TRUE(IsRemoved(read_right)); + EXPECT_TRUE(IsRemoved(write_right)); + EXPECT_FALSE(IsRemoved(write_left)); + EXPECT_FALSE(IsRemoved(call_left)); + EXPECT_FALSE(IsRemoved(read_left)); +} + +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // LEFT +// // DO NOT ELIMINATE +// obj.field = 1; +// while (true) { +// bool esc = escape(obj); +// // DO NOT ELIMINATE +// obj.field = 3; +// if (esc) break; +// } +// // ELIMINATE. +// return obj.field; +// } else { +// // RIGHT +// // ELIMINATE +// obj.field = 2; +// return obj.field; +// } +// EXIT +TEST_F(LoadStoreEliminationTest, PartialLoadElimination4) { + InitGraph(); + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "entry_post" }, + { "entry_post", "right" }, + { "right", "exit" }, + { "entry_post", "left_pre" }, + { "left_pre", "left_loop" }, + { "left_loop", "left_loop" }, + { "left_loop", "left_finish" }, + { "left_finish", "exit" } })); +#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) + GET_BLOCK(entry); + GET_BLOCK(entry_post); + GET_BLOCK(exit); + GET_BLOCK(left_pre); + GET_BLOCK(left_loop); + GET_BLOCK(left_finish); + GET_BLOCK(right); +#undef GET_BLOCK + // Left-loops first successor is the break. + if (left_loop->GetSuccessors()[0] != left_finish) { + left_loop->SwapSuccessors(); + } + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* c3 = graph_->GetIntConstant(3); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* goto_entry = new (GetAllocator()) HGoto(); + entry->AddInstruction(bool_value); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(goto_entry); + ArenaVector<HInstruction*> current_locals({}, GetAllocator()->Adapter(kArenaAllocInstruction)); + ManuallyBuildEnvFor(cls, ¤t_locals); + new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry_post->AddInstruction(if_inst); + + HInstruction* write_left_pre = new (GetAllocator()) HInstanceFieldSet(new_inst, + c1, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_left_pre = new (GetAllocator()) HGoto(); + left_pre->AddInstruction(write_left_pre); + left_pre->AddInstruction(goto_left_pre); + + HInstruction* suspend_left_loop = new (GetAllocator()) HSuspendCheck(); + HInstruction* call_left_loop = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kBool, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* write_left_loop = new (GetAllocator()) HInstanceFieldSet(new_inst, + c3, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* if_left_loop = new (GetAllocator()) HIf(call_left_loop); + call_left_loop->AsInvoke()->SetRawInputAt(0, new_inst); + left_loop->AddInstruction(suspend_left_loop); + left_loop->AddInstruction(call_left_loop); + left_loop->AddInstruction(write_left_loop); + left_loop->AddInstruction(if_left_loop); + suspend_left_loop->CopyEnvironmentFrom(cls->GetEnvironment()); + call_left_loop->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* read_left_end = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* return_left_end = new (GetAllocator()) HReturn(read_left_end); + left_finish->AddInstruction(read_left_end); + left_finish->AddInstruction(return_left_end); + + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c2, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* read_right = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* return_right = new (GetAllocator()) HReturn(read_right); + right->AddInstruction(write_right); + right->AddInstruction(read_right); + right->AddInstruction(return_right); + + HInstruction* exit_instruction = new (GetAllocator()) HExit(); + exit->AddInstruction(exit_instruction); + // PerformLSE expects this to be empty. + graph_->ClearDominanceInformation(); + PerformLSE(); + + EXPECT_FALSE(IsRemoved(write_left_pre)); + EXPECT_TRUE(IsRemoved(read_right)); + EXPECT_TRUE(IsRemoved(write_right)); + EXPECT_FALSE(IsRemoved(write_left_loop)); + EXPECT_FALSE(IsRemoved(call_left_loop)); + EXPECT_TRUE(IsRemoved(read_left_end)); +} + +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // LEFT +// // DO NOT ELIMINATE +// obj.field = 1; +// while (true) { +// bool esc = escape(obj); +// if (esc) break; +// // DO NOT ELIMINATE +// obj.field = 3; +// } +// } else { +// // RIGHT +// // DO NOT ELIMINATE +// obj.field = 2; +// } +// // DO NOT ELIMINATE +// return obj.field; +// EXIT +TEST_F(LoadStoreEliminationTest, PartialLoadPreserved3) { + InitGraph(); + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "entry_post" }, + { "entry_post", "right" }, + { "right", "return_block" }, + { "entry_post", "left_pre" }, + { "left_pre", "left_loop" }, + { "left_loop", "left_loop_post" }, + { "left_loop_post", "left_loop" }, + { "left_loop", "return_block" }, + { "return_block", "exit" } })); +#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) + GET_BLOCK(entry); + GET_BLOCK(entry_post); + GET_BLOCK(exit); + GET_BLOCK(return_block); + GET_BLOCK(left_pre); + GET_BLOCK(left_loop); + GET_BLOCK(left_loop_post); + GET_BLOCK(right); +#undef GET_BLOCK + // Left-loops first successor is the break. + if (left_loop->GetSuccessors()[0] != return_block) { + left_loop->SwapSuccessors(); + } + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* c3 = graph_->GetIntConstant(3); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* goto_entry = new (GetAllocator()) HGoto(); + entry->AddInstruction(bool_value); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(goto_entry); + ArenaVector<HInstruction*> current_locals({}, GetAllocator()->Adapter(kArenaAllocInstruction)); + ManuallyBuildEnvFor(cls, ¤t_locals); + new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry_post->AddInstruction(if_inst); + + HInstruction* write_left_pre = new (GetAllocator()) HInstanceFieldSet(new_inst, + c1, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_left_pre = new (GetAllocator()) HGoto(); + left_pre->AddInstruction(write_left_pre); + left_pre->AddInstruction(goto_left_pre); + + HInstruction* suspend_left_loop = new (GetAllocator()) HSuspendCheck(); + HInstruction* call_left_loop = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kBool, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* if_left_loop = new (GetAllocator()) HIf(call_left_loop); + call_left_loop->AsInvoke()->SetRawInputAt(0, new_inst); + left_loop->AddInstruction(suspend_left_loop); + left_loop->AddInstruction(call_left_loop); + left_loop->AddInstruction(if_left_loop); + suspend_left_loop->CopyEnvironmentFrom(cls->GetEnvironment()); + call_left_loop->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* write_left_loop = new (GetAllocator()) HInstanceFieldSet(new_inst, + c3, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_left_loop = new (GetAllocator()) HGoto(); + left_loop_post->AddInstruction(write_left_loop); + left_loop_post->AddInstruction(goto_left_loop); + + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c2, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_right = new (GetAllocator()) HGoto(); + right->AddInstruction(write_right); + right->AddInstruction(goto_right); + + HInstruction* read_return = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* return_final = new (GetAllocator()) HReturn(read_return); + return_block->AddInstruction(read_return); + return_block->AddInstruction(return_final); + + HInstruction* exit_instruction = new (GetAllocator()) HExit(); + exit->AddInstruction(exit_instruction); + // PerformLSE expects this to be empty. + graph_->ClearDominanceInformation(); + PerformLSE(); + + EXPECT_FALSE(IsRemoved(write_left_pre)); + EXPECT_FALSE(IsRemoved(read_return)); + EXPECT_FALSE(IsRemoved(write_right)); + EXPECT_FALSE(IsRemoved(write_left_loop)); + EXPECT_FALSE(IsRemoved(call_left_loop)); +} + +// // ENTRY +// obj = new Obj(); +// if (parameter_value) { +// // LEFT +// // ELIMINATE (not visible since always overridden by obj.field = 3) +// obj.field = 1; +// while (true) { +// bool stop = should_stop(); +// // DO NOT ELIMINATE (visible by read at end) +// obj.field = 3; +// if (stop) break; +// } +// } else { +// // RIGHT +// // DO NOT ELIMINATE +// obj.field = 2; +// escape(obj); +// } +// // DO NOT ELIMINATE +// return obj.field; +// EXIT +TEST_F(LoadStoreEliminationTest, PartialLoadPreserved4) { + InitGraph(); + AdjacencyListGraph blks(SetupFromAdjacencyList("entry", + "exit", + { { "entry", "entry_post" }, + { "entry_post", "right" }, + { "right", "return_block" }, + { "entry_post", "left_pre" }, + { "left_pre", "left_loop" }, + { "left_loop", "left_loop" }, + { "left_loop", "return_block" }, + { "return_block", "exit" } })); +#define GET_BLOCK(name) HBasicBlock* name = blks.Get(#name) + GET_BLOCK(entry); + GET_BLOCK(entry_post); + GET_BLOCK(exit); + GET_BLOCK(return_block); + GET_BLOCK(left_pre); + GET_BLOCK(left_loop); + GET_BLOCK(right); +#undef GET_BLOCK + // Left-loops first successor is the break. + if (left_loop->GetSuccessors()[0] != return_block) { + left_loop->SwapSuccessors(); + } + HInstruction* bool_value = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kBool); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c2 = graph_->GetIntConstant(2); + HInstruction* c3 = graph_->GetIntConstant(3); + HInstruction* cls = new (GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + dex::TypeIndex(10), + graph_->GetDexFile(), + ScopedNullHandle<mirror::Class>(), + false, + 0, + false); + HInstruction* new_inst = + new (GetAllocator()) HNewInstance(cls, + 0, + dex::TypeIndex(10), + graph_->GetDexFile(), + false, + QuickEntrypointEnum::kQuickAllocObjectInitialized); + HInstruction* goto_entry = new (GetAllocator()) HGoto(); + entry->AddInstruction(bool_value); + entry->AddInstruction(cls); + entry->AddInstruction(new_inst); + entry->AddInstruction(goto_entry); + ArenaVector<HInstruction*> current_locals({}, GetAllocator()->Adapter(kArenaAllocInstruction)); + ManuallyBuildEnvFor(cls, ¤t_locals); + new_inst->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* if_inst = new (GetAllocator()) HIf(bool_value); + entry_post->AddInstruction(if_inst); + + HInstruction* write_left_pre = new (GetAllocator()) HInstanceFieldSet(new_inst, + c1, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* goto_left_pre = new (GetAllocator()) HGoto(); + left_pre->AddInstruction(write_left_pre); + left_pre->AddInstruction(goto_left_pre); + + HInstruction* suspend_left_loop = new (GetAllocator()) HSuspendCheck(); + HInstruction* call_left_loop = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 0, + DataType::Type::kBool, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* write_left_loop = new (GetAllocator()) HInstanceFieldSet(new_inst, + c3, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* if_left_loop = new (GetAllocator()) HIf(call_left_loop); + left_loop->AddInstruction(suspend_left_loop); + left_loop->AddInstruction(call_left_loop); + left_loop->AddInstruction(write_left_loop); + left_loop->AddInstruction(if_left_loop); + suspend_left_loop->CopyEnvironmentFrom(cls->GetEnvironment()); + call_left_loop->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* write_right = new (GetAllocator()) HInstanceFieldSet(new_inst, + c2, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* call_right = new (GetAllocator()) + HInvokeStaticOrDirect(GetAllocator(), + 1, + DataType::Type::kBool, + 0, + { nullptr, 0 }, + nullptr, + {}, + InvokeType::kStatic, + { nullptr, 0 }, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HInstruction* goto_right = new (GetAllocator()) HGoto(); + call_right->AsInvoke()->SetRawInputAt(0, new_inst); + right->AddInstruction(write_right); + right->AddInstruction(call_right); + right->AddInstruction(goto_right); + call_right->CopyEnvironmentFrom(cls->GetEnvironment()); + + HInstruction* read_return = new (GetAllocator()) HInstanceFieldGet(new_inst, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + 0, + 0, + graph_->GetDexFile(), + 0); + HInstruction* return_final = new (GetAllocator()) HReturn(read_return); + return_block->AddInstruction(read_return); + return_block->AddInstruction(return_final); + + HInstruction* exit_instruction = new (GetAllocator()) HExit(); + exit->AddInstruction(exit_instruction); + // PerformLSE expects this to be empty. + graph_->ClearDominanceInformation(); + PerformLSE(); + + EXPECT_FALSE(IsRemoved(read_return)); + EXPECT_FALSE(IsRemoved(write_right)); + EXPECT_FALSE(IsRemoved(write_left_loop)); + EXPECT_FALSE(IsRemoved(call_left_loop)); + EXPECT_TRUE(IsRemoved(write_left_pre)); + EXPECT_FALSE(IsRemoved(call_right)); +} } // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index e2d164e1a2..1773c0770f 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -15,12 +15,19 @@ */ #include "nodes.h" +#include <algorithm> #include <cfloat> #include "art_method-inl.h" +#include "base/arena_allocator.h" +#include "base/arena_bit_vector.h" #include "base/bit_utils.h" #include "base/bit_vector-inl.h" +#include "base/bit_vector.h" +#include "base/iteration_range.h" #include "base/logging.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "base/stl_util.h" #include "class_linker-inl.h" #include "class_root-inl.h" @@ -263,6 +270,171 @@ static bool UpdateDominatorOfSuccessor(HBasicBlock* block, HBasicBlock* successo } } +// TODO Consider moving this entirely into LoadStoreAnalysis/Elimination. +bool HGraph::PathBetween(uint32_t source_idx, uint32_t dest_idx) const { + DCHECK_LT(source_idx, blocks_.size()) << "source not present in graph!"; + DCHECK_LT(dest_idx, blocks_.size()) << "dest not present in graph!"; + DCHECK(blocks_[source_idx] != nullptr); + DCHECK(blocks_[dest_idx] != nullptr); + return reachability_graph_.IsBitSet(source_idx, dest_idx); +} + +bool HGraph::PathBetween(const HBasicBlock* source, const HBasicBlock* dest) const { + if (source == nullptr || dest == nullptr) { + return false; + } + size_t source_idx = source->GetBlockId(); + size_t dest_idx = dest->GetBlockId(); + return PathBetween(source_idx, dest_idx); +} + +// This function/struct calculates the reachability of every node from every +// other node by iteratively using DFS to find reachability of each individual +// block. +// +// This is in practice faster then the simpler Floyd-Warshall since while that +// is O(N**3) this is O(N*(E + N)) where N is the number of blocks and E is the +// number of edges. Since in practice each block only has a few outgoing edges +// we can confidently say that E ~ B*N where B is a small number (~3). We also +// memoize the results as we go allowing us to (potentially) avoid walking the +// entire graph for every node. To make best use of this memoization we +// calculate the reachability of blocks in PostOrder. This means that +// (generally) blocks that are dominated by many other blocks and dominate few +// blocks themselves will be examined first. This makes it more likely we can +// use our memoized results. +class ReachabilityAnalysisHelper { + public: + ReachabilityAnalysisHelper(const HGraph* graph, + ArenaBitVectorArray* reachability_graph, + ArenaStack* arena_stack) + : graph_(graph), + reachability_graph_(reachability_graph), + arena_stack_(arena_stack), + temporaries_(arena_stack_), + block_size_(RoundUp(graph_->GetBlocks().size(), BitVector::kWordBits)), + all_visited_nodes_( + &temporaries_, graph_->GetBlocks().size(), false, kArenaAllocReachabilityGraph), + not_post_order_visited_( + &temporaries_, graph_->GetBlocks().size(), false, kArenaAllocReachabilityGraph) { + // We can't adjust the size of reachability graph any more without breaking + // our allocator invariants so it had better be large enough. + CHECK_GE(reachability_graph_->NumRows(), graph_->GetBlocks().size()); + CHECK_GE(reachability_graph_->NumColumns(), graph_->GetBlocks().size()); + not_post_order_visited_.SetInitialBits(graph_->GetBlocks().size()); + } + + void CalculateReachability() { + // Calculate what blocks connect using repeated DFS + // + // Going in PostOrder should generally give memoization a good shot of + // hitting. + for (const HBasicBlock* blk : graph_->GetPostOrder()) { + if (blk == nullptr) { + continue; + } + not_post_order_visited_.ClearBit(blk->GetBlockId()); + CalculateConnectednessOn(blk); + all_visited_nodes_.SetBit(blk->GetBlockId()); + } + // Get all other bits + for (auto idx : not_post_order_visited_.Indexes()) { + const HBasicBlock* blk = graph_->GetBlocks()[idx]; + if (blk == nullptr) { + continue; + } + CalculateConnectednessOn(blk); + all_visited_nodes_.SetBit(blk->GetBlockId()); + } + } + + private: + void AddEdge(uint32_t source, const HBasicBlock* dest) { + reachability_graph_->SetBit(source, dest->GetBlockId()); + } + + // Union the reachability of 'idx' into 'update_block_idx'. This is done to + // implement memoization. In order to improve performance we do this in 4-byte + // blocks. Clang should be able to optimize this to larger blocks if possible. + void UnionBlock(size_t update_block_idx, size_t idx) { + reachability_graph_->UnionRows(update_block_idx, idx); + } + + // Single DFS to get connectedness of a single block + void CalculateConnectednessOn(const HBasicBlock* const target_block) { + const uint32_t target_idx = target_block->GetBlockId(); + ScopedArenaAllocator connectedness_temps(arena_stack_); + // What nodes we have already discovered and either have processed or are + // already on the queue. + ArenaBitVector discovered( + &connectedness_temps, graph_->GetBlocks().size(), false, kArenaAllocReachabilityGraph); + // The work stack. What blocks we still need to process. + ScopedArenaVector<const HBasicBlock*> work_stack( + connectedness_temps.Adapter(kArenaAllocReachabilityGraph)); + // Known max size since otherwise we'd have blocks multiple times. Avoids + // re-allocation + work_stack.reserve(graph_->GetBlocks().size()); + discovered.SetBit(target_idx); + work_stack.push_back(target_block); + // Main DFS Loop. + while (!work_stack.empty()) { + const HBasicBlock* cur = work_stack.back(); + work_stack.pop_back(); + // Memoization of previous runs. + if (all_visited_nodes_.IsBitSet(cur->GetBlockId())) { + DCHECK_NE(target_block, cur); + // Already explored from here. Just use that data. + UnionBlock(target_idx, cur->GetBlockId()); + continue; + } + for (const HBasicBlock* succ : cur->GetSuccessors()) { + AddEdge(target_idx, succ); + if (!discovered.IsBitSet(succ->GetBlockId())) { + work_stack.push_back(succ); + discovered.SetBit(succ->GetBlockId()); + } + } + } + } + + const HGraph* graph_; + // The graph's reachability_graph_ on the main allocator. + ArenaBitVectorArray* reachability_graph_; + ArenaStack* arena_stack_; + // An allocator for temporary bit-vectors used by this algorithm. The + // 'SetBit,ClearBit' on reachability_graph_ prior to the construction of this + // object should be the only allocation on the main allocator so it's safe to + // make a sub-allocator here. + ScopedArenaAllocator temporaries_; + // number of columns + const size_t block_size_; + // Where we've already completely calculated connectedness. + ArenaBitVector all_visited_nodes_; + // What we never visited and need to do later + ArenaBitVector not_post_order_visited_; + + DISALLOW_COPY_AND_ASSIGN(ReachabilityAnalysisHelper); +}; + +void HGraph::ComputeReachabilityInformation() { + DCHECK_EQ(reachability_graph_.GetRawData().NumSetBits(), 0u); + DCHECK(reachability_graph_.IsExpandable()); + // Reserve all the bits we'll need. This is the only allocation on the + // standard allocator we do here, enabling us to create a new ScopedArena for + // use with temporaries. + // + // reachability_graph_ acts as |N| x |N| graph for PathBetween. Array is + // padded so each row starts on an BitVector::kWordBits-bit alignment for + // simplicity and performance, allowing us to union blocks together without + // going bit-by-bit. + reachability_graph_.Resize(blocks_.size(), blocks_.size(), /*clear=*/false); + ReachabilityAnalysisHelper helper(this, &reachability_graph_, GetArenaStack()); + helper.CalculateReachability(); +} + +void HGraph::ClearReachabilityInformation() { + reachability_graph_.Clear(); +} + void HGraph::ComputeDominanceInformation() { DCHECK(reverse_post_order_.empty()); reverse_post_order_.reserve(blocks_.size()); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index ad56d31667..9fa21d5006 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -21,6 +21,7 @@ #include <array> #include <type_traits> +#include "base/arena_allocator.h" #include "base/arena_bit_vector.h" #include "base/arena_containers.h" #include "base/arena_object.h" @@ -387,6 +388,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { blocks_(allocator->Adapter(kArenaAllocBlockList)), reverse_post_order_(allocator->Adapter(kArenaAllocReversePostOrder)), linear_order_(allocator->Adapter(kArenaAllocLinearOrder)), + reachability_graph_(allocator, 0, 0, true, kArenaAllocReachabilityGraph), entry_block_(nullptr), exit_block_(nullptr), maximum_number_of_out_vregs_(0), @@ -442,6 +444,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { void ComputeDominanceInformation(); void ClearDominanceInformation(); + void ComputeReachabilityInformation(); + void ClearReachabilityInformation(); void ClearLoopInformation(); void FindBackEdges(ArenaBitVector* visited); GraphAnalysisResult BuildDominatorTree(); @@ -590,6 +594,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { has_bounds_checks_ = value; } + // Returns true if dest is reachable from source, using either blocks or block-ids. + bool PathBetween(const HBasicBlock* source, const HBasicBlock* dest) const; + bool PathBetween(uint32_t source_id, uint32_t dest_id) const; + // Is the code known to be robust against eliminating dead references // and the effects of early finalization? bool IsDeadReferenceSafe() const { return dead_reference_safe_; } @@ -746,6 +754,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // post order, this order is not incrementally kept up-to-date. ArenaVector<HBasicBlock*> linear_order_; + // Reachability graph for checking connectedness between nodes. Acts as a partitioned vector where + // each RoundUp(blocks_.size(), BitVector::kWordBits) is the reachability of each node. + ArenaBitVectorArray reachability_graph_; + HBasicBlock* entry_block_; HBasicBlock* exit_block_; diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index 475c53205f..4322eb72a0 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -108,6 +108,11 @@ enum class MethodCompilationStat { kConstructorFenceRemovedCFRE, kBitstringTypeCheck, kJitOutOfMemoryForCommit, + kFullLSEAllocationRemoved, + kFullLSEPossible, + kNonPartialLoadRemoved, + kPartialLSEPossible, + kPartialStoreRemoved, kLastStat }; std::ostream& operator<<(std::ostream& os, MethodCompilationStat rhs); diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 792c9db1af..71acdacdb4 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -317,21 +317,23 @@ class AdjacencyListGraph { HBasicBlock* dest_blk = name_to_block_.GetOrCreate(dest, create_block); src_blk->AddSuccessor(dest_blk); } + graph_->ClearReachabilityInformation(); graph_->ComputeDominanceInformation(); + graph_->ComputeReachabilityInformation(); for (auto [name, blk] : name_to_block_) { block_to_name_.Put(blk, name); } } - bool HasBlock(const HBasicBlock* blk) { + bool HasBlock(const HBasicBlock* blk) const { return block_to_name_.find(blk) != block_to_name_.end(); } - std::string_view GetName(const HBasicBlock* blk) { + std::string_view GetName(const HBasicBlock* blk) const { return block_to_name_.Get(blk); } - HBasicBlock* Get(const std::string_view& sv) { + HBasicBlock* Get(const std::string_view& sv) const { return name_to_block_.Get(sv); } diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index ea5a13a0db..c1891de69a 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -560,7 +560,7 @@ void HScheduler::Schedule(HGraph* graph) { // should run the analysis or not. const HeapLocationCollector* heap_location_collector = nullptr; ScopedArenaAllocator allocator(graph->GetArenaStack()); - LoadStoreAnalysis lsa(graph, &allocator); + LoadStoreAnalysis lsa(graph, /*stats=*/nullptr, &allocator, /*for_elimination=*/false); if (!only_optimize_loop_blocks_ || graph->HasLoops()) { lsa.Run(); heap_location_collector = &lsa.GetHeapLocationCollector(); diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index 94f1599e62..c166a46082 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -273,7 +273,8 @@ class SchedulerTest : public OptimizingUnitTest { entry->AddInstruction(instr); } - HeapLocationCollector heap_location_collector(graph_, GetScopedAllocator()); + HeapLocationCollector heap_location_collector( + graph_, GetScopedAllocator(), /*for_partial_elimination=*/false); heap_location_collector.VisitBasicBlock(entry); heap_location_collector.BuildAliasingMatrix(); TestSchedulingGraph scheduling_graph(GetScopedAllocator(), &heap_location_collector); diff --git a/libartbase/base/arena_allocator.cc b/libartbase/base/arena_allocator.cc index 0e7f6cceb3..34386513bf 100644 --- a/libartbase/base/arena_allocator.cc +++ b/libartbase/base/arena_allocator.cc @@ -44,6 +44,7 @@ const char* const ArenaAllocatorStatsImpl<kCount>::kAllocNames[] = { "BlockList ", "RevPostOrder ", "LinearOrder ", + "Reachability ", "ConstantsMap ", "Predecessors ", "Successors ", diff --git a/libartbase/base/arena_allocator.h b/libartbase/base/arena_allocator.h index a9ccae1b07..44c3adde3d 100644 --- a/libartbase/base/arena_allocator.h +++ b/libartbase/base/arena_allocator.h @@ -53,6 +53,7 @@ enum ArenaAllocKind { kArenaAllocBlockList, kArenaAllocReversePostOrder, kArenaAllocLinearOrder, + kArenaAllocReachabilityGraph, kArenaAllocConstantsMap, kArenaAllocPredecessors, kArenaAllocSuccessors, diff --git a/libartbase/base/arena_bit_vector.h b/libartbase/base/arena_bit_vector.h index 0fb6bbf775..a367da836b 100644 --- a/libartbase/base/arena_bit_vector.h +++ b/libartbase/base/arena_bit_vector.h @@ -18,6 +18,7 @@ #define ART_LIBARTBASE_BASE_ARENA_BIT_VECTOR_H_ #include "arena_object.h" +#include "base/arena_allocator.h" #include "bit_vector.h" namespace art { @@ -49,8 +50,58 @@ class ArenaBitVector : public BitVector, public ArenaObject<kArenaAllocGrowableB ArenaAllocKind kind = kArenaAllocGrowableBitMap); ~ArenaBitVector() {} + ArenaBitVector(ArenaBitVector&&) = default; + ArenaBitVector(const ArenaBitVector&) = delete; +}; + +// A BitVectorArray implementation that uses Arena allocation. See +// BitVectorArray for more information. +// This is a helper for dealing with 2d bit-vector arrays packed into a single +// bit-vector +class ArenaBitVectorArray final : public BaseBitVectorArray, + public ArenaObject<kArenaAllocGrowableBitMap> { + public: + ArenaBitVectorArray(const ArenaBitVectorArray& bv) = delete; + ArenaBitVectorArray& operator=(const ArenaBitVectorArray& other) = delete; + + explicit ArenaBitVectorArray(ArenaBitVector&& bv) : BaseBitVectorArray(), data_(std::move(bv)) {} + ArenaBitVectorArray(ArenaBitVector&& bv, size_t cols) + : BaseBitVectorArray(BaseBitVectorArray::MaxRowsFor(bv, cols), cols), data_(std::move(bv)) {} + + ArenaBitVectorArray(ArenaAllocator* allocator, + size_t start_rows, + size_t start_cols, + bool expandable, + ArenaAllocKind kind = kArenaAllocGrowableBitMap) + : BaseBitVectorArray(start_rows, start_cols), + data_(ArenaBitVector(allocator, + BaseBitVectorArray::RequiredBitVectorSize(start_rows, start_cols), + expandable, + kind)) {} + + ArenaBitVectorArray(ScopedArenaAllocator* allocator, + size_t start_rows, + size_t start_cols, + bool expandable, + ArenaAllocKind kind = kArenaAllocGrowableBitMap) + : BaseBitVectorArray(start_rows, start_cols), + data_(ArenaBitVector(allocator, + BaseBitVectorArray::RequiredBitVectorSize(start_rows, start_cols), + expandable, + kind)) {} + + ~ArenaBitVectorArray() override {} + + const BitVector& GetRawData() const override { + return data_; + } + + BitVector& GetRawData() override { + return data_; + } + private: - DISALLOW_COPY_AND_ASSIGN(ArenaBitVector); + ArenaBitVector data_; }; } // namespace art diff --git a/libartbase/base/array_ref.h b/libartbase/base/array_ref.h index e8b3bceb71..064e26bc5f 100644 --- a/libartbase/base/array_ref.h +++ b/libartbase/base/array_ref.h @@ -203,6 +203,19 @@ bool operator!=(const ArrayRef<T>& lhs, const ArrayRef<T>& rhs) { return !(lhs == rhs); } +template<typename T> +std::ostream& operator<<(std::ostream& os, const ArrayRef<T>& ts) { + bool first = true; + os << "["; + for (const T& t : ts) { + if (!first) { os << ", "; } + first = false; + os << t; + } + os << "]"; + return os; +} + } // namespace art diff --git a/libartbase/base/array_slice.h b/libartbase/base/array_slice.h index 4679146ca1..a58ff4439d 100644 --- a/libartbase/base/array_slice.h +++ b/libartbase/base/array_slice.h @@ -17,6 +17,7 @@ #ifndef ART_LIBARTBASE_BASE_ARRAY_SLICE_H_ #define ART_LIBARTBASE_BASE_ARRAY_SLICE_H_ +#include <ostream> #include "bit_utils.h" #include "casts.h" #include "iteration_range.h" @@ -63,6 +64,10 @@ class ArraySlice { lpa != nullptr && lpa->size() != 0 ? &lpa->At(0, element_size, alignment) : nullptr, lpa != nullptr ? lpa->size() : 0, element_size) {} + ArraySlice(const ArraySlice<T>&) = default; + ArraySlice(ArraySlice<T>&&) = default; + ArraySlice<T>& operator=(const ArraySlice<T>&) = default; + ArraySlice<T>& operator=(ArraySlice<T>&&) = default; // Iterators. iterator begin() { return iterator(&AtUnchecked(0), element_size_); } @@ -166,6 +171,19 @@ class ArraySlice { size_t element_size_; }; +template<typename T> +std::ostream& operator<<(std::ostream& os, const ArraySlice<T>& ts) { + bool first = true; + os << "["; + for (const T& t : ts) { + if (!first) { os << ", "; } + first = false; + os << t; + } + os << "]"; + return os; +} + } // namespace art #endif // ART_LIBARTBASE_BASE_ARRAY_SLICE_H_ diff --git a/libartbase/base/bit_vector.cc b/libartbase/base/bit_vector.cc index 8e3d4c9bf7..d3cb652c0c 100644 --- a/libartbase/base/bit_vector.cc +++ b/libartbase/base/bit_vector.cc @@ -376,4 +376,31 @@ Allocator* BitVector::GetAllocator() const { return allocator_; } +void BaseBitVectorArray::Resize(size_t rows, size_t cols, bool clear) { + DCHECK(IsExpandable()); + if (clear) { + Clear(); + } + cols = RoundUp(cols, BitVector::kWordBits); + num_columns_ = cols; + num_rows_ = rows; + // Ensure size + GetRawData().SetBit(num_rows_ * num_columns_ - 1); + GetRawData().ClearBit(num_rows_ * num_columns_ - 1); +} + +// In order to improve performance we do this in 4-byte blocks. Clang should be +// able to optimize this to larger blocks if possible. +void BaseBitVectorArray::UnionRows(size_t dest_row, size_t other) { + DCHECK_LT(dest_row, num_rows_); + DCHECK_LT(other, num_rows_); + size_t block_words = num_columns_ / BitVector::kWordBits; + uint32_t* dest = + GetRawData().GetRawStorage() + ((dest_row * num_columns_) / BitVector::kWordBits); + uint32_t* source = GetRawData().GetRawStorage() + ((other * num_columns_) / BitVector::kWordBits); + for (uint32_t i = 0; i < block_words; ++i, ++dest, ++source) { + *dest = (*dest) | (*source); + } +} + } // namespace art diff --git a/libartbase/base/bit_vector.h b/libartbase/base/bit_vector.h index dc15874cbf..0c735cc58a 100644 --- a/libartbase/base/bit_vector.h +++ b/libartbase/base/bit_vector.h @@ -18,6 +18,7 @@ #define ART_LIBARTBASE_BASE_BIT_VECTOR_H_ #include <stdint.h> + #include <iterator> #include "bit_utils.h" @@ -26,6 +27,7 @@ namespace art { class Allocator; +class ArenaBitVector; /* * Expanding bitmap, used for tracking resources. Bits are numbered starting @@ -33,6 +35,9 @@ class Allocator; */ class BitVector { public: + static constexpr uint32_t kWordBytes = sizeof(uint32_t); + static constexpr uint32_t kWordBits = kWordBytes * 8; + class IndexContainer; /** @@ -226,11 +231,22 @@ class BitVector { return storage_size_ * kWordBytes; } + size_t GetBitSizeOf() const { + return storage_size_ * kWordBits; + } + /** * @return the highest bit set, -1 if none are set */ int GetHighestBitSet() const; + /** + * @return true if there are any bits set, false otherwise. + */ + bool IsAnyBitSet() const { + return GetHighestBitSet() != -1; + } + // Minimum number of bits required to store this vector, 0 if none are set. size_t GetNumberOfBits() const { return GetHighestBitSet() + 1; @@ -281,15 +297,148 @@ class BitVector { return 1 << (idx & 0x1f); } - static constexpr uint32_t kWordBytes = sizeof(uint32_t); - static constexpr uint32_t kWordBits = kWordBytes * 8; - uint32_t* storage_; // The storage for the bit vector. uint32_t storage_size_; // Current size, in 32-bit words. Allocator* const allocator_; // Allocator if expandable. const bool expandable_; // Should the bitmap expand if too small? }; +// Helper for dealing with 2d bit-vector arrays packed into a single bit-vec +class BaseBitVectorArray { + public: + BaseBitVectorArray(const BaseBitVectorArray& bv) = default; + BaseBitVectorArray& operator=(const BaseBitVectorArray& other) = default; + + BaseBitVectorArray() : num_columns_(0), num_rows_(0) {} + + BaseBitVectorArray(size_t num_rows, size_t num_columns) + : num_columns_(RoundUp(num_columns, BitVector::kWordBits)), num_rows_(num_rows) {} + + virtual ~BaseBitVectorArray() {} + + bool IsExpandable() const { + return GetRawData().IsExpandable(); + } + + // Let subclasses provide storage for various types. + virtual const BitVector& GetRawData() const = 0; + virtual BitVector& GetRawData() = 0; + + size_t NumRows() const { + return num_rows_; + } + + // NB This might be more than the requested size for alignment purposes. + size_t NumColumns() const { + return num_columns_; + } + + void Clear() { + GetRawData().ClearAllBits(); + } + + // Ensure that we can set all bits in the given range. The actual number of + // columns might be larger than requested for alignment purposes. + void Resize(size_t rows, size_t cols, bool clear = true); + + void SetBit(size_t row, size_t col) { + DCHECK_LT(col, num_columns_); + DCHECK_LT(row, num_rows_); + GetRawData().SetBit(row * num_columns_ + col); + } + + void ClearBit(size_t row, size_t col) { + DCHECK_LT(col, num_columns_); + DCHECK_LT(row, num_rows_); + GetRawData().ClearBit(row * num_columns_ + col); + } + + bool IsBitSet(size_t row, size_t col) const { + DCHECK_LT(col, num_columns_); + DCHECK_LT(row, num_rows_); + return GetRawData().IsBitSet(row * num_columns_ + col); + } + + // Union the vector of 'other' into 'dest_row'. + void UnionRows(size_t dest_row, size_t other); + + static size_t RequiredBitVectorSize(size_t rows, size_t cols) { + return rows * RoundUp(cols, BitVector::kWordBits); + } + + static size_t MaxRowsFor(const BitVector& bv, size_t cols) { + return cols != 0 ? bv.GetBitSizeOf() / RoundUp(cols, BitVector::kWordBits) : 0; + } + + private: + size_t num_columns_; + size_t num_rows_; +}; + +// A BitVectorArray with a standard owned BitVector providing the backing +// storage. This should be used when the BitVectorArray is the owner of the +// whole BitVector and should use standard allocators for cleanup/allocation. +// Contrast this with ArenaBitVectorArray which uses arena allocators. +class BitVectorArray final : public BaseBitVectorArray { + public: + BitVectorArray(const BitVectorArray& bv) = delete; + BitVectorArray& operator=(const BitVectorArray& other) = delete; + + explicit BitVectorArray(BitVector&& bv) : BaseBitVectorArray(), data_(std::move(bv)) {} + explicit BitVectorArray(BitVector&& bv, size_t cols) + : BaseBitVectorArray(BaseBitVectorArray::MaxRowsFor(bv, cols), cols), data_(std::move(bv)) {} + explicit BitVectorArray(BitVector&& bv, size_t rows, size_t cols) + : BaseBitVectorArray(rows, cols), data_(std::move(bv)) {} + + BitVectorArray(uint32_t start_rows, uint32_t start_cols, bool expandable, Allocator* allocator) + : BaseBitVectorArray(start_rows, start_cols), + data_(BaseBitVectorArray::RequiredBitVectorSize(start_rows, start_cols), + expandable, + allocator) {} + + BitVectorArray(const BaseBitVectorArray& src, bool expandable, Allocator* allocator) + : BaseBitVectorArray(src.NumRows(), src.NumColumns()), + data_(src.GetRawData(), expandable, allocator) {} + + ~BitVectorArray() override {} + + const BitVector& GetRawData() const override { + return data_; + } + + BitVector& GetRawData() override { + return data_; + } + + private: + BitVector data_; +}; + +// A bit vector array that uses an unowned BitVector reference as it's backing +// data. +class BitVectorArrayWrapper final : public BaseBitVectorArray { + public: + BitVectorArrayWrapper& operator=(BitVectorArrayWrapper& other) = default; + BitVectorArrayWrapper(BitVectorArrayWrapper&) = default; + explicit BitVectorArrayWrapper(BitVector* bv) : BaseBitVectorArray(), data_(bv) {} + explicit BitVectorArrayWrapper(BitVector* bv, size_t cols) + : BaseBitVectorArray(BaseBitVectorArray::MaxRowsFor(*bv, cols), cols), data_(bv) {} + explicit BitVectorArrayWrapper(BitVector* bv, size_t rows, size_t cols) + : BaseBitVectorArray(rows, cols), data_(bv) {} + + ~BitVectorArrayWrapper() override {} + + const BitVector& GetRawData() const override { + return *data_; + } + + BitVector& GetRawData() override { + return *data_; + } + + private: + BitVector* data_; +}; } // namespace art diff --git a/libartbase/base/bit_vector_test.cc b/libartbase/base/bit_vector_test.cc index 2ef81c6726..5f1b167718 100644 --- a/libartbase/base/bit_vector_test.cc +++ b/libartbase/base/bit_vector_test.cc @@ -15,11 +15,13 @@ */ #include <memory> +#include <random> #include "allocator.h" +#include "base/stl_util.h" #include "bit_vector-inl.h" -#include "transform_iterator.h" #include "gtest/gtest.h" +#include "transform_iterator.h" namespace art { @@ -363,4 +365,58 @@ TEST(BitVector, MovementFree) { EXPECT_EQ(alloc.AllocCount(), 1u); } +TEST(BitVector, ArrayCol) { + { + BitVectorArray bva(100, 200, true, Allocator::GetMallocAllocator()); + for (uint32_t i : Range(bva.NumColumns())) { + bva.SetBit(bva.NumRows() / 2, i); + } + EXPECT_EQ(bva.GetRawData().NumSetBits(), bva.NumColumns()); + } + { + BitVectorArray bva(100, 200, true, Allocator::GetMallocAllocator()); + for (uint32_t i : Range(bva.NumRows())) { + bva.SetBit(i, bva.NumColumns() / 2); + } + EXPECT_EQ(bva.GetRawData().NumSetBits(), bva.NumRows()); + } +} + +TEST(BitVector, ArrayUnion) { + { + BitVectorArray bva(100, 200, true, Allocator::GetMallocAllocator()); + bva.SetBit(4, 12); + bva.SetBit(40, 120); + bva.SetBit(40, 121); + bva.SetBit(40, 122); + + bva.UnionRows(4, 40); + + EXPECT_TRUE(bva.IsBitSet(4, 12)); + EXPECT_TRUE(bva.IsBitSet(4, 120)); + EXPECT_TRUE(bva.IsBitSet(4, 121)); + EXPECT_TRUE(bva.IsBitSet(4, 122)); + EXPECT_FALSE(bva.IsBitSet(40, 12)); + EXPECT_TRUE(bva.IsBitSet(40, 120)); + EXPECT_TRUE(bva.IsBitSet(40, 121)); + EXPECT_TRUE(bva.IsBitSet(40, 122)); + EXPECT_EQ(bva.GetRawData().NumSetBits(), 7u); + } + { + BitVectorArray bva(100, 100, true, Allocator::GetMallocAllocator()); + for (uint32_t i : Range(bva.NumRows())) { + bva.SetBit(i, i); + } + for (uint32_t i : Range(1, bva.NumRows())) { + bva.UnionRows(0, i); + } + for (uint32_t col : Range(bva.NumColumns())) { + for (uint32_t row : Range(bva.NumRows())) { + // We set every bit where row== column and every bit on row 0 up to number of rows. + EXPECT_EQ(bva.IsBitSet(row, col), row == col || (row == 0 && col < bva.NumRows())); + } + } + } +} + } // namespace art diff --git a/libartbase/base/hash_map.h b/libartbase/base/hash_map.h index 7d018921f0..32c232a1b2 100644 --- a/libartbase/base/hash_map.h +++ b/libartbase/base/hash_map.h @@ -81,6 +81,13 @@ class HashMap : public HashSet<std::pair<Key, Value>, HashMap() : Base() { } explicit HashMap(const Alloc& alloc) : Base(alloc) { } + + // Used to insert a new mapping. + typename Base::iterator Overwrite(const Key& k, const Value& v) { + auto res = Base::insert({ k, v }).first; + *res = { k, v }; + return res; + } }; } // namespace art diff --git a/libartbase/base/iteration_range.h b/libartbase/base/iteration_range.h index eaed8b06a8..c916250751 100644 --- a/libartbase/base/iteration_range.h +++ b/libartbase/base/iteration_range.h @@ -18,6 +18,7 @@ #define ART_LIBARTBASE_BASE_ITERATION_RANGE_H_ #include <iterator> +#include <type_traits> namespace art { @@ -49,9 +50,11 @@ inline IterationRange<Iter> MakeIterationRange(const Iter& begin_it, const Iter& return IterationRange<Iter>(begin_it, end_it); } -template<typename List> -inline IterationRange<typename List::iterator> MakeIterationRange(List& list) { - return IterationRange<typename List::iterator>(list.begin(), list.end()); +template <typename List> +inline auto MakeIterationRange(List& list) -> IterationRange<decltype(list.begin())> { + static_assert(std::is_same_v<decltype(list.begin()), decltype(list.end())>, + "Different iterator types"); + return MakeIterationRange(list.begin(), list.end()); } template <typename Iter> diff --git a/libartbase/base/stl_util.h b/libartbase/base/stl_util.h index cd7b812b72..503dfee1d6 100644 --- a/libartbase/base/stl_util.h +++ b/libartbase/base/stl_util.h @@ -229,6 +229,64 @@ static inline IterationRange<ZipLeftIter<IterLeft, IterRight>> ZipLeft( ZipLeftIter(iter_left.end(), iter_right.end())); } +static inline IterationRange<CountIter> Range(size_t start, size_t end) { + return IterationRange(CountIter(start), CountIter(end)); +} + +static inline IterationRange<CountIter> Range(size_t end) { + return Range(0, end); +} + +template <typename RealIter, typename Filter> +struct FilterIterator + : public std::iterator<std::forward_iterator_tag, typename RealIter::value_type> { + public: + FilterIterator(RealIter rl, + Filter cond, + std::optional<RealIter> end = std::nullopt) + : real_iter_(rl), cond_(cond), end_(end) { + DCHECK(std::make_optional(rl) == end_ || cond_(*real_iter_)); + } + + FilterIterator<RealIter, Filter>& operator++() { + DCHECK(std::make_optional(real_iter_) != end_); + do { + if (std::make_optional(++real_iter_) == end_) { + break; + } + } while (!cond_(*real_iter_)); + return *this; + } + FilterIterator<RealIter, Filter> operator++(int) { + FilterIterator<RealIter, Filter> ret(real_iter_, cond_, end_); + ++(*this); + return ret; + } + bool operator==(const FilterIterator<RealIter, Filter>& other) const { + return real_iter_ == other.real_iter_; + } + bool operator!=(const FilterIterator<RealIter, Filter>& other) const { + return !(*this == other); + } + typename RealIter::value_type operator*() const { + return *real_iter_; + } + + private: + RealIter real_iter_; + Filter cond_; + std::optional<RealIter> end_; +}; + +template <typename Iter, typename Filter> +static inline IterationRange<FilterIterator<Iter, Filter>> Filter( + IterationRange<Iter> it, Filter cond) { + auto end = it.end(); + auto start = std::find_if(it.begin(), end, cond); + return MakeIterationRange(FilterIterator(start, cond, std::make_optional(end)), + FilterIterator(end, cond, std::make_optional(end))); +} + } // namespace art #endif // ART_LIBARTBASE_BASE_STL_UTIL_H_ diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java index b2ae3a1bd8..f250aa59fb 100644 --- a/test/530-checker-lse/src/Main.java +++ b/test/530-checker-lse/src/Main.java @@ -84,6 +84,12 @@ interface Filter { public class Main { + static Object ESCAPE = null; + static void $noinline$Escape(TestClass o) { + ESCAPE = o; + o.next.i++; + } + /// CHECK-START: double Main.calcCircleArea(double) load_store_elimination (before) /// CHECK: NewInstance /// CHECK: InstanceFieldSet @@ -3728,6 +3734,65 @@ public class Main { return t; } + private static boolean $noinline$getBoolean(boolean val) { + return val; + } + + /// CHECK-START: int Main.$noinline$testPartialEscape1(TestClass, boolean) load_store_elimination (before) + /// CHECK-DAG: ParameterValue + /// CHECK-DAG: NewInstance + /// CHECK-DAG: InvokeStaticOrDirect + /// CHECK-DAG: InstanceFieldSet + /// CHECK-DAG: InvokeStaticOrDirect + /// CHECK-DAG: InstanceFieldGet + /// CHECK-DAG: InstanceFieldGet + /// CHECK-DAG: InstanceFieldSet + /// CHECK-DAG: InstanceFieldGet + /// CHECK-DAG: InstanceFieldGet + /// CHECK-DAG: Phi + // + /// CHECK-NOT: NewInstance + /// CHECK-NOT: InvokeStaticOrDirect + /// CHECK-NOT: InstanceFieldSet + /// CHECK-NOT: InstanceFieldGet + // + /// CHECK-START: int Main.$noinline$testPartialEscape1(TestClass, boolean) load_store_elimination (after) + /// CHECK-DAG: ParameterValue + /// CHECK-DAG: NewInstance + /// CHECK-DAG: Phi + // + /// CHECK-START: int Main.$noinline$testPartialEscape1(TestClass, boolean) load_store_elimination (after) + /// CHECK: InvokeStaticOrDirect + /// CHECK: InvokeStaticOrDirect + // + /// CHECK-NOT: InvokeStaticOrDirect + + /// CHECK-START: int Main.$noinline$testPartialEscape1(TestClass, boolean) load_store_elimination (after) + /// CHECK: InstanceFieldSet + // + /// CHECK-NOT: InstanceFieldSet + // + /// CHECK-START: int Main.$noinline$testPartialEscape1(TestClass, boolean) load_store_elimination (after) + /// CHECK: InstanceFieldGet + /// CHECK: InstanceFieldGet + /// CHECK: InstanceFieldGet + // + /// CHECK-NOT: InstanceFieldGet + private static int $noinline$testPartialEscape1(TestClass obj, boolean escape) { + TestClass i = new SubTestClass(); + int res; + if ($noinline$getBoolean(escape)) { + i.next = obj; + $noinline$Escape(i); + res = i.next.i; + } else { + i.next = obj; + res = i.next.i; + } + return res; + } + + private static void $noinline$clobberObservables() {} static void assertLongEquals(long result, long expected) { @@ -4129,5 +4194,7 @@ public class Main { assertIntEquals(testNestedLoop8(new TestClass(), 3), 0); assertLongEquals(testOverlapLoop(10), 34l); assertLongEquals(testOverlapLoop(50), 7778742049l); + assertIntEquals($noinline$testPartialEscape1(new TestClass(), true), 1); + assertIntEquals($noinline$testPartialEscape1(new TestClass(), false), 0); } } |