| /* |
| * Copyright (C) 2014 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "gvn.h" |
| |
| #include "base/arena_bit_vector.h" |
| #include "base/bit_vector-inl.h" |
| #include "base/scoped_arena_allocator.h" |
| #include "base/scoped_arena_containers.h" |
| #include "base/utils.h" |
| #include "side_effects_analysis.h" |
| |
| namespace art HIDDEN { |
| |
| /** |
| * A ValueSet holds instructions that can replace other instructions. It is updated |
| * through the `Add` method, and the `Kill` method. The `Kill` method removes |
| * instructions that are affected by the given side effect. |
| * |
| * The `Lookup` method returns an equivalent instruction to the given instruction |
| * if there is one in the set. In GVN, we would say those instructions have the |
| * same "number". |
| */ |
| class ValueSet : public ArenaObject<kArenaAllocGvn> { |
| public: |
| // Constructs an empty ValueSet which owns all its buckets. |
| explicit ValueSet(ScopedArenaAllocator* allocator) |
| : allocator_(allocator), |
| num_buckets_(kMinimumNumberOfBuckets), |
| buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), |
| buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn), |
| num_entries_(0u) { |
| DCHECK(IsPowerOfTwo(num_buckets_)); |
| std::fill_n(buckets_, num_buckets_, nullptr); |
| buckets_owned_.SetInitialBits(num_buckets_); |
| } |
| |
| // Copy constructor. Depending on the load factor, it will either make a deep |
| // copy (all buckets owned) or a shallow one (buckets pointing to the parent). |
| ValueSet(ScopedArenaAllocator* allocator, const ValueSet& other) |
| : allocator_(allocator), |
| num_buckets_(other.IdealBucketCount()), |
| buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), |
| buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn), |
| num_entries_(0u) { |
| DCHECK(IsPowerOfTwo(num_buckets_)); |
| PopulateFromInternal(other); |
| } |
| |
| // Erases all values in this set and populates it with values from `other`. |
| void PopulateFrom(const ValueSet& other) { |
| if (this == &other) { |
| return; |
| } |
| PopulateFromInternal(other); |
| } |
| |
| // Returns true if `this` has enough buckets so that if `other` is copied into |
| // it, the load factor will not cross the upper threshold. |
| // If `exact_match` is set, true is returned only if `this` has the ideal |
| // number of buckets. Larger number of buckets is allowed otherwise. |
| bool CanHoldCopyOf(const ValueSet& other, bool exact_match) { |
| if (exact_match) { |
| return other.IdealBucketCount() == num_buckets_; |
| } else { |
| return other.IdealBucketCount() <= num_buckets_; |
| } |
| } |
| |
| // Adds an instruction in the set. |
| void Add(HInstruction* instruction) { |
| DCHECK(Lookup(instruction) == nullptr); |
| size_t hash_code = HashCode(instruction); |
| size_t index = BucketIndex(hash_code); |
| |
| if (!buckets_owned_.IsBitSet(index)) { |
| CloneBucket(index); |
| } |
| buckets_[index] = new (allocator_) Node(instruction, hash_code, buckets_[index]); |
| ++num_entries_; |
| } |
| |
| // If in the set, returns an equivalent instruction to the given instruction. |
| // Returns null otherwise. |
| HInstruction* Lookup(HInstruction* instruction) const { |
| size_t hash_code = HashCode(instruction); |
| size_t index = BucketIndex(hash_code); |
| |
| for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { |
| if (node->GetHashCode() == hash_code) { |
| HInstruction* existing = node->GetInstruction(); |
| if (existing->Equals(instruction)) { |
| return existing; |
| } |
| } |
| } |
| return nullptr; |
| } |
| |
| // Returns whether instruction is in the set. |
| bool Contains(HInstruction* instruction) const { |
| size_t hash_code = HashCode(instruction); |
| size_t index = BucketIndex(hash_code); |
| |
| for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { |
| if (node->GetInstruction() == instruction) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| // Removes all instructions in the set affected by the given side effects. |
| void Kill(SideEffects side_effects) { |
| DeleteAllImpureWhich([side_effects](Node* node) { |
| return node->GetSideEffects().MayDependOn(side_effects); |
| }); |
| } |
| |
| void Clear() { |
| num_entries_ = 0; |
| for (size_t i = 0; i < num_buckets_; ++i) { |
| buckets_[i] = nullptr; |
| } |
| buckets_owned_.SetInitialBits(num_buckets_); |
| } |
| |
| // Updates this set by intersecting with instructions in a predecessor's set. |
| void IntersectWith(ValueSet* predecessor) { |
| if (IsEmpty()) { |
| return; |
| } else if (predecessor->IsEmpty()) { |
| Clear(); |
| } else { |
| // Pure instructions do not need to be tested because only impure |
| // instructions can be killed. |
| DeleteAllImpureWhich([predecessor](Node* node) { |
| return !predecessor->Contains(node->GetInstruction()); |
| }); |
| } |
| } |
| |
| bool IsEmpty() const { return num_entries_ == 0; } |
| size_t GetNumberOfEntries() const { return num_entries_; } |
| |
| private: |
| // Copies all entries from `other` to `this`. |
| void PopulateFromInternal(const ValueSet& other) { |
| DCHECK_NE(this, &other); |
| DCHECK_GE(num_buckets_, other.IdealBucketCount()); |
| |
| if (num_buckets_ == other.num_buckets_) { |
| // Hash table remains the same size. We copy the bucket pointers and leave |
| // all buckets_owned_ bits false. |
| buckets_owned_.ClearAllBits(); |
| memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*)); |
| } else { |
| // Hash table size changes. We copy and rehash all entries, and set all |
| // buckets_owned_ bits to true. |
| std::fill_n(buckets_, num_buckets_, nullptr); |
| for (size_t i = 0; i < other.num_buckets_; ++i) { |
| for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) { |
| size_t new_index = BucketIndex(node->GetHashCode()); |
| buckets_[new_index] = node->Dup(allocator_, buckets_[new_index]); |
| } |
| } |
| buckets_owned_.SetInitialBits(num_buckets_); |
| } |
| |
| num_entries_ = other.num_entries_; |
| } |
| |
| class Node : public ArenaObject<kArenaAllocGvn> { |
| public: |
| Node(HInstruction* instruction, size_t hash_code, Node* next) |
| : instruction_(instruction), hash_code_(hash_code), next_(next) {} |
| |
| size_t GetHashCode() const { return hash_code_; } |
| HInstruction* GetInstruction() const { return instruction_; } |
| Node* GetNext() const { return next_; } |
| void SetNext(Node* node) { next_ = node; } |
| |
| Node* Dup(ScopedArenaAllocator* allocator, Node* new_next = nullptr) { |
| return new (allocator) Node(instruction_, hash_code_, new_next); |
| } |
| |
| SideEffects GetSideEffects() const { |
| // Deoptimize is a weird instruction since it's predicated and |
| // never-return. Its side-effects are to prevent the splitting of dex |
| // instructions across it (which could cause inconsistencies once we begin |
| // interpreting again). In the context of GVN the 'perform-deopt' branch is not |
| // relevant and we only need to care about the no-op case, in which case there are |
| // no side-effects. By doing this we are able to eliminate redundant (i.e. |
| // dominated deopts with GVNd conditions) deoptimizations. |
| if (instruction_->IsDeoptimize()) { |
| return SideEffects::None(); |
| } else { |
| return instruction_->GetSideEffects(); |
| } |
| } |
| |
| private: |
| HInstruction* const instruction_; |
| const size_t hash_code_; |
| Node* next_; |
| |
| DISALLOW_COPY_AND_ASSIGN(Node); |
| }; |
| |
| // Creates our own copy of a bucket that is currently pointing to a parent. |
| // This algorithm can be called while iterating over the bucket because it |
| // preserves the order of entries in the bucket and will return the clone of |
| // the given 'iterator'. |
| Node* CloneBucket(size_t index, Node* iterator = nullptr) { |
| DCHECK(!buckets_owned_.IsBitSet(index)); |
| Node* clone_current = nullptr; |
| Node* clone_previous = nullptr; |
| Node* clone_iterator = nullptr; |
| for (Node* node = buckets_[index]; node != nullptr; node = node->GetNext()) { |
| clone_current = node->Dup(allocator_, nullptr); |
| if (node == iterator) { |
| clone_iterator = clone_current; |
| } |
| if (clone_previous == nullptr) { |
| buckets_[index] = clone_current; |
| } else { |
| clone_previous->SetNext(clone_current); |
| } |
| clone_previous = clone_current; |
| } |
| buckets_owned_.SetBit(index); |
| return clone_iterator; |
| } |
| |
| // Iterates over buckets with impure instructions (even indices) and deletes |
| // the ones on which 'cond' returns true. |
| template<typename Functor> |
| void DeleteAllImpureWhich(Functor cond) { |
| for (size_t i = 0; i < num_buckets_; i += 2) { |
| Node* node = buckets_[i]; |
| Node* previous = nullptr; |
| |
| if (node == nullptr) { |
| continue; |
| } |
| |
| if (!buckets_owned_.IsBitSet(i)) { |
| // Bucket is not owned but maybe we won't need to change it at all. |
| // Iterate as long as the entries don't satisfy 'cond'. |
| while (node != nullptr) { |
| if (cond(node)) { |
| // We do need to delete an entry but we do not own the bucket. |
| // Clone the bucket, make sure 'previous' and 'node' point to |
| // the cloned entries and break. |
| previous = CloneBucket(i, previous); |
| node = (previous == nullptr) ? buckets_[i] : previous->GetNext(); |
| break; |
| } |
| previous = node; |
| node = node->GetNext(); |
| } |
| } |
| |
| // By this point we either own the bucket and can start deleting entries, |
| // or we do not own it but no entries matched 'cond'. |
| DCHECK(buckets_owned_.IsBitSet(i) || node == nullptr); |
| |
| // We iterate over the remainder of entries and delete those that match |
| // the given condition. |
| while (node != nullptr) { |
| Node* next = node->GetNext(); |
| if (cond(node)) { |
| if (previous == nullptr) { |
| buckets_[i] = next; |
| } else { |
| previous->SetNext(next); |
| } |
| } else { |
| previous = node; |
| } |
| node = next; |
| } |
| } |
| } |
| |
| // Computes a bucket count such that the load factor is reasonable. |
| // This is estimated as (num_entries_ * 1.5) and rounded up to nearest pow2. |
| size_t IdealBucketCount() const { |
| size_t bucket_count = RoundUpToPowerOfTwo(num_entries_ + (num_entries_ >> 1)); |
| if (bucket_count > kMinimumNumberOfBuckets) { |
| return bucket_count; |
| } else { |
| return kMinimumNumberOfBuckets; |
| } |
| } |
| |
| // Generates a hash code for an instruction. |
| size_t HashCode(HInstruction* instruction) const { |
| size_t hash_code = instruction->ComputeHashCode(); |
| // Pure instructions are put into odd buckets to speed up deletion. Note that in the |
| // case of irreducible loops, we don't put pure instructions in odd buckets, as we |
| // need to delete them when entering the loop. |
| // ClinitCheck is treated as a pure instruction since it's only executed |
| // once. |
| bool pure = !instruction->GetSideEffects().HasDependencies() || |
| instruction->IsClinitCheck(); |
| if (!pure || instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) { |
| return (hash_code << 1) | 0; |
| } else { |
| return (hash_code << 1) | 1; |
| } |
| } |
| |
| // Converts a hash code to a bucket index. |
| size_t BucketIndex(size_t hash_code) const { |
| return hash_code & (num_buckets_ - 1); |
| } |
| |
| ScopedArenaAllocator* const allocator_; |
| |
| // The internal bucket implementation of the set. |
| size_t const num_buckets_; |
| Node** const buckets_; |
| |
| // Flags specifying which buckets were copied into the set from its parent. |
| // If a flag is not set, the corresponding bucket points to entries in the |
| // parent and must be cloned prior to making changes. |
| ArenaBitVector buckets_owned_; |
| |
| // The number of entries in the set. |
| size_t num_entries_; |
| |
| static constexpr size_t kMinimumNumberOfBuckets = 8; |
| |
| DISALLOW_COPY_AND_ASSIGN(ValueSet); |
| }; |
| |
| /** |
| * Optimization phase that removes redundant instruction. |
| */ |
| class GlobalValueNumberer : public ValueObject { |
| public: |
| GlobalValueNumberer(HGraph* graph, |
| const SideEffectsAnalysis& side_effects) |
| : graph_(graph), |
| allocator_(graph->GetArenaStack()), |
| side_effects_(side_effects), |
| sets_(graph->GetBlocks().size(), nullptr, allocator_.Adapter(kArenaAllocGvn)), |
| visited_blocks_( |
| &allocator_, graph->GetBlocks().size(), /* expandable= */ false, kArenaAllocGvn) { |
| visited_blocks_.ClearAllBits(); |
| } |
| |
| bool Run(); |
| |
| private: |
| // Per-block GVN. Will also update the ValueSet of the dominated and |
| // successor blocks. |
| void VisitBasicBlock(HBasicBlock* block); |
| |
| HGraph* graph_; |
| ScopedArenaAllocator allocator_; |
| const SideEffectsAnalysis& side_effects_; |
| |
| ValueSet* FindSetFor(HBasicBlock* block) const { |
| ValueSet* result = sets_[block->GetBlockId()]; |
| DCHECK(result != nullptr) << "Could not find set for block B" << block->GetBlockId(); |
| return result; |
| } |
| |
| void AbandonSetFor(HBasicBlock* block) { |
| DCHECK(sets_[block->GetBlockId()] != nullptr) |
| << "Block B" << block->GetBlockId() << " expected to have a set"; |
| sets_[block->GetBlockId()] = nullptr; |
| } |
| |
| // Returns false if the GlobalValueNumberer has already visited all blocks |
| // which may reference `block`. |
| bool WillBeReferencedAgain(HBasicBlock* block) const; |
| |
| // Iterates over visited blocks and finds one which has a ValueSet such that: |
| // (a) it will not be referenced in the future, and |
| // (b) it can hold a copy of `reference_set` with a reasonable load factor. |
| HBasicBlock* FindVisitedBlockWithRecyclableSet(HBasicBlock* block, |
| const ValueSet& reference_set) const; |
| |
| // ValueSet for blocks. Initially null, but for an individual block they |
| // are allocated and populated by the dominator, and updated by all blocks |
| // in the path from the dominator to the block. |
| ScopedArenaVector<ValueSet*> sets_; |
| |
| // BitVector which serves as a fast-access map from block id to |
| // visited/unvisited Boolean. |
| ArenaBitVector visited_blocks_; |
| |
| DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer); |
| }; |
| |
| bool GlobalValueNumberer::Run() { |
| DCHECK(side_effects_.HasRun()); |
| sets_[graph_->GetEntryBlock()->GetBlockId()] = new (&allocator_) ValueSet(&allocator_); |
| |
| // Use the reverse post order to ensure the non back-edge predecessors of a block are |
| // visited before the block itself. |
| for (HBasicBlock* block : graph_->GetReversePostOrder()) { |
| VisitBasicBlock(block); |
| } |
| return true; |
| } |
| |
| void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { |
| ValueSet* set = nullptr; |
| |
| const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors(); |
| if (predecessors.size() == 0 || predecessors[0]->IsEntryBlock()) { |
| // The entry block should only accumulate constant instructions, and |
| // the builder puts constants only in the entry block. |
| // Therefore, there is no need to propagate the value set to the next block. |
| set = new (&allocator_) ValueSet(&allocator_); |
| } else { |
| HBasicBlock* dominator = block->GetDominator(); |
| ValueSet* dominator_set = FindSetFor(dominator); |
| |
| if (dominator->GetSuccessors().size() == 1) { |
| // `block` is a direct successor of its dominator. No need to clone the |
| // dominator's set, `block` can take over its ownership including its buckets. |
| DCHECK_EQ(dominator->GetSingleSuccessor(), block); |
| AbandonSetFor(dominator); |
| set = dominator_set; |
| } else { |
| // Try to find a basic block which will never be referenced again and whose |
| // ValueSet can therefore be recycled. We will need to copy `dominator_set` |
| // into the recycled set, so we pass `dominator_set` as a reference for size. |
| HBasicBlock* recyclable = FindVisitedBlockWithRecyclableSet(block, *dominator_set); |
| if (recyclable == nullptr) { |
| // No block with a suitable ValueSet found. Allocate a new one and |
| // copy `dominator_set` into it. |
| set = new (&allocator_) ValueSet(&allocator_, *dominator_set); |
| } else { |
| // Block with a recyclable ValueSet found. Clone `dominator_set` into it. |
| set = FindSetFor(recyclable); |
| AbandonSetFor(recyclable); |
| set->PopulateFrom(*dominator_set); |
| } |
| } |
| |
| if (!set->IsEmpty()) { |
| if (block->IsLoopHeader()) { |
| if (block->GetLoopInformation()->ContainsIrreducibleLoop()) { |
| // To satisfy our linear scan algorithm, no instruction should flow in an irreducible |
| // loop header. We clear the set at entry of irreducible loops and any loop containing |
| // an irreducible loop, as in both cases, GVN can extend the liveness of an instruction |
| // across the irreducible loop. |
| // Note that, if we're not compiling OSR, we could still do GVN and introduce |
| // phis at irreducible loop headers. We decided it was not worth the complexity. |
| set->Clear(); |
| } else { |
| DCHECK(!block->GetLoopInformation()->IsIrreducible()); |
| DCHECK_EQ(block->GetDominator(), block->GetLoopInformation()->GetPreHeader()); |
| set->Kill(side_effects_.GetLoopEffects(block)); |
| } |
| } else if (predecessors.size() > 1) { |
| for (HBasicBlock* predecessor : predecessors) { |
| set->IntersectWith(FindSetFor(predecessor)); |
| if (set->IsEmpty()) { |
| break; |
| } |
| } |
| } |
| } |
| } |
| |
| sets_[block->GetBlockId()] = set; |
| |
| HInstruction* current = block->GetFirstInstruction(); |
| while (current != nullptr) { |
| // Save the next instruction in case `current` is removed from the graph. |
| HInstruction* next = current->GetNext(); |
| // Do not kill the set with the side effects of the instruction just now: if |
| // the instruction is GVN'ed, we don't need to kill. |
| // |
| // BoundType is a special case example of an instruction which shouldn't be moved but can be |
| // GVN'ed. |
| // |
| // Deoptimize is a special case since even though we don't want to move it we can still remove |
| // it for GVN. |
| if (current->CanBeMoved() || current->IsBoundType() || current->IsDeoptimize()) { |
| if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) { |
| // For commutative ops, (x op y) will be treated the same as (y op x) |
| // after fixed ordering. |
| current->AsBinaryOperation()->OrderInputs(); |
| } |
| HInstruction* existing = set->Lookup(current); |
| if (existing != nullptr) { |
| // This replacement doesn't make more OrderInputs() necessary since |
| // current is either used by an instruction that it dominates, |
| // which hasn't been visited yet due to the order we visit instructions. |
| // Or current is used by a phi, and we don't do OrderInputs() on a phi anyway. |
| current->ReplaceWith(existing); |
| current->GetBlock()->RemoveInstruction(current); |
| } else { |
| set->Kill(current->GetSideEffects()); |
| set->Add(current); |
| } |
| } else { |
| set->Kill(current->GetSideEffects()); |
| } |
| current = next; |
| } |
| |
| visited_blocks_.SetBit(block->GetBlockId()); |
| } |
| |
| bool GlobalValueNumberer::WillBeReferencedAgain(HBasicBlock* block) const { |
| DCHECK(visited_blocks_.IsBitSet(block->GetBlockId())); |
| |
| for (const HBasicBlock* dominated_block : block->GetDominatedBlocks()) { |
| if (!visited_blocks_.IsBitSet(dominated_block->GetBlockId())) { |
| return true; |
| } |
| } |
| |
| for (const HBasicBlock* successor : block->GetSuccessors()) { |
| if (!visited_blocks_.IsBitSet(successor->GetBlockId())) { |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet( |
| HBasicBlock* block, const ValueSet& reference_set) const { |
| HBasicBlock* secondary_match = nullptr; |
| |
| for (size_t block_id : visited_blocks_.Indexes()) { |
| ValueSet* current_set = sets_[block_id]; |
| if (current_set == nullptr) { |
| // Set was already recycled. |
| continue; |
| } |
| |
| HBasicBlock* current_block = block->GetGraph()->GetBlocks()[block_id]; |
| |
| // We test if `current_set` has enough buckets to store a copy of |
| // `reference_set` with a reasonable load factor. If we find a set whose |
| // number of buckets matches perfectly, we return right away. If we find one |
| // that is larger, we return it if no perfectly-matching set is found. |
| // Note that we defer testing WillBeReferencedAgain until all other criteria |
| // have been satisfied because it might be expensive. |
| if (current_set->CanHoldCopyOf(reference_set, /* exact_match= */ true)) { |
| if (!WillBeReferencedAgain(current_block)) { |
| return current_block; |
| } |
| } else if (secondary_match == nullptr && |
| current_set->CanHoldCopyOf(reference_set, /* exact_match= */ false)) { |
| if (!WillBeReferencedAgain(current_block)) { |
| secondary_match = current_block; |
| } |
| } |
| } |
| |
| return secondary_match; |
| } |
| |
| bool GVNOptimization::Run() { |
| GlobalValueNumberer gvn(graph_, side_effects_); |
| return gvn.Run(); |
| } |
| |
| } // namespace art |