diff options
author | 2023-05-10 10:11:48 +0200 | |
---|---|---|
committer | 2023-05-10 15:54:41 +0000 | |
commit | 345aedde88070dee32c297f44caf2b5b2fb00942 (patch) | |
tree | 3f6182a5ca73f14a05e6ad2d3cc4bd846c96c4cb | |
parent | 85752e245be2c33a30bc5f5d1f72825f1ec68609 (diff) |
ART: Remove graph coloring register allocator.
Test: m test-art-host-gtest
Bug: 281793697
Change-Id: Ic73a8a06e08437847db943bc3df148ca65b5f9a9
-rw-r--r-- | compiler/Android.bp | 1 | ||||
-rw-r--r-- | compiler/driver/compiler_options.cc | 3 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator.cc | 5 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator_graph_color.cc | 2086 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator_graph_color.h | 195 | ||||
-rw-r--r-- | compiler/optimizing/register_allocator_test.cc | 3 |
6 files changed, 6 insertions, 2287 deletions
diff --git a/compiler/Android.bp b/compiler/Android.bp index ea45897a92..acb15d9604 100644 --- a/compiler/Android.bp +++ b/compiler/Android.bp @@ -80,7 +80,6 @@ art_cc_defaults { "optimizing/reference_type_propagation.cc", "optimizing/register_allocation_resolver.cc", "optimizing/register_allocator.cc", - "optimizing/register_allocator_graph_color.cc", "optimizing/register_allocator_linear_scan.cc", "optimizing/select_generator.cc", "optimizing/scheduler.cc", diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index 603596f3bc..3ed5f88875 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -121,7 +121,8 @@ bool CompilerOptions::ParseRegisterAllocationStrategy(const std::string& option, if (option == "linear-scan") { register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorLinearScan; } else if (option == "graph-color") { - register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorGraphColor; + LOG(ERROR) << "Graph coloring allocator has been removed, using linear scan instead."; + register_allocation_strategy_ = RegisterAllocator::Strategy::kRegisterAllocatorLinearScan; } else { *error_msg = "Unrecognized register allocation strategy. Try linear-scan, or graph-color."; return false; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index e4c2d74908..f8b057d4a8 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -23,7 +23,6 @@ #include "base/scoped_arena_containers.h" #include "base/bit_vector-inl.h" #include "code_generator.h" -#include "register_allocator_graph_color.h" #include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" @@ -45,8 +44,8 @@ std::unique_ptr<RegisterAllocator> RegisterAllocator::Create(ScopedArenaAllocato return std::unique_ptr<RegisterAllocator>( new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis)); case kRegisterAllocatorGraphColor: - return std::unique_ptr<RegisterAllocator>( - new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis)); + LOG(FATAL) << "Graph coloring register allocator has been removed."; + UNREACHABLE(); default: LOG(FATAL) << "Invalid register allocation strategy: " << strategy; UNREACHABLE(); diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc deleted file mode 100644 index a7c891d4e7..0000000000 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ /dev/null @@ -1,2086 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "register_allocator_graph_color.h" - -#include "code_generator.h" -#include "linear_order.h" -#include "register_allocation_resolver.h" -#include "ssa_liveness_analysis.h" -#include "thread-current-inl.h" - -namespace art HIDDEN { - -// Highest number of registers that we support for any platform. This can be used for std::bitset, -// for example, which needs to know its size at compile time. -static constexpr size_t kMaxNumRegs = 32; - -// The maximum number of graph coloring attempts before triggering a DCHECK. -// This is meant to catch changes to the graph coloring algorithm that undermine its forward -// progress guarantees. Forward progress for the algorithm means splitting live intervals on -// every graph coloring attempt so that eventually the interference graph will be sparse enough -// to color. The main threat to forward progress is trying to split short intervals which cannot be -// split further; this could cause infinite looping because the interference graph would never -// change. This is avoided by prioritizing short intervals before long ones, so that long -// intervals are split when coloring fails. -static constexpr size_t kMaxGraphColoringAttemptsDebug = 100; - -// We always want to avoid spilling inside loops. -static constexpr size_t kLoopSpillWeightMultiplier = 10; - -// If we avoid moves in single jump blocks, we can avoid jumps to jumps. -static constexpr size_t kSingleJumpBlockWeightMultiplier = 2; - -// We avoid moves in blocks that dominate the exit block, since these blocks will -// be executed on every path through the method. -static constexpr size_t kDominatesExitBlockWeightMultiplier = 2; - -enum class CoalesceKind { - kAdjacentSibling, // Prevents moves at interval split points. - kFixedOutputSibling, // Prevents moves from a fixed output location. - kFixedInput, // Prevents moves into a fixed input location. - kNonlinearControlFlow, // Prevents moves between blocks. - kPhi, // Prevents phi resolution moves. - kFirstInput, // Prevents a single input move. - kAnyInput, // May lead to better instruction selection / smaller encodings. -}; - -std::ostream& operator<<(std::ostream& os, const CoalesceKind& kind) { - return os << static_cast<typename std::underlying_type<CoalesceKind>::type>(kind); -} - -static size_t LoopDepthAt(HBasicBlock* block) { - HLoopInformation* loop_info = block->GetLoopInformation(); - size_t depth = 0; - while (loop_info != nullptr) { - ++depth; - loop_info = loop_info->GetPreHeader()->GetLoopInformation(); - } - return depth; -} - -// Return the runtime cost of inserting a move instruction at the specified location. -static size_t CostForMoveAt(size_t position, const SsaLivenessAnalysis& liveness) { - HBasicBlock* block = liveness.GetBlockFromPosition(position / 2); - DCHECK(block != nullptr); - size_t cost = 1; - if (block->IsSingleJump()) { - cost *= kSingleJumpBlockWeightMultiplier; - } - if (block->Dominates(block->GetGraph()->GetExitBlock())) { - cost *= kDominatesExitBlockWeightMultiplier; - } - for (size_t loop_depth = LoopDepthAt(block); loop_depth > 0; --loop_depth) { - cost *= kLoopSpillWeightMultiplier; - } - return cost; -} - -// In general, we estimate coalesce priority by whether it will definitely avoid a move, -// and by how likely it is to create an interference graph that's harder to color. -static size_t ComputeCoalescePriority(CoalesceKind kind, - size_t position, - const SsaLivenessAnalysis& liveness) { - if (kind == CoalesceKind::kAnyInput) { - // This type of coalescing can affect instruction selection, but not moves, so we - // give it the lowest priority. - return 0; - } else { - return CostForMoveAt(position, liveness); - } -} - -enum class CoalesceStage { - kWorklist, // Currently in the iterative coalescing worklist. - kActive, // Not in a worklist, but could be considered again during iterative coalescing. - kInactive, // No longer considered until last-chance coalescing. - kDefunct, // Either the two nodes interfere, or have already been coalesced. -}; - -std::ostream& operator<<(std::ostream& os, const CoalesceStage& stage) { - return os << static_cast<typename std::underlying_type<CoalesceStage>::type>(stage); -} - -// Represents a coalesce opportunity between two nodes. -struct CoalesceOpportunity : public ArenaObject<kArenaAllocRegisterAllocator> { - CoalesceOpportunity(InterferenceNode* a, - InterferenceNode* b, - CoalesceKind kind, - size_t position, - const SsaLivenessAnalysis& liveness) - : node_a(a), - node_b(b), - stage(CoalesceStage::kWorklist), - priority(ComputeCoalescePriority(kind, position, liveness)) {} - - // Compare two coalesce opportunities based on their priority. - // Return true if lhs has a lower priority than that of rhs. - static bool CmpPriority(const CoalesceOpportunity* lhs, - const CoalesceOpportunity* rhs) { - return lhs->priority < rhs->priority; - } - - InterferenceNode* const node_a; - InterferenceNode* const node_b; - - // The current stage of this coalesce opportunity, indicating whether it is in a worklist, - // and whether it should still be considered. - CoalesceStage stage; - - // The priority of this coalesce opportunity, based on heuristics. - const size_t priority; -}; - -enum class NodeStage { - kInitial, // Uninitialized. - kPrecolored, // Marks fixed nodes. - kSafepoint, // Marks safepoint nodes. - kPrunable, // Marks uncolored nodes in the interference graph. - kSimplifyWorklist, // Marks non-move-related nodes with degree less than the number of registers. - kFreezeWorklist, // Marks move-related nodes with degree less than the number of registers. - kSpillWorklist, // Marks nodes with degree greater or equal to the number of registers. - kPruned // Marks nodes already pruned from the interference graph. -}; - -std::ostream& operator<<(std::ostream& os, const NodeStage& stage) { - return os << static_cast<typename std::underlying_type<NodeStage>::type>(stage); -} - -// Returns the estimated cost of spilling a particular live interval. -static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysis& liveness) { - if (interval->HasRegister()) { - // Intervals with a fixed register cannot be spilled. - return std::numeric_limits<float>::min(); - } - - size_t length = interval->GetLength(); - if (length == 1) { - // Tiny intervals should have maximum priority, since they cannot be split any further. - return std::numeric_limits<float>::max(); - } - - size_t use_weight = 0; - if (interval->GetDefinedBy() != nullptr && interval->DefinitionRequiresRegister()) { - // Cost for spilling at a register definition point. - use_weight += CostForMoveAt(interval->GetStart() + 1, liveness); - } - - // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e. - // [interval->GetStart() + 1, interval->GetEnd() + 1) - auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), - interval->GetUses().end(), - interval->GetStart() + 1u, - interval->GetEnd() + 1u); - for (const UsePosition& use : matching_use_range) { - if (use.GetUser() != nullptr && use.RequiresRegister()) { - // Cost for spilling at a register use point. - use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness); - } - } - - // We divide by the length of the interval because we want to prioritize - // short intervals; we do not benefit much if we split them further. - return static_cast<float>(use_weight) / static_cast<float>(length); -} - -// Interference nodes make up the interference graph, which is the primary data structure in -// graph coloring register allocation. Each node represents a single live interval, and contains -// a set of adjacent nodes corresponding to intervals overlapping with its own. To save memory, -// pre-colored nodes never contain outgoing edges (only incoming ones). -// -// As nodes are pruned from the interference graph, incoming edges of the pruned node are removed, -// but outgoing edges remain in order to later color the node based on the colors of its neighbors. -// -// Note that a pair interval is represented by a single node in the interference graph, which -// essentially requires two colors. One consequence of this is that the degree of a node is not -// necessarily equal to the number of adjacent nodes--instead, the degree reflects the maximum -// number of colors with which a node could interfere. We model this by giving edges different -// weights (1 or 2) to control how much it increases the degree of adjacent nodes. -// For example, the edge between two single nodes will have weight 1. On the other hand, -// the edge between a single node and a pair node will have weight 2. This is because the pair -// node could block up to two colors for the single node, and because the single node could -// block an entire two-register aligned slot for the pair node. -// The degree is defined this way because we use it to decide whether a node is guaranteed a color, -// and thus whether it is safe to prune it from the interference graph early on. -class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { - public: - InterferenceNode(LiveInterval* interval, - const SsaLivenessAnalysis& liveness) - : stage(NodeStage::kInitial), - interval_(interval), - adjacent_nodes_(nullptr), - coalesce_opportunities_(nullptr), - out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0), - alias_(this), - spill_weight_(ComputeSpillWeight(interval, liveness)), - requires_color_(interval->RequiresRegister()), - needs_spill_slot_(false) { - DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval"; - } - - void AddInterference(InterferenceNode* other, - bool guaranteed_not_interfering_yet, - ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>>* storage) { - DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences"; - DCHECK_NE(this, other) << "Should not create self loops in the interference graph"; - DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another"; - DCHECK_NE(stage, NodeStage::kPruned); - DCHECK_NE(other->stage, NodeStage::kPruned); - if (adjacent_nodes_ == nullptr) { - ScopedArenaVector<InterferenceNode*>::allocator_type adapter(storage->get_allocator()); - storage->emplace_back(adapter); - adjacent_nodes_ = &storage->back(); - } - if (guaranteed_not_interfering_yet) { - DCHECK(!ContainsElement(GetAdjacentNodes(), other)); - adjacent_nodes_->push_back(other); - out_degree_ += EdgeWeightWith(other); - } else { - if (!ContainsElement(GetAdjacentNodes(), other)) { - adjacent_nodes_->push_back(other); - out_degree_ += EdgeWeightWith(other); - } - } - } - - void RemoveInterference(InterferenceNode* other) { - DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node"; - DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning"; - if (adjacent_nodes_ != nullptr) { - auto it = std::find(adjacent_nodes_->begin(), adjacent_nodes_->end(), other); - if (it != adjacent_nodes_->end()) { - adjacent_nodes_->erase(it); - out_degree_ -= EdgeWeightWith(other); - } - } - } - - bool ContainsInterference(InterferenceNode* other) const { - DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences"; - DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences"; - return ContainsElement(GetAdjacentNodes(), other); - } - - LiveInterval* GetInterval() const { - return interval_; - } - - ArrayRef<InterferenceNode*> GetAdjacentNodes() const { - return adjacent_nodes_ != nullptr - ? ArrayRef<InterferenceNode*>(*adjacent_nodes_) - : ArrayRef<InterferenceNode*>(); - } - - size_t GetOutDegree() const { - // Pre-colored nodes have infinite degree. - DCHECK_IMPLIES(IsPrecolored(), out_degree_ == std::numeric_limits<size_t>::max()); - return out_degree_; - } - - void AddCoalesceOpportunity(CoalesceOpportunity* opportunity, - ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>>* storage) { - if (coalesce_opportunities_ == nullptr) { - ScopedArenaVector<CoalesceOpportunity*>::allocator_type adapter(storage->get_allocator()); - storage->emplace_back(adapter); - coalesce_opportunities_ = &storage->back(); - } - coalesce_opportunities_->push_back(opportunity); - } - - void ClearCoalesceOpportunities() { - coalesce_opportunities_ = nullptr; - } - - bool IsMoveRelated() const { - for (CoalesceOpportunity* opportunity : GetCoalesceOpportunities()) { - if (opportunity->stage == CoalesceStage::kWorklist || - opportunity->stage == CoalesceStage::kActive) { - return true; - } - } - return false; - } - - // Return whether this node already has a color. - // Used to find fixed nodes in the interference graph before coloring. - bool IsPrecolored() const { - return interval_->HasRegister(); - } - - bool IsPair() const { - return interval_->HasHighInterval(); - } - - void SetAlias(InterferenceNode* rep) { - DCHECK_NE(rep->stage, NodeStage::kPruned); - DCHECK_EQ(this, alias_) << "Should only set a node's alias once"; - alias_ = rep; - } - - InterferenceNode* GetAlias() { - if (alias_ != this) { - // Recurse in order to flatten tree of alias pointers. - alias_ = alias_->GetAlias(); - } - return alias_; - } - - ArrayRef<CoalesceOpportunity*> GetCoalesceOpportunities() const { - return coalesce_opportunities_ != nullptr - ? ArrayRef<CoalesceOpportunity*>(*coalesce_opportunities_) - : ArrayRef<CoalesceOpportunity*>(); - } - - float GetSpillWeight() const { - return spill_weight_; - } - - bool RequiresColor() const { - return requires_color_; - } - - // We give extra weight to edges adjacent to pair nodes. See the general comment on the - // interference graph above. - size_t EdgeWeightWith(const InterferenceNode* other) const { - return (IsPair() || other->IsPair()) ? 2 : 1; - } - - bool NeedsSpillSlot() const { - return needs_spill_slot_; - } - - void SetNeedsSpillSlot() { - needs_spill_slot_ = true; - } - - // The current stage of this node, indicating which worklist it belongs to. - NodeStage stage; - - private: - // The live interval that this node represents. - LiveInterval* const interval_; - - // All nodes interfering with this one. - // We use an unsorted vector as a set, since a tree or hash set is too heavy for the - // set sizes that we encounter. Using a vector leads to much better performance. - ScopedArenaVector<InterferenceNode*>* adjacent_nodes_; // Owned by ColoringIteration. - - // Interference nodes that this node should be coalesced with to reduce moves. - ScopedArenaVector<CoalesceOpportunity*>* coalesce_opportunities_; // Owned by ColoringIteration. - - // The maximum number of colors with which this node could interfere. This could be more than - // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes. - // We use "out" degree because incoming edges come from nodes already pruned from the graph, - // and do not affect the coloring of this node. - // Pre-colored nodes are treated as having infinite degree. - size_t out_degree_; - - // The node representing this node in the interference graph. - // Initially set to `this`, and only changed if this node is coalesced into another. - InterferenceNode* alias_; - - // The cost of splitting and spilling this interval to the stack. - // Nodes with a higher spill weight should be prioritized when assigning registers. - // This is essentially based on use density and location; short intervals with many uses inside - // deeply nested loops have a high spill weight. - const float spill_weight_; - - const bool requires_color_; - - bool needs_spill_slot_; - - DISALLOW_COPY_AND_ASSIGN(InterferenceNode); -}; - -// The order in which we color nodes is important. To guarantee forward progress, -// we prioritize intervals that require registers, and after that we prioritize -// short intervals. That way, if we fail to color a node, it either won't require a -// register, or it will be a long interval that can be split in order to make the -// interference graph sparser. -// To improve code quality, we prioritize intervals used frequently in deeply nested loops. -// (This metric is secondary to the forward progress requirements above.) -// TODO: May also want to consider: -// - Constants (since they can be rematerialized) -// - Allocated spill slots -static bool HasGreaterNodePriority(const InterferenceNode* lhs, - const InterferenceNode* rhs) { - // (1) Prioritize the node that requires a color. - if (lhs->RequiresColor() != rhs->RequiresColor()) { - return lhs->RequiresColor(); - } - - // (2) Prioritize the interval that has a higher spill weight. - return lhs->GetSpillWeight() > rhs->GetSpillWeight(); -} - -// A ColoringIteration holds the many data structures needed for a single graph coloring attempt, -// and provides methods for each phase of the attempt. -class ColoringIteration { - public: - ColoringIteration(RegisterAllocatorGraphColor* register_allocator, - ScopedArenaAllocator* allocator, - bool processing_core_regs, - size_t num_regs) - : register_allocator_(register_allocator), - allocator_(allocator), - processing_core_regs_(processing_core_regs), - num_regs_(num_regs), - interval_node_map_(allocator->Adapter(kArenaAllocRegisterAllocator)), - prunable_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - pruned_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - simplify_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), - freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), - spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)), - coalesce_worklist_(CoalesceOpportunity::CmpPriority, - allocator->Adapter(kArenaAllocRegisterAllocator)), - adjacent_nodes_links_(allocator->Adapter(kArenaAllocRegisterAllocator)), - coalesce_opportunities_links_(allocator->Adapter(kArenaAllocRegisterAllocator)) {} - - // Use the intervals collected from instructions to construct an - // interference graph mapping intervals to adjacency lists. - // Also, collect synthesized safepoint nodes, used to keep - // track of live intervals across safepoints. - // TODO: Should build safepoints elsewhere. - void BuildInterferenceGraph(const ScopedArenaVector<LiveInterval*>& intervals, - const ScopedArenaVector<InterferenceNode*>& physical_nodes); - - // Add coalesce opportunities to interference nodes. - void FindCoalesceOpportunities(); - - // Prune nodes from the interference graph to be colored later. Build - // a stack (pruned_nodes) containing these intervals in an order determined - // by various heuristics. - void PruneInterferenceGraph(); - - // Process pruned_intervals_ to color the interference graph, spilling when - // necessary. Returns true if successful. Else, some intervals have been - // split, and the interference graph should be rebuilt for another attempt. - bool ColorInterferenceGraph(); - - // Return prunable nodes. - // The register allocator will need to access prunable nodes after coloring - // in order to tell the code generator which registers have been assigned. - ArrayRef<InterferenceNode* const> GetPrunableNodes() const { - return ArrayRef<InterferenceNode* const>(prunable_nodes_); - } - - private: - // Create a coalesce opportunity between two nodes. - void CreateCoalesceOpportunity(InterferenceNode* a, - InterferenceNode* b, - CoalesceKind kind, - size_t position); - - // Add an edge in the interference graph, if valid. - // Note that `guaranteed_not_interfering_yet` is used to optimize adjacency set insertion - // when possible. - void AddPotentialInterference(InterferenceNode* from, - InterferenceNode* to, - bool guaranteed_not_interfering_yet, - bool both_directions = true); - - // Invalidate all coalesce opportunities this node has, so that it (and possibly its neighbors) - // may be pruned from the interference graph. - void FreezeMoves(InterferenceNode* node); - - // Prune a node from the interference graph, updating worklists if necessary. - void PruneNode(InterferenceNode* node); - - // Add coalesce opportunities associated with this node to the coalesce worklist. - void EnableCoalesceOpportunities(InterferenceNode* node); - - // If needed, from `node` from the freeze worklist to the simplify worklist. - void CheckTransitionFromFreezeWorklist(InterferenceNode* node); - - // Return true if `into` is colored, and `from` can be coalesced with `into` conservatively. - bool PrecoloredHeuristic(InterferenceNode* from, InterferenceNode* into); - - // Return true if `from` and `into` are uncolored, and can be coalesced conservatively. - bool UncoloredHeuristic(InterferenceNode* from, InterferenceNode* into); - - void Coalesce(CoalesceOpportunity* opportunity); - - // Merge `from` into `into` in the interference graph. - void Combine(InterferenceNode* from, InterferenceNode* into); - - // A reference to the register allocator instance, - // needed to split intervals and assign spill slots. - RegisterAllocatorGraphColor* register_allocator_; - - // A scoped arena allocator used for a single graph coloring attempt. - ScopedArenaAllocator* allocator_; - - const bool processing_core_regs_; - - const size_t num_regs_; - - // A map from live intervals to interference nodes. - ScopedArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_; - - // Uncolored nodes that should be pruned from the interference graph. - ScopedArenaVector<InterferenceNode*> prunable_nodes_; - - // A stack of nodes pruned from the interference graph, waiting to be pruned. - ScopedArenaStdStack<InterferenceNode*> pruned_nodes_; - - // A queue containing low degree, non-move-related nodes that can pruned immediately. - ScopedArenaDeque<InterferenceNode*> simplify_worklist_; - - // A queue containing low degree, move-related nodes. - ScopedArenaDeque<InterferenceNode*> freeze_worklist_; - - // A queue containing high degree nodes. - // If we have to prune from the spill worklist, we cannot guarantee - // the pruned node a color, so we order the worklist by priority. - ScopedArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_; - - // A queue containing coalesce opportunities. - // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those - // inside of loops) are more important than others. - ScopedArenaPriorityQueue<CoalesceOpportunity*, - decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_; - - // Storage for links to adjacent nodes for interference nodes. - // Using std::deque so that elements do not move when adding new ones. - ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>> adjacent_nodes_links_; - - // Storage for links to coalesce opportunities for interference nodes. - // Using std::deque so that elements do not move when adding new ones. - ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>> coalesce_opportunities_links_; - - DISALLOW_COPY_AND_ASSIGN(ColoringIteration); -}; - -static bool IsCoreInterval(LiveInterval* interval) { - return !DataType::IsFloatingPointType(interval->GetType()); -} - -static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) { - return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize; -} - -RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator, - CodeGenerator* codegen, - const SsaLivenessAnalysis& liveness, - bool iterative_move_coalescing) - : RegisterAllocator(allocator, codegen, liveness), - iterative_move_coalescing_(iterative_move_coalescing), - core_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - fp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - temp_intervals_(allocator->Adapter(kArenaAllocRegisterAllocator)), - safepoints_(allocator->Adapter(kArenaAllocRegisterAllocator)), - physical_core_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - physical_fp_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - num_int_spill_slots_(0), - num_double_spill_slots_(0), - num_float_spill_slots_(0), - num_long_spill_slots_(0), - catch_phi_spill_slot_counter_(0), - reserved_art_method_slots_(ComputeReservedArtMethodSlots(*codegen)), - reserved_out_slots_(codegen->GetGraph()->GetMaximumNumberOfOutVRegs()) { - // Before we ask for blocked registers, set them up in the code generator. - codegen->SetupBlockedRegisters(); - - // Initialize physical core register live intervals and blocked registers. - // This includes globally blocked registers, such as the stack pointer. - physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr); - for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { - LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kInt32); - physical_core_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness); - physical_core_nodes_[i]->stage = NodeStage::kPrecolored; - core_intervals_.push_back(interval); - if (codegen_->IsBlockedCoreRegister(i)) { - interval->AddRange(0, liveness.GetMaxLifetimePosition()); - } - } - // Initialize physical floating point register live intervals and blocked registers. - physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr); - for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { - LiveInterval* interval = - LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kFloat32); - physical_fp_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness); - physical_fp_nodes_[i]->stage = NodeStage::kPrecolored; - fp_intervals_.push_back(interval); - if (codegen_->IsBlockedFloatingPointRegister(i)) { - interval->AddRange(0, liveness.GetMaxLifetimePosition()); - } - } -} - -RegisterAllocatorGraphColor::~RegisterAllocatorGraphColor() {} - -void RegisterAllocatorGraphColor::AllocateRegisters() { - // (1) Collect and prepare live intervals. - ProcessInstructions(); - - for (bool processing_core_regs : {true, false}) { - ScopedArenaVector<LiveInterval*>& intervals = processing_core_regs - ? core_intervals_ - : fp_intervals_; - size_t num_registers = processing_core_regs - ? codegen_->GetNumberOfCoreRegisters() - : codegen_->GetNumberOfFloatingPointRegisters(); - - size_t attempt = 0; - while (true) { - ++attempt; - DCHECK(attempt <= kMaxGraphColoringAttemptsDebug) - << "Exceeded debug max graph coloring register allocation attempts. " - << "This could indicate that the register allocator is not making forward progress, " - << "which could be caused by prioritizing the wrong live intervals. (Short intervals " - << "should be prioritized over long ones, because they cannot be split further.)"; - - // Many data structures are cleared between graph coloring attempts, so we reduce - // total memory usage by using a new scoped arena allocator for each attempt. - ScopedArenaAllocator coloring_attempt_allocator(allocator_->GetArenaStack()); - ColoringIteration iteration(this, - &coloring_attempt_allocator, - processing_core_regs, - num_registers); - - // (2) Build the interference graph. - ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs - ? physical_core_nodes_ - : physical_fp_nodes_; - iteration.BuildInterferenceGraph(intervals, physical_nodes); - - // (3) Add coalesce opportunities. - // If we have tried coloring the graph a suspiciously high number of times, give - // up on move coalescing, just in case the coalescing heuristics are not conservative. - // (This situation will be caught if DCHECKs are turned on.) - if (iterative_move_coalescing_ && attempt <= kMaxGraphColoringAttemptsDebug) { - iteration.FindCoalesceOpportunities(); - } - - // (4) Prune all uncolored nodes from interference graph. - iteration.PruneInterferenceGraph(); - - // (5) Color pruned nodes based on interferences. - bool successful = iteration.ColorInterferenceGraph(); - - // We manually clear coalesce opportunities for physical nodes, - // since they persist across coloring attempts. - for (InterferenceNode* node : physical_core_nodes_) { - node->ClearCoalesceOpportunities(); - } - for (InterferenceNode* node : physical_fp_nodes_) { - node->ClearCoalesceOpportunities(); - } - - if (successful) { - // Assign spill slots. - AllocateSpillSlots(iteration.GetPrunableNodes()); - - // Tell the code generator which registers were allocated. - // We only look at prunable_nodes because we already told the code generator about - // fixed intervals while processing instructions. We also ignore the fixed intervals - // placed at the top of catch blocks. - for (InterferenceNode* node : iteration.GetPrunableNodes()) { - LiveInterval* interval = node->GetInterval(); - if (interval->HasRegister()) { - Location low_reg = processing_core_regs - ? Location::RegisterLocation(interval->GetRegister()) - : Location::FpuRegisterLocation(interval->GetRegister()); - codegen_->AddAllocatedRegister(low_reg); - if (interval->HasHighInterval()) { - LiveInterval* high = interval->GetHighInterval(); - DCHECK(high->HasRegister()); - Location high_reg = processing_core_regs - ? Location::RegisterLocation(high->GetRegister()) - : Location::FpuRegisterLocation(high->GetRegister()); - codegen_->AddAllocatedRegister(high_reg); - } - } else { - DCHECK_IMPLIES(interval->HasHighInterval(), - !interval->GetHighInterval()->HasRegister()); - } - } - - break; - } - } // while unsuccessful - } // for processing_core_instructions - - // (6) Resolve locations and deconstruct SSA form. - RegisterAllocationResolver(codegen_, liveness_) - .Resolve(ArrayRef<HInstruction* const>(safepoints_), - reserved_art_method_slots_ + reserved_out_slots_, - num_int_spill_slots_, - num_long_spill_slots_, - num_float_spill_slots_, - num_double_spill_slots_, - catch_phi_spill_slot_counter_, - ArrayRef<LiveInterval* const>(temp_intervals_)); - - if (kIsDebugBuild) { - Validate(/*log_fatal_on_failure*/ true); - } -} - -bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) { - for (bool processing_core_regs : {true, false}) { - ScopedArenaAllocator allocator(allocator_->GetArenaStack()); - ScopedArenaVector<LiveInterval*> intervals( - allocator.Adapter(kArenaAllocRegisterAllocatorValidate)); - for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { - HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); - LiveInterval* interval = instruction->GetLiveInterval(); - if (interval != nullptr && IsCoreInterval(interval) == processing_core_regs) { - intervals.push_back(instruction->GetLiveInterval()); - } - } - - ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs - ? physical_core_nodes_ - : physical_fp_nodes_; - for (InterferenceNode* fixed : physical_nodes) { - LiveInterval* interval = fixed->GetInterval(); - if (interval->GetFirstRange() != nullptr) { - // Ideally we would check fixed ranges as well, but currently there are times when - // two fixed intervals for the same register will overlap. For example, a fixed input - // and a fixed output may sometimes share the same register, in which there will be two - // fixed intervals for the same place. - } - } - - for (LiveInterval* temp : temp_intervals_) { - if (IsCoreInterval(temp) == processing_core_regs) { - intervals.push_back(temp); - } - } - - size_t spill_slots = num_int_spill_slots_ - + num_long_spill_slots_ - + num_float_spill_slots_ - + num_double_spill_slots_ - + catch_phi_spill_slot_counter_; - bool ok = ValidateIntervals(ArrayRef<LiveInterval* const>(intervals), - spill_slots, - reserved_art_method_slots_ + reserved_out_slots_, - *codegen_, - processing_core_regs, - log_fatal_on_failure); - if (!ok) { - return false; - } - } // for processing_core_regs - - return true; -} - -void RegisterAllocatorGraphColor::ProcessInstructions() { - for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) { - // Note that we currently depend on this ordering, since some helper - // code is designed for linear scan register allocation. - for (HBackwardInstructionIterator instr_it(block->GetInstructions()); - !instr_it.Done(); - instr_it.Advance()) { - ProcessInstruction(instr_it.Current()); - } - - for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { - ProcessInstruction(phi_it.Current()); - } - - if (block->IsCatchBlock() - || (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible())) { - // By blocking all registers at the top of each catch block or irreducible loop, we force - // intervals belonging to the live-in set of the catch/header block to be spilled. - // TODO(ngeoffray): Phis in this block could be allocated in register. - size_t position = block->GetLifetimeStart(); - BlockRegisters(position, position + 1); - } - } -} - -bool RegisterAllocatorGraphColor::TryRemoveSuspendCheckEntry(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - if (instruction->IsSuspendCheckEntry() && !codegen_->NeedsSuspendCheckEntry()) { - // TODO: We do this here because we do not want the suspend check to artificially - // create live registers. We should find another place, but this is currently the - // simplest. - DCHECK_EQ(locations->GetTempCount(), 0u); - instruction->GetBlock()->RemoveInstruction(instruction); - return true; - } - return false; -} - -void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - if (locations == nullptr) { - return; - } - if (TryRemoveSuspendCheckEntry(instruction)) { - return; - } - - CheckForTempLiveIntervals(instruction); - CheckForSafepoint(instruction); - if (locations->WillCall()) { - // If a call will happen, create fixed intervals for caller-save registers. - // TODO: Note that it may be beneficial to later split intervals at this point, - // so that we allow last-minute moves from a caller-save register - // to a callee-save register. - BlockRegisters(instruction->GetLifetimePosition(), - instruction->GetLifetimePosition() + 1, - /*caller_save_only*/ true); - } - CheckForFixedInputs(instruction); - - LiveInterval* interval = instruction->GetLiveInterval(); - if (interval == nullptr) { - // Instructions lacking a valid output location do not have a live interval. - DCHECK(!locations->Out().IsValid()); - return; - } - - // Low intervals act as representatives for their corresponding high interval. - DCHECK(!interval->IsHighInterval()); - if (codegen_->NeedsTwoRegisters(interval->GetType())) { - interval->AddHighInterval(); - } - AddSafepointsFor(instruction); - CheckForFixedOutput(instruction); - AllocateSpillSlotForCatchPhi(instruction); - - ScopedArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval) - ? core_intervals_ - : fp_intervals_; - if (interval->HasSpillSlot() || instruction->IsConstant()) { - // Note that if an interval already has a spill slot, then its value currently resides - // in the stack (e.g., parameters). Thus we do not have to allocate a register until its first - // register use. This is also true for constants, which can be materialized at any point. - size_t first_register_use = interval->FirstRegisterUse(); - if (first_register_use != kNoLifetime) { - LiveInterval* split = SplitBetween(interval, interval->GetStart(), first_register_use - 1); - intervals.push_back(split); - } else { - // We won't allocate a register for this value. - } - } else { - intervals.push_back(interval); - } -} - -void RegisterAllocatorGraphColor::CheckForFixedInputs(HInstruction* instruction) { - // We simply block physical registers where necessary. - // TODO: Ideally we would coalesce the physical register with the register - // allocated to the input value, but this can be tricky if, e.g., there - // could be multiple physical register uses of the same value at the - // same instruction. Furthermore, there's currently no distinction between - // fixed inputs to a call (which will be clobbered) and other fixed inputs (which - // may not be clobbered). - LocationSummary* locations = instruction->GetLocations(); - size_t position = instruction->GetLifetimePosition(); - for (size_t i = 0; i < locations->GetInputCount(); ++i) { - Location input = locations->InAt(i); - if (input.IsRegister() || input.IsFpuRegister()) { - BlockRegister(input, position, position + 1); - codegen_->AddAllocatedRegister(input); - } else if (input.IsPair()) { - BlockRegister(input.ToLow(), position, position + 1); - BlockRegister(input.ToHigh(), position, position + 1); - codegen_->AddAllocatedRegister(input.ToLow()); - codegen_->AddAllocatedRegister(input.ToHigh()); - } - } -} - -void RegisterAllocatorGraphColor::CheckForFixedOutput(HInstruction* instruction) { - // If an instruction has a fixed output location, we give the live interval a register and then - // proactively split it just after the definition point to avoid creating too many interferences - // with a fixed node. - LiveInterval* interval = instruction->GetLiveInterval(); - Location out = interval->GetDefinedBy()->GetLocations()->Out(); - size_t position = instruction->GetLifetimePosition(); - DCHECK_GE(interval->GetEnd() - position, 2u); - - if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) { - out = instruction->GetLocations()->InAt(0); - } - - if (out.IsRegister() || out.IsFpuRegister()) { - interval->SetRegister(out.reg()); - codegen_->AddAllocatedRegister(out); - Split(interval, position + 1); - } else if (out.IsPair()) { - interval->SetRegister(out.low()); - interval->GetHighInterval()->SetRegister(out.high()); - codegen_->AddAllocatedRegister(out.ToLow()); - codegen_->AddAllocatedRegister(out.ToHigh()); - Split(interval, position + 1); - } else if (out.IsStackSlot() || out.IsDoubleStackSlot()) { - interval->SetSpillSlot(out.GetStackIndex()); - } else { - DCHECK(out.IsUnallocated() || out.IsConstant()); - } -} - -void RegisterAllocatorGraphColor::AddSafepointsFor(HInstruction* instruction) { - LiveInterval* interval = instruction->GetLiveInterval(); - for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) { - HInstruction* safepoint = safepoints_[safepoint_index - 1u]; - size_t safepoint_position = safepoint->GetLifetimePosition(); - - // Test that safepoints_ are ordered in the optimal way. - DCHECK(safepoint_index == safepoints_.size() || - safepoints_[safepoint_index]->GetLifetimePosition() < safepoint_position); - - if (safepoint_position == interval->GetStart()) { - // The safepoint is for this instruction, so the location of the instruction - // does not need to be saved. - DCHECK_EQ(safepoint_index, safepoints_.size()); - DCHECK_EQ(safepoint, instruction); - continue; - } else if (interval->IsDeadAt(safepoint_position)) { - break; - } else if (!interval->Covers(safepoint_position)) { - // Hole in the interval. - continue; - } - interval->AddSafepoint(safepoint); - } -} - -void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - size_t position = instruction->GetLifetimePosition(); - for (size_t i = 0; i < locations->GetTempCount(); ++i) { - Location temp = locations->GetTemp(i); - if (temp.IsRegister() || temp.IsFpuRegister()) { - BlockRegister(temp, position, position + 1); - codegen_->AddAllocatedRegister(temp); - } else { - DCHECK(temp.IsUnallocated()); - switch (temp.GetPolicy()) { - case Location::kRequiresRegister: { - LiveInterval* interval = - LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32); - interval->AddTempUse(instruction, i); - core_intervals_.push_back(interval); - temp_intervals_.push_back(interval); - break; - } - - case Location::kRequiresFpuRegister: { - LiveInterval* interval = - LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64); - interval->AddTempUse(instruction, i); - fp_intervals_.push_back(interval); - temp_intervals_.push_back(interval); - if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) { - interval->AddHighInterval(/*is_temp*/ true); - temp_intervals_.push_back(interval->GetHighInterval()); - } - break; - } - - default: - LOG(FATAL) << "Unexpected policy for temporary location " - << temp.GetPolicy(); - } - } - } -} - -void RegisterAllocatorGraphColor::CheckForSafepoint(HInstruction* instruction) { - LocationSummary* locations = instruction->GetLocations(); - - if (locations->NeedsSafepoint()) { - safepoints_.push_back(instruction); - } -} - -LiveInterval* RegisterAllocatorGraphColor::TrySplit(LiveInterval* interval, size_t position) { - if (interval->GetStart() < position && position < interval->GetEnd()) { - return Split(interval, position); - } else { - return interval; - } -} - -void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) { - DCHECK(!interval->IsHighInterval()); - - // Split just after a register definition. - if (interval->IsParent() && interval->DefinitionRequiresRegister()) { - interval = TrySplit(interval, interval->GetStart() + 1); - } - - // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e. - // [interval->GetStart(), interval->GetEnd() + 1) - auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), - interval->GetUses().end(), - interval->GetStart(), - interval->GetEnd() + 1u); - // Split around register uses. - for (const UsePosition& use : matching_use_range) { - if (use.RequiresRegister()) { - size_t position = use.GetPosition(); - interval = TrySplit(interval, position - 1); - if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) { - // If we are at the very end of a basic block, we cannot split right - // at the use. Split just after instead. - interval = TrySplit(interval, position + 1); - } else { - interval = TrySplit(interval, position); - } - } - } -} - -void RegisterAllocatorGraphColor::AllocateSpillSlotForCatchPhi(HInstruction* instruction) { - if (instruction->IsPhi() && instruction->AsPhi()->IsCatchPhi()) { - HPhi* phi = instruction->AsPhi(); - LiveInterval* interval = phi->GetLiveInterval(); - - HInstruction* previous_phi = phi->GetPrevious(); - DCHECK(previous_phi == nullptr || - previous_phi->AsPhi()->GetRegNumber() <= phi->GetRegNumber()) - << "Phis expected to be sorted by vreg number, " - << "so that equivalent phis are adjacent."; - - if (phi->IsVRegEquivalentOf(previous_phi)) { - // Assign the same spill slot. - DCHECK(previous_phi->GetLiveInterval()->HasSpillSlot()); - interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot()); - } else { - interval->SetSpillSlot(catch_phi_spill_slot_counter_); - catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded(); - } - } -} - -void RegisterAllocatorGraphColor::BlockRegister(Location location, - size_t start, - size_t end) { - DCHECK(location.IsRegister() || location.IsFpuRegister()); - int reg = location.reg(); - LiveInterval* interval = location.IsRegister() - ? physical_core_nodes_[reg]->GetInterval() - : physical_fp_nodes_[reg]->GetInterval(); - DCHECK(interval->GetRegister() == reg); - bool blocked_by_codegen = location.IsRegister() - ? codegen_->IsBlockedCoreRegister(reg) - : codegen_->IsBlockedFloatingPointRegister(reg); - if (blocked_by_codegen) { - // We've already blocked this register for the entire method. (And adding a - // range inside another range violates the preconditions of AddRange). - } else { - interval->AddRange(start, end); - } -} - -void RegisterAllocatorGraphColor::BlockRegisters(size_t start, size_t end, bool caller_save_only) { - for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { - if (!caller_save_only || !codegen_->IsCoreCalleeSaveRegister(i)) { - BlockRegister(Location::RegisterLocation(i), start, end); - } - } - for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { - if (!caller_save_only || !codegen_->IsFloatingPointCalleeSaveRegister(i)) { - BlockRegister(Location::FpuRegisterLocation(i), start, end); - } - } -} - -void ColoringIteration::AddPotentialInterference(InterferenceNode* from, - InterferenceNode* to, - bool guaranteed_not_interfering_yet, - bool both_directions) { - if (from->IsPrecolored()) { - // We save space by ignoring outgoing edges from fixed nodes. - } else if (to->IsPrecolored()) { - // It is important that only a single node represents a given fixed register in the - // interference graph. We retrieve that node here. - const ScopedArenaVector<InterferenceNode*>& physical_nodes = - to->GetInterval()->IsFloatingPoint() ? register_allocator_->physical_fp_nodes_ - : register_allocator_->physical_core_nodes_; - InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()]; - from->AddInterference( - physical_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_); - DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister()); - DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node"; - - // If a node interferes with a fixed pair node, the weight of the edge may - // be inaccurate after using the alias of the pair node, because the alias of the pair node - // is a singular node. - // We could make special pair fixed nodes, but that ends up being too conservative because - // a node could then interfere with both {r1} and {r1,r2}, leading to a degree of - // three rather than two. - // Instead, we explicitly add an interference with the high node of the fixed pair node. - // TODO: This is too conservative at time for pair nodes, but the fact that fixed pair intervals - // can be unaligned on x86 complicates things. - if (to->IsPair()) { - InterferenceNode* high_node = - physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()]; - DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(), - high_node->GetInterval()->GetRegister()); - from->AddInterference( - high_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_); - } - } else { - // Standard interference between two uncolored nodes. - from->AddInterference(to, guaranteed_not_interfering_yet, &adjacent_nodes_links_); - } - - if (both_directions) { - AddPotentialInterference(to, from, guaranteed_not_interfering_yet, /*both_directions*/ false); - } -} - -// Returns true if `in_node` represents an input interval of `out_node`, and the output interval -// is allowed to have the same register as the input interval. -// TODO: Ideally we should just produce correct intervals in liveness analysis. -// We would need to refactor the current live interval layout to do so, which is -// no small task. -static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNode* out_node) { - LiveInterval* output_interval = out_node->GetInterval(); - HInstruction* defined_by = output_interval->GetDefinedBy(); - if (defined_by == nullptr) { - // This must not be a definition point. - return false; - } - - LocationSummary* locations = defined_by->GetLocations(); - if (locations->OutputCanOverlapWithInputs()) { - // This instruction does not allow the output to reuse a register from an input. - return false; - } - - LiveInterval* input_interval = in_node->GetInterval(); - LiveInterval* next_sibling = input_interval->GetNextSibling(); - size_t def_position = defined_by->GetLifetimePosition(); - size_t use_position = def_position + 1; - if (next_sibling != nullptr && next_sibling->GetStart() == use_position) { - // The next sibling starts at the use position, so reusing the input register in the output - // would clobber the input before it's moved into the sibling interval location. - return false; - } - - if (!input_interval->IsDeadAt(use_position) && input_interval->CoversSlow(use_position)) { - // The input interval is live after the use position. - return false; - } - - HInputsRef inputs = defined_by->GetInputs(); - for (size_t i = 0; i < inputs.size(); ++i) { - if (inputs[i]->GetLiveInterval()->GetSiblingAt(def_position) == input_interval) { - DCHECK(input_interval->SameRegisterKind(*output_interval)); - return true; - } - } - - // The input interval was not an input for this instruction. - return false; -} - -void ColoringIteration::BuildInterferenceGraph( - const ScopedArenaVector<LiveInterval*>& intervals, - const ScopedArenaVector<InterferenceNode*>& physical_nodes) { - DCHECK(interval_node_map_.empty() && prunable_nodes_.empty()); - // Build the interference graph efficiently by ordering range endpoints - // by position and doing a linear sweep to find interferences. (That is, we - // jump from endpoint to endpoint, maintaining a set of intervals live at each - // point. If two nodes are ever in the live set at the same time, then they - // interfere with each other.) - // - // We order by both position and (secondarily) by whether the endpoint - // begins or ends a range; we want to process range endings before range - // beginnings at the same position because they should not conflict. - // - // For simplicity, we create a tuple for each endpoint, and then sort the tuples. - // Tuple contents: (position, is_range_beginning, node). - ScopedArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints( - allocator_->Adapter(kArenaAllocRegisterAllocator)); - - // We reserve plenty of space to avoid excessive copying. - range_endpoints.reserve(4 * prunable_nodes_.size()); - - for (LiveInterval* parent : intervals) { - for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) { - LiveRange* range = sibling->GetFirstRange(); - if (range != nullptr) { - InterferenceNode* node = - new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_); - interval_node_map_.insert(std::make_pair(sibling, node)); - - if (sibling->HasRegister()) { - // Fixed nodes should alias the canonical node for the corresponding register. - node->stage = NodeStage::kPrecolored; - InterferenceNode* physical_node = physical_nodes[sibling->GetRegister()]; - node->SetAlias(physical_node); - DCHECK_EQ(node->GetInterval()->GetRegister(), - physical_node->GetInterval()->GetRegister()); - } else { - node->stage = NodeStage::kPrunable; - prunable_nodes_.push_back(node); - } - - while (range != nullptr) { - range_endpoints.push_back(std::make_tuple(range->GetStart(), true, node)); - range_endpoints.push_back(std::make_tuple(range->GetEnd(), false, node)); - range = range->GetNext(); - } - } - } - } - - // Sort the endpoints. - // We explicitly ignore the third entry of each tuple (the node pointer) in order - // to maintain determinism. - std::sort(range_endpoints.begin(), range_endpoints.end(), - [] (const std::tuple<size_t, bool, InterferenceNode*>& lhs, - const std::tuple<size_t, bool, InterferenceNode*>& rhs) { - return std::tie(std::get<0>(lhs), std::get<1>(lhs)) - < std::tie(std::get<0>(rhs), std::get<1>(rhs)); - }); - - // Nodes live at the current position in the linear sweep. - ScopedArenaVector<InterferenceNode*> live(allocator_->Adapter(kArenaAllocRegisterAllocator)); - - // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the - // live set. When we encounter the end of a range, we remove the corresponding node - // from the live set. Nodes interfere if they are in the live set at the same time. - for (auto it = range_endpoints.begin(); it != range_endpoints.end(); ++it) { - bool is_range_beginning; - InterferenceNode* node; - size_t position; - // Extract information from the tuple, including the node this tuple represents. - std::tie(position, is_range_beginning, node) = *it; - - if (is_range_beginning) { - bool guaranteed_not_interfering_yet = position == node->GetInterval()->GetStart(); - for (InterferenceNode* conflicting : live) { - DCHECK_NE(node, conflicting); - if (CheckInputOutputCanOverlap(conflicting, node)) { - // We do not add an interference, because the instruction represented by `node` allows - // its output to share a register with an input, represented here by `conflicting`. - } else { - AddPotentialInterference(node, conflicting, guaranteed_not_interfering_yet); - } - } - DCHECK(std::find(live.begin(), live.end(), node) == live.end()); - live.push_back(node); - } else { - // End of range. - auto live_it = std::find(live.begin(), live.end(), node); - DCHECK(live_it != live.end()); - live.erase(live_it); - } - } - DCHECK(live.empty()); -} - -void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a, - InterferenceNode* b, - CoalesceKind kind, - size_t position) { - DCHECK_EQ(a->IsPair(), b->IsPair()) - << "Nodes of different memory widths should never be coalesced"; - CoalesceOpportunity* opportunity = - new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_); - a->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); - b->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); - coalesce_worklist_.push(opportunity); -} - -// When looking for coalesce opportunities, we use the interval_node_map_ to find the node -// corresponding to an interval. Note that not all intervals are in this map, notably the parents -// of constants and stack arguments. (However, these interval should not be involved in coalesce -// opportunities anyway, because they're not going to be in registers.) -void ColoringIteration::FindCoalesceOpportunities() { - DCHECK(coalesce_worklist_.empty()); - - for (InterferenceNode* node : prunable_nodes_) { - LiveInterval* interval = node->GetInterval(); - - // Coalesce siblings. - LiveInterval* next_sibling = interval->GetNextSibling(); - if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) { - auto it = interval_node_map_.find(next_sibling); - if (it != interval_node_map_.end()) { - InterferenceNode* sibling_node = it->second; - CreateCoalesceOpportunity(node, - sibling_node, - CoalesceKind::kAdjacentSibling, - interval->GetEnd()); - } - } - - // Coalesce fixed outputs with this interval if this interval is an adjacent sibling. - LiveInterval* parent = interval->GetParent(); - if (parent->HasRegister() - && parent->GetNextSibling() == interval - && parent->GetEnd() == interval->GetStart()) { - auto it = interval_node_map_.find(parent); - if (it != interval_node_map_.end()) { - InterferenceNode* parent_node = it->second; - CreateCoalesceOpportunity(node, - parent_node, - CoalesceKind::kFixedOutputSibling, - parent->GetEnd()); - } - } - - // Try to prevent moves across blocks. - // Note that this does not lead to many succeeding coalesce attempts, so could be removed - // if found to add to compile time. - const SsaLivenessAnalysis& liveness = register_allocator_->liveness_; - if (interval->IsSplit() && liveness.IsAtBlockBoundary(interval->GetStart() / 2)) { - // If the start of this interval is at a block boundary, we look at the - // location of the interval in blocks preceding the block this interval - // starts at. This can avoid a move between the two blocks. - HBasicBlock* block = liveness.GetBlockFromPosition(interval->GetStart() / 2); - for (HBasicBlock* predecessor : block->GetPredecessors()) { - size_t position = predecessor->GetLifetimeEnd() - 1; - LiveInterval* existing = interval->GetParent()->GetSiblingAt(position); - if (existing != nullptr) { - auto it = interval_node_map_.find(existing); - if (it != interval_node_map_.end()) { - InterferenceNode* existing_node = it->second; - CreateCoalesceOpportunity(node, - existing_node, - CoalesceKind::kNonlinearControlFlow, - position); - } - } - } - } - - // Coalesce phi inputs with the corresponding output. - HInstruction* defined_by = interval->GetDefinedBy(); - if (defined_by != nullptr && defined_by->IsPhi()) { - ArrayRef<HBasicBlock* const> predecessors(defined_by->GetBlock()->GetPredecessors()); - HInputsRef inputs = defined_by->GetInputs(); - - for (size_t i = 0, e = inputs.size(); i < e; ++i) { - // We want the sibling at the end of the appropriate predecessor block. - size_t position = predecessors[i]->GetLifetimeEnd() - 1; - LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position); - - auto it = interval_node_map_.find(input_interval); - if (it != interval_node_map_.end()) { - InterferenceNode* input_node = it->second; - CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position); - } - } - } - - // Coalesce output with first input when policy is kSameAsFirstInput. - if (defined_by != nullptr) { - Location out = defined_by->GetLocations()->Out(); - if (out.IsUnallocated() && out.GetPolicy() == Location::kSameAsFirstInput) { - LiveInterval* input_interval - = defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1); - // TODO: Could we consider lifetime holes here? - if (input_interval->GetEnd() == interval->GetStart()) { - auto it = interval_node_map_.find(input_interval); - if (it != interval_node_map_.end()) { - InterferenceNode* input_node = it->second; - CreateCoalesceOpportunity(node, - input_node, - CoalesceKind::kFirstInput, - interval->GetStart()); - } - } - } - } - - // An interval that starts an instruction (that is, it is not split), may - // re-use the registers used by the inputs of that instruction, based on the - // location summary. - if (defined_by != nullptr) { - DCHECK(!interval->IsSplit()); - LocationSummary* locations = defined_by->GetLocations(); - if (!locations->OutputCanOverlapWithInputs()) { - HInputsRef inputs = defined_by->GetInputs(); - for (size_t i = 0; i < inputs.size(); ++i) { - size_t def_point = defined_by->GetLifetimePosition(); - // TODO: Getting the sibling at the def_point might not be quite what we want - // for fixed inputs, since the use will be *at* the def_point rather than after. - LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point); - if (input_interval != nullptr && - input_interval->HasHighInterval() == interval->HasHighInterval()) { - auto it = interval_node_map_.find(input_interval); - if (it != interval_node_map_.end()) { - InterferenceNode* input_node = it->second; - CreateCoalesceOpportunity(node, - input_node, - CoalesceKind::kAnyInput, - interval->GetStart()); - } - } - } - } - } - - // Try to prevent moves into fixed input locations. - // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e. - // [interval->GetStart() + 1, interval->GetEnd() + 1) - auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), - interval->GetUses().end(), - interval->GetStart() + 1u, - interval->GetEnd() + 1u); - for (const UsePosition& use : matching_use_range) { - HInstruction* user = use.GetUser(); - if (user == nullptr) { - // User may be null for certain intervals, such as temp intervals. - continue; - } - LocationSummary* locations = user->GetLocations(); - Location input = locations->InAt(use.GetInputIndex()); - if (input.IsRegister() || input.IsFpuRegister()) { - // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes - // is currently not supported. - InterferenceNode* fixed_node = input.IsRegister() - ? register_allocator_->physical_core_nodes_[input.reg()] - : register_allocator_->physical_fp_nodes_[input.reg()]; - CreateCoalesceOpportunity(node, - fixed_node, - CoalesceKind::kFixedInput, - user->GetLifetimePosition()); - } - } - } // for node in prunable_nodes -} - -static bool IsLowDegreeNode(InterferenceNode* node, size_t num_regs) { - return node->GetOutDegree() < num_regs; -} - -static bool IsHighDegreeNode(InterferenceNode* node, size_t num_regs) { - return !IsLowDegreeNode(node, num_regs); -} - -void ColoringIteration::PruneInterferenceGraph() { - DCHECK(pruned_nodes_.empty() - && simplify_worklist_.empty() - && freeze_worklist_.empty() - && spill_worklist_.empty()); - // When pruning the graph, we refer to nodes with degree less than num_regs as low degree nodes, - // and all others as high degree nodes. The distinction is important: low degree nodes are - // guaranteed a color, while high degree nodes are not. - - // Build worklists. Note that the coalesce worklist has already been - // filled by FindCoalesceOpportunities(). - for (InterferenceNode* node : prunable_nodes_) { - DCHECK(!node->IsPrecolored()) << "Fixed nodes should never be pruned"; - if (IsLowDegreeNode(node, num_regs_)) { - if (node->GetCoalesceOpportunities().empty()) { - // Simplify Worklist. - node->stage = NodeStage::kSimplifyWorklist; - simplify_worklist_.push_back(node); - } else { - // Freeze Worklist. - node->stage = NodeStage::kFreezeWorklist; - freeze_worklist_.push_back(node); - } - } else { - // Spill worklist. - node->stage = NodeStage::kSpillWorklist; - spill_worklist_.push(node); - } - } - - // Prune graph. - // Note that we do not remove a node from its current worklist if it moves to another, so it may - // be in multiple worklists at once; the node's `phase` says which worklist it is really in. - while (true) { - if (!simplify_worklist_.empty()) { - // Prune low-degree nodes. - // TODO: pop_back() should work as well, but it didn't; we get a - // failed check while pruning. We should look into this. - InterferenceNode* node = simplify_worklist_.front(); - simplify_worklist_.pop_front(); - DCHECK_EQ(node->stage, NodeStage::kSimplifyWorklist) << "Cannot move from simplify list"; - DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in simplify list should be low degree"; - DCHECK(!node->IsMoveRelated()) << "Nodes in simplify list should not be move related"; - PruneNode(node); - } else if (!coalesce_worklist_.empty()) { - // Coalesce. - CoalesceOpportunity* opportunity = coalesce_worklist_.top(); - coalesce_worklist_.pop(); - if (opportunity->stage == CoalesceStage::kWorklist) { - Coalesce(opportunity); - } - } else if (!freeze_worklist_.empty()) { - // Freeze moves and prune a low-degree move-related node. - InterferenceNode* node = freeze_worklist_.front(); - freeze_worklist_.pop_front(); - if (node->stage == NodeStage::kFreezeWorklist) { - DCHECK_LT(node->GetOutDegree(), num_regs_) << "Nodes in freeze list should be low degree"; - DCHECK(node->IsMoveRelated()) << "Nodes in freeze list should be move related"; - FreezeMoves(node); - PruneNode(node); - } - } else if (!spill_worklist_.empty()) { - // We spill the lowest-priority node, because pruning a node earlier - // gives it a higher chance of being spilled. - InterferenceNode* node = spill_worklist_.top(); - spill_worklist_.pop(); - if (node->stage == NodeStage::kSpillWorklist) { - DCHECK_GE(node->GetOutDegree(), num_regs_) << "Nodes in spill list should be high degree"; - FreezeMoves(node); - PruneNode(node); - } - } else { - // Pruning complete. - break; - } - } - DCHECK_EQ(prunable_nodes_.size(), pruned_nodes_.size()); -} - -void ColoringIteration::EnableCoalesceOpportunities(InterferenceNode* node) { - for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { - if (opportunity->stage == CoalesceStage::kActive) { - opportunity->stage = CoalesceStage::kWorklist; - coalesce_worklist_.push(opportunity); - } - } -} - -void ColoringIteration::PruneNode(InterferenceNode* node) { - DCHECK_NE(node->stage, NodeStage::kPruned); - DCHECK(!node->IsPrecolored()); - node->stage = NodeStage::kPruned; - pruned_nodes_.push(node); - - for (InterferenceNode* adj : node->GetAdjacentNodes()) { - DCHECK_NE(adj->stage, NodeStage::kPruned) << "Should be no interferences with pruned nodes"; - - if (adj->IsPrecolored()) { - // No effect on pre-colored nodes; they're never pruned. - } else { - // Remove the interference. - bool was_high_degree = IsHighDegreeNode(adj, num_regs_); - DCHECK(adj->ContainsInterference(node)) - << "Missing reflexive interference from non-fixed node"; - adj->RemoveInterference(node); - - // Handle transitions from high degree to low degree. - if (was_high_degree && IsLowDegreeNode(adj, num_regs_)) { - EnableCoalesceOpportunities(adj); - for (InterferenceNode* adj_adj : adj->GetAdjacentNodes()) { - EnableCoalesceOpportunities(adj_adj); - } - - DCHECK_EQ(adj->stage, NodeStage::kSpillWorklist); - if (adj->IsMoveRelated()) { - adj->stage = NodeStage::kFreezeWorklist; - freeze_worklist_.push_back(adj); - } else { - adj->stage = NodeStage::kSimplifyWorklist; - simplify_worklist_.push_back(adj); - } - } - } - } -} - -void ColoringIteration::CheckTransitionFromFreezeWorklist(InterferenceNode* node) { - if (IsLowDegreeNode(node, num_regs_) && !node->IsMoveRelated()) { - DCHECK_EQ(node->stage, NodeStage::kFreezeWorklist); - node->stage = NodeStage::kSimplifyWorklist; - simplify_worklist_.push_back(node); - } -} - -void ColoringIteration::FreezeMoves(InterferenceNode* node) { - for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { - if (opportunity->stage == CoalesceStage::kDefunct) { - // Constrained moves should remain constrained, since they will not be considered - // during last-chance coalescing. - } else { - opportunity->stage = CoalesceStage::kInactive; - } - InterferenceNode* other = opportunity->node_a->GetAlias() == node - ? opportunity->node_b->GetAlias() - : opportunity->node_a->GetAlias(); - if (other != node && other->stage == NodeStage::kFreezeWorklist) { - DCHECK(IsLowDegreeNode(node, num_regs_)); - CheckTransitionFromFreezeWorklist(other); - } - } -} - -bool ColoringIteration::PrecoloredHeuristic(InterferenceNode* from, - InterferenceNode* into) { - if (!into->IsPrecolored()) { - // The uncolored heuristic will cover this case. - return false; - } - if (from->IsPair() || into->IsPair()) { - // TODO: Merging from a pair node is currently not supported, since fixed pair nodes - // are currently represented as two single fixed nodes in the graph, and `into` is - // only one of them. (We may lose the implicit connections to the second one in a merge.) - return false; - } - - // If all adjacent nodes of `from` are "ok", then we can conservatively merge with `into`. - // Reasons an adjacent node `adj` can be "ok": - // (1) If `adj` is low degree, interference with `into` will not affect its existing - // colorable guarantee. (Notice that coalescing cannot increase its degree.) - // (2) If `adj` is pre-colored, it already interferes with `into`. See (3). - // (3) If there's already an interference with `into`, coalescing will not add interferences. - for (InterferenceNode* adj : from->GetAdjacentNodes()) { - if (IsLowDegreeNode(adj, num_regs_) || adj->IsPrecolored() || adj->ContainsInterference(into)) { - // Ok. - } else { - return false; - } - } - return true; -} - -bool ColoringIteration::UncoloredHeuristic(InterferenceNode* from, - InterferenceNode* into) { - if (into->IsPrecolored()) { - // The pre-colored heuristic will handle this case. - return false; - } - - // Arbitrary cap to improve compile time. Tests show that this has negligible affect - // on generated code. - if (from->GetOutDegree() + into->GetOutDegree() > 2 * num_regs_) { - return false; - } - - // It's safe to coalesce two nodes if the resulting node has fewer than `num_regs` neighbors - // of high degree. (Low degree neighbors can be ignored, because they will eventually be - // pruned from the interference graph in the simplify stage.) - size_t high_degree_interferences = 0; - for (InterferenceNode* adj : from->GetAdjacentNodes()) { - if (IsHighDegreeNode(adj, num_regs_)) { - high_degree_interferences += from->EdgeWeightWith(adj); - } - } - for (InterferenceNode* adj : into->GetAdjacentNodes()) { - if (IsHighDegreeNode(adj, num_regs_)) { - if (from->ContainsInterference(adj)) { - // We've already counted this adjacent node. - // Furthermore, its degree will decrease if coalescing succeeds. Thus, it's possible that - // we should not have counted it at all. (This extends the textbook Briggs coalescing test, - // but remains conservative.) - if (adj->GetOutDegree() - into->EdgeWeightWith(adj) < num_regs_) { - high_degree_interferences -= from->EdgeWeightWith(adj); - } - } else { - high_degree_interferences += into->EdgeWeightWith(adj); - } - } - } - - return high_degree_interferences < num_regs_; -} - -void ColoringIteration::Combine(InterferenceNode* from, - InterferenceNode* into) { - from->SetAlias(into); - - // Add interferences. - for (InterferenceNode* adj : from->GetAdjacentNodes()) { - bool was_low_degree = IsLowDegreeNode(adj, num_regs_); - AddPotentialInterference(adj, into, /*guaranteed_not_interfering_yet*/ false); - if (was_low_degree && IsHighDegreeNode(adj, num_regs_)) { - // This is a (temporary) transition to a high degree node. Its degree will decrease again - // when we prune `from`, but it's best to be consistent about the current worklist. - adj->stage = NodeStage::kSpillWorklist; - spill_worklist_.push(adj); - } - } - - // Add coalesce opportunities. - for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) { - if (opportunity->stage != CoalesceStage::kDefunct) { - into->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); - } - } - EnableCoalesceOpportunities(from); - - // Prune and update worklists. - PruneNode(from); - if (IsLowDegreeNode(into, num_regs_)) { - // Coalesce(...) takes care of checking for a transition to the simplify worklist. - DCHECK_EQ(into->stage, NodeStage::kFreezeWorklist); - } else if (into->stage == NodeStage::kFreezeWorklist) { - // This is a transition to a high degree node. - into->stage = NodeStage::kSpillWorklist; - spill_worklist_.push(into); - } else { - DCHECK(into->stage == NodeStage::kSpillWorklist || into->stage == NodeStage::kPrecolored); - } -} - -void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) { - InterferenceNode* from = opportunity->node_a->GetAlias(); - InterferenceNode* into = opportunity->node_b->GetAlias(); - DCHECK_NE(from->stage, NodeStage::kPruned); - DCHECK_NE(into->stage, NodeStage::kPruned); - - if (from->IsPrecolored()) { - // If we have one pre-colored node, make sure it's the `into` node. - std::swap(from, into); - } - - if (from == into) { - // These nodes have already been coalesced. - opportunity->stage = CoalesceStage::kDefunct; - CheckTransitionFromFreezeWorklist(from); - } else if (from->IsPrecolored() || from->ContainsInterference(into)) { - // These nodes interfere. - opportunity->stage = CoalesceStage::kDefunct; - CheckTransitionFromFreezeWorklist(from); - CheckTransitionFromFreezeWorklist(into); - } else if (PrecoloredHeuristic(from, into) - || UncoloredHeuristic(from, into)) { - // We can coalesce these nodes. - opportunity->stage = CoalesceStage::kDefunct; - Combine(from, into); - CheckTransitionFromFreezeWorklist(into); - } else { - // We cannot coalesce, but we may be able to later. - opportunity->stage = CoalesceStage::kActive; - } -} - -// Build a mask with a bit set for each register assigned to some -// interval in `intervals`. -template <typename Container> -static std::bitset<kMaxNumRegs> BuildConflictMask(const Container& intervals) { - std::bitset<kMaxNumRegs> conflict_mask; - for (InterferenceNode* adjacent : intervals) { - LiveInterval* conflicting = adjacent->GetInterval(); - if (conflicting->HasRegister()) { - conflict_mask.set(conflicting->GetRegister()); - if (conflicting->HasHighInterval()) { - DCHECK(conflicting->GetHighInterval()->HasRegister()); - conflict_mask.set(conflicting->GetHighInterval()->GetRegister()); - } - } else { - DCHECK(!conflicting->HasHighInterval() - || !conflicting->GetHighInterval()->HasRegister()); - } - } - return conflict_mask; -} - -bool RegisterAllocatorGraphColor::IsCallerSave(size_t reg, bool processing_core_regs) { - return processing_core_regs - ? !codegen_->IsCoreCalleeSaveRegister(reg) - : !codegen_->IsFloatingPointCalleeSaveRegister(reg); -} - -static bool RegisterIsAligned(size_t reg) { - return reg % 2 == 0; -} - -static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask) { - // We use CTZ (count trailing zeros) to quickly find the lowest 0 bit. - // Note that CTZ is undefined if all bits are 0, so we special-case it. - return conflict_mask.all() ? conflict_mask.size() : CTZ(~conflict_mask.to_ulong()); -} - -bool ColoringIteration::ColorInterferenceGraph() { - DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small"; - ScopedArenaVector<LiveInterval*> colored_intervals( - allocator_->Adapter(kArenaAllocRegisterAllocator)); - bool successful = true; - - while (!pruned_nodes_.empty()) { - InterferenceNode* node = pruned_nodes_.top(); - pruned_nodes_.pop(); - LiveInterval* interval = node->GetInterval(); - size_t reg = 0; - - InterferenceNode* alias = node->GetAlias(); - if (alias != node) { - // This node was coalesced with another. - LiveInterval* alias_interval = alias->GetInterval(); - if (alias_interval->HasRegister()) { - reg = alias_interval->GetRegister(); - DCHECK(!BuildConflictMask(node->GetAdjacentNodes())[reg]) - << "This node conflicts with the register it was coalesced with"; - } else { - DCHECK(false) << node->GetOutDegree() << " " << alias->GetOutDegree() << " " - << "Move coalescing was not conservative, causing a node to be coalesced " - << "with another node that could not be colored"; - if (interval->RequiresRegister()) { - successful = false; - } - } - } else { - // Search for free register(s). - std::bitset<kMaxNumRegs> conflict_mask = BuildConflictMask(node->GetAdjacentNodes()); - if (interval->HasHighInterval()) { - // Note that the graph coloring allocator assumes that pair intervals are aligned here, - // excluding pre-colored pair intervals (which can currently be unaligned on x86). If we - // change the alignment requirements here, we will have to update the algorithm (e.g., - // be more conservative about the weight of edges adjacent to pair nodes.) - while (reg < num_regs_ - 1 && (conflict_mask[reg] || conflict_mask[reg + 1])) { - reg += 2; - } - - // Try to use a caller-save register first. - for (size_t i = 0; i < num_regs_ - 1; i += 2) { - bool low_caller_save = register_allocator_->IsCallerSave(i, processing_core_regs_); - bool high_caller_save = register_allocator_->IsCallerSave(i + 1, processing_core_regs_); - if (!conflict_mask[i] && !conflict_mask[i + 1]) { - if (low_caller_save && high_caller_save) { - reg = i; - break; - } else if (low_caller_save || high_caller_save) { - reg = i; - // Keep looking to try to get both parts in caller-save registers. - } - } - } - } else { - // Not a pair interval. - reg = FindFirstZeroInConflictMask(conflict_mask); - - // Try to use caller-save registers first. - for (size_t i = 0; i < num_regs_; ++i) { - if (!conflict_mask[i] && register_allocator_->IsCallerSave(i, processing_core_regs_)) { - reg = i; - break; - } - } - } - - // Last-chance coalescing. - for (CoalesceOpportunity* opportunity : node->GetCoalesceOpportunities()) { - if (opportunity->stage == CoalesceStage::kDefunct) { - continue; - } - LiveInterval* other_interval = opportunity->node_a->GetAlias() == node - ? opportunity->node_b->GetAlias()->GetInterval() - : opportunity->node_a->GetAlias()->GetInterval(); - if (other_interval->HasRegister()) { - size_t coalesce_register = other_interval->GetRegister(); - if (interval->HasHighInterval()) { - if (!conflict_mask[coalesce_register] && - !conflict_mask[coalesce_register + 1] && - RegisterIsAligned(coalesce_register)) { - reg = coalesce_register; - break; - } - } else if (!conflict_mask[coalesce_register]) { - reg = coalesce_register; - break; - } - } - } - } - - if (reg < (interval->HasHighInterval() ? num_regs_ - 1 : num_regs_)) { - // Assign register. - DCHECK(!interval->HasRegister()); - interval->SetRegister(reg); - colored_intervals.push_back(interval); - if (interval->HasHighInterval()) { - DCHECK(!interval->GetHighInterval()->HasRegister()); - interval->GetHighInterval()->SetRegister(reg + 1); - colored_intervals.push_back(interval->GetHighInterval()); - } - } else if (interval->RequiresRegister()) { - // The interference graph is too dense to color. Make it sparser by - // splitting this live interval. - successful = false; - register_allocator_->SplitAtRegisterUses(interval); - // We continue coloring, because there may be additional intervals that cannot - // be colored, and that we should split. - } else { - // Spill. - node->SetNeedsSpillSlot(); - } - } - - // If unsuccessful, reset all register assignments. - if (!successful) { - for (LiveInterval* interval : colored_intervals) { - interval->ClearRegister(); - } - } - - return successful; -} - -void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes) { - // The register allocation resolver will organize the stack based on value type, - // so we assign stack slots for each value type separately. - ScopedArenaAllocator allocator(allocator_->GetArenaStack()); - ScopedArenaAllocatorAdapter<void> adapter = allocator.Adapter(kArenaAllocRegisterAllocator); - ScopedArenaVector<LiveInterval*> double_intervals(adapter); - ScopedArenaVector<LiveInterval*> long_intervals(adapter); - ScopedArenaVector<LiveInterval*> float_intervals(adapter); - ScopedArenaVector<LiveInterval*> int_intervals(adapter); - - // The set of parent intervals already handled. - ScopedArenaSet<LiveInterval*> seen(adapter); - - // Find nodes that need spill slots. - for (InterferenceNode* node : nodes) { - if (!node->NeedsSpillSlot()) { - continue; - } - - LiveInterval* parent = node->GetInterval()->GetParent(); - if (seen.find(parent) != seen.end()) { - // We've already handled this interval. - // This can happen if multiple siblings of the same interval request a stack slot. - continue; - } - seen.insert(parent); - - HInstruction* defined_by = parent->GetDefinedBy(); - if (parent->HasSpillSlot()) { - // We already have a spill slot for this value that we can reuse. - } else if (defined_by->IsParameterValue()) { - // Parameters already have a stack slot. - parent->SetSpillSlot(codegen_->GetStackSlotOfParameter(defined_by->AsParameterValue())); - } else if (defined_by->IsCurrentMethod()) { - // The current method is always at stack slot 0. - parent->SetSpillSlot(0); - } else if (defined_by->IsConstant()) { - // Constants don't need a spill slot. - } else { - // We need to find a spill slot for this interval. Place it in the correct - // worklist to be processed later. - switch (node->GetInterval()->GetType()) { - case DataType::Type::kFloat64: - double_intervals.push_back(parent); - break; - case DataType::Type::kInt64: - long_intervals.push_back(parent); - break; - case DataType::Type::kFloat32: - float_intervals.push_back(parent); - break; - case DataType::Type::kReference: - case DataType::Type::kInt32: - case DataType::Type::kUint16: - case DataType::Type::kUint8: - case DataType::Type::kInt8: - case DataType::Type::kBool: - case DataType::Type::kInt16: - int_intervals.push_back(parent); - break; - case DataType::Type::kUint32: - case DataType::Type::kUint64: - case DataType::Type::kVoid: - LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType(); - UNREACHABLE(); - } - } - } - - // Color spill slots for each value type. - ColorSpillSlots(ArrayRef<LiveInterval* const>(double_intervals), &num_double_spill_slots_); - ColorSpillSlots(ArrayRef<LiveInterval* const>(long_intervals), &num_long_spill_slots_); - ColorSpillSlots(ArrayRef<LiveInterval* const>(float_intervals), &num_float_spill_slots_); - ColorSpillSlots(ArrayRef<LiveInterval* const>(int_intervals), &num_int_spill_slots_); -} - -void RegisterAllocatorGraphColor::ColorSpillSlots(ArrayRef<LiveInterval* const> intervals, - /* out */ size_t* num_stack_slots_used) { - // We cannot use the original interference graph here because spill slots are assigned to - // all of the siblings of an interval, whereas an interference node represents only a single - // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints - // by position, and assigning the lowest spill slot available when we encounter an interval - // beginning. We ignore lifetime holes for simplicity. - ScopedArenaAllocator allocator(allocator_->GetArenaStack()); - ScopedArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints( - allocator.Adapter(kArenaAllocRegisterAllocator)); - - for (LiveInterval* parent_interval : intervals) { - DCHECK(parent_interval->IsParent()); - DCHECK(!parent_interval->HasSpillSlot()); - size_t start = parent_interval->GetStart(); - size_t end = parent_interval->GetLastSibling()->GetEnd(); - DCHECK_LT(start, end); - interval_endpoints.push_back(std::make_tuple(start, true, parent_interval)); - interval_endpoints.push_back(std::make_tuple(end, false, parent_interval)); - } - - // Sort by position. - // We explicitly ignore the third entry of each tuple (the interval pointer) in order - // to maintain determinism. - std::sort(interval_endpoints.begin(), interval_endpoints.end(), - [] (const std::tuple<size_t, bool, LiveInterval*>& lhs, - const std::tuple<size_t, bool, LiveInterval*>& rhs) { - return std::tie(std::get<0>(lhs), std::get<1>(lhs)) - < std::tie(std::get<0>(rhs), std::get<1>(rhs)); - }); - - ArenaBitVector taken(&allocator, 0, true, kArenaAllocRegisterAllocator); - for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) { - // Extract information from the current tuple. - LiveInterval* parent_interval; - bool is_interval_beginning; - size_t position; - std::tie(position, is_interval_beginning, parent_interval) = *it; - size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded(); - - if (is_interval_beginning) { - DCHECK(!parent_interval->HasSpillSlot()); - DCHECK_EQ(position, parent_interval->GetStart()); - - // Find first available free stack slot(s). - size_t slot = 0; - for (; ; ++slot) { - bool found = true; - for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { - if (taken.IsBitSet(s)) { - found = false; - break; // failure - } - } - if (found) { - break; // success - } - } - - parent_interval->SetSpillSlot(slot); - - *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed); - if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) { - // The parallel move resolver requires that there be an even number of spill slots - // allocated for pair value types. - ++(*num_stack_slots_used); - } - - for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { - taken.SetBit(s); - } - } else { - DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd()); - DCHECK(parent_interval->HasSpillSlot()); - - // Free up the stack slot(s) used by this interval. - size_t slot = parent_interval->GetSpillSlot(); - for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) { - DCHECK(taken.IsBitSet(s)); - taken.ClearBit(s); - } - } - } - DCHECK_EQ(taken.NumSetBits(), 0u); -} - -} // namespace art diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h deleted file mode 100644 index 0e10152049..0000000000 --- a/compiler/optimizing/register_allocator_graph_color.h +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ -#define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ - -#include "arch/instruction_set.h" -#include "base/arena_object.h" -#include "base/array_ref.h" -#include "base/macros.h" -#include "base/scoped_arena_containers.h" -#include "register_allocator.h" - -namespace art HIDDEN { - -class CodeGenerator; -class HBasicBlock; -class HGraph; -class HInstruction; -class HParallelMove; -class Location; -class SsaLivenessAnalysis; -class InterferenceNode; -struct CoalesceOpportunity; -enum class CoalesceKind; - -/** - * A graph coloring register allocator. - * - * The algorithm proceeds as follows: - * (1) Build an interference graph, where nodes represent live intervals, and edges represent - * interferences between two intervals. Coloring this graph with k colors is isomorphic to - * finding a valid register assignment with k registers. - * (2) To color the graph, first prune all nodes with degree less than k, since these nodes are - * guaranteed a color. (No matter how we color their adjacent nodes, we can give them a - * different color.) As we prune nodes from the graph, more nodes may drop below degree k, - * enabling further pruning. The key is to maintain the pruning order in a stack, so that we - * can color the nodes in the reverse order. - * When there are no more nodes with degree less than k, we start pruning alternate nodes based - * on heuristics. Since these nodes are not guaranteed a color, we are careful to - * prioritize nodes that require a register. We also prioritize short intervals, because - * short intervals cannot be split very much if coloring fails (see below). "Prioritizing" - * a node amounts to pruning it later, since it will have fewer interferences if we prune other - * nodes first. - * (3) We color nodes in the reverse order in which we pruned them. If we cannot assign - * a node a color, we do one of two things: - * - If the node requires a register, we consider the current coloring attempt a failure. - * However, we split the node's live interval in order to make the interference graph - * sparser, so that future coloring attempts may succeed. - * - If the node does not require a register, we simply assign it a location on the stack. - * - * If iterative move coalescing is enabled, the algorithm also attempts to conservatively - * combine nodes in the graph that would prefer to have the same color. (For example, the output - * of a phi instruction would prefer to have the same register as at least one of its inputs.) - * There are several additional steps involved with this: - * - We look for coalesce opportunities by examining each live interval, a step similar to that - * used by linear scan when looking for register hints. - * - When pruning the graph, we maintain a worklist of coalesce opportunities, as well as a worklist - * of low degree nodes that have associated coalesce opportunities. Only when we run out of - * coalesce opportunities do we start pruning coalesce-associated nodes. - * - When pruning a node, if any nodes transition from high degree to low degree, we add - * associated coalesce opportunities to the worklist, since these opportunities may now succeed. - * - Whether two nodes can be combined is decided by two different heuristics--one used when - * coalescing uncolored nodes, and one used for coalescing an uncolored node with a colored node. - * It is vital that we only combine two nodes if the node that remains is guaranteed to receive - * a color. This is because additionally spilling is more costly than failing to coalesce. - * - Even if nodes are not coalesced while pruning, we keep the coalesce opportunities around - * to be used as last-chance register hints when coloring. If nothing else, we try to use - * caller-save registers before callee-save registers. - * - * A good reference for graph coloring register allocation is - * "Modern Compiler Implementation in Java" (Andrew W. Appel, 2nd Edition). - */ -class RegisterAllocatorGraphColor : public RegisterAllocator { - public: - RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator, - CodeGenerator* codegen, - const SsaLivenessAnalysis& analysis, - bool iterative_move_coalescing = true); - ~RegisterAllocatorGraphColor() override; - - void AllocateRegisters() override; - - bool Validate(bool log_fatal_on_failure) override; - - private: - // Collect all intervals and prepare for register allocation. - void ProcessInstructions(); - void ProcessInstruction(HInstruction* instruction); - - // If any inputs require specific registers, block those registers - // at the position of this instruction. - void CheckForFixedInputs(HInstruction* instruction); - - // If the output of an instruction requires a specific register, split - // the interval and assign the register to the first part. - void CheckForFixedOutput(HInstruction* instruction); - - // Add all applicable safepoints to a live interval. - // Currently depends on instruction processing order. - void AddSafepointsFor(HInstruction* instruction); - - // Collect all live intervals associated with the temporary locations - // needed by an instruction. - void CheckForTempLiveIntervals(HInstruction* instruction); - - // If a safe point is needed, add a synthesized interval to later record - // the number of live registers at this point. - void CheckForSafepoint(HInstruction* instruction); - - // Try to remove the SuspendCheck at function entry. Returns true if it was successful. - bool TryRemoveSuspendCheckEntry(HInstruction* instruction); - - // Split an interval, but only if `position` is inside of `interval`. - // Return either the new interval, or the original interval if not split. - static LiveInterval* TrySplit(LiveInterval* interval, size_t position); - - // To ensure every graph can be colored, split live intervals - // at their register defs and uses. This creates short intervals with low - // degree in the interference graph, which are prioritized during graph - // coloring. - void SplitAtRegisterUses(LiveInterval* interval); - - // If the given instruction is a catch phi, give it a spill slot. - void AllocateSpillSlotForCatchPhi(HInstruction* instruction); - - // Ensure that the given register cannot be allocated for a given range. - void BlockRegister(Location location, size_t start, size_t end); - void BlockRegisters(size_t start, size_t end, bool caller_save_only = false); - - bool IsCallerSave(size_t reg, bool processing_core_regs); - - // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not - // assigned the same stack slot. - void ColorSpillSlots(ArrayRef<LiveInterval* const> nodes, /* out */ size_t* num_stack_slots_used); - - // Provide stack slots to nodes that need them. - void AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes); - - // Whether iterative move coalescing should be performed. Iterative move coalescing - // improves code quality, but increases compile time. - const bool iterative_move_coalescing_; - - // Live intervals, split by kind (core and floating point). - // These should not contain high intervals, as those are represented by - // the corresponding low interval throughout register allocation. - ScopedArenaVector<LiveInterval*> core_intervals_; - ScopedArenaVector<LiveInterval*> fp_intervals_; - - // Intervals for temporaries, saved for special handling in the resolution phase. - ScopedArenaVector<LiveInterval*> temp_intervals_; - - // Safepoints, saved for special handling while processing instructions. - ScopedArenaVector<HInstruction*> safepoints_; - - // Interference nodes representing specific registers. These are "pre-colored" nodes - // in the interference graph. - ScopedArenaVector<InterferenceNode*> physical_core_nodes_; - ScopedArenaVector<InterferenceNode*> physical_fp_nodes_; - - // Allocated stack slot counters. - size_t num_int_spill_slots_; - size_t num_double_spill_slots_; - size_t num_float_spill_slots_; - size_t num_long_spill_slots_; - size_t catch_phi_spill_slot_counter_; - - // Number of stack slots needed for the pointer to the current method. - // This is 1 for 32-bit architectures, and 2 for 64-bit architectures. - const size_t reserved_art_method_slots_; - - // Number of stack slots needed for outgoing arguments. - const size_t reserved_out_slots_; - - friend class ColoringIteration; - - DISALLOW_COPY_AND_ASSIGN(RegisterAllocatorGraphColor); -}; - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index d316aa5dc2..0d2d20682d 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -84,7 +84,8 @@ class RegisterAllocatorTest : public CommonCompilerTest, public OptimizingUnitTe TEST_F(RegisterAllocatorTest, test_name##_LinearScan) {\ test_name(Strategy::kRegisterAllocatorLinearScan);\ }\ -TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\ +/* Note: Graph coloring register allocator has been removed, so the test is DISABLED. */ \ +TEST_F(RegisterAllocatorTest, DISABLED_##test_name##_GraphColor) {\ test_name(Strategy::kRegisterAllocatorGraphColor);\ } |