summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/loop_analysis.cc68
-rw-r--r--compiler/optimizing/loop_analysis.h23
-rw-r--r--compiler/optimizing/loop_optimization.cc17
-rw-r--r--compiler/optimizing/loop_optimization.h4
-rw-r--r--compiler/optimizing/nodes.cc17
-rw-r--r--compiler/optimizing/nodes.h5
-rw-r--r--compiler/optimizing/superblock_cloner.cc154
-rw-r--r--compiler/optimizing/superblock_cloner.h31
8 files changed, 257 insertions, 62 deletions
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
index a0760eff69..a2124455e2 100644
--- a/compiler/optimizing/loop_analysis.cc
+++ b/compiler/optimizing/loop_analysis.cc
@@ -35,6 +35,9 @@ void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* instruction = it.Current();
+ if (it.Current()->GetType() == DataType::Type::kInt64) {
+ analysis_results->has_long_type_instructions_ = true;
+ }
if (MakesScalarPeelingUnrollingNonBeneficial(instruction)) {
analysis_results->has_instructions_preventing_scalar_peeling_ = true;
analysis_results->has_instructions_preventing_scalar_unrolling_ = true;
@@ -61,34 +64,29 @@ bool LoopAnalysis::HasLoopAtLeastOneInvariantExit(HLoopInformation* loop_info) {
return false;
}
-class Arm64LoopHelper : public ArchDefaultLoopHelper {
+// Default implementation of loop helper; used for all targets unless a custom implementation
+// is provided. Enables scalar loop peeling and unrolling with the most conservative heuristics.
+class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper {
public:
// Scalar loop unrolling parameters and heuristics.
//
// Maximum possible unrolling factor.
- static constexpr uint32_t kArm64ScalarMaxUnrollFactor = 2;
+ static constexpr uint32_t kScalarMaxUnrollFactor = 2;
// Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
- static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40;
+ static constexpr uint32_t kScalarHeuristicMaxBodySizeInstr = 17;
// Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
- static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8;
+ static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6;
- // SIMD loop unrolling parameters and heuristics.
- //
- // Maximum possible unrolling factor.
- static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8;
- // Loop's maximum instruction count. Loops with higher count will not be unrolled.
- static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50;
-
- bool IsLoopTooBigForScalarPeelingUnrolling(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
- size_t instr_num = loop_analysis_info->GetNumberOfInstructions();
- size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks();
- return (instr_num >= kArm64ScalarHeuristicMaxBodySizeInstr ||
- bb_num >= kArm64ScalarHeuristicMaxBodySizeBlocks);
+ bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
+ return loop_analysis_info->HasLongTypeInstructions() ||
+ IsLoopTooBig(loop_analysis_info,
+ kScalarHeuristicMaxBodySizeInstr,
+ kScalarHeuristicMaxBodySizeBlocks);
}
uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
uint64_t trip_count) const OVERRIDE {
- uint32_t desired_unrolling_factor = kArm64ScalarMaxUnrollFactor;
+ uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor;
if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
return kNoUnrollingFactor;
}
@@ -98,6 +96,38 @@ class Arm64LoopHelper : public ArchDefaultLoopHelper {
bool IsLoopPeelingEnabled() const OVERRIDE { return true; }
+ protected:
+ bool IsLoopTooBig(LoopAnalysisInfo* loop_analysis_info,
+ size_t instr_threshold,
+ size_t bb_threshold) const {
+ size_t instr_num = loop_analysis_info->GetNumberOfInstructions();
+ size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks();
+ return (instr_num >= instr_threshold || bb_num >= bb_threshold);
+ }
+};
+
+// Custom implementation of loop helper for arm64 target. Enables heuristics for scalar loop
+// peeling and unrolling and supports SIMD loop unrolling.
+class Arm64LoopHelper : public ArchDefaultLoopHelper {
+ public:
+ // SIMD loop unrolling parameters and heuristics.
+ //
+ // Maximum possible unrolling factor.
+ static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8;
+ // Loop's maximum instruction count. Loops with higher count will not be unrolled.
+ static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50;
+
+ // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
+ static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40;
+ // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
+ static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8;
+
+ bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
+ return IsLoopTooBig(loop_analysis_info,
+ kArm64ScalarHeuristicMaxBodySizeInstr,
+ kArm64ScalarHeuristicMaxBodySizeBlocks);
+ }
+
uint32_t GetSIMDUnrollingFactor(HBasicBlock* block,
int64_t trip_count,
uint32_t max_peel,
@@ -126,8 +156,8 @@ class Arm64LoopHelper : public ArchDefaultLoopHelper {
}
};
-ArchDefaultLoopHelper* ArchDefaultLoopHelper::Create(InstructionSet isa,
- ArenaAllocator* allocator) {
+ArchNoOptsLoopHelper* ArchNoOptsLoopHelper::Create(InstructionSet isa,
+ ArenaAllocator* allocator) {
switch (isa) {
case InstructionSet::kArm64: {
return new (allocator) Arm64LoopHelper;
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
index ece9858136..c09d3ff00f 100644
--- a/compiler/optimizing/loop_analysis.h
+++ b/compiler/optimizing/loop_analysis.h
@@ -35,6 +35,7 @@ class LoopAnalysisInfo : public ValueObject {
exits_num_(0),
has_instructions_preventing_scalar_peeling_(false),
has_instructions_preventing_scalar_unrolling_(false),
+ has_long_type_instructions_(false),
loop_info_(loop_info) {}
size_t GetNumberOfBasicBlocks() const { return bb_num_; }
@@ -49,6 +50,10 @@ class LoopAnalysisInfo : public ValueObject {
return has_instructions_preventing_scalar_unrolling_;
}
+ bool HasLongTypeInstructions() const {
+ return has_long_type_instructions_;
+ }
+
const HLoopInformation* GetLoopInfo() const { return loop_info_; }
private:
@@ -62,6 +67,9 @@ class LoopAnalysisInfo : public ValueObject {
bool has_instructions_preventing_scalar_peeling_;
// Whether the loop has instructions which make scalar loop unrolling non-beneficial.
bool has_instructions_preventing_scalar_unrolling_;
+ // Whether the loop has instructions of primitive long type; unrolling these loop will
+ // likely introduce spill/fills on 32-bit targets.
+ bool has_long_type_instructions_;
// Corresponding HLoopInformation.
const HLoopInformation* loop_info_;
@@ -117,22 +125,21 @@ class LoopAnalysis : public ValueObject {
// To support peeling/unrolling for a new architecture one needs to create new helper class,
// inherit it from this and add implementation for the following methods.
//
-class ArchDefaultLoopHelper : public ArenaObject<kArenaAllocOptimization> {
+class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
public:
- virtual ~ArchDefaultLoopHelper() {}
+ virtual ~ArchNoOptsLoopHelper() {}
// Creates an instance of specialised helper for the target or default helper if the target
// doesn't support loop peeling and unrolling.
- static ArchDefaultLoopHelper* Create(InstructionSet isa, ArenaAllocator* allocator);
+ static ArchNoOptsLoopHelper* Create(InstructionSet isa, ArenaAllocator* allocator);
- // Returns whether the loop is too big for loop peeling/unrolling by checking its total number of
- // basic blocks and instructions.
+ // Returns whether the loop is not beneficial for loop peeling/unrolling.
//
- // If the loop body has too many instructions then peeling/unrolling optimization will not bring
- // any noticeable performance improvement however will increase the code size.
+ // For example, if the loop body has too many instructions then peeling/unrolling optimization
+ // will not bring any noticeable performance improvement however will increase the code size.
//
// Returns 'true' by default, should be overridden by particular target loop helper.
- virtual bool IsLoopTooBigForScalarPeelingUnrolling(
+ virtual bool IsLoopNonBeneficialForScalarOpts(
LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; }
// Returns optimal scalar unrolling factor for the loop.
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 1ce3524bd6..eda6bd1e86 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -33,9 +33,6 @@ namespace art {
// Enables vectorization (SIMDization) in the loop optimizer.
static constexpr bool kEnableVectorization = true;
-// Enables scalar loop unrolling in the loop optimizer.
-static constexpr bool kEnableScalarPeelingUnrolling = false;
-
//
// Static helpers.
//
@@ -457,7 +454,7 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
vector_header_(nullptr),
vector_body_(nullptr),
vector_index_(nullptr),
- arch_loop_helper_(ArchDefaultLoopHelper::Create(compiler_driver_ != nullptr
+ arch_loop_helper_(ArchNoOptsLoopHelper::Create(compiler_driver_ != nullptr
? compiler_driver_->GetInstructionSet()
: InstructionSet::kNone,
global_allocator_)) {
@@ -761,7 +758,7 @@ bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) {
// Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests)
// as InstructionSet is needed.
- if (!kEnableScalarPeelingUnrolling || compiler_driver_ == nullptr) {
+ if (compiler_driver_ == nullptr) {
return false;
}
@@ -781,9 +778,9 @@ bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) {
LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
// Check "IsLoopClonable" last as it can be time-consuming.
- if (arch_loop_helper_->IsLoopTooBigForScalarPeelingUnrolling(&loop_analysis_info) ||
+ if (loop_analysis_info.HasInstructionsPreventingScalarUnrolling() ||
+ arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) ||
(loop_analysis_info.GetNumberOfExits() > 1) ||
- loop_analysis_info.HasInstructionsPreventingScalarUnrolling() ||
!PeelUnrollHelper::IsLoopClonable(loop_info)) {
return false;
}
@@ -807,7 +804,7 @@ bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* node) {
bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* node) {
// Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests)
// as InstructionSet is needed.
- if (!kEnableScalarPeelingUnrolling || compiler_driver_ == nullptr) {
+ if (compiler_driver_ == nullptr) {
return false;
}
@@ -821,8 +818,8 @@ bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopNode* nod
LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
// Check "IsLoopClonable" last as it can be time-consuming.
- if (arch_loop_helper_->IsLoopTooBigForScalarPeelingUnrolling(&loop_analysis_info) ||
- loop_analysis_info.HasInstructionsPreventingScalarPeeling() ||
+ if (loop_analysis_info.HasInstructionsPreventingScalarPeeling() ||
+ arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&loop_analysis_info) ||
!LoopAnalysis::HasLoopAtLeastOneInvariantExit(loop_info) ||
!PeelUnrollHelper::IsLoopClonable(loop_info)) {
return false;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 7807da15ed..191a93da26 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -28,7 +28,7 @@
namespace art {
class CompilerDriver;
-class ArchDefaultLoopHelper;
+class ArchNoOptsLoopHelper;
/**
* Loop optimizations. Builds a loop hierarchy and applies optimizations to
@@ -308,7 +308,7 @@ class HLoopOptimization : public HOptimization {
HInstruction* vector_index_; // normalized index of the new loop
// Helper for target-specific behaviour for loop optimizations.
- ArchDefaultLoopHelper* arch_loop_helper_;
+ ArchNoOptsLoopHelper* arch_loop_helper_;
friend class LoopOptimizationTest;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 99b0b186d4..ef8a757ad0 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1121,6 +1121,23 @@ void HEnvironment::RemoveAsUserOfInput(size_t index) const {
user->FixUpUserRecordsAfterEnvUseRemoval(before_env_use_node);
}
+void HEnvironment::ReplaceInput(HInstruction* replacement, size_t index) {
+ const HUserRecord<HEnvironment*>& env_use_record = vregs_[index];
+ HInstruction* orig_instr = env_use_record.GetInstruction();
+
+ DCHECK(orig_instr != replacement);
+
+ HUseList<HEnvironment*>::iterator before_use_node = env_use_record.GetBeforeUseNode();
+ // Note: fixup_end remains valid across splice_after().
+ auto fixup_end = replacement->env_uses_.empty() ? replacement->env_uses_.begin()
+ : ++replacement->env_uses_.begin();
+ replacement->env_uses_.splice_after(replacement->env_uses_.before_begin(),
+ env_use_record.GetInstruction()->env_uses_,
+ before_use_node);
+ replacement->FixUpUserRecordsAfterEnvUseInsertion(fixup_end);
+ orig_instr->FixUpUserRecordsAfterEnvUseRemoval(before_use_node);
+}
+
HInstruction* HInstruction::GetNextDisregardingMoves() const {
HInstruction* next = GetNext();
while (next != nullptr && next->IsParallelMove()) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 09d9c57a33..3fd5b6b02d 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1909,6 +1909,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
void RemoveAsUserOfInput(size_t index) const;
+ // Replaces the input at the position 'index' with the replacement; the replacement and old
+ // input instructions' env_uses_ lists are adjusted. The function works similar to
+ // HInstruction::ReplaceInput.
+ void ReplaceInput(HInstruction* replacement, size_t index);
+
size_t Size() const { return vregs_.size(); }
HEnvironment* GetParent() const { return parent_; }
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index fad7729956..1b43618538 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -409,7 +409,7 @@ void SuperblockCloner::ResolvePhi(HPhi* phi) {
// Main algorithm methods.
//
-void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) {
+void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const {
DCHECK(exits->empty());
for (uint32_t block_id : orig_bb_set_.Indexes()) {
HBasicBlock* block = GetBlockById(block_id);
@@ -521,6 +521,113 @@ void SuperblockCloner::ResolveDataFlow() {
}
//
+// Helpers for live-outs processing and Subgraph-closed SSA.
+//
+
+bool SuperblockCloner::CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs) const {
+ DCHECK(live_outs->empty());
+ for (uint32_t idx : orig_bb_set_.Indexes()) {
+ HBasicBlock* block = GetBlockById(idx);
+
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ DCHECK(instr->IsClonable());
+
+ if (IsUsedOutsideRegion(instr, orig_bb_set_)) {
+ live_outs->FindOrAdd(instr, instr);
+ }
+ }
+
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ if (!instr->IsClonable()) {
+ return false;
+ }
+
+ if (IsUsedOutsideRegion(instr, orig_bb_set_)) {
+ // TODO: Investigate why HNewInstance, HCheckCast has a requirement for the input.
+ if (instr->IsLoadClass()) {
+ return false;
+ }
+ live_outs->FindOrAdd(instr, instr);
+ }
+ }
+ }
+ return true;
+}
+
+void SuperblockCloner::ConstructSubgraphClosedSSA() {
+ if (live_outs_.empty()) {
+ return;
+ }
+
+ ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
+ SearchForSubgraphExits(&exits);
+ if (exits.empty()) {
+ DCHECK(live_outs_.empty());
+ return;
+ }
+
+ DCHECK_EQ(exits.size(), 1u);
+ HBasicBlock* exit_block = exits[0];
+ // There should be no critical edges.
+ DCHECK_EQ(exit_block->GetPredecessors().size(), 1u);
+ DCHECK(exit_block->GetPhis().IsEmpty());
+
+ // For each live-out value insert a phi into the loop exit and replace all the value's uses
+ // external to the loop with this phi. The phi will have the original value as its only input;
+ // after copying is done FixSubgraphClosedSSAAfterCloning will add a corresponding copy of the
+ // original value as the second input thus merging data flow from the original and copy parts of
+ // the subgraph. Also update the record in the live_outs_ map from (value, value) to
+ // (value, new_phi).
+ for (auto live_out_it = live_outs_.begin(); live_out_it != live_outs_.end(); ++live_out_it) {
+ HInstruction* value = live_out_it->first;
+ HPhi* phi = new (arena_) HPhi(arena_, kNoRegNumber, 0, value->GetType());
+
+ if (value->GetType() == DataType::Type::kReference) {
+ phi->SetReferenceTypeInfo(value->GetReferenceTypeInfo());
+ }
+
+ exit_block->AddPhi(phi);
+ live_out_it->second = phi;
+
+ const HUseList<HInstruction*>& uses = value->GetUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+ HInstruction* user = it->GetUser();
+ size_t index = it->GetIndex();
+ // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+ ++it;
+ if (!IsInOrigBBSet(user->GetBlock())) {
+ user->ReplaceInput(phi, index);
+ }
+ }
+
+ const HUseList<HEnvironment*>& env_uses = value->GetEnvUses();
+ for (auto it = env_uses.begin(), e = env_uses.end(); it != e; /* ++it below */) {
+ HEnvironment* env = it->GetUser();
+ size_t index = it->GetIndex();
+ ++it;
+ if (!IsInOrigBBSet(env->GetHolder()->GetBlock())) {
+ env->ReplaceInput(phi, index);
+ }
+ }
+
+ phi->AddInput(value);
+ }
+}
+
+void SuperblockCloner::FixSubgraphClosedSSAAfterCloning() {
+ for (auto it : live_outs_) {
+ DCHECK(it.first != it.second);
+ HInstruction* orig_value = it.first;
+ HPhi* phi = it.second->AsPhi();
+ HInstruction* copy_value = GetInstrCopy(orig_value);
+ // Copy edges are inserted after the original so we can just add new input to the phi.
+ phi->AddInput(copy_value);
+ }
+}
+
+//
// Debug and logging methods.
//
@@ -644,7 +751,6 @@ void DumpBBSet(const ArenaBitVector* set) {
}
void SuperblockCloner::DumpInputSets() {
- std::cout << graph_->PrettyMethod() << "\n";
std::cout << "orig_bb_set:\n";
for (uint32_t idx : orig_bb_set_.Indexes()) {
std::cout << idx << "\n";
@@ -680,7 +786,9 @@ SuperblockCloner::SuperblockCloner(HGraph* graph,
bb_map_(bb_map),
hir_map_(hir_map),
outer_loop_(nullptr),
- outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) {
+ outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner),
+ live_outs_(std::less<HInstruction*>(),
+ graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)) {
orig_bb_set_.Copy(orig_bb_set);
}
@@ -699,26 +807,19 @@ bool SuperblockCloner::IsSubgraphClonable() const {
return false;
}
- // Check that there are no instructions defined in the subgraph and used outside.
- // TODO: Improve this by accepting graph with such uses but only one exit.
- for (uint32_t idx : orig_bb_set_.Indexes()) {
- HBasicBlock* block = GetBlockById(idx);
+ HInstructionMap live_outs(
+ std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- HInstruction* instr = it.Current();
- if (!instr->IsClonable() ||
- IsUsedOutsideRegion(instr, orig_bb_set_)) {
- return false;
- }
- }
+ if (!CollectLiveOutsAndCheckClonable(&live_outs)) {
+ return false;
+ }
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- HInstruction* instr = it.Current();
- if (!instr->IsClonable() ||
- IsUsedOutsideRegion(instr, orig_bb_set_)) {
- return false;
- }
- }
+ ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
+ SearchForSubgraphExits(&exits);
+
+ // The only loops with live-outs which are currently supported are loops with a single exit.
+ if (!live_outs.empty() && exits.size() != 1) {
+ return false;
}
return true;
@@ -794,8 +895,10 @@ void SuperblockCloner::Run() {
DumpInputSets();
}
+ CollectLiveOutsAndCheckClonable(&live_outs_);
// Find an area in the graph for which control flow information should be adjusted.
FindAndSetLocalAreaForAdjustments();
+ ConstructSubgraphClosedSSA();
// Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be
// adjusted.
CloneBasicBlocks();
@@ -819,6 +922,7 @@ void SuperblockCloner::Run() {
AdjustControlFlowInfo();
// Fix data flow of the graph.
ResolveDataFlow();
+ FixSubgraphClosedSSAAfterCloning();
}
void SuperblockCloner::CleanUp() {
@@ -985,8 +1089,14 @@ HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) {
HBasicBlock* loop_header = loop_info_->GetHeader();
// Check that loop info is up-to-date.
DCHECK(loop_info_ == loop_header->GetLoopInformation());
-
HGraph* graph = loop_header->GetGraph();
+
+ if (kSuperblockClonerLogging) {
+ std::cout << "Method: " << graph->PrettyMethod() << std::endl;
+ std::cout << "Scalar loop " << (to_unroll ? "unrolling" : "peeling") <<
+ " was applied to the loop <" << loop_header->GetBlockId() << ">." << std::endl;
+ }
+
ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool());
HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index e0931674cb..f21172131b 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -218,7 +218,7 @@ class SuperblockCloner : public ValueObject {
private:
// Fills the 'exits' vector with the subgraph exits.
- void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
+ void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const;
// Finds and records information about the area in the graph for which control flow (back edges,
// loops, dominators) needs to be adjusted.
@@ -240,6 +240,33 @@ class SuperblockCloner : public ValueObject {
void ResolveDataFlow();
//
+ // Helpers for live-outs processing and Subgraph-closed SSA.
+ //
+ // - live-outs - values which are defined inside the subgraph and have uses outside.
+ // - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph
+ // have no outside uses except for the phi-nodes in the subgraph exits.
+ //
+ // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this
+ // makes the subgraph-closed SSA form construction much easier.
+ //
+ // TODO: Support subgraphs with live-outs and multiple exits.
+ //
+
+ // For each live-out value 'val' in the region puts a record <val, val> into the map.
+ // Returns whether all of the instructions in the subgraph are clonable.
+ bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const;
+
+ // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit.
+ //
+ // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates
+ // the record in the map to <val, phi> and replaces all outside uses with this phi.
+ void ConstructSubgraphClosedSSA();
+
+ // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding
+ // (<val, phi>) phi after the cloning is done.
+ void FixSubgraphClosedSSAAfterCloning();
+
+ //
// Helpers for CloneBasicBlock.
//
@@ -316,6 +343,8 @@ class SuperblockCloner : public ValueObject {
HLoopInformation* outer_loop_;
HBasicBlockSet outer_loop_bb_set_;
+ HInstructionMap live_outs_;
+
ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);