Complete unrolling of loops with small body and trip count one.
Rationale:
Avoids the unnecessary loop control overhead, suspend check,
and exposes more opportunities for constant folding in the
resulting loop body. Fully unrolls loop in execute() of
the Dhrystone benchmark (3% to 8% improvements).
Test: test-art-host
Change-Id: If30f38caea9e9f87a929df041dfb7ed1c227aba3
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index d5c4c2f..6d8ae75 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -368,10 +368,14 @@
}
}
-bool InductionVarRange::IsFinite(HLoopInformation* loop) const {
+bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const {
HInductionVarAnalysis::InductionInfo *trip =
induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
- return trip != nullptr && !IsUnsafeTripCount(trip);
+ if (trip != nullptr && !IsUnsafeTripCount(trip)) {
+ IsConstant(trip->op_a, kExact, tc);
+ return true;
+ }
+ return false;
}
//
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index ba14847..6c424b7 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -150,9 +150,9 @@
}
/**
- * Checks if header logic of a loop terminates.
+ * Checks if header logic of a loop terminates. Sets trip-count tc if known.
*/
- bool IsFinite(HLoopInformation* loop) const;
+ bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
private:
/*
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 9d73e29..9583838 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -161,26 +161,27 @@
void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
for ( ; node != nullptr; node = node->next) {
+ // Visit inner loops first.
int current_induction_simplification_count = induction_simplication_count_;
if (node->inner != nullptr) {
TraverseLoopsInnerToOuter(node->inner);
}
- // Visit loop after its inner loops have been visited. If the induction of any inner
- // loop has been simplified, recompute the induction information of this loop first.
+ // Recompute induction information of this loop if the induction
+ // of any inner loop has been simplified.
if (current_induction_simplification_count != induction_simplication_count_) {
induction_range_.ReVisit(node->loop_info);
}
- // Repeat simplifications until no more changes occur. Note that since
- // each simplification consists of eliminating code (without introducing
- // new code), this process is always finite.
+ // Repeat simplifications in the body of this loop until no more changes occur.
+ // Note that since each simplification consists of eliminating code (without
+ // introducing new code), this process is always finite.
do {
simplified_ = false;
- SimplifyBlocks(node);
SimplifyInduction(node);
+ SimplifyBlocks(node);
} while (simplified_);
- // Remove inner loops when empty.
+ // Simplify inner loop.
if (node->inner == nullptr) {
- RemoveIfEmptyInnerLoop(node);
+ SimplifyInnerLoop(node);
}
}
}
@@ -198,7 +199,7 @@
iset_->clear();
int32_t use_count = 0;
if (IsPhiInduction(phi) &&
- IsOnlyUsedAfterLoop(node->loop_info, phi, &use_count) &&
+ IsOnlyUsedAfterLoop(node->loop_info, phi, /*collect_loop_uses*/ false, &use_count) &&
// No uses, or no early-exit with proper replacement.
(use_count == 0 ||
(!IsEarlyExit(node->loop_info) && TryReplaceWithLastValue(phi, preheader)))) {
@@ -206,7 +207,6 @@
RemoveFromCycle(i);
}
simplified_ = true;
- induction_simplication_count_++;
}
}
}
@@ -216,24 +216,14 @@
for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
// Remove dead instructions from the loop-body.
- for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) {
- HInstruction* instruction = i.Current();
- if (instruction->IsDeadAndRemovable()) {
- simplified_ = true;
- block->RemoveInstruction(instruction);
- }
- }
+ RemoveDeadInstructions(block->GetPhis());
+ RemoveDeadInstructions(block->GetInstructions());
// Remove trivial control flow blocks from the loop-body.
- HBasicBlock* succ = nullptr;
- if (IsGotoBlock(block, &succ) && succ->GetPredecessors().size() == 1) {
- // Trivial goto block can be removed.
- HBasicBlock* pred = block->GetSinglePredecessor();
+ if (block->GetPredecessors().size() == 1 &&
+ block->GetSuccessors().size() == 1 &&
+ block->GetSingleSuccessor()->GetPredecessors().size() == 1) {
simplified_ = true;
- pred->ReplaceSuccessor(block, succ);
- block->RemoveDominatedBlock(succ);
- block->DisconnectAndDelete();
- pred->AddDominatedBlock(succ);
- succ->SetDominator(pred);
+ block->MergeWith(block->GetSingleSuccessor());
} else if (block->GetSuccessors().size() == 2) {
// Trivial if block can be bypassed to either branch.
HBasicBlock* succ0 = block->GetSuccessors()[0];
@@ -258,55 +248,66 @@
}
}
-void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) {
+bool HLoopOptimization::SimplifyInnerLoop(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
// Ensure loop header logic is finite.
- if (!induction_range_.IsFinite(node->loop_info)) {
- return;
+ int64_t tc = 0;
+ if (!induction_range_.IsFinite(node->loop_info, &tc)) {
+ return false;
}
// Ensure there is only a single loop-body (besides the header).
HBasicBlock* body = nullptr;
for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
if (it.Current() != header) {
if (body != nullptr) {
- return;
+ return false;
}
body = it.Current();
}
}
// Ensure there is only a single exit point.
if (header->GetSuccessors().size() != 2) {
- return;
+ return false;
}
HBasicBlock* exit = (header->GetSuccessors()[0] == body)
? header->GetSuccessors()[1]
: header->GetSuccessors()[0];
// Ensure exit can only be reached by exiting loop.
if (exit->GetPredecessors().size() != 1) {
- return;
+ return false;
}
- // Detect an empty loop: no side effects other than plain iteration. Replace
- // subsequent index uses, if any, with the last value and remove the loop.
+ // Detect either an empty loop (no side effects other than plain iteration) or
+ // a trivial loop (just iterating once). Replace subsequent index uses, if any,
+ // with the last value and remove the loop, possibly after unrolling its body.
+ HInstruction* phi = header->GetFirstPhi();
iset_->clear();
int32_t use_count = 0;
- if (IsEmptyHeader(header) &&
- IsEmptyBody(body) &&
- IsOnlyUsedAfterLoop(node->loop_info, header->GetFirstPhi(), &use_count) &&
- // No uses, or proper replacement.
- (use_count == 0 || TryReplaceWithLastValue(header->GetFirstPhi(), preheader))) {
- body->DisconnectAndDelete();
- exit->RemovePredecessor(header);
- header->RemoveSuccessor(exit);
- header->RemoveDominatedBlock(exit);
- header->DisconnectAndDelete();
- preheader->AddSuccessor(exit);
- preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator
- preheader->AddDominatedBlock(exit);
- exit->SetDominator(preheader);
- // Update hierarchy.
- RemoveLoop(node);
+ if (IsEmptyHeader(header)) {
+ bool is_empty = IsEmptyBody(body);
+ if ((is_empty || tc == 1) &&
+ IsOnlyUsedAfterLoop(node->loop_info, phi, /*collect_loop_uses*/ true, &use_count) &&
+ // No uses, or proper replacement.
+ (use_count == 0 || TryReplaceWithLastValue(phi, preheader))) {
+ if (!is_empty) {
+ // Unroll the loop body, which sees initial value of the index.
+ phi->ReplaceWith(phi->InputAt(0));
+ preheader->MergeInstructionsWith(body);
+ }
+ body->DisconnectAndDelete();
+ exit->RemovePredecessor(header);
+ header->RemoveSuccessor(exit);
+ header->RemoveDominatedBlock(exit);
+ header->DisconnectAndDelete();
+ preheader->AddSuccessor(exit);
+ preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator
+ preheader->AddDominatedBlock(exit);
+ exit->SetDominator(preheader);
+ RemoveLoop(node); // update hierarchy
+ return true;
+ }
}
+ return false;
}
bool HLoopOptimization::IsPhiInduction(HPhi* phi) {
@@ -374,12 +375,19 @@
bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
HInstruction* instruction,
+ bool collect_loop_uses,
/*out*/ int32_t* use_count) {
for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
HInstruction* user = use.GetUser();
if (iset_->find(user) == iset_->end()) { // not excluded?
HLoopInformation* other_loop_info = user->GetBlock()->GetLoopInformation();
if (other_loop_info != nullptr && other_loop_info->IsIn(*loop_info)) {
+ // If collect_loop_uses is set, simply keep adding those uses to the set.
+ // Otherwise, reject uses inside the loop that were not already in the set.
+ if (collect_loop_uses) {
+ iset_->insert(user);
+ continue;
+ }
return false;
}
++*use_count;
@@ -388,40 +396,48 @@
return true;
}
-void HLoopOptimization::ReplaceAllUses(HInstruction* instruction, HInstruction* replacement) {
- const HUseList<HInstruction*>& uses = instruction->GetUses();
- for (auto it = uses.begin(), end = uses.end(); it != end;) {
- HInstruction* user = it->GetUser();
- size_t index = it->GetIndex();
- ++it; // increment before replacing
- if (iset_->find(user) == iset_->end()) { // not excluded?
- user->ReplaceInput(replacement, index);
- induction_range_.Replace(user, instruction, replacement); // update induction
- }
- }
- const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses();
- for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) {
- HEnvironment* user = it->GetUser();
- size_t index = it->GetIndex();
- ++it; // increment before replacing
- if (iset_->find(user->GetHolder()) == iset_->end()) { // not excluded?
- user->RemoveAsUserOfInput(index);
- user->SetRawEnvAt(index, replacement);
- replacement->AddEnvUseAt(user, index);
- }
- }
-}
-
bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block) {
// Try to replace outside uses with the last value. Environment uses can consume this
// value too, since any first true use is outside the loop (although this may imply
// that de-opting may look "ahead" a bit on the phi value). If there are only environment
// uses, the value is dropped altogether, since the computations have no effect.
if (induction_range_.CanGenerateLastValue(instruction)) {
- ReplaceAllUses(instruction, induction_range_.GenerateLastValue(instruction, graph_, block));
+ HInstruction* replacement = induction_range_.GenerateLastValue(instruction, graph_, block);
+ const HUseList<HInstruction*>& uses = instruction->GetUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end;) {
+ HInstruction* user = it->GetUser();
+ size_t index = it->GetIndex();
+ ++it; // increment before replacing
+ if (iset_->find(user) == iset_->end()) { // not excluded?
+ user->ReplaceInput(replacement, index);
+ induction_range_.Replace(user, instruction, replacement); // update induction
+ }
+ }
+ const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses();
+ for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) {
+ HEnvironment* user = it->GetUser();
+ size_t index = it->GetIndex();
+ ++it; // increment before replacing
+ if (iset_->find(user->GetHolder()) == iset_->end()) { // not excluded?
+ user->RemoveAsUserOfInput(index);
+ user->SetRawEnvAt(index, replacement);
+ replacement->AddEnvUseAt(user, index);
+ }
+ }
+ induction_simplication_count_++;
return true;
}
return false;
}
+void HLoopOptimization::RemoveDeadInstructions(const HInstructionList& list) {
+ for (HBackwardInstructionIterator i(list); !i.Done(); i.Advance()) {
+ HInstruction* instruction = i.Current();
+ if (instruction->IsDeadAndRemovable()) {
+ simplified_ = true;
+ instruction->GetBlock()->RemoveInstructionOrPhi(instruction);
+ }
+ }
+}
+
} // namespace art
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 0f05b24..9ddab41 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -60,19 +60,21 @@
void TraverseLoopsInnerToOuter(LoopNode* node);
+ // Simplification.
void SimplifyInduction(LoopNode* node);
void SimplifyBlocks(LoopNode* node);
- void RemoveIfEmptyInnerLoop(LoopNode* node);
+ bool SimplifyInnerLoop(LoopNode* node);
+ // Helpers.
bool IsPhiInduction(HPhi* phi);
bool IsEmptyHeader(HBasicBlock* block);
bool IsEmptyBody(HBasicBlock* block);
-
bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
HInstruction* instruction,
+ bool collect_loop_uses,
/*out*/ int32_t* use_count);
- void ReplaceAllUses(HInstruction* instruction, HInstruction* replacement);
bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block);
+ void RemoveDeadInstructions(const HInstructionList& list);
// Range information based on prior induction variable analysis.
InductionVarRange induction_range_;
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d45fa11..a6084eb 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1853,6 +1853,14 @@
SetGraph(nullptr);
}
+void HBasicBlock::MergeInstructionsWith(HBasicBlock* other) {
+ DCHECK(EndsWithControlFlowInstruction());
+ RemoveInstruction(GetLastInstruction());
+ instructions_.Add(other->GetInstructions());
+ other->instructions_.SetBlockOfInstructions(this);
+ other->instructions_.Clear();
+}
+
void HBasicBlock::MergeWith(HBasicBlock* other) {
DCHECK_EQ(GetGraph(), other->GetGraph());
DCHECK(ContainsElement(dominated_blocks_, other));
@@ -1861,11 +1869,7 @@
DCHECK(other->GetPhis().IsEmpty());
// Move instructions from `other` to `this`.
- DCHECK(EndsWithControlFlowInstruction());
- RemoveInstruction(GetLastInstruction());
- instructions_.Add(other->GetInstructions());
- other->instructions_.SetBlockOfInstructions(this);
- other->instructions_.Clear();
+ MergeInstructionsWith(other);
// Remove `other` from the loops it is included in.
for (HLoopInformationOutwardIterator it(*other); !it.Done(); it.Advance()) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index ea9a94c..bf13702 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1097,6 +1097,9 @@
// with a control flow instruction).
void ReplaceWith(HBasicBlock* other);
+ // Merges the instructions of `other` at the end of `this`.
+ void MergeInstructionsWith(HBasicBlock* other);
+
// Merge `other` at the end of `this`. This method updates loops, reverse post
// order, links to predecessors, successors, dominators and deletes the block
// from the graph. The two blocks must be successive, i.e. `this` the only
diff --git a/test/627-checker-unroll/expected.txt b/test/627-checker-unroll/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/627-checker-unroll/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/627-checker-unroll/info.txt b/test/627-checker-unroll/info.txt
new file mode 100644
index 0000000..d7885f4
--- /dev/null
+++ b/test/627-checker-unroll/info.txt
@@ -0,0 +1 @@
+Test on loop unrolling.
diff --git a/test/627-checker-unroll/src/Main.java b/test/627-checker-unroll/src/Main.java
new file mode 100644
index 0000000..9785bdc
--- /dev/null
+++ b/test/627-checker-unroll/src/Main.java
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Test on loop unrolling. Removes loop control overhead (including suspend
+// checks) and exposes more opportunities for constant folding.
+//
+public class Main {
+
+ static int sA = 0;
+
+ /// CHECK-START: void Main.unroll() loop_optimization (before)
+ /// CHECK-DAG: Phi loop:<<Loop:B\d+>>
+ /// CHECK-DAG: StaticFieldSet loop:<<Loop>>
+ //
+ /// CHECK-START: void Main.unroll() loop_optimization (after)
+ /// CHECK-DAG: StaticFieldSet loop:none
+ //
+ /// CHECK-START: void Main.unroll() instruction_simplifier$after_bce (after)
+ /// CHECK-DAG: <<Int:i\d+>> IntConstant 68 loop:none
+ /// CHECK-DAG: StaticFieldSet [{{l\d+}},<<Int>>] loop:none
+ //
+ /// CHECK-START: void Main.unroll() loop_optimization (after)
+ /// CHECK-NOT: Phi
+ public static void unroll() {
+ for (int i = 4; i < 5; i++) {
+ sA = 17 * i;
+ }
+ }
+
+ /// CHECK-START: int Main.unrollLV() loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>>
+ /// CHECK-DAG: StaticFieldSet loop:<<Loop>>
+ /// CHECK-DAG: Return [<<Phi>>] loop:none
+ //
+ /// CHECK-START: int Main.unrollLV() loop_optimization (after)
+ /// CHECK-DAG: StaticFieldSet loop:none
+ //
+ /// CHECK-START: int Main.unrollLV() instruction_simplifier$after_bce (after)
+ /// CHECK-DAG: <<Int1:i\d+>> IntConstant 187 loop:none
+ /// CHECK-DAG: <<Int2:i\d+>> IntConstant 12 loop:none
+ /// CHECK-DAG: StaticFieldSet [{{l\d+}},<<Int1>>] loop:none
+ /// CHECK-DAG: Return [<<Int2>>] loop:none
+ //
+ /// CHECK-START: int Main.unrollLV() loop_optimization (after)
+ /// CHECK-NOT: Phi
+ public static int unrollLV() {
+ int i;
+ for (i = 11; i < 12; i++) {
+ sA = 17 * i;
+ }
+ return i;
+ }
+
+ /// CHECK-START: void Main.unrollNest() loop_optimization (before)
+ /// CHECK-DAG: SuspendCheck loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: SuspendCheck loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi loop:<<Loop2:B\d+>> outer_loop:<<Loop1>>
+ /// CHECK-DAG: SuspendCheck loop:<<Loop2>> outer_loop:<<Loop1>>
+ /// CHECK-DAG: <<Phi3:i\d+>> Phi loop:<<Loop3:B\d+>> outer_loop:<<Loop2>>
+ /// CHECK-DAG: SuspendCheck loop:<<Loop3>> outer_loop:<<Loop2>>
+ /// CHECK-DAG: StaticFieldSet loop:<<Loop3>> outer_loop:<<Loop2>>
+ //
+ /// CHECK-START: void Main.unrollNest() loop_optimization (after)
+ /// CHECK-DAG: StaticFieldSet loop:none
+ /// CHECK-DAG: SuspendCheck loop:none
+ /// CHECK-NOT: SuspendCheck
+ //
+ /// CHECK-START: void Main.unrollNest() instruction_simplifier$after_bce (after)
+ /// CHECK-DAG: <<Int:i\d+>> IntConstant 6 loop:none
+ /// CHECK-DAG: StaticFieldSet [{{l\d+}},<<Int>>] loop:none
+ //
+ /// CHECK-START: void Main.unrollNest() loop_optimization (after)
+ /// CHECK-NOT: Phi
+ public static void unrollNest() {
+ // Unrolling each loop in turn ultimately removes the complete nest!
+ for (int i = 4; i < 5; i++) {
+ for (int j = 5; j < 6; j++) {
+ for (int k = 6; k < 7; k++) {
+ sA = k;
+ }
+ }
+ }
+ }
+
+ //
+ // Verifier.
+ //
+
+ public static void main(String[] args) {
+ unroll();
+ expectEquals(68, sA);
+ expectEquals(12, unrollLV());
+ expectEquals(187, sA);
+ unrollNest();
+ expectEquals(6, sA);
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}