ART: Implement scalar loop unrolling.
Implement scalar loop unrolling for small loops
(on arm64) with known trip count to reduce loop check
and branch penalty and to provide more opportunities
for instruction scheduling.
Note: this functionality is turned off by default now.
Test: cloner_test.cc
Test: test-art-target, test-art-host
Change-Id: Ic27fd8fb0bc0d7b69251252da37b8b510bc30acc
diff --git a/compiler/Android.bp b/compiler/Android.bp
index c4d538f..062eb88 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -70,6 +70,7 @@
"optimizing/load_store_analysis.cc",
"optimizing/load_store_elimination.cc",
"optimizing/locations.cc",
+ "optimizing/loop_analysis.cc",
"optimizing/loop_optimization.cc",
"optimizing/nodes.cc",
"optimizing/optimization.cc",
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 0a310ca..55eca23 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -352,13 +352,15 @@
}
bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const {
- HInductionVarAnalysis::InductionInfo *trip =
- induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
- if (trip != nullptr && !IsUnsafeTripCount(trip)) {
- IsConstant(trip->op_a, kExact, trip_count);
- return true;
- }
- return false;
+ bool is_constant_unused = false;
+ return CheckForFiniteAndConstantProps(loop, &is_constant_unused, trip_count);
+}
+
+bool InductionVarRange::HasKnownTripCount(HLoopInformation* loop,
+ /*out*/ int64_t* trip_count) const {
+ bool is_constant = false;
+ CheckForFiniteAndConstantProps(loop, &is_constant, trip_count);
+ return is_constant;
}
bool InductionVarRange::IsUnitStride(HInstruction* context,
@@ -417,6 +419,18 @@
// Private class methods.
//
+bool InductionVarRange::CheckForFiniteAndConstantProps(HLoopInformation* loop,
+ /*out*/ bool* is_constant,
+ /*out*/ int64_t* trip_count) const {
+ HInductionVarAnalysis::InductionInfo *trip =
+ induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
+ if (trip != nullptr && !IsUnsafeTripCount(trip)) {
+ *is_constant = IsConstant(trip->op_a, kExact, trip_count);
+ return true;
+ }
+ return false;
+}
+
bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,
ConstantRequest request,
/*out*/ int64_t* value) const {
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 0b980f5..906dc6b 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -161,9 +161,15 @@
}
/**
- * Checks if header logic of a loop terminates. Sets trip-count tc if known.
+ * Checks if header logic of a loop terminates. If trip count is known sets 'trip_count' to its
+ * value.
*/
- bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
+ bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const;
+
+ /**
+ * Checks if a trip count is known for the loop and sets 'trip_count' to its value in this case.
+ */
+ bool HasKnownTripCount(HLoopInformation* loop, /*out*/ int64_t* trip_count) const;
/**
* Checks if the given instruction is a unit stride induction inside the closest enveloping
@@ -194,6 +200,14 @@
};
/**
+ * Checks if header logic of a loop terminates. If trip count is known (constant) sets
+ * 'is_constant' to true and 'trip_count' to the trip count value.
+ */
+ bool CheckForFiniteAndConstantProps(HLoopInformation* loop,
+ /*out*/ bool* is_constant,
+ /*out*/ int64_t* trip_count) const;
+
+ /**
* Returns true if exact or upper/lower bound on the given induction
* information is known as a 64-bit constant, which is returned in value.
*/
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
new file mode 100644
index 0000000..cd3bdaf
--- /dev/null
+++ b/compiler/optimizing/loop_analysis.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loop_analysis.h"
+
+namespace art {
+
+void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
+ LoopAnalysisInfo* analysis_results) {
+ for (HBlocksInLoopIterator block_it(*loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* block = block_it.Current();
+
+ for (HBasicBlock* successor : block->GetSuccessors()) {
+ if (!loop_info->Contains(*successor)) {
+ analysis_results->exits_num_++;
+ }
+ }
+
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (MakesScalarUnrollingNonBeneficial(instruction)) {
+ analysis_results->has_instructions_preventing_scalar_unrolling_ = true;
+ }
+ analysis_results->instr_num_++;
+ }
+ analysis_results->bb_num_++;
+ }
+}
+
+class Arm64LoopHelper : public ArchDefaultLoopHelper {
+ public:
+ // Scalar loop unrolling parameters and heuristics.
+ //
+ // Maximum possible unrolling factor.
+ static constexpr uint32_t kArm64ScalarMaxUnrollFactor = 2;
+ // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
+ static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40;
+ // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
+ static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8;
+
+ // SIMD loop unrolling parameters and heuristics.
+ //
+ // Maximum possible unrolling factor.
+ static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8;
+ // Loop's maximum instruction count. Loops with higher count will not be unrolled.
+ static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50;
+
+ bool IsLoopTooBigForScalarUnrolling(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
+ size_t instr_num = loop_analysis_info->GetNumberOfInstructions();
+ size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks();
+ return (instr_num >= kArm64ScalarHeuristicMaxBodySizeInstr ||
+ bb_num >= kArm64ScalarHeuristicMaxBodySizeBlocks);
+ }
+
+ uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
+ uint64_t trip_count) const OVERRIDE {
+ uint32_t desired_unrolling_factor = kArm64ScalarMaxUnrollFactor;
+ if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
+ return kNoUnrollingFactor;
+ }
+
+ return desired_unrolling_factor;
+ }
+
+ uint32_t GetSIMDUnrollingFactor(HBasicBlock* block,
+ int64_t trip_count,
+ uint32_t max_peel,
+ uint32_t vector_length) const OVERRIDE {
+ // Don't unroll with insufficient iterations.
+ // TODO: Unroll loops with unknown trip count.
+ DCHECK_NE(vector_length, 0u);
+ if (trip_count < (2 * vector_length + max_peel)) {
+ return kNoUnrollingFactor;
+ }
+ // Don't unroll for large loop body size.
+ uint32_t instruction_count = block->GetInstructions().CountSize();
+ if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) {
+ return kNoUnrollingFactor;
+ }
+ // Find a beneficial unroll factor with the following restrictions:
+ // - At least one iteration of the transformed loop should be executed.
+ // - The loop body shouldn't be "too big" (heuristic).
+
+ uint32_t uf1 = kArm64SimdHeuristicMaxBodySizeInstr / instruction_count;
+ uint32_t uf2 = (trip_count - max_peel) / vector_length;
+ uint32_t unroll_factor =
+ TruncToPowerOfTwo(std::min({uf1, uf2, kArm64SimdMaxUnrollFactor}));
+ DCHECK_GE(unroll_factor, 1u);
+ return unroll_factor;
+ }
+};
+
+ArchDefaultLoopHelper* ArchDefaultLoopHelper::Create(InstructionSet isa,
+ ArenaAllocator* allocator) {
+ switch (isa) {
+ case InstructionSet::kArm64: {
+ return new (allocator) Arm64LoopHelper;
+ }
+ default: {
+ return new (allocator) ArchDefaultLoopHelper;
+ }
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
new file mode 100644
index 0000000..bad406f
--- /dev/null
+++ b/compiler/optimizing/loop_analysis.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
+#define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
+
+#include "nodes.h"
+
+namespace art {
+
+class LoopAnalysis;
+
+// No loop unrolling factor (just one copy of the loop-body).
+static constexpr uint32_t kNoUnrollingFactor = 1;
+
+// Class to hold cached information on properties of the loop.
+class LoopAnalysisInfo : public ValueObject {
+ public:
+ explicit LoopAnalysisInfo(HLoopInformation* loop_info)
+ : bb_num_(0),
+ instr_num_(0),
+ exits_num_(0),
+ has_instructions_preventing_scalar_unrolling_(false),
+ loop_info_(loop_info) {}
+
+ size_t GetNumberOfBasicBlocks() const { return bb_num_; }
+ size_t GetNumberOfInstructions() const { return instr_num_; }
+ size_t GetNumberOfExits() const { return exits_num_; }
+
+ bool HasInstructionsPreventingScalarUnrolling() const {
+ return has_instructions_preventing_scalar_unrolling_;
+ }
+
+ const HLoopInformation* GetLoopInfo() const { return loop_info_; }
+
+ private:
+ // Number of basic blocks in the loop body.
+ size_t bb_num_;
+ // Number of instructions in the loop body.
+ size_t instr_num_;
+ // Number of loop's exits.
+ size_t exits_num_;
+ // Whether the loop has instructions which make scalar loop unrolling non-beneficial.
+ bool has_instructions_preventing_scalar_unrolling_;
+
+ // Corresponding HLoopInformation.
+ const HLoopInformation* loop_info_;
+
+ friend class LoopAnalysis;
+};
+
+// Placeholder class for methods and routines used to analyse loops, calculate loop properties
+// and characteristics.
+class LoopAnalysis : public ValueObject {
+ public:
+ // Calculates loops basic properties like body size, exits number, etc. and fills
+ // 'analysis_results' with this information.
+ static void CalculateLoopBasicProperties(HLoopInformation* loop_info,
+ LoopAnalysisInfo* analysis_results);
+
+ private:
+ // Returns whether an instruction makes scalar loop unrolling non-beneficial.
+ //
+ // If in the loop body we have a dex/runtime call then its contribution to the whole
+ // loop performance will probably prevail. So unrolling optimization will not bring
+ // any noticeable performance improvement however will increase the code size.
+ static bool MakesScalarUnrollingNonBeneficial(HInstruction* instruction) {
+ return (instruction->IsNewArray() ||
+ instruction->IsNewInstance() ||
+ instruction->IsUnresolvedInstanceFieldGet() ||
+ instruction->IsUnresolvedInstanceFieldSet() ||
+ instruction->IsUnresolvedStaticFieldGet() ||
+ instruction->IsUnresolvedStaticFieldSet() ||
+ // TODO: Unroll loops with intrinsified invokes.
+ instruction->IsInvoke() ||
+ // TODO: Unroll loops with ClinitChecks.
+ instruction->IsClinitCheck());
+ }
+};
+
+//
+// Helper class which holds target-dependent methods and constants needed for loop optimizations.
+//
+// To support peeling/unrolling for a new architecture one needs to create new helper class,
+// inherit it from this and add implementation for the following methods.
+//
+class ArchDefaultLoopHelper : public ArenaObject<kArenaAllocOptimization> {
+ public:
+ virtual ~ArchDefaultLoopHelper() {}
+
+ // Creates an instance of specialised helper for the target or default helper if the target
+ // doesn't support loop peeling and unrolling.
+ static ArchDefaultLoopHelper* Create(InstructionSet isa, ArenaAllocator* allocator);
+
+ // Returns whether the loop is too big for loop unrolling by checking its total number of
+ // basic blocks and instructions.
+ //
+ // If the loop body has too many instructions then unrolling optimization will not bring
+ // any noticeable performance improvement however will increase the code size.
+ //
+ // Returns 'true' by default, should be overridden by particular target loop helper.
+ virtual bool IsLoopTooBigForScalarUnrolling(
+ LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; }
+
+ // Returns optimal scalar unrolling factor for the loop.
+ //
+ // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
+ virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
+ uint64_t trip_count ATTRIBUTE_UNUSED) const {
+ return kNoUnrollingFactor;
+ }
+
+ // Returns optimal SIMD unrolling factor for the loop.
+ //
+ // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
+ virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED,
+ int64_t trip_count ATTRIBUTE_UNUSED,
+ uint32_t max_peel ATTRIBUTE_UNUSED,
+ uint32_t vector_length ATTRIBUTE_UNUSED) const {
+ return kNoUnrollingFactor;
+ }
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index e1fb7ac..6908034 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -33,8 +33,8 @@
// Enables vectorization (SIMDization) in the loop optimizer.
static constexpr bool kEnableVectorization = true;
-// No loop unrolling factor (just one copy of the loop-body).
-static constexpr uint32_t kNoUnrollingFactor = 1;
+// Enables scalar loop unrolling in the loop optimizer.
+static constexpr bool kEnableScalarUnrolling = false;
//
// Static helpers.
@@ -480,7 +480,11 @@
vector_preheader_(nullptr),
vector_header_(nullptr),
vector_body_(nullptr),
- vector_index_(nullptr) {
+ vector_index_(nullptr),
+ arch_loop_helper_(ArchDefaultLoopHelper::Create(compiler_driver_ != nullptr
+ ? compiler_driver_->GetInstructionSet()
+ : InstructionSet::kNone,
+ global_allocator_)) {
}
void HLoopOptimization::Run() {
@@ -691,7 +695,7 @@
}
}
-bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
// Ensure loop header logic is finite.
@@ -761,6 +765,83 @@
return false;
}
+bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+ return TryOptimizeInnerLoopFinite(node) ||
+ TryUnrollingForBranchPenaltyReduction(node);
+}
+
+void HLoopOptimization::PeelOrUnrollOnce(LoopNode* loop_node,
+ bool do_unrolling,
+ SuperblockCloner::HBasicBlockMap* bb_map,
+ SuperblockCloner::HInstructionMap* hir_map) {
+ // TODO: peel loop nests.
+ DCHECK(loop_node->inner == nullptr);
+
+ // Check that loop info is up-to-date.
+ HLoopInformation* loop_info = loop_node->loop_info;
+ HBasicBlock* header = loop_info->GetHeader();
+ DCHECK(loop_info == header->GetLoopInformation());
+
+ PeelUnrollHelper helper(loop_info, bb_map, hir_map);
+ DCHECK(helper.IsLoopClonable());
+ HBasicBlock* new_header = do_unrolling ? helper.DoUnrolling() : helper.DoPeeling();
+ DCHECK(header == new_header);
+ DCHECK(loop_info == new_header->GetLoopInformation());
+}
+
+//
+// Loop unrolling: generic part methods.
+//
+
+bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node) {
+ // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests)
+ // as InstructionSet is needed.
+ if (!kEnableScalarUnrolling || compiler_driver_ == nullptr) {
+ return false;
+ }
+
+ HLoopInformation* loop_info = loop_node->loop_info;
+ int64_t trip_count = 0;
+ // Only unroll loops with a known tripcount.
+ if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) {
+ return false;
+ }
+
+ uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count);
+ if (unrolling_factor == kNoUnrollingFactor) {
+ return false;
+ }
+
+ LoopAnalysisInfo loop_analysis_info(loop_info);
+ LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
+
+ // Check "IsLoopClonable" last as it can be time-consuming.
+ if (arch_loop_helper_->IsLoopTooBigForScalarUnrolling(&loop_analysis_info) ||
+ (loop_analysis_info.GetNumberOfExits() > 1) ||
+ loop_analysis_info.HasInstructionsPreventingScalarUnrolling() ||
+ !PeelUnrollHelper::IsLoopClonable(loop_info)) {
+ return false;
+ }
+
+ // TODO: support other unrolling factors.
+ DCHECK_EQ(unrolling_factor, 2u);
+
+ // Perform unrolling.
+ ArenaAllocator* arena = loop_info->GetHeader()->GetGraph()->GetAllocator();
+ SuperblockCloner::HBasicBlockMap bb_map(
+ std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+ SuperblockCloner::HInstructionMap hir_map(
+ std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+ PeelOrUnrollOnce(loop_node, /* unrolling */ true, &bb_map, &hir_map);
+
+ // Remove the redundant loop check after unrolling.
+ HIf* copy_hif = bb_map.Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
+ int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
+ copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
+
+ return true;
+}
+
//
// Loop vectorization. The implementation is based on the book by Aart J.C. Bik:
// "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance."
@@ -891,7 +972,8 @@
HBasicBlock* preheader = node->loop_info->GetPreHeader();
// Pick a loop unrolling factor for the vector loop.
- uint32_t unroll = GetUnrollingFactor(block, trip_count);
+ uint32_t unroll = arch_loop_helper_->GetSIMDUnrollingFactor(
+ block, trip_count, MaxNumberPeeled(), vector_length_);
uint32_t chunk = vector_length_ * unroll;
DCHECK(trip_count == 0 || (trip_count >= MaxNumberPeeled() + chunk));
@@ -2174,41 +2256,6 @@
return true;
}
-static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8;
-static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50;
-
-uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
- uint32_t max_peel = MaxNumberPeeled();
- switch (compiler_driver_->GetInstructionSet()) {
- case InstructionSet::kArm64: {
- // Don't unroll with insufficient iterations.
- // TODO: Unroll loops with unknown trip count.
- DCHECK_NE(vector_length_, 0u);
- if (trip_count < (2 * vector_length_ + max_peel)) {
- return kNoUnrollingFactor;
- }
- // Don't unroll for large loop body size.
- uint32_t instruction_count = block->GetInstructions().CountSize();
- if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) {
- return kNoUnrollingFactor;
- }
- // Find a beneficial unroll factor with the following restrictions:
- // - At least one iteration of the transformed loop should be executed.
- // - The loop body shouldn't be "too big" (heuristic).
- uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count;
- uint32_t uf2 = (trip_count - max_peel) / vector_length_;
- uint32_t unroll_factor =
- TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR}));
- DCHECK_GE(unroll_factor, 1u);
- return unroll_factor;
- }
- case InstructionSet::kX86:
- case InstructionSet::kX86_64:
- default:
- return kNoUnrollingFactor;
- }
-}
-
//
// Helpers.
//
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 9414e5a..0120cff 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -20,12 +20,15 @@
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "induction_var_range.h"
+#include "loop_analysis.h"
#include "nodes.h"
#include "optimization.h"
+#include "superblock_cloner.h"
namespace art {
class CompilerDriver;
+class ArchDefaultLoopHelper;
/**
* Loop optimizations. Builds a loop hierarchy and applies optimizations to
@@ -135,10 +138,26 @@
void SimplifyInduction(LoopNode* node);
void SimplifyBlocks(LoopNode* node);
- // Performs optimizations specific to inner loop (empty loop removal,
+ // Performs optimizations specific to inner loop with finite header logic (empty loop removal,
// unrolling, vectorization). Returns true if anything changed.
+ bool TryOptimizeInnerLoopFinite(LoopNode* node);
+
+ // Performs optimizations specific to inner loop. Returns true if anything changed.
bool OptimizeInnerLoop(LoopNode* node);
+ // Performs loop peeling/unrolling once (depends on the 'do_unrolling'); the transformation
+ // preserves the header and the loop info.
+ //
+ // Note: the function records copying information about blocks and instructions.
+ void PeelOrUnrollOnce(LoopNode* loop_node,
+ bool do_unrolling,
+ SuperblockCloner::HBasicBlockMap* bb_map,
+ SuperblockCloner::HInstructionMap* hir_map);
+
+ // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling
+ // opportunities. Returns whether transformation happened.
+ bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node);
+
//
// Vectorization analysis and synthesis.
//
@@ -203,7 +222,6 @@
const ArrayReference* peeling_candidate);
uint32_t MaxNumberPeeled();
bool IsVectorizationProfitable(int64_t trip_count);
- uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
//
// Helpers.
@@ -297,6 +315,9 @@
HBasicBlock* vector_body_; // body of the new loop
HInstruction* vector_index_; // normalized index of the new loop
+ // Helper for target-specific behaviour for loop optimizations.
+ ArchDefaultLoopHelper* arch_loop_helper_;
+
friend class LoopOptimizationTest;
DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index 04942f9..ee74f10 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -853,7 +853,7 @@
}
}
- if (kSuperblockClonerVerify) {
+ if (kIsDebugBuild) {
VerifyGraph();
}
}
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index 19c9dd4..afd5a5d 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -25,7 +25,6 @@
namespace art {
static const bool kSuperblockClonerLogging = false;
-static const bool kSuperblockClonerVerify = false;
// Represents an edge between two HBasicBlocks.
//