summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/induction_var_range.cc28
-rw-r--r--compiler/optimizing/induction_var_range.h18
-rw-r--r--compiler/optimizing/loop_analysis.cc120
-rw-r--r--compiler/optimizing/loop_analysis.h139
-rw-r--r--compiler/optimizing/loop_optimization.cc127
-rw-r--r--compiler/optimizing/loop_optimization.h25
-rw-r--r--compiler/optimizing/superblock_cloner.cc2
-rw-r--r--compiler/optimizing/superblock_cloner.h1
8 files changed, 407 insertions, 53 deletions
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 0a310ca940..55eca2316a 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -352,13 +352,15 @@ void InductionVarRange::Replace(HInstruction* instruction,
}
bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const {
- HInductionVarAnalysis::InductionInfo *trip =
- induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
- if (trip != nullptr && !IsUnsafeTripCount(trip)) {
- IsConstant(trip->op_a, kExact, trip_count);
- return true;
- }
- return false;
+ bool is_constant_unused = false;
+ return CheckForFiniteAndConstantProps(loop, &is_constant_unused, trip_count);
+}
+
+bool InductionVarRange::HasKnownTripCount(HLoopInformation* loop,
+ /*out*/ int64_t* trip_count) const {
+ bool is_constant = false;
+ CheckForFiniteAndConstantProps(loop, &is_constant, trip_count);
+ return is_constant;
}
bool InductionVarRange::IsUnitStride(HInstruction* context,
@@ -417,6 +419,18 @@ HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop,
// Private class methods.
//
+bool InductionVarRange::CheckForFiniteAndConstantProps(HLoopInformation* loop,
+ /*out*/ bool* is_constant,
+ /*out*/ int64_t* trip_count) const {
+ HInductionVarAnalysis::InductionInfo *trip =
+ induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
+ if (trip != nullptr && !IsUnsafeTripCount(trip)) {
+ *is_constant = IsConstant(trip->op_a, kExact, trip_count);
+ return true;
+ }
+ return false;
+}
+
bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,
ConstantRequest request,
/*out*/ int64_t* value) const {
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 0b980f596a..906dc6bb7b 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -161,9 +161,15 @@ class InductionVarRange {
}
/**
- * Checks if header logic of a loop terminates. Sets trip-count tc if known.
+ * Checks if header logic of a loop terminates. If trip count is known sets 'trip_count' to its
+ * value.
*/
- bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
+ bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const;
+
+ /**
+ * Checks if a trip count is known for the loop and sets 'trip_count' to its value in this case.
+ */
+ bool HasKnownTripCount(HLoopInformation* loop, /*out*/ int64_t* trip_count) const;
/**
* Checks if the given instruction is a unit stride induction inside the closest enveloping
@@ -194,6 +200,14 @@ class InductionVarRange {
};
/**
+ * Checks if header logic of a loop terminates. If trip count is known (constant) sets
+ * 'is_constant' to true and 'trip_count' to the trip count value.
+ */
+ bool CheckForFiniteAndConstantProps(HLoopInformation* loop,
+ /*out*/ bool* is_constant,
+ /*out*/ int64_t* trip_count) const;
+
+ /**
* Returns true if exact or upper/lower bound on the given induction
* information is known as a 64-bit constant, which is returned in value.
*/
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
new file mode 100644
index 0000000000..cd3bdaf016
--- /dev/null
+++ b/compiler/optimizing/loop_analysis.cc
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loop_analysis.h"
+
+namespace art {
+
+void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
+ LoopAnalysisInfo* analysis_results) {
+ for (HBlocksInLoopIterator block_it(*loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* block = block_it.Current();
+
+ for (HBasicBlock* successor : block->GetSuccessors()) {
+ if (!loop_info->Contains(*successor)) {
+ analysis_results->exits_num_++;
+ }
+ }
+
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (MakesScalarUnrollingNonBeneficial(instruction)) {
+ analysis_results->has_instructions_preventing_scalar_unrolling_ = true;
+ }
+ analysis_results->instr_num_++;
+ }
+ analysis_results->bb_num_++;
+ }
+}
+
+class Arm64LoopHelper : public ArchDefaultLoopHelper {
+ public:
+ // Scalar loop unrolling parameters and heuristics.
+ //
+ // Maximum possible unrolling factor.
+ static constexpr uint32_t kArm64ScalarMaxUnrollFactor = 2;
+ // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
+ static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40;
+ // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
+ static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8;
+
+ // SIMD loop unrolling parameters and heuristics.
+ //
+ // Maximum possible unrolling factor.
+ static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8;
+ // Loop's maximum instruction count. Loops with higher count will not be unrolled.
+ static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50;
+
+ bool IsLoopTooBigForScalarUnrolling(LoopAnalysisInfo* loop_analysis_info) const OVERRIDE {
+ size_t instr_num = loop_analysis_info->GetNumberOfInstructions();
+ size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks();
+ return (instr_num >= kArm64ScalarHeuristicMaxBodySizeInstr ||
+ bb_num >= kArm64ScalarHeuristicMaxBodySizeBlocks);
+ }
+
+ uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
+ uint64_t trip_count) const OVERRIDE {
+ uint32_t desired_unrolling_factor = kArm64ScalarMaxUnrollFactor;
+ if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
+ return kNoUnrollingFactor;
+ }
+
+ return desired_unrolling_factor;
+ }
+
+ uint32_t GetSIMDUnrollingFactor(HBasicBlock* block,
+ int64_t trip_count,
+ uint32_t max_peel,
+ uint32_t vector_length) const OVERRIDE {
+ // Don't unroll with insufficient iterations.
+ // TODO: Unroll loops with unknown trip count.
+ DCHECK_NE(vector_length, 0u);
+ if (trip_count < (2 * vector_length + max_peel)) {
+ return kNoUnrollingFactor;
+ }
+ // Don't unroll for large loop body size.
+ uint32_t instruction_count = block->GetInstructions().CountSize();
+ if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) {
+ return kNoUnrollingFactor;
+ }
+ // Find a beneficial unroll factor with the following restrictions:
+ // - At least one iteration of the transformed loop should be executed.
+ // - The loop body shouldn't be "too big" (heuristic).
+
+ uint32_t uf1 = kArm64SimdHeuristicMaxBodySizeInstr / instruction_count;
+ uint32_t uf2 = (trip_count - max_peel) / vector_length;
+ uint32_t unroll_factor =
+ TruncToPowerOfTwo(std::min({uf1, uf2, kArm64SimdMaxUnrollFactor}));
+ DCHECK_GE(unroll_factor, 1u);
+ return unroll_factor;
+ }
+};
+
+ArchDefaultLoopHelper* ArchDefaultLoopHelper::Create(InstructionSet isa,
+ ArenaAllocator* allocator) {
+ switch (isa) {
+ case InstructionSet::kArm64: {
+ return new (allocator) Arm64LoopHelper;
+ }
+ default: {
+ return new (allocator) ArchDefaultLoopHelper;
+ }
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
new file mode 100644
index 0000000000..bad406f10b
--- /dev/null
+++ b/compiler/optimizing/loop_analysis.h
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
+#define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
+
+#include "nodes.h"
+
+namespace art {
+
+class LoopAnalysis;
+
+// No loop unrolling factor (just one copy of the loop-body).
+static constexpr uint32_t kNoUnrollingFactor = 1;
+
+// Class to hold cached information on properties of the loop.
+class LoopAnalysisInfo : public ValueObject {
+ public:
+ explicit LoopAnalysisInfo(HLoopInformation* loop_info)
+ : bb_num_(0),
+ instr_num_(0),
+ exits_num_(0),
+ has_instructions_preventing_scalar_unrolling_(false),
+ loop_info_(loop_info) {}
+
+ size_t GetNumberOfBasicBlocks() const { return bb_num_; }
+ size_t GetNumberOfInstructions() const { return instr_num_; }
+ size_t GetNumberOfExits() const { return exits_num_; }
+
+ bool HasInstructionsPreventingScalarUnrolling() const {
+ return has_instructions_preventing_scalar_unrolling_;
+ }
+
+ const HLoopInformation* GetLoopInfo() const { return loop_info_; }
+
+ private:
+ // Number of basic blocks in the loop body.
+ size_t bb_num_;
+ // Number of instructions in the loop body.
+ size_t instr_num_;
+ // Number of loop's exits.
+ size_t exits_num_;
+ // Whether the loop has instructions which make scalar loop unrolling non-beneficial.
+ bool has_instructions_preventing_scalar_unrolling_;
+
+ // Corresponding HLoopInformation.
+ const HLoopInformation* loop_info_;
+
+ friend class LoopAnalysis;
+};
+
+// Placeholder class for methods and routines used to analyse loops, calculate loop properties
+// and characteristics.
+class LoopAnalysis : public ValueObject {
+ public:
+ // Calculates loops basic properties like body size, exits number, etc. and fills
+ // 'analysis_results' with this information.
+ static void CalculateLoopBasicProperties(HLoopInformation* loop_info,
+ LoopAnalysisInfo* analysis_results);
+
+ private:
+ // Returns whether an instruction makes scalar loop unrolling non-beneficial.
+ //
+ // If in the loop body we have a dex/runtime call then its contribution to the whole
+ // loop performance will probably prevail. So unrolling optimization will not bring
+ // any noticeable performance improvement however will increase the code size.
+ static bool MakesScalarUnrollingNonBeneficial(HInstruction* instruction) {
+ return (instruction->IsNewArray() ||
+ instruction->IsNewInstance() ||
+ instruction->IsUnresolvedInstanceFieldGet() ||
+ instruction->IsUnresolvedInstanceFieldSet() ||
+ instruction->IsUnresolvedStaticFieldGet() ||
+ instruction->IsUnresolvedStaticFieldSet() ||
+ // TODO: Unroll loops with intrinsified invokes.
+ instruction->IsInvoke() ||
+ // TODO: Unroll loops with ClinitChecks.
+ instruction->IsClinitCheck());
+ }
+};
+
+//
+// Helper class which holds target-dependent methods and constants needed for loop optimizations.
+//
+// To support peeling/unrolling for a new architecture one needs to create new helper class,
+// inherit it from this and add implementation for the following methods.
+//
+class ArchDefaultLoopHelper : public ArenaObject<kArenaAllocOptimization> {
+ public:
+ virtual ~ArchDefaultLoopHelper() {}
+
+ // Creates an instance of specialised helper for the target or default helper if the target
+ // doesn't support loop peeling and unrolling.
+ static ArchDefaultLoopHelper* Create(InstructionSet isa, ArenaAllocator* allocator);
+
+ // Returns whether the loop is too big for loop unrolling by checking its total number of
+ // basic blocks and instructions.
+ //
+ // If the loop body has too many instructions then unrolling optimization will not bring
+ // any noticeable performance improvement however will increase the code size.
+ //
+ // Returns 'true' by default, should be overridden by particular target loop helper.
+ virtual bool IsLoopTooBigForScalarUnrolling(
+ LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; }
+
+ // Returns optimal scalar unrolling factor for the loop.
+ //
+ // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
+ virtual uint32_t GetScalarUnrollingFactor(HLoopInformation* loop_info ATTRIBUTE_UNUSED,
+ uint64_t trip_count ATTRIBUTE_UNUSED) const {
+ return kNoUnrollingFactor;
+ }
+
+ // Returns optimal SIMD unrolling factor for the loop.
+ //
+ // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
+ virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED,
+ int64_t trip_count ATTRIBUTE_UNUSED,
+ uint32_t max_peel ATTRIBUTE_UNUSED,
+ uint32_t vector_length ATTRIBUTE_UNUSED) const {
+ return kNoUnrollingFactor;
+ }
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index e1fb7ac17e..69080340e4 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -33,8 +33,8 @@ namespace art {
// Enables vectorization (SIMDization) in the loop optimizer.
static constexpr bool kEnableVectorization = true;
-// No loop unrolling factor (just one copy of the loop-body).
-static constexpr uint32_t kNoUnrollingFactor = 1;
+// Enables scalar loop unrolling in the loop optimizer.
+static constexpr bool kEnableScalarUnrolling = false;
//
// Static helpers.
@@ -480,7 +480,11 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
vector_preheader_(nullptr),
vector_header_(nullptr),
vector_body_(nullptr),
- vector_index_(nullptr) {
+ vector_index_(nullptr),
+ arch_loop_helper_(ArchDefaultLoopHelper::Create(compiler_driver_ != nullptr
+ ? compiler_driver_->GetInstructionSet()
+ : InstructionSet::kNone,
+ global_allocator_)) {
}
void HLoopOptimization::Run() {
@@ -691,7 +695,7 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
}
}
-bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
// Ensure loop header logic is finite.
@@ -761,6 +765,83 @@ bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
return false;
}
+bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+ return TryOptimizeInnerLoopFinite(node) ||
+ TryUnrollingForBranchPenaltyReduction(node);
+}
+
+void HLoopOptimization::PeelOrUnrollOnce(LoopNode* loop_node,
+ bool do_unrolling,
+ SuperblockCloner::HBasicBlockMap* bb_map,
+ SuperblockCloner::HInstructionMap* hir_map) {
+ // TODO: peel loop nests.
+ DCHECK(loop_node->inner == nullptr);
+
+ // Check that loop info is up-to-date.
+ HLoopInformation* loop_info = loop_node->loop_info;
+ HBasicBlock* header = loop_info->GetHeader();
+ DCHECK(loop_info == header->GetLoopInformation());
+
+ PeelUnrollHelper helper(loop_info, bb_map, hir_map);
+ DCHECK(helper.IsLoopClonable());
+ HBasicBlock* new_header = do_unrolling ? helper.DoUnrolling() : helper.DoPeeling();
+ DCHECK(header == new_header);
+ DCHECK(loop_info == new_header->GetLoopInformation());
+}
+
+//
+// Loop unrolling: generic part methods.
+//
+
+bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node) {
+ // Don't run peeling/unrolling if compiler_driver_ is nullptr (i.e., running under tests)
+ // as InstructionSet is needed.
+ if (!kEnableScalarUnrolling || compiler_driver_ == nullptr) {
+ return false;
+ }
+
+ HLoopInformation* loop_info = loop_node->loop_info;
+ int64_t trip_count = 0;
+ // Only unroll loops with a known tripcount.
+ if (!induction_range_.HasKnownTripCount(loop_info, &trip_count)) {
+ return false;
+ }
+
+ uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(loop_info, trip_count);
+ if (unrolling_factor == kNoUnrollingFactor) {
+ return false;
+ }
+
+ LoopAnalysisInfo loop_analysis_info(loop_info);
+ LoopAnalysis::CalculateLoopBasicProperties(loop_info, &loop_analysis_info);
+
+ // Check "IsLoopClonable" last as it can be time-consuming.
+ if (arch_loop_helper_->IsLoopTooBigForScalarUnrolling(&loop_analysis_info) ||
+ (loop_analysis_info.GetNumberOfExits() > 1) ||
+ loop_analysis_info.HasInstructionsPreventingScalarUnrolling() ||
+ !PeelUnrollHelper::IsLoopClonable(loop_info)) {
+ return false;
+ }
+
+ // TODO: support other unrolling factors.
+ DCHECK_EQ(unrolling_factor, 2u);
+
+ // Perform unrolling.
+ ArenaAllocator* arena = loop_info->GetHeader()->GetGraph()->GetAllocator();
+ SuperblockCloner::HBasicBlockMap bb_map(
+ std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+ SuperblockCloner::HInstructionMap hir_map(
+ std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+ PeelOrUnrollOnce(loop_node, /* unrolling */ true, &bb_map, &hir_map);
+
+ // Remove the redundant loop check after unrolling.
+ HIf* copy_hif = bb_map.Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
+ int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
+ copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
+
+ return true;
+}
+
//
// Loop vectorization. The implementation is based on the book by Aart J.C. Bik:
// "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance."
@@ -891,7 +972,8 @@ void HLoopOptimization::Vectorize(LoopNode* node,
HBasicBlock* preheader = node->loop_info->GetPreHeader();
// Pick a loop unrolling factor for the vector loop.
- uint32_t unroll = GetUnrollingFactor(block, trip_count);
+ uint32_t unroll = arch_loop_helper_->GetSIMDUnrollingFactor(
+ block, trip_count, MaxNumberPeeled(), vector_length_);
uint32_t chunk = vector_length_ * unroll;
DCHECK(trip_count == 0 || (trip_count >= MaxNumberPeeled() + chunk));
@@ -2174,41 +2256,6 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) {
return true;
}
-static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8;
-static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50;
-
-uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
- uint32_t max_peel = MaxNumberPeeled();
- switch (compiler_driver_->GetInstructionSet()) {
- case InstructionSet::kArm64: {
- // Don't unroll with insufficient iterations.
- // TODO: Unroll loops with unknown trip count.
- DCHECK_NE(vector_length_, 0u);
- if (trip_count < (2 * vector_length_ + max_peel)) {
- return kNoUnrollingFactor;
- }
- // Don't unroll for large loop body size.
- uint32_t instruction_count = block->GetInstructions().CountSize();
- if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) {
- return kNoUnrollingFactor;
- }
- // Find a beneficial unroll factor with the following restrictions:
- // - At least one iteration of the transformed loop should be executed.
- // - The loop body shouldn't be "too big" (heuristic).
- uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count;
- uint32_t uf2 = (trip_count - max_peel) / vector_length_;
- uint32_t unroll_factor =
- TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR}));
- DCHECK_GE(unroll_factor, 1u);
- return unroll_factor;
- }
- case InstructionSet::kX86:
- case InstructionSet::kX86_64:
- default:
- return kNoUnrollingFactor;
- }
-}
-
//
// Helpers.
//
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 9414e5a0c6..0120cffa56 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -20,12 +20,15 @@
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "induction_var_range.h"
+#include "loop_analysis.h"
#include "nodes.h"
#include "optimization.h"
+#include "superblock_cloner.h"
namespace art {
class CompilerDriver;
+class ArchDefaultLoopHelper;
/**
* Loop optimizations. Builds a loop hierarchy and applies optimizations to
@@ -135,10 +138,26 @@ class HLoopOptimization : public HOptimization {
void SimplifyInduction(LoopNode* node);
void SimplifyBlocks(LoopNode* node);
- // Performs optimizations specific to inner loop (empty loop removal,
+ // Performs optimizations specific to inner loop with finite header logic (empty loop removal,
// unrolling, vectorization). Returns true if anything changed.
+ bool TryOptimizeInnerLoopFinite(LoopNode* node);
+
+ // Performs optimizations specific to inner loop. Returns true if anything changed.
bool OptimizeInnerLoop(LoopNode* node);
+ // Performs loop peeling/unrolling once (depends on the 'do_unrolling'); the transformation
+ // preserves the header and the loop info.
+ //
+ // Note: the function records copying information about blocks and instructions.
+ void PeelOrUnrollOnce(LoopNode* loop_node,
+ bool do_unrolling,
+ SuperblockCloner::HBasicBlockMap* bb_map,
+ SuperblockCloner::HInstructionMap* hir_map);
+
+ // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling
+ // opportunities. Returns whether transformation happened.
+ bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node);
+
//
// Vectorization analysis and synthesis.
//
@@ -203,7 +222,6 @@ class HLoopOptimization : public HOptimization {
const ArrayReference* peeling_candidate);
uint32_t MaxNumberPeeled();
bool IsVectorizationProfitable(int64_t trip_count);
- uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
//
// Helpers.
@@ -297,6 +315,9 @@ class HLoopOptimization : public HOptimization {
HBasicBlock* vector_body_; // body of the new loop
HInstruction* vector_index_; // normalized index of the new loop
+ // Helper for target-specific behaviour for loop optimizations.
+ ArchDefaultLoopHelper* arch_loop_helper_;
+
friend class LoopOptimizationTest;
DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index 04942f9a4a..ee74f1001f 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -853,7 +853,7 @@ void SuperblockCloner::CleanUp() {
}
}
- if (kSuperblockClonerVerify) {
+ if (kIsDebugBuild) {
VerifyGraph();
}
}
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index 19c9dd471c..afd5a5d6e7 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -25,7 +25,6 @@
namespace art {
static const bool kSuperblockClonerLogging = false;
-static const bool kSuperblockClonerVerify = false;
// Represents an edge between two HBasicBlocks.
//