ART: Implement scalar loop unrolling. Implement scalar loop unrolling for small loops (on arm64) with known trip count to reduce loop check and branch penalty and to provide more opportunities for instruction scheduling. Note: this functionality is turned off by default now. Test: cloner_test.cc Test: test-art-target, test-art-host Change-Id: Ic27fd8fb0bc0d7b69251252da37b8b510bc30acc

commit: 121f2038e9c8afe12f8f4096b7c84a167e7adea5 [log] [tgz]
author: Artem Serov <artem.serov@linaro.org> Mon Oct 23 19:19:06 2017 +0100
committer: Artem Serov <artem.serov@linaro.org> Mon Mar 26 19:46:23 2018 +0100
tree: 655e2bba77ac34208c54b290286104b124003e59
parent: f9635aab3f2db9b1b13184e8146530a53246b82c [diff] [blame]
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 9414e5a..0120cff 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h

@@ -20,12 +20,15 @@
 #include "base/scoped_arena_allocator.h"
 #include "base/scoped_arena_containers.h"
 #include "induction_var_range.h"
+#include "loop_analysis.h"
 #include "nodes.h"
 #include "optimization.h"
+#include "superblock_cloner.h"
 
 namespace art {
 
 class CompilerDriver;
+class ArchDefaultLoopHelper;
 
 /**
  * Loop optimizations. Builds a loop hierarchy and applies optimizations to
@@ -135,10 +138,26 @@
   void SimplifyInduction(LoopNode* node);
   void SimplifyBlocks(LoopNode* node);
 
-  // Performs optimizations specific to inner loop (empty loop removal,
+  // Performs optimizations specific to inner loop with finite header logic (empty loop removal,
   // unrolling, vectorization). Returns true if anything changed.
+  bool TryOptimizeInnerLoopFinite(LoopNode* node);
+
+  // Performs optimizations specific to inner loop. Returns true if anything changed.
   bool OptimizeInnerLoop(LoopNode* node);
 
+  // Performs loop peeling/unrolling once (depends on the 'do_unrolling'); the transformation
+  // preserves the header and the loop info.
+  //
+  // Note: the function records copying information about blocks and instructions.
+  void PeelOrUnrollOnce(LoopNode* loop_node,
+                        bool do_unrolling,
+                        SuperblockCloner::HBasicBlockMap* bb_map,
+                        SuperblockCloner::HInstructionMap* hir_map);
+
+  // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling
+  // opportunities. Returns whether transformation happened.
+  bool TryUnrollingForBranchPenaltyReduction(LoopNode* loop_node);
+
   //
   // Vectorization analysis and synthesis.
   //
@@ -203,7 +222,6 @@
                             const ArrayReference* peeling_candidate);
   uint32_t MaxNumberPeeled();
   bool IsVectorizationProfitable(int64_t trip_count);
-  uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
 
   //
   // Helpers.
@@ -297,6 +315,9 @@
   HBasicBlock* vector_body_;  // body of the new loop
   HInstruction* vector_index_;  // normalized index of the new loop
 
+  // Helper for target-specific behaviour for loop optimizations.
+  ArchDefaultLoopHelper* arch_loop_helper_;
+
   friend class LoopOptimizationTest;
 
   DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
commit	121f2038e9c8afe12f8f4096b7c84a167e7adea5	[log] [tgz]
author	Artem Serov <artem.serov@linaro.org>	Mon Oct 23 19:19:06 2017 +0100
committer	Artem Serov <artem.serov@linaro.org>	Mon Mar 26 19:46:23 2018 +0100
tree	655e2bba77ac34208c54b290286104b124003e59
parent	f9635aab3f2db9b1b13184e8146530a53246b82c [diff] [blame]