ART: Implement loop full unrolling.

Performs whole loop unrolling for small loops with small
trip count to eliminate the loop check overhead, to have
more opportunities for inter-iteration optimizations.

caffeinemark/FloatAtom: 1.2x performance on arm64 Cortex-A57.

Test: 530-checker-peel-unroll.
Test: test-art-host, test-art-target.
Change-Id: Idf3fe3cb611376935d176c60db8c49907222e28a
diff --git a/test/527-checker-array-access-split/src/Main.java b/test/527-checker-array-access-split/src/Main.java
index a5caa7b..935b378 100644
--- a/test/527-checker-array-access-split/src/Main.java
+++ b/test/527-checker-array-access-split/src/Main.java
@@ -400,7 +400,7 @@
   /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Div>>]
 
   public static int canMergeAfterBCE1() {
-    int[] array = {0, 7, 14, 21};
+    int[] array = {0, 7, 14, 21, 28, 35, 42};
     for (int i = 0; i < array.length; i++) {
       array[i] = array[i] / 7;
     }
@@ -513,7 +513,7 @@
   /// CHECK-NOT:                                IntermediateAddress
 
   public static int canMergeAfterBCE2() {
-    int[] array = {64, 8, 4, 2 };
+    int[] array = {128, 64, 32, 8, 4, 2 };
     for (int i = 0; i < array.length - 1; i++) {
       array[i + 1] = array[i] << array[i + 1];
     }
@@ -571,8 +571,8 @@
     accrossGC(array, 0);
     assertIntEquals(125, array[0]);
 
-    assertIntEquals(3, canMergeAfterBCE1());
-    assertIntEquals(1048576, canMergeAfterBCE2());
+    assertIntEquals(6, canMergeAfterBCE1());
+    assertIntEquals(2097152, canMergeAfterBCE2());
 
     assertIntEquals(18, checkLongFloatDouble());
   }