LSE: Better elimination of array stores.

This replaces the workaround from
    https://android-review.googlesource.com/1427546
with a proper fix that precisely tracks the needed stores.

Also fix spelling of Floyd-Warshall.

Test: Additional tests in 530-checker-lse
Test: testrunner.py --host --optimizing --interpreter --jvm -t 530
Test: testrunner.py --host --optimizing
Bug: 168446366
Bug: 77906240
Change-Id: I0cd7acb600ed210ac09d0006b28f045758c2c3ec
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 9427a70..5f15822 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -123,7 +123,7 @@
  * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
  * replacement search, there are additional dependencies to consider, see below.
  *
- * In the successful case (no unknown inputs found) we use the Floyd-Warshal
+ * In the successful case (no unknown inputs found) we use the Floyd-Warshall
  * algorithm to determine transitive closures for each found Phi placeholder,
  * and then match or materialize Phis from the smallest transitive closure,
  * so that we can determine if such subset has a single other input. This has
@@ -162,7 +162,7 @@
  *    O(phi_placeholder_dependencies * phi_placeholders)
  * term from unknown input cases is actually
  *    O(heap_locations^3 * blocks^3) ,
- * exactly as the estimate for the Floyd-Warshal parts of successful cases.
+ * exactly as the estimate for the Floyd-Warshall parts of successful cases.
  * Adding the other term from the unknown input cases (to account for the case
  * with significantly more instructions than blocks and heap locations), the
  * phase 2 time complexity is
@@ -2052,12 +2052,10 @@
     work_queue.pop_back();
     size_t idx = phi_placeholder->GetHeapLocation();
     HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
-    HeapLocation* heap_loc = heap_location_collector_.GetHeapLocation(idx);
-    bool unknown_array_stores = false;
     for (HBasicBlock* predecessor : block->GetPredecessors()) {
       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
-      // For loop back-edges we must also preserve all stores to locations that may alias
-      // with the location `idx`.
+      // For loop back-edges we must also preserve all stores to locations that
+      // may alias with the location `idx`.
       // TODO: Review whether we need to keep stores to aliased locations from pre-header.
       // TODO: Add tests cases around this.
       bool is_back_edge =
@@ -2066,7 +2064,31 @@
       size_t end = is_back_edge ? heap_values.size() : idx + 1u;
       for (size_t i = start; i != end; ++i) {
         Value stored_by = heap_values[i].stored_by;
-        if (!stored_by.IsUnknown() && (i == idx || heap_location_collector_.MayAlias(i, idx))) {
+        auto may_alias = [this, block, idx](size_t i) {
+            DCHECK_NE(i, idx);
+            DCHECK(block->IsLoopHeader());
+            if (heap_location_collector_.MayAlias(i, idx)) {
+              return true;
+            }
+            // For array locations with index defined inside the loop, include
+            // all other locations in the array, even those that LSA declares
+            // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
+            // refer to the same locations for different iterations. (LSA's
+            // `ComputeMayAlias()` does not consider different loop iterations.)
+            HeapLocation* heap_loc = heap_location_collector_.GetHeapLocation(idx);
+            HeapLocation* other_loc = heap_location_collector_.GetHeapLocation(i);
+            if (heap_loc->IsArray() &&
+                other_loc->IsArray() &&
+                heap_loc->GetReferenceInfo() == other_loc->GetReferenceInfo() &&
+                block->GetLoopInformation()->Contains(*heap_loc->GetIndex()->GetBlock())) {
+              // If one location has index defined inside and the other index defined outside
+              // of the loop, LSA considers them aliasing and we take an early return above.
+              DCHECK(block->GetLoopInformation()->Contains(*other_loc->GetIndex()->GetBlock()));
+              return true;
+            }
+            return false;
+        };
+        if (!stored_by.IsUnknown() && (i == idx || may_alias(i))) {
           if (stored_by.NeedsPhi()) {
             size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
             if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
@@ -2077,51 +2099,6 @@
             DCHECK(IsStore(stored_by.GetInstruction()));
             kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
           }
-        } else if (stored_by.IsUnknown() && heap_loc->IsArray()) {
-          // See comment below. We are an array load where we don't know how the
-          // value got there. Need to ensure that writes aren't omitted (in
-          // ways that are observable) since this looks like a read from an
-          // arbitrary point in the array.
-          unknown_array_stores = true;
-        }
-      }
-    }
-    if (unknown_array_stores) {
-      // Since we are an array, actually ended up here, and don't have a known
-      // set this must be from a previous iteration (otherwise we'd either not
-      // reach this because stores were kept from escapes or just known from
-      // earlier). We need to keep all stores that could affect this read.
-      //
-      // This is sufficient since
-      // (1) If we are not an array then all loads and stores are at known,
-      //     exact offsets (fields and such).
-      // (2) If we didn't end up here then we either had no chance to turn
-      //     this load into a phi or we know what the actual value is (above
-      //     branch).
-      //
-      // Since none of these are true we are reading from some location in
-      // the array and we have no idea where/if it was written. This means
-      // we need to be super conservative and preserve all writes that could
-      // possibly occur before this read, in case one of them is the write
-      // that supplies the value. This means keeping the last store for
-      // every heap location touching this array in every block.
-      //
-      // TODO We can do better and do partial LSE around this but we don't
-      // have the data for that at the moment.
-      // TODO Can we be more selective w/o partial LSE data?
-      // TODO Do some analysis to see how much we are leaving on the table
-      // with this.
-      for (uint32_t j = 0; j < heap_location_collector_.GetNumberOfHeapLocations(); ++j) {
-        auto candidate_heap_location = heap_location_collector_.GetHeapLocation(j);
-        // If the heap-location is part of this same array.
-        if (candidate_heap_location->GetReferenceInfo() == heap_loc->GetReferenceInfo()) {
-          for (const auto& hv : heap_values_for_) {
-            if (hv.size() > j && hv[j].stored_by.IsInstruction()) {
-              // This is the last store in this block. We can't tell if it's
-              // to the right location or not so we'll need to keep it.
-              kept_stores_.SetBit(hv[j].stored_by.GetInstruction()->GetId());
-            }
-          }
         }
       }
     }
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 0bc2e56..b2ae3a1 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -3338,6 +3338,73 @@
     return obj.j;
   }
 
+  /// CHECK-START: int Main.testLoop34(int) load_store_elimination (before)
+  /// CHECK-DAG:                 NewArray
+  /// CHECK-DAG:                 Phi
+  /// CHECK-DAG:                 Phi
+  /// CHECK-DAG:                 ArrayGet
+  /// CHECK-DAG:                 ArraySet
+
+  /// CHECK-START: int Main.testLoop34(int) load_store_elimination (after)
+  /// CHECK-DAG:                 NewArray
+  /// CHECK-DAG:                 Phi
+  /// CHECK-DAG:                 Phi
+  /// CHECK-DAG:                 ArrayGet
+  /// CHECK-DAG:                 ArraySet
+
+  // Test that ArraySet with non-default value prevents matching ArrayGet for
+  // the same array to default value even when the ArraySet is using an index
+  // offset by one, making LSA declare that the two heap locations do not alias.
+  // Also test that the ArraySet is not eliminated.
+  private static int testLoop34(int n) {
+    int[] a = new int[n + 1];
+    int sum = 0;
+    for (int i = 0; i < n; ) {
+      int value = a[i] + 1;
+      sum += value;
+      ++i;
+      a[i] = value;
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.testLoop35(int) load_store_elimination (before)
+  /// CHECK-DAG:                 NewArray
+  /// CHECK-DAG:                 Phi
+  /// CHECK-DAG:                 Phi
+  /// CHECK-DAG:                 ArrayGet
+  /// CHECK-DAG:                 ArraySet
+  /// CHECK-DAG:                 ArraySet
+
+  /// CHECK-START: int Main.testLoop35(int) load_store_elimination (after)
+  /// CHECK-DAG:                 NewArray
+  /// CHECK-DAG:                 Phi
+  /// CHECK-DAG:                 Phi
+  /// CHECK-DAG:                 ArrayGet
+  /// CHECK-DAG:                 ArraySet
+
+  /// CHECK-START: int Main.testLoop35(int) load_store_elimination (after)
+  /// CHECK:                     ArraySet
+  /// CHECK-NOT:                 ArraySet
+
+  // Test that ArraySet with non-default value prevents matching ArrayGet for
+  // the same array to default value even when the ArraySet is using an index
+  // offset by one, making LSA declare that the two heap locations do not alias.
+  // Also test that the ArraySet is not eliminated and that a store after the
+  // loop is eliminated.
+  private static int testLoop35(int n) {
+    int[] a = new int[n + 1];
+    int sum = 0;
+    for (int i = 0; i < n; ) {
+      int value = a[i] + 1;
+      sum += value;
+      ++i;
+      a[i] = value;
+    }
+    a[0] = 1;
+    return sum;
+  }
+
   /// CHECK-START: int Main.testNestedLoop1(TestClass, int) load_store_elimination (before)
   /// CHECK-DAG:                 InstanceFieldSet
   /// CHECK-DAG:                 InstanceFieldGet
@@ -4019,6 +4086,14 @@
     assertIntEquals(testLoop33(new TestClass(), 1), 0);
     assertIntEquals(testLoop33(new TestClass(), 2), 0);
     assertIntEquals(testLoop33(new TestClass(), 3), 0);
+    assertIntEquals(testLoop34(0), 0);
+    assertIntEquals(testLoop34(1), 1);
+    assertIntEquals(testLoop34(2), 3);
+    assertIntEquals(testLoop34(3), 6);
+    assertIntEquals(testLoop35(0), 0);
+    assertIntEquals(testLoop35(1), 1);
+    assertIntEquals(testLoop35(2), 3);
+    assertIntEquals(testLoop35(3), 6);
 
     assertIntEquals(testNestedLoop1(new TestClass(), 0), 1);
     assertIntEquals(testNestedLoop1(new TestClass(), 1), 1);