Fix LSE-array overlap issue

In cases where an array has overlapping accesses on separate
iterations LSE could get confused and incorrectly believe removing
array stores is safe even when later iterations rely on the store
occurring. For example consider the following code:

```
int do_cal(int len) {
  if (len < 5) {
    return -1;
  }
  int w[] = new w[len];
  int t = 0;
  for (int i = 5; i < w.length; i++) {
    w[i] = please_interleave(w[i - 1], w[i - 5]);
    t = please_select(w[i], i);
  }
  return t;
}
```

We would either need to materialize 5 PHIs to hold the values
`w[i - 1], ..., w[i - 5]` or avoid removing the write to `w[i]`. Our
LSE pass is unable to do the former and would (incorrectly) fail to
recognize that, by not being able to determine the values of `w[i -
1]` and `w[i-5]` that the later store to `w[i]` must be preserved.

Bug: 168446366
Test: ./test.py --host
Change-Id: I89772c8bf49ebf6de70f86bd68484e14bd189406
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index d69442e..9427a70 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -23,6 +23,7 @@
 #include "base/scoped_arena_containers.h"
 #include "escape.h"
 #include "load_store_analysis.h"
+#include "optimizing/optimizing_compiler_stats.h"
 #include "reference_type_propagation.h"
 
 /**
@@ -2051,13 +2052,13 @@
     work_queue.pop_back();
     size_t idx = phi_placeholder->GetHeapLocation();
     HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
+    HeapLocation* heap_loc = heap_location_collector_.GetHeapLocation(idx);
+    bool unknown_array_stores = false;
     for (HBasicBlock* predecessor : block->GetPredecessors()) {
       ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
       // For loop back-edges we must also preserve all stores to locations that may alias
       // with the location `idx`.
       // TODO: Review whether we need to keep stores to aliased locations from pre-header.
-      // TODO: Review in light of testLoop29(); may need to handle locations that LSA considers
-      //       non-aliasing.
       // TODO: Add tests cases around this.
       bool is_back_edge =
           block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader();
@@ -2076,6 +2077,51 @@
             DCHECK(IsStore(stored_by.GetInstruction()));
             kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
           }
+        } else if (stored_by.IsUnknown() && heap_loc->IsArray()) {
+          // See comment below. We are an array load where we don't know how the
+          // value got there. Need to ensure that writes aren't omitted (in
+          // ways that are observable) since this looks like a read from an
+          // arbitrary point in the array.
+          unknown_array_stores = true;
+        }
+      }
+    }
+    if (unknown_array_stores) {
+      // Since we are an array, actually ended up here, and don't have a known
+      // set this must be from a previous iteration (otherwise we'd either not
+      // reach this because stores were kept from escapes or just known from
+      // earlier). We need to keep all stores that could affect this read.
+      //
+      // This is sufficient since
+      // (1) If we are not an array then all loads and stores are at known,
+      //     exact offsets (fields and such).
+      // (2) If we didn't end up here then we either had no chance to turn
+      //     this load into a phi or we know what the actual value is (above
+      //     branch).
+      //
+      // Since none of these are true we are reading from some location in
+      // the array and we have no idea where/if it was written. This means
+      // we need to be super conservative and preserve all writes that could
+      // possibly occur before this read, in case one of them is the write
+      // that supplies the value. This means keeping the last store for
+      // every heap location touching this array in every block.
+      //
+      // TODO We can do better and do partial LSE around this but we don't
+      // have the data for that at the moment.
+      // TODO Can we be more selective w/o partial LSE data?
+      // TODO Do some analysis to see how much we are leaving on the table
+      // with this.
+      for (uint32_t j = 0; j < heap_location_collector_.GetNumberOfHeapLocations(); ++j) {
+        auto candidate_heap_location = heap_location_collector_.GetHeapLocation(j);
+        // If the heap-location is part of this same array.
+        if (candidate_heap_location->GetReferenceInfo() == heap_loc->GetReferenceInfo()) {
+          for (const auto& hv : heap_values_for_) {
+            if (hv.size() > j && hv[j].stored_by.IsInstruction()) {
+              // This is the last store in this block. We can't tell if it's
+              // to the right location or not so we'll need to keep it.
+              kept_stores_.SetBit(hv[j].stored_by.GetInstruction()->GetId());
+            }
+          }
         }
       }
     }