LSE: Better elimination of array stores.
This replaces the workaround from
https://android-review.googlesource.com/1427546
with a proper fix that precisely tracks the needed stores.
Also fix spelling of Floyd-Warshall.
Test: Additional tests in 530-checker-lse
Test: testrunner.py --host --optimizing --interpreter --jvm -t 530
Test: testrunner.py --host --optimizing
Bug: 168446366
Bug: 77906240
Change-Id: I0cd7acb600ed210ac09d0006b28f045758c2c3ec
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 9427a70..5f15822 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -123,7 +123,7 @@
* but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value
* replacement search, there are additional dependencies to consider, see below.
*
- * In the successful case (no unknown inputs found) we use the Floyd-Warshal
+ * In the successful case (no unknown inputs found) we use the Floyd-Warshall
* algorithm to determine transitive closures for each found Phi placeholder,
* and then match or materialize Phis from the smallest transitive closure,
* so that we can determine if such subset has a single other input. This has
@@ -162,7 +162,7 @@
* O(phi_placeholder_dependencies * phi_placeholders)
* term from unknown input cases is actually
* O(heap_locations^3 * blocks^3) ,
- * exactly as the estimate for the Floyd-Warshal parts of successful cases.
+ * exactly as the estimate for the Floyd-Warshall parts of successful cases.
* Adding the other term from the unknown input cases (to account for the case
* with significantly more instructions than blocks and heap locations), the
* phase 2 time complexity is
@@ -2052,12 +2052,10 @@
work_queue.pop_back();
size_t idx = phi_placeholder->GetHeapLocation();
HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
- HeapLocation* heap_loc = heap_location_collector_.GetHeapLocation(idx);
- bool unknown_array_stores = false;
for (HBasicBlock* predecessor : block->GetPredecessors()) {
ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[predecessor->GetBlockId()];
- // For loop back-edges we must also preserve all stores to locations that may alias
- // with the location `idx`.
+ // For loop back-edges we must also preserve all stores to locations that
+ // may alias with the location `idx`.
// TODO: Review whether we need to keep stores to aliased locations from pre-header.
// TODO: Add tests cases around this.
bool is_back_edge =
@@ -2066,7 +2064,31 @@
size_t end = is_back_edge ? heap_values.size() : idx + 1u;
for (size_t i = start; i != end; ++i) {
Value stored_by = heap_values[i].stored_by;
- if (!stored_by.IsUnknown() && (i == idx || heap_location_collector_.MayAlias(i, idx))) {
+ auto may_alias = [this, block, idx](size_t i) {
+ DCHECK_NE(i, idx);
+ DCHECK(block->IsLoopHeader());
+ if (heap_location_collector_.MayAlias(i, idx)) {
+ return true;
+ }
+ // For array locations with index defined inside the loop, include
+ // all other locations in the array, even those that LSA declares
+ // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually
+ // refer to the same locations for different iterations. (LSA's
+ // `ComputeMayAlias()` does not consider different loop iterations.)
+ HeapLocation* heap_loc = heap_location_collector_.GetHeapLocation(idx);
+ HeapLocation* other_loc = heap_location_collector_.GetHeapLocation(i);
+ if (heap_loc->IsArray() &&
+ other_loc->IsArray() &&
+ heap_loc->GetReferenceInfo() == other_loc->GetReferenceInfo() &&
+ block->GetLoopInformation()->Contains(*heap_loc->GetIndex()->GetBlock())) {
+ // If one location has index defined inside and the other index defined outside
+ // of the loop, LSA considers them aliasing and we take an early return above.
+ DCHECK(block->GetLoopInformation()->Contains(*other_loc->GetIndex()->GetBlock()));
+ return true;
+ }
+ return false;
+ };
+ if (!stored_by.IsUnknown() && (i == idx || may_alias(i))) {
if (stored_by.NeedsPhi()) {
size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
@@ -2077,51 +2099,6 @@
DCHECK(IsStore(stored_by.GetInstruction()));
kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
}
- } else if (stored_by.IsUnknown() && heap_loc->IsArray()) {
- // See comment below. We are an array load where we don't know how the
- // value got there. Need to ensure that writes aren't omitted (in
- // ways that are observable) since this looks like a read from an
- // arbitrary point in the array.
- unknown_array_stores = true;
- }
- }
- }
- if (unknown_array_stores) {
- // Since we are an array, actually ended up here, and don't have a known
- // set this must be from a previous iteration (otherwise we'd either not
- // reach this because stores were kept from escapes or just known from
- // earlier). We need to keep all stores that could affect this read.
- //
- // This is sufficient since
- // (1) If we are not an array then all loads and stores are at known,
- // exact offsets (fields and such).
- // (2) If we didn't end up here then we either had no chance to turn
- // this load into a phi or we know what the actual value is (above
- // branch).
- //
- // Since none of these are true we are reading from some location in
- // the array and we have no idea where/if it was written. This means
- // we need to be super conservative and preserve all writes that could
- // possibly occur before this read, in case one of them is the write
- // that supplies the value. This means keeping the last store for
- // every heap location touching this array in every block.
- //
- // TODO We can do better and do partial LSE around this but we don't
- // have the data for that at the moment.
- // TODO Can we be more selective w/o partial LSE data?
- // TODO Do some analysis to see how much we are leaving on the table
- // with this.
- for (uint32_t j = 0; j < heap_location_collector_.GetNumberOfHeapLocations(); ++j) {
- auto candidate_heap_location = heap_location_collector_.GetHeapLocation(j);
- // If the heap-location is part of this same array.
- if (candidate_heap_location->GetReferenceInfo() == heap_loc->GetReferenceInfo()) {
- for (const auto& hv : heap_values_for_) {
- if (hv.size() > j && hv[j].stored_by.IsInstruction()) {
- // This is the last store in this block. We can't tell if it's
- // to the right location or not so we'll need to keep it.
- kept_stores_.SetBit(hv[j].stored_by.GetInstruction()->GetId());
- }
- }
}
}
}
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 0bc2e56..b2ae3a1 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -3338,6 +3338,73 @@
return obj.j;
}
+ /// CHECK-START: int Main.testLoop34(int) load_store_elimination (before)
+ /// CHECK-DAG: NewArray
+ /// CHECK-DAG: Phi
+ /// CHECK-DAG: Phi
+ /// CHECK-DAG: ArrayGet
+ /// CHECK-DAG: ArraySet
+
+ /// CHECK-START: int Main.testLoop34(int) load_store_elimination (after)
+ /// CHECK-DAG: NewArray
+ /// CHECK-DAG: Phi
+ /// CHECK-DAG: Phi
+ /// CHECK-DAG: ArrayGet
+ /// CHECK-DAG: ArraySet
+
+ // Test that ArraySet with non-default value prevents matching ArrayGet for
+ // the same array to default value even when the ArraySet is using an index
+ // offset by one, making LSA declare that the two heap locations do not alias.
+ // Also test that the ArraySet is not eliminated.
+ private static int testLoop34(int n) {
+ int[] a = new int[n + 1];
+ int sum = 0;
+ for (int i = 0; i < n; ) {
+ int value = a[i] + 1;
+ sum += value;
+ ++i;
+ a[i] = value;
+ }
+ return sum;
+ }
+
+ /// CHECK-START: int Main.testLoop35(int) load_store_elimination (before)
+ /// CHECK-DAG: NewArray
+ /// CHECK-DAG: Phi
+ /// CHECK-DAG: Phi
+ /// CHECK-DAG: ArrayGet
+ /// CHECK-DAG: ArraySet
+ /// CHECK-DAG: ArraySet
+
+ /// CHECK-START: int Main.testLoop35(int) load_store_elimination (after)
+ /// CHECK-DAG: NewArray
+ /// CHECK-DAG: Phi
+ /// CHECK-DAG: Phi
+ /// CHECK-DAG: ArrayGet
+ /// CHECK-DAG: ArraySet
+
+ /// CHECK-START: int Main.testLoop35(int) load_store_elimination (after)
+ /// CHECK: ArraySet
+ /// CHECK-NOT: ArraySet
+
+ // Test that ArraySet with non-default value prevents matching ArrayGet for
+ // the same array to default value even when the ArraySet is using an index
+ // offset by one, making LSA declare that the two heap locations do not alias.
+ // Also test that the ArraySet is not eliminated and that a store after the
+ // loop is eliminated.
+ private static int testLoop35(int n) {
+ int[] a = new int[n + 1];
+ int sum = 0;
+ for (int i = 0; i < n; ) {
+ int value = a[i] + 1;
+ sum += value;
+ ++i;
+ a[i] = value;
+ }
+ a[0] = 1;
+ return sum;
+ }
+
/// CHECK-START: int Main.testNestedLoop1(TestClass, int) load_store_elimination (before)
/// CHECK-DAG: InstanceFieldSet
/// CHECK-DAG: InstanceFieldGet
@@ -4019,6 +4086,14 @@
assertIntEquals(testLoop33(new TestClass(), 1), 0);
assertIntEquals(testLoop33(new TestClass(), 2), 0);
assertIntEquals(testLoop33(new TestClass(), 3), 0);
+ assertIntEquals(testLoop34(0), 0);
+ assertIntEquals(testLoop34(1), 1);
+ assertIntEquals(testLoop34(2), 3);
+ assertIntEquals(testLoop34(3), 6);
+ assertIntEquals(testLoop35(0), 0);
+ assertIntEquals(testLoop35(1), 1);
+ assertIntEquals(testLoop35(2), 3);
+ assertIntEquals(testLoop35(3), 6);
assertIntEquals(testNestedLoop1(new TestClass(), 0), 1);
assertIntEquals(testNestedLoop1(new TestClass(), 1), 1);