Improved LSE: Replacing loads with Phis.

Create "Phi placeholders" for tracking heap values that can
merge from different values and try to match existing Phis
or create new Phis to replace loads. For Phi placeholders
from loop headers we do not know whether they are fed by
unknown values through back-edges when processing the loop
header, so we delay processing loads that depend on them
until we walked the entire graph. We then try to match them
with existing instructions (when the location is unchanged
in the loop) or Phis or create new Phis if needed. If we
find a loop Phi placeholder fed with unknown value from a
back-edge, we mark the Phi placeholder unreplaceable and
reprocess loads and stores to propagate the unknown value.
This can sometimes allow other loads to be replaced. At the
end we re-calculate the heap values to find stores that can
be eliminated because they write over the same value.

Golem results:
  art-opt-cc           arm  arm64    x86 x86-64
  CaffeineFloat      +6.7%  +3.0%  +5.9%  +3.8%
  KotlinMicroWhen   +33.7%  +4.8%  +1.8%  +0.6%
  art-opt (more noisy than art-opt-cc)
  CaffeineFloat      +4.1%  +4.4%  +7.8% +10.5%
  KotlinMicroWhen   +33.6%  +2.0%  +1.8%  +1.8%
The MoveLiteralColumn benchmark seems to gain significantly
(up to 22% on art-opt-cc but under 10% on art-opt) but it is
very noisy and the results are therefore unreliable.

Insignificant code size changes for aosp_blueline-userdebug:
  - before:
    arm boot*.oat: 15303468
    arm64 boot*.oat: 18184736
    services.odex: 25195944
    grep -c pAllocObject boot.arm64.oatdump.txt: 27213
    grep -c pAllocArray boot.arm64.oatdump.txt: 3620
  - after:
    arm boot*.oat: 15299524 (-4KiB, -0.03%)
    arm64 boot*.oat: 18176528 (-8KiB, -0.05%)
    services.odex: 25191832 (-4KiB, -0.02%)
    grep -c pAllocObject boot.arm64.oatdump.txt: 27206 (-7)
    grep -c pAllocArray boot.arm64.oatdump.txt: 3615 (-5)

Test: New tests in 530-checker-lse.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: blueline-userdebug boots.
Bug: 77906240
Change-Id: Ia9fe0cd3530f9d3941650dfefc00a7f7fd821994
diff --git a/compiler/optimizing/load_store_elimination_test.cc b/compiler/optimizing/load_store_elimination_test.cc
index 462e6a9..6c5eec8 100644
--- a/compiler/optimizing/load_store_elimination_test.cc
+++ b/compiler/optimizing/load_store_elimination_test.cc
@@ -20,7 +20,6 @@
 #include "load_store_elimination.h"
 #include "nodes.h"
 #include "optimizing_unit_test.h"
-#include "side_effects_analysis.h"
 
 #include "gtest/gtest.h"
 
@@ -30,9 +29,7 @@
  public:
   void PerformLSE() {
     graph_->BuildDominatorTree();
-    SideEffectsAnalysis side_effects(graph_);
-    side_effects.Run();
-    LoadStoreElimination lse(graph_, side_effects, /*stats=*/ nullptr);
+    LoadStoreElimination lse(graph_, /*stats=*/ nullptr);
     lse.Run();
     EXPECT_TRUE(CheckGraphSkipRefTypeInfoChecks());
   }
@@ -581,9 +578,12 @@
 //     vstore2: b[i,... i + 3] = x
 //     vstore3: a[i,... i + 3] = [1,...1]
 //
+// Return 'a' from the method to make it escape.
+//
 // Expected:
 //   'vstore1' is not removed.
 //   'vload' is removed.
+//   'vstore2' is removed because 'b' does not escape.
 //   'vstore3' is removed.
 TEST_F(LoadStoreEliminationTest, RedundantVStoreVLoadInLoop) {
   CreateTestControlFlowGraph();
@@ -595,6 +595,10 @@
   pre_header_->InsertInstructionBefore(array_a, pre_header_->GetLastInstruction());
   array_a->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
 
+  ASSERT_TRUE(return_block_->GetLastInstruction()->IsReturnVoid());
+  HInstruction* ret = new (GetAllocator()) HReturn(array_a);
+  return_block_->ReplaceAndRemoveInstructionWith(return_block_->GetLastInstruction(), ret);
+
   HInstruction* array_b = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
   pre_header_->InsertInstructionBefore(array_b, pre_header_->GetLastInstruction());
   array_b->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
@@ -606,19 +610,18 @@
   //    a[i,... i + 3] = [1,...1]
   HInstruction* vstore1 = AddVecStore(loop_, array_a, phi_);
   HInstruction* vload = AddVecLoad(loop_, array_a, phi_);
-  AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
+  HInstruction* vstore2 = AddVecStore(loop_, array_b, phi_, vload->AsVecLoad());
   HInstruction* vstore3 = AddVecStore(loop_, array_a, phi_, vstore1->InputAt(2));
 
   PerformLSE();
 
   ASSERT_FALSE(IsRemoved(vstore1));
   ASSERT_TRUE(IsRemoved(vload));
+  ASSERT_TRUE(IsRemoved(vstore2));
   ASSERT_TRUE(IsRemoved(vstore3));
 }
 
-// Loop write side effects invalidate all stores.
-// This causes stores after such loops not to be removed, even
-// their values are known.
+// Loop writes invalidate only possibly aliased heap locations.
 TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithSideEffects) {
   CreateTestControlFlowGraph();
 
@@ -630,20 +633,55 @@
   // loop:
   //   b[i] = array[i]
   // array[0] = 2
-  AddArraySet(entry_block_, array_, c0, c2);
+  HInstruction* store1 = AddArraySet(entry_block_, array_, c0, c2);
 
   HInstruction* array_b = new (GetAllocator()) HNewArray(c0, c128, 0, 0);
   pre_header_->InsertInstructionBefore(array_b, pre_header_->GetLastInstruction());
   array_b->CopyEnvironmentFrom(suspend_check_->GetEnvironment());
 
   HInstruction* load = AddArrayGet(loop_, array_, phi_);
-  AddArraySet(loop_, array_b, phi_, load);
+  HInstruction* store2 = AddArraySet(loop_, array_b, phi_, load);
 
-  HInstruction* store = AddArraySet(return_block_, array_, c0, c2);
+  HInstruction* store3 = AddArraySet(return_block_, array_, c0, c2);
 
   PerformLSE();
 
-  ASSERT_FALSE(IsRemoved(store));
+  ASSERT_FALSE(IsRemoved(store1));
+  ASSERT_TRUE(IsRemoved(store2));
+  ASSERT_TRUE(IsRemoved(store3));
+}
+
+// Loop writes invalidate only possibly aliased heap locations.
+TEST_F(LoadStoreEliminationTest, StoreAfterLoopWithSideEffects2) {
+  CreateTestControlFlowGraph();
+
+  // Add another array parameter that may alias with `array_`.
+  // Note: We're not adding it to the suspend check environment.
+  AddParameter(new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
+                                                    dex::TypeIndex(0),
+                                                    3,
+                                                    DataType::Type::kInt32));
+  HInstruction* array2 = parameters_.back();
+
+  HInstruction* c0 = graph_->GetIntConstant(0);
+  HInstruction* c2 = graph_->GetIntConstant(2);
+
+  // array[0] = 2;
+  // loop:
+  //   array2[i] = array[i]
+  // array[0] = 2
+  HInstruction* store1 = AddArraySet(entry_block_, array_, c0, c2);
+
+  HInstruction* load = AddArrayGet(loop_, array_, phi_);
+  HInstruction* store2 = AddArraySet(loop_, array2, phi_, load);
+
+  HInstruction* store3 = AddArraySet(return_block_, array_, c0, c2);
+
+  PerformLSE();
+
+  ASSERT_FALSE(IsRemoved(store1));
+  ASSERT_FALSE(IsRemoved(store2));
+  ASSERT_FALSE(IsRemoved(store3));
 }
 
 // As it is not allowed to use defaults for VecLoads, check if there is a new created array