LSE: Fix tracking heap values for small types.

We previously inserted TypeConversion to 8-bit and 16-bit
types only when replacing loads at the end of LSE. This is
insufficient as it allowed incorrect merging of values that
had different type. We now insert the TypeConversion when we
designate a load for replacement and therefore when a value
retrieved by such load is stored in another heap location,
we record the substitute TypeConversion as the heap value.

This replaces the insufficient fix from
    https://android-review.googlesource.com/538635 .

Test: New tests in 530-checker-lse and 530-checker-lse3.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 161521389
Change-Id: I7c41931126455411d25f0d675857f104700a15af
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 207a11a..a70b117 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -117,17 +117,43 @@
     HGraphVisitor::VisitBasicBlock(block);
   }
 
-  HTypeConversion* AddTypeConversionIfNecessary(HInstruction* instruction,
-                                                HInstruction* value,
-                                                DataType::Type expected_type) {
-    HTypeConversion* type_conversion = nullptr;
+  HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction,
+                                                      HInstruction* value,
+                                                      DataType::Type expected_type) {
     // Should never add type conversion into boolean value.
-    if (expected_type != DataType::Type::kBool &&
-        !DataType::IsTypeConversionImplicit(value->GetType(), expected_type)) {
-      type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
-          expected_type, value, instruction->GetDexPc());
-      instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
+    if (expected_type == DataType::Type::kBool ||
+        DataType::IsTypeConversionImplicit(value->GetType(), expected_type) ||
+        // TODO: This prevents type conversion of default values but we can still insert
+        // type conversion of other constants and there is no constant folding pass after LSE.
+        IsZeroBitPattern(value)) {
+      return nullptr;
     }
+
+    // Check if there is already a suitable TypeConversion we can reuse.
+    for (const HUseListNode<HInstruction*>& use : value->GetUses()) {
+      if (use.GetUser()->IsTypeConversion() &&
+          use.GetUser()->GetType() == expected_type &&
+          // TODO: We could move the TypeConversion to a common dominator
+          // if it does not cross irreducible loop header.
+          use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) &&
+          // Don't share across irreducible loop headers.
+          // TODO: can be more fine-grained than this by testing each dominator.
+          (use.GetUser()->GetBlock() == instruction->GetBlock() ||
+           !GetGraph()->HasIrreducibleLoops())) {
+        if (use.GetUser()->GetBlock() == instruction->GetBlock() &&
+            use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) {
+          // Move the TypeConversion before the instruction.
+          use.GetUser()->MoveBefore(instruction);
+        }
+        DCHECK(use.GetUser()->StrictlyDominates(instruction));
+        return use.GetUser()->AsTypeConversion();
+      }
+    }
+
+    // We must create a new TypeConversion instruction.
+    HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion(
+          expected_type, value, instruction->GetDexPc());
+    instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction);
     return type_conversion;
   }
 
@@ -153,41 +179,25 @@
     DCHECK(IsLoad(load));
     DCHECK_EQ(FindSubstitute(heap_value), heap_value) <<
         "Unexpected heap_value that has a substitute " << heap_value->DebugName();
-    removed_loads_.push_back(load);
-    substitute_instructions_for_loads_.push_back(heap_value);
-  }
 
-  // Scan the list of removed loads to see if we can reuse `type_conversion`, if
-  // the other removed load has the same substitute and type and is dominated
-  // by `type_conversion`.
-  void TryToReuseTypeConversion(HInstruction* type_conversion, size_t index) {
-    size_t size = removed_loads_.size();
-    HInstruction* load = removed_loads_[index];
-    HInstruction* substitute = substitute_instructions_for_loads_[index];
-    for (size_t j = index + 1; j < size; j++) {
-      HInstruction* load2 = removed_loads_[j];
-      HInstruction* substitute2 = substitute_instructions_for_loads_[j];
-      if (load2 == nullptr) {
-        DCHECK(substitute2->IsTypeConversion());
-        continue;
-      }
-      DCHECK(IsLoad(load2));
-      DCHECK(substitute2 != nullptr);
-      if (substitute2 == substitute &&
-          load2->GetType() == load->GetType() &&
-          type_conversion->GetBlock()->Dominates(load2->GetBlock()) &&
-          // Don't share across irreducible loop headers.
-          // TODO: can be more fine-grained than this by testing each dominator.
-          (load2->GetBlock() == type_conversion->GetBlock() ||
-           !GetGraph()->HasIrreducibleLoops())) {
-        // The removed_loads_ are added in reverse post order.
-        DCHECK(type_conversion->StrictlyDominates(load2));
-        load2->ReplaceWith(type_conversion);
-        load2->GetBlock()->RemoveInstruction(load2);
-        removed_loads_[j] = nullptr;
-        substitute_instructions_for_loads_[j] = type_conversion;
-      }
-    }
+    // The load expects to load the heap value as type load->GetType().
+    // However the tracked heap value may not be of that type. An explicit
+    // type conversion may be needed.
+    // There are actually three types involved here:
+    // (1) tracked heap value's type (type A)
+    // (2) heap location (field or element)'s type (type B)
+    // (3) load's type (type C)
+    // We guarantee that type A stored as type B and then fetched out as
+    // type C is the same as casting from type A to type C directly, since
+    // type B and type C will have the same size which is guaranteed in
+    // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
+    // So we only need one type conversion from type A to type C.
+    HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary(
+        load, heap_value, load->GetType());
+
+    removed_loads_.push_back(load);
+    substitute_instructions_for_loads_.push_back(
+        type_conversion != nullptr ? type_conversion : heap_value);
   }
 
   // Remove recorded instructions that should be eliminated.
@@ -196,11 +206,7 @@
     DCHECK_EQ(size, substitute_instructions_for_loads_.size());
     for (size_t i = 0; i < size; i++) {
       HInstruction* load = removed_loads_[i];
-      if (load == nullptr) {
-        // The load has been handled in the scan for type conversion below.
-        DCHECK(substitute_instructions_for_loads_[i]->IsTypeConversion());
-        continue;
-      }
+      DCHECK(load != nullptr);
       DCHECK(IsLoad(load));
       HInstruction* substitute = substitute_instructions_for_loads_[i];
       DCHECK(substitute != nullptr);
@@ -209,27 +215,7 @@
       // location value.
       DCHECK_EQ(FindSubstitute(substitute), substitute);
 
-      // The load expects to load the heap value as type load->GetType().
-      // However the tracked heap value may not be of that type. An explicit
-      // type conversion may be needed.
-      // There are actually three types involved here:
-      // (1) tracked heap value's type (type A)
-      // (2) heap location (field or element)'s type (type B)
-      // (3) load's type (type C)
-      // We guarantee that type A stored as type B and then fetched out as
-      // type C is the same as casting from type A to type C directly, since
-      // type B and type C will have the same size which is guarenteed in
-      // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType().
-      // So we only need one type conversion from type A to type C.
-      HTypeConversion* type_conversion = AddTypeConversionIfNecessary(
-          load, substitute, load->GetType());
-      if (type_conversion != nullptr) {
-        TryToReuseTypeConversion(type_conversion, i);
-        load->ReplaceWith(type_conversion);
-        substitute_instructions_for_loads_[i] = type_conversion;
-      } else {
-        load->ReplaceWith(substitute);
-      }
+      load->ReplaceWith(substitute);
       load->GetBlock()->RemoveInstruction(load);
     }
 
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index e228912..9672d97 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -42,6 +42,7 @@
   volatile int k;
   TestClass next;
   String str;
+  byte b;
   static int si;
   static TestClass sTestClassObj;
 }
@@ -639,6 +640,63 @@
     return a;
   }
 
+  /// CHECK-START: int Main.$noinline$testConversion1(TestClass, int) load_store_elimination (before)
+  /// CHECK-DAG:                     InstanceFieldSet
+  /// CHECK-DAG:                     InstanceFieldSet
+  /// CHECK-DAG:                     InstanceFieldGet
+  /// CHECK-DAG:                     InstanceFieldSet
+  /// CHECK-DAG:                     InstanceFieldGet
+
+  /// CHECK-START: int Main.$noinline$testConversion1(TestClass, int) load_store_elimination (after)
+  /// CHECK-DAG:                     InstanceFieldSet
+  /// CHECK-DAG:                     InstanceFieldSet
+  /// CHECK-DAG:                     TypeConversion
+  /// CHECK-DAG:                     InstanceFieldSet
+  /// CHECK-DAG:                     InstanceFieldGet
+  static int $noinline$testConversion1(TestClass obj, int x) {
+    obj.i = x;
+    if ((x & 1) != 0) {
+      obj.b = (byte) x;
+      obj.i = obj.b;
+    }
+    return obj.i;
+  }
+
+  /// CHECK-START: int Main.$noinline$testConversion2(TestClass, int) load_store_elimination (before)
+  /// CHECK-DAG:                     InstanceFieldSet
+  /// CHECK-DAG:                     InstanceFieldSet
+  /// CHECK-DAG:                     InstanceFieldGet
+  /// CHECK-DAG:                     InstanceFieldSet
+  /// CHECK-DAG:                     InstanceFieldGet
+
+  /// CHECK-START: int Main.$noinline$testConversion2(TestClass, int) load_store_elimination (after)
+  /// CHECK-DAG:                     InstanceFieldSet
+  /// CHECK-DAG:                     InstanceFieldSet
+  /// CHECK-DAG:                     TypeConversion
+  /// CHECK-DAG:                     InstanceFieldSet
+  /// CHECK-DAG:                     InstanceFieldGet
+
+  /// CHECK-START: int Main.$noinline$testConversion2(TestClass, int) load_store_elimination (after)
+  /// CHECK:                         TypeConversion
+  /// CHECK-NOT:                     TypeConversion
+  static int $noinline$testConversion2(TestClass obj, int x) {
+    int tmp = 0;
+    obj.i = x;
+    if ((x & 1) != 0) {
+      // The instruction simplifier can remove this TypeConversion if there are
+      // no environment uses. Currently, there is an environment use in NullCheck,
+      // so this TypeConversion remains and GVN removes the second TypeConversion
+      // below. Since we really want to test that the TypeConversion from below
+      // can be moved and used for the load of `obj.b`, we have a similar test
+      // written in smali in 530-checker-lse3, StoreLoad.test3(int), except that
+      // it's using static fields (which would not help with the environment use).
+      obj.b = (byte) x;
+      obj.i = obj.b;
+      tmp = (byte) x;
+    }
+    return obj.i + tmp;
+  }
+
   /// CHECK-START: void Main.testFinalizable() load_store_elimination (before)
   /// CHECK: NewInstance
   /// CHECK: InstanceFieldSet
@@ -1157,6 +1215,25 @@
     array[1] = obj;    // store the same value as the defaut value.
   }
 
+  /// CHECK-START: int Main.$noinline$testByteArrayDefaultValue() load_store_elimination (before)
+  /// CHECK-DAG:                 NewArray
+  /// CHECK-DAG: <<Value:b\d+>>  ArrayGet
+  /// CHECK-DAG:                 Return [<<Value>>]
+
+  /// CHECK-START: int Main.$noinline$testByteArrayDefaultValue() load_store_elimination (after)
+  /// CHECK-DAG: <<Const0:i\d+>> IntConstant 0
+  /// CHECK-DAG:                 Return [<<Const0>>]
+
+  /// CHECK-START: int Main.$noinline$testByteArrayDefaultValue() load_store_elimination (after)
+  /// CHECK-NOT:                 NewArray
+  /// CHECK-NOT:                 ArrayGet
+  /// CHECK-NOT:                 TypeConversion
+  private static int $noinline$testByteArrayDefaultValue() {
+    byte[] array = new byte[2];
+    array[1] = 1;  // FIXME: Without any stores, LSA tells LSE not to run.
+    return array[0];
+  }
+
   static Object[] sArray;
 
   /// CHECK-START: int Main.testLocalArrayMerge1(boolean) load_store_elimination (before)
@@ -1367,6 +1444,11 @@
     assertDoubleEquals(Math.PI * Math.PI * Math.PI, getCircleArea(Math.PI, true));
     assertDoubleEquals(0d, getCircleArea(Math.PI, false));
 
+    assertIntEquals($noinline$testConversion1(new TestClass(), 300), 300);
+    assertIntEquals($noinline$testConversion1(new TestClass(), 301), 45);
+    assertIntEquals($noinline$testConversion2(new TestClass(), 300), 300);
+    assertIntEquals($noinline$testConversion2(new TestClass(), 301), 90);
+
     int[] iarray = {0, 0, 0};
     double[] darray = {0d, 0d, 0d};
     try {
@@ -1435,6 +1517,8 @@
 
     assertIntEquals(testStoreStoreWithDeoptimize(new int[4]), 4);
 
+    assertIntEquals($noinline$testByteArrayDefaultValue(), 0);
+
     assertIntEquals(testLocalArrayMerge1(true), 1);
     assertIntEquals(testLocalArrayMerge1(false), 1);
     assertIntEquals(testLocalArrayMerge2(true), 2);
diff --git a/test/530-checker-lse3/smali/StoreLoad.smali b/test/530-checker-lse3/smali/StoreLoad.smali
index 7fb582c..59d6a8c 100644
--- a/test/530-checker-lse3/smali/StoreLoad.smali
+++ b/test/530-checker-lse3/smali/StoreLoad.smali
@@ -58,5 +58,43 @@
     return v0
 .end method
 
+## CHECK-START: int StoreLoad.test3(int) load_store_elimination (before)
+## CHECK-DAG:                     StaticFieldSet
+## CHECK-DAG:                     StaticFieldSet
+## CHECK-DAG:                     StaticFieldGet
+## CHECK-DAG:                     StaticFieldSet
+## CHECK-DAG:                     StaticFieldGet
+
+## CHECK-START: int StoreLoad.test3(int) load_store_elimination (after)
+## CHECK-DAG:                     StaticFieldSet
+## CHECK-DAG:                     StaticFieldSet
+## CHECK-DAG:                     TypeConversion
+## CHECK-DAG:                     StaticFieldSet
+## CHECK-DAG:                     StaticFieldGet
+
+## CHECK-START: int StoreLoad.test3(int) load_store_elimination (after)
+## CHECK:                         TypeConversion
+## CHECK-NOT:                     TypeConversion
+.method public static test3(I)I
+    .registers 3
+    const/4 v0, 0
+    sput p0, LStoreLoad;->intField:I
+    and-int/lit8 v1, p0, 1
+    if-eqz v1, :skip
+
+    sput-byte p0, LStoreLoad;->byteField:B
+    sget-byte v1, LStoreLoad;->byteField:B
+    sput v1, LStoreLoad;->intField:I
+    # Test that this TypeConversion is moved and used for the
+    # sget-byte above instead of creating a new one.
+    int-to-byte v0, p0
+
+    :skip
+    sget v1, LStoreLoad;->intField:I
+    add-int v0, v1, v0
+    return v0
+.end method
+
+.field public static intField:I
 .field public static byteField:B
 .field public static byteField2:B
diff --git a/test/530-checker-lse3/src/Main.java b/test/530-checker-lse3/src/Main.java
index caef0b3..81b70e2 100644
--- a/test/530-checker-lse3/src/Main.java
+++ b/test/530-checker-lse3/src/Main.java
@@ -44,5 +44,17 @@
     if (b != (byte)0x78) {
       throw new Error("Expected 0xef, got " + b);
     }
+
+    m = c.getMethod("test3", int.class);
+    result = (Integer)m.invoke(null, 300);
+    assertIntEquals(result, 300);
+    result = (Integer)m.invoke(null, 301);
+    assertIntEquals(result, 90);
+  }
+
+  private static void assertIntEquals(int result, int expected) {
+    if (result != expected) {
+      throw new Error("Expected " + expected + ", got " + result);
+    }
   }
 }