Revert^2 "Partial LSE analysis & store removal"

A ScopedArenaAllocator in a single test was accidentally loaded using
operator new which is not supported. This caused a memory leak.

This reverts commit fe270426c8a2a69a8f669339e83b86fbf40e25a1.
This unreverts commit bb6cda60e4418c0ab557ea4090e046bed8206763.

Bug: 67037140

Reason for revert: Fixed memory leak in
LoadStoreAnalysisTest.PartialEscape test case

Test: SANITIZE_HOST=address ASAN_OPTIONS=detect_leaks=0 m test-art-host-gtest-dependencies
      Run art_compiler_tests

Change-Id: I34fa2079df946ae54b8c91fa771a44d56438a719
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 5f15822..dcccc5b 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -16,15 +16,19 @@
 
 #include "load_store_elimination.h"
 
+#include "base/arena_allocator.h"
 #include "base/arena_bit_vector.h"
 #include "base/array_ref.h"
 #include "base/bit_vector-inl.h"
+#include "base/bit_vector.h"
 #include "base/scoped_arena_allocator.h"
 #include "base/scoped_arena_containers.h"
 #include "escape.h"
 #include "load_store_analysis.h"
-#include "optimizing/optimizing_compiler_stats.h"
+#include "nodes.h"
+#include "optimizing_compiler_stats.h"
 #include "reference_type_propagation.h"
+#include "side_effects_analysis.h"
 
 /**
  * The general algorithm of load-store elimination (LSE).
@@ -258,6 +262,7 @@
     enum class Type {
       kInvalid,
       kUnknown,
+      kMergedUnknown,
       kDefault,
       kInstruction,
       kNeedsNonLoopPhi,
@@ -282,6 +287,13 @@
       return value;
     }
 
+    static Value MergedUnknown(const PhiPlaceholder* phi_placeholder) {
+      Value value;
+      value.type_ = Type::kMergedUnknown;
+      value.phi_placeholder_ = phi_placeholder;
+      return value;
+    }
+
     // Default heap value after an allocation.
     // A heap location can be set to that value right after an allocation.
     static Value Default() {
@@ -325,10 +337,18 @@
       return type_ == Type::kInvalid;
     }
 
-    bool IsUnknown() const {
+    bool IsMergedUnknown() const {
+      return type_ == Type::kMergedUnknown;
+    }
+
+    bool IsPureUnknown() const {
       return type_ == Type::kUnknown;
     }
 
+    bool IsUnknown() const {
+      return type_ == Type::kUnknown || type_ == Type::kMergedUnknown;
+    }
+
     bool IsDefault() const {
       return type_ == Type::kDefault;
     }
@@ -355,10 +375,25 @@
     }
 
     const PhiPlaceholder* GetPhiPlaceholder() const {
-      DCHECK(NeedsPhi());
+      DCHECK(NeedsPhi() || IsMergedUnknown());
       return phi_placeholder_;
     }
 
+    uint32_t GetMergeBlockId() const {
+      DCHECK(IsMergedUnknown()) << this;
+      return phi_placeholder_->GetBlockId();
+    }
+
+    HBasicBlock* GetMergeBlock(const HGraph* graph) const {
+      DCHECK(IsMergedUnknown()) << this;
+      return graph->GetBlocks()[GetMergeBlockId()];
+    }
+
+    size_t GetHeapLocation() const {
+      DCHECK(IsMergedUnknown() || NeedsPhi()) << this;
+      return phi_placeholder_->GetHeapLocation();
+    }
+
     bool Equals(Value other) const {
       // Only valid values can be compared.
       DCHECK(IsValid());
@@ -369,9 +404,9 @@
                (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction()));
       } else {
         // Note: Two unknown values are considered different.
-        return IsDefault() ||
-               (IsInstruction() && GetInstruction() == other.GetInstruction()) ||
-               (NeedsPhi() && GetPhiPlaceholder() == other.GetPhiPlaceholder());
+        return IsDefault() || (IsInstruction() && GetInstruction() == other.GetInstruction()) ||
+               (NeedsPhi() && GetPhiPlaceholder() == other.GetPhiPlaceholder()) ||
+               (IsMergedUnknown() && GetPhiPlaceholder() == other.GetPhiPlaceholder());
       }
     }
 
@@ -379,7 +414,11 @@
       return Equals(ForInstruction(instruction));
     }
 
+    std::ostream& Dump(std::ostream& os) const;
+
    private:
+    friend std::ostream& operator<<(std::ostream& os, const Value& v);
+
     Type type_;
     union {
       HInstruction* instruction_;
@@ -397,6 +436,20 @@
     return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder());
   }
 
+  bool IsPartialNoEscape(HBasicBlock* blk, size_t idx) {
+    auto* ri = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
+    auto* sg = ri->GetNoEscapeSubgraph();
+    return ri->IsPartialSingleton() &&
+           std::none_of(sg->GetExcludedCohorts().cbegin(),
+                        sg->GetExcludedCohorts().cend(),
+                        [&](const ExecutionSubgraph::ExcludedCohort& ex) -> bool {
+                          // Make sure we haven't yet and never will escape.
+                          return ex.PrecedesBlock(blk) ||
+                                 ex.ContainsBlock(blk) ||
+                                 ex.SucceedsBlock(blk);
+                        });
+  }
+
   const PhiPlaceholder* GetPhiPlaceholder(uint32_t block_id, size_t idx) const {
     size_t phi_placeholders_begin = phi_placeholders_begin_for_block_[block_id];
     const PhiPlaceholder* phi_placeholder = &phi_placeholders_[phi_placeholders_begin + idx];
@@ -545,7 +598,12 @@
   // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder.
   // This is necessary if the stored heap value can be observed.
   void KeepStores(Value value) {
-    if (value.IsUnknown()) {
+    if (value.IsPureUnknown()) {
+      return;
+    }
+    if (value.IsMergedUnknown()) {
+      kept_merged_unknowns_.SetBit(PhiPlaceholderIndex(value));
+      phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value));
       return;
     }
     if (value.NeedsPhi()) {
@@ -568,7 +626,9 @@
         // We use this function when reading a location with unknown value and
         // therefore we cannot know what exact store wrote that unknown value.
         // But we can have a phi placeholder here marking multiple stores to keep.
-        DCHECK(!heap_values[i].stored_by.IsInstruction());
+        DCHECK(
+            !heap_values[i].stored_by.IsInstruction() ||
+            heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo()->IsPartialSingleton());
         KeepStores(heap_values[i].stored_by);
         heap_values[i].stored_by = Value::Unknown();
       } else if (heap_location_collector_.MayAlias(i, loc_index)) {
@@ -779,7 +839,8 @@
     ScopedArenaVector<ValueRecord>& heap_values = heap_values_for_[block->GetBlockId()];
     for (size_t i = 0u, size = heap_values.size(); i != size; ++i) {
       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
-      if (!ref_info->IsSingletonAndRemovable()) {
+      if (!ref_info->IsSingletonAndRemovable() &&
+          !(ref_info->IsPartialSingleton() && IsPartialNoEscape(block, i))) {
         KeepStores(heap_values[i].stored_by);
         heap_values[i].stored_by = Value::Unknown();
       }
@@ -953,11 +1014,44 @@
   // The unknown heap value is used to mark Phi placeholders that cannot be replaced.
   ScopedArenaVector<Value> phi_placeholder_replacements_;
 
+  // Merged-unknowns that must have their predecessor values kept to ensure
+  // partially escaped values are written
+  ArenaBitVector kept_merged_unknowns_;
+
   ScopedArenaVector<HInstruction*> singleton_new_instances_;
 
+  friend std::ostream& operator<<(std::ostream& os, const Value& v);
+
   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
 };
 
+std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const {
+  switch (type_) {
+    case Type::kDefault:
+      return os << "Default";
+    case Type::kInstruction:
+      return os << "Instruction[id: " << instruction_->GetId()
+                << ", block: " << instruction_->GetBlock()->GetBlockId() << "]";
+    case Type::kUnknown:
+      return os << "Unknown";
+    case Type::kInvalid:
+      return os << "Invalid";
+    case Type::kMergedUnknown:
+      return os << "MergedUnknown[block: " << phi_placeholder_->GetBlockId()
+                << ", heap_loc: " << phi_placeholder_->GetHeapLocation() << "]";
+    case Type::kNeedsLoopPhi:
+      return os << "NeedsLoopPhi[block: " << phi_placeholder_->GetBlockId()
+                << ", heap_loc: " << phi_placeholder_->GetHeapLocation() << "]";
+    case Type::kNeedsNonLoopPhi:
+      return os << "NeedsNonLoopPhi[block: " << phi_placeholder_->GetBlockId()
+                << ", heap_loc: " << phi_placeholder_->GetHeapLocation() << "]";
+  }
+}
+
+std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) {
+  return v.Dump(os);
+}
+
 ScopedArenaVector<LSEVisitor::PhiPlaceholder> LSEVisitor::CreatePhiPlaceholders(
     HGraph* graph,
     const HeapLocationCollector& heap_location_collector,
@@ -1032,6 +1126,10 @@
       phi_placeholder_replacements_(phi_placeholders_.size(),
                                     Value::Invalid(),
                                     allocator_.Adapter(kArenaAllocLSE)),
+      kept_merged_unknowns_(&allocator_,
+                            /*start_bits=*/ phi_placeholders_.size(),
+                            /*expandable=*/ false,
+                            kArenaAllocLSE),
       singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) {
   // Clear bit vectors.
   phi_placeholders_to_search_for_kept_stores_.ClearAllBits();
@@ -1049,18 +1147,21 @@
   uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId();
   Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value);
   if (pre_header_value.IsUnknown()) {
-    return Value::Unknown();
+    return pre_header_value;
   }
   if (kIsDebugBuild) {
     // Check that the reference indeed dominates this loop.
     HeapLocation* location = heap_location_collector_.GetHeapLocation(idx);
     HInstruction* ref = location->GetReferenceInfo()->GetReference();
-    CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block));
+    CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block))
+        << GetGraph()->PrettyMethod();
     // Check that the index, if defined inside the loop, tracks a default value
     // or a Phi placeholder requiring a loop Phi.
     HInstruction* index = location->GetIndex();
     if (index != nullptr && loop_info->Contains(*index->GetBlock())) {
-      CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()));
+      CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default()))
+          << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " "
+          << pre_header_value;
     }
   }
   const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
@@ -1115,14 +1216,21 @@
   Value merged_value =
       ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value);
   for (size_t i = 1u, size = predecessors.size(); i != size; ++i) {
-    if (merged_value.IsUnknown()) {
-      break;
-    }
     Value pred_value =
         ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value);
-    if (pred_value.IsUnknown()) {
-      merged_value = Value::Unknown();
-    } else if (!pred_value.Equals(merged_value)) {
+    if (pred_value.Equals(merged_value) ||
+        (pred_value.IsPureUnknown() && merged_value.IsPureUnknown())) {
+      // Value is the same. No need to update our merged value.
+      continue;
+    } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) {
+      // If one is unknown and the other is a different type of unknown
+      const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
+      merged_value = Value::MergedUnknown(phi_placeholder);
+      // We know that at least one of the merge points is unknown (and both are
+      // not pure-unknowns since that's captured above). This means that the
+      // overall value needs to be a MergedUnknown. Just return that.
+      break;
+    } else {
       // There are conflicting known values. We may still be able to replace loads with a Phi.
       const PhiPlaceholder* phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx);
       // Propagate the need for a new loop Phi from all predecessors.
@@ -1247,7 +1355,8 @@
     phi_inputs.clear();
     for (HBasicBlock* predecessor : current_block->GetPredecessors()) {
       Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value);
-      DCHECK(!pred_value.IsUnknown());
+      DCHECK(!pred_value.IsUnknown())
+          << "block " << current_block->GetBlockId() << " pred: " << predecessor->GetBlockId();
       if (pred_value.NeedsNonLoopPhi()) {
         // We need to process the Phi placeholder first.
         work_queue.push_back(pred_value.GetPhiPlaceholder());
@@ -1287,8 +1396,10 @@
   } else if (record.value.IsUnknown()) {
     // Load isn't eliminated. Put the load as the value into the HeapLocation.
     // This acts like GVN but with better aliasing analysis.
+    Value old_value = record.value;
     record.value = Value::ForInstruction(instruction);
     KeepStoresIfAliasedToLocation(heap_values, idx);
+    KeepStores(old_value);
   } else if (record.value.NeedsLoopPhi()) {
     // We do not know yet if the value is known for all back edges. Record for future processing.
     loads_requiring_loop_phi_.insert(std::make_pair(instruction, record));
@@ -2047,8 +2158,22 @@
     work_queue.push_back(index);
   }
   const ArenaVector<HBasicBlock*>& blocks = GetGraph()->GetBlocks();
+  std::optional<ArenaBitVector> not_kept_stores;
+  if (stats_) {
+    not_kept_stores.emplace(GetGraph()->GetAllocator(),
+                            kept_stores_.GetBitSizeOf(),
+                            false,
+                            ArenaAllocKind::kArenaAllocLSE);
+  }
   while (!work_queue.empty()) {
-    const PhiPlaceholder* phi_placeholder = &phi_placeholders_[work_queue.back()];
+    uint32_t cur_phi_idx = work_queue.back();
+    const PhiPlaceholder* phi_placeholder = &phi_placeholders_[cur_phi_idx];
+    // Only writes to partial-escapes need to be specifically kept.
+    bool is_partial_kept_merged_unknown =
+        kept_merged_unknowns_.IsBitSet(cur_phi_idx) &&
+        heap_location_collector_.GetHeapLocation(phi_placeholder->GetHeapLocation())
+            ->GetReferenceInfo()
+            ->IsPartialSingleton();
     work_queue.pop_back();
     size_t idx = phi_placeholder->GetHeapLocation();
     HBasicBlock* block = blocks[phi_placeholder->GetBlockId()];
@@ -2091,18 +2216,39 @@
         if (!stored_by.IsUnknown() && (i == idx || may_alias(i))) {
           if (stored_by.NeedsPhi()) {
             size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by);
+            if (is_partial_kept_merged_unknown) {
+              // Propagate merged-unknown keep since otherwise this might look
+              // like a partial escape we can remove.
+              kept_merged_unknowns_.SetBit(phi_placeholder_index);
+            }
             if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) {
               phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index);
               work_queue.push_back(phi_placeholder_index);
             }
           } else {
             DCHECK(IsStore(stored_by.GetInstruction()));
-            kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
+            ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
+            DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName()
+                                  << " id: " << stored_by.GetInstruction()->GetId() << " block: "
+                                  << stored_by.GetInstruction()->GetBlock()->GetBlockId();
+            if (!is_partial_kept_merged_unknown && IsPartialNoEscape(predecessor, idx)) {
+              if (not_kept_stores) {
+                not_kept_stores->SetBit(stored_by.GetInstruction()->GetId());
+              }
+            } else {
+              kept_stores_.SetBit(stored_by.GetInstruction()->GetId());
+            }
           }
         }
       }
     }
   }
+  if (not_kept_stores) {
+    // a - b := (a & ~b)
+    not_kept_stores->Subtract(&kept_stores_);
+    auto num_removed = not_kept_stores->NumSetBits();
+    MaybeRecordStat(stats_, MethodCompilationStat::kPartialStoreRemoved, num_removed);
+  }
 }
 
 void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) {
@@ -2273,6 +2419,7 @@
     if (!new_instance->HasNonEnvironmentUses()) {
       new_instance->RemoveEnvironmentUsers();
       new_instance->GetBlock()->RemoveInstruction(new_instance);
+      MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved);
     }
   }
 }
@@ -2284,8 +2431,14 @@
     // Skip this optimization.
     return false;
   }
+  // We need to be able to determine reachability. Clear it just to be safe but
+  // this should initially be empty.
+  graph_->ClearReachabilityInformation();
+  // This is O(blocks^3) time complexity. It means we can query reachability in
+  // O(1) though.
+  graph_->ComputeReachabilityInformation();
   ScopedArenaAllocator allocator(graph_->GetArenaStack());
-  LoadStoreAnalysis lsa(graph_, &allocator);
+  LoadStoreAnalysis lsa(graph_, stats_, &allocator, /*for_elimination=*/true);
   lsa.Run();
   const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector();
   if (heap_location_collector.GetNumberOfHeapLocations() == 0) {