Reduce Partial LSE memory usage.

Instantiate ExecutionSubgraph only for partial singleton
candidates (currently NewInstance, possibly NewArray in the
future). This reduces "LSA" allocations.

Reserve memory for PartialLoadStoreEliminationHelper members
based on the number of partial singletons instead of the
number of reference infos. This reduces "LSE" allocations.

The peak scoped arena allocation for one compiled method
is reduced from
  MEM: used: 97424004, allocated: 99006568, lost: 1115968
    LSA            46015104
    LSE            51408900
down to
  MEM: used: 17000744, allocated: 26713880, lost: 3332496
    GVN            17000744
where the LSA+LSE memory use is lower than GVN use.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 33650849
Change-Id: I323b9f144b258f0fab034794770971547ce94b59
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
index 7e5b071..4975bae 100644
--- a/compiler/optimizing/load_store_analysis.h
+++ b/compiler/optimizing/load_store_analysis.h
@@ -50,15 +50,15 @@
         is_singleton_and_not_returned_(true),
         is_singleton_and_not_deopt_visible_(true),
         allocator_(allocator),
-        subgraph_(reference->GetBlock()->GetGraph(),
-                  elimination_type != LoadStoreAnalysisType::kBasic,
-                  allocator_) {
+        subgraph_(nullptr) {
     // TODO We can do this in one pass.
     // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads
     // for now just ignore it.
     bool can_be_partial = elimination_type != LoadStoreAnalysisType::kBasic &&
                           (/* reference_->IsNewArray() || */ reference_->IsNewInstance());
     if (can_be_partial) {
+      subgraph_.reset(
+          new (allocator) ExecutionSubgraph(reference->GetBlock()->GetGraph(), allocator));
       CollectPartialEscapes(reference_->GetBlock()->GetGraph());
     }
     CalculateEscape(reference_,
@@ -73,14 +73,16 @@
         //      to see if the additional branches are worth it.
         PrunePartialEscapeWrites();
       }
-      subgraph_.Finalize();
+      DCHECK(subgraph_ != nullptr);
+      subgraph_->Finalize();
     } else {
-      subgraph_.Invalidate();
+      DCHECK(subgraph_ == nullptr);
     }
   }
 
   const ExecutionSubgraph* GetNoEscapeSubgraph() const {
-    return &subgraph_;
+    DCHECK(IsPartialSingleton());
+    return subgraph_.get();
   }
 
   HInstruction* GetReference() const {
@@ -103,7 +105,9 @@
     auto ref = GetReference();
     // TODO NewArray is possible but will need to get a handle on how to deal with the dynamic loads
     // for now just ignore it.
-    return (/* ref->IsNewArray() || */ ref->IsNewInstance()) && GetNoEscapeSubgraph()->IsValid();
+    return (/* ref->IsNewArray() || */ ref->IsNewInstance()) &&
+           subgraph_ != nullptr &&
+           subgraph_->IsValid();
   }
 
   // Returns true if reference_ is a singleton and not returned to the caller or
@@ -123,7 +127,8 @@
  private:
   void CollectPartialEscapes(HGraph* graph);
   void HandleEscape(HBasicBlock* escape) {
-    subgraph_.RemoveBlock(escape);
+    DCHECK(subgraph_ != nullptr);
+    subgraph_->RemoveBlock(escape);
   }
   void HandleEscape(HInstruction* escape) {
     HandleEscape(escape->GetBlock());
@@ -145,7 +150,7 @@
 
   ScopedArenaAllocator* allocator_;
 
-  ExecutionSubgraph subgraph_;
+  std::unique_ptr<ExecutionSubgraph> subgraph_;
 
   DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
 };
@@ -264,8 +269,10 @@
     ref_info_array_.clear();
   }
 
-  size_t GetNumberOfReferenceInfos() const {
-    return ref_info_array_.size();
+  size_t CountPartialSingletons() const {
+    return std::count_if(ref_info_array_.begin(),
+                         ref_info_array_.end(),
+                         [](ReferenceInfo* ri) { return ri->IsPartialSingleton(); });
   }
 
   size_t GetNumberOfHeapLocations() const {