Reuse arena memory for each block in scheduler. This reduces the peak memory used for large methods with multiple blocks to schedule. Compiling the aosp_taimen-userdebug boot image, the most memory hungry method BatteryStats.dumpLocked has the Scheduler memory allocations in ArenaStack hidden by the register allocator: - before: MEM: used: 8300224, allocated: 9175040, lost: 197360 Scheduler 8300224 - after: MEM: used: 5914296, allocated: 7864320, lost: 78200 SsaLiveness 5532840 RegAllocator 144968 RegAllocVldt 236488 The total arena memory used, including the ArenaAllocator not listed above, goes from 44333648 to 41950324 (-5.4%). (Measured with kArenaAllocatorCountAllocations=true, kArenaAllocatorPreciseTracking=false.) Also remove one unnecessary -Wframe-larger-than= workaround and add one workaround for large frame with the above arena alloc tracking flags. Test: m test-art-host-gtest Test: testrunner.py --host Bug: 34053922 Change-Id: I7fd8d90dcc13b184b1e5bd0bcac072388710a129

commit: ced04835d8e3cd3f68576cfffc1d21283ca151b4 [log] [tgz]
author: Vladimir Marko <vmarko@google.com> Thu Jul 26 14:42:17 2018 +0100
committer: Vladimir Marko <vmarko@google.com> Thu Aug 02 15:47:02 2018 +0100
tree: 125ddd1d222f4fb1710e17c76803ad3e92572a5c
parent: 350b6a312222b9b27bfee0e72ce261a45cb60e1c [diff] [blame]
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index c7683e0..fd48d84 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h

@@ -251,12 +251,14 @@
  */
 class SchedulingGraph : public ValueObject {
  public:
-  SchedulingGraph(const HScheduler* scheduler, ScopedArenaAllocator* allocator)
+  SchedulingGraph(const HScheduler* scheduler,
+                  ScopedArenaAllocator* allocator,
+                  const HeapLocationCollector* heap_location_collector)
       : scheduler_(scheduler),
         allocator_(allocator),
         contains_scheduling_barrier_(false),
         nodes_map_(allocator_->Adapter(kArenaAllocScheduler)),
-        heap_location_collector_(nullptr) {}
+        heap_location_collector_(heap_location_collector) {}
 
   SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) {
     std::unique_ptr<SchedulingNode> node(
@@ -268,15 +270,6 @@
     return result;
   }
 
-  void Clear() {
-    nodes_map_.clear();
-    contains_scheduling_barrier_ = false;
-  }
-
-  void SetHeapLocationCollector(const HeapLocationCollector& heap_location_collector) {
-    heap_location_collector_ = &heap_location_collector;
-  }
-
   SchedulingNode* GetNode(const HInstruction* instr) const {
     auto it = nodes_map_.find(instr);
     if (it == nodes_map_.end()) {
@@ -329,7 +322,7 @@
 
   ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_;
 
-  const HeapLocationCollector* heap_location_collector_;
+  const HeapLocationCollector* const heap_location_collector_;
 };
 
 /*
@@ -377,6 +370,7 @@
 
 class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> {
  public:
+  virtual void Reset() {}
   virtual SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
                                                  const SchedulingGraph& graph) = 0;
   virtual ~SchedulingNodeSelector() {}
@@ -418,6 +412,7 @@
  public:
   CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {}
 
+  void Reset() OVERRIDE { prev_select_ = nullptr; }
   SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
                                          const SchedulingGraph& graph) OVERRIDE;
 
@@ -434,16 +429,11 @@
 
 class HScheduler {
  public:
-  HScheduler(ScopedArenaAllocator* allocator,
-             SchedulingLatencyVisitor* latency_visitor,
-             SchedulingNodeSelector* selector)
-      : allocator_(allocator),
-        latency_visitor_(latency_visitor),
+  HScheduler(SchedulingLatencyVisitor* latency_visitor, SchedulingNodeSelector* selector)
+      : latency_visitor_(latency_visitor),
         selector_(selector),
         only_optimize_loop_blocks_(true),
-        scheduling_graph_(this, allocator),
-        cursor_(nullptr),
-        candidates_(allocator_->Adapter(kArenaAllocScheduler)) {}
+        cursor_(nullptr) {}
   virtual ~HScheduler() {}
 
   void Schedule(HGraph* graph);
@@ -454,8 +444,9 @@
   virtual bool IsSchedulingBarrier(const HInstruction* instruction) const;
 
  protected:
-  void Schedule(HBasicBlock* block);
-  void Schedule(SchedulingNode* scheduling_node);
+  void Schedule(HBasicBlock* block, const HeapLocationCollector* heap_location_collector);
+  void Schedule(SchedulingNode* scheduling_node,
+                /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates);
   void Schedule(HInstruction* instruction);
 
   // Any instruction returning `false` via this method will prevent its
@@ -476,19 +467,12 @@
     node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency());
   }
 
-  ScopedArenaAllocator* const allocator_;
   SchedulingLatencyVisitor* const latency_visitor_;
   SchedulingNodeSelector* const selector_;
   bool only_optimize_loop_blocks_;
 
-  // We instantiate the members below as part of this class to avoid
-  // instantiating them locally for every chunk scheduled.
-  SchedulingGraph scheduling_graph_;
   // A pointer indicating where the next instruction to be scheduled will be inserted.
   HInstruction* cursor_;
-  // The list of candidates for scheduling. A node becomes a candidate when all
-  // its predecessors have been scheduled.
-  ScopedArenaVector<SchedulingNode*> candidates_;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HScheduler);
commit	ced04835d8e3cd3f68576cfffc1d21283ca151b4	[log] [tgz]
author	Vladimir Marko <vmarko@google.com>	Thu Jul 26 14:42:17 2018 +0100
committer	Vladimir Marko <vmarko@google.com>	Thu Aug 02 15:47:02 2018 +0100
tree	125ddd1d222f4fb1710e17c76803ad3e92572a5c
parent	350b6a312222b9b27bfee0e72ce261a45cb60e1c [diff] [blame]