Reuse arena memory for each block in scheduler.
This reduces the peak memory used for large methods with
multiple blocks to schedule.
Compiling the aosp_taimen-userdebug boot image, the most
memory hungry method BatteryStats.dumpLocked has the
Scheduler memory allocations in ArenaStack hidden by the
register allocator:
- before:
MEM: used: 8300224, allocated: 9175040, lost: 197360
Scheduler 8300224
- after:
MEM: used: 5914296, allocated: 7864320, lost: 78200
SsaLiveness 5532840
RegAllocator 144968
RegAllocVldt 236488
The total arena memory used, including the ArenaAllocator
not listed above, goes from 44333648 to 41950324 (-5.4%).
(Measured with kArenaAllocatorCountAllocations=true,
kArenaAllocatorPreciseTracking=false.)
Also remove one unnecessary -Wframe-larger-than= workaround
and add one workaround for large frame with the above arena
alloc tracking flags.
Test: m test-art-host-gtest
Test: testrunner.py --host
Bug: 34053922
Change-Id: I7fd8d90dcc13b184b1e5bd0bcac072388710a129
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index c7683e0..fd48d84 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -251,12 +251,14 @@
*/
class SchedulingGraph : public ValueObject {
public:
- SchedulingGraph(const HScheduler* scheduler, ScopedArenaAllocator* allocator)
+ SchedulingGraph(const HScheduler* scheduler,
+ ScopedArenaAllocator* allocator,
+ const HeapLocationCollector* heap_location_collector)
: scheduler_(scheduler),
allocator_(allocator),
contains_scheduling_barrier_(false),
nodes_map_(allocator_->Adapter(kArenaAllocScheduler)),
- heap_location_collector_(nullptr) {}
+ heap_location_collector_(heap_location_collector) {}
SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) {
std::unique_ptr<SchedulingNode> node(
@@ -268,15 +270,6 @@
return result;
}
- void Clear() {
- nodes_map_.clear();
- contains_scheduling_barrier_ = false;
- }
-
- void SetHeapLocationCollector(const HeapLocationCollector& heap_location_collector) {
- heap_location_collector_ = &heap_location_collector;
- }
-
SchedulingNode* GetNode(const HInstruction* instr) const {
auto it = nodes_map_.find(instr);
if (it == nodes_map_.end()) {
@@ -329,7 +322,7 @@
ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_;
- const HeapLocationCollector* heap_location_collector_;
+ const HeapLocationCollector* const heap_location_collector_;
};
/*
@@ -377,6 +370,7 @@
class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> {
public:
+ virtual void Reset() {}
virtual SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
const SchedulingGraph& graph) = 0;
virtual ~SchedulingNodeSelector() {}
@@ -418,6 +412,7 @@
public:
CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {}
+ void Reset() OVERRIDE { prev_select_ = nullptr; }
SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
const SchedulingGraph& graph) OVERRIDE;
@@ -434,16 +429,11 @@
class HScheduler {
public:
- HScheduler(ScopedArenaAllocator* allocator,
- SchedulingLatencyVisitor* latency_visitor,
- SchedulingNodeSelector* selector)
- : allocator_(allocator),
- latency_visitor_(latency_visitor),
+ HScheduler(SchedulingLatencyVisitor* latency_visitor, SchedulingNodeSelector* selector)
+ : latency_visitor_(latency_visitor),
selector_(selector),
only_optimize_loop_blocks_(true),
- scheduling_graph_(this, allocator),
- cursor_(nullptr),
- candidates_(allocator_->Adapter(kArenaAllocScheduler)) {}
+ cursor_(nullptr) {}
virtual ~HScheduler() {}
void Schedule(HGraph* graph);
@@ -454,8 +444,9 @@
virtual bool IsSchedulingBarrier(const HInstruction* instruction) const;
protected:
- void Schedule(HBasicBlock* block);
- void Schedule(SchedulingNode* scheduling_node);
+ void Schedule(HBasicBlock* block, const HeapLocationCollector* heap_location_collector);
+ void Schedule(SchedulingNode* scheduling_node,
+ /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates);
void Schedule(HInstruction* instruction);
// Any instruction returning `false` via this method will prevent its
@@ -476,19 +467,12 @@
node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency());
}
- ScopedArenaAllocator* const allocator_;
SchedulingLatencyVisitor* const latency_visitor_;
SchedulingNodeSelector* const selector_;
bool only_optimize_loop_blocks_;
- // We instantiate the members below as part of this class to avoid
- // instantiating them locally for every chunk scheduled.
- SchedulingGraph scheduling_graph_;
// A pointer indicating where the next instruction to be scheduled will be inserted.
HInstruction* cursor_;
- // The list of candidates for scheduling. A node becomes a candidate when all
- // its predecessors have been scheduled.
- ScopedArenaVector<SchedulingNode*> candidates_;
private:
DISALLOW_COPY_AND_ASSIGN(HScheduler);