summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
author Vladimir Marko <vmarko@google.com> 2018-07-26 14:42:17 +0100
committer Vladimir Marko <vmarko@google.com> 2018-08-02 15:47:02 +0100
commitced04835d8e3cd3f68576cfffc1d21283ca151b4 (patch)
tree125ddd1d222f4fb1710e17c76803ad3e92572a5c /compiler/optimizing
parent350b6a312222b9b27bfee0e72ce261a45cb60e1c (diff)
Reuse arena memory for each block in scheduler.
This reduces the peak memory used for large methods with multiple blocks to schedule. Compiling the aosp_taimen-userdebug boot image, the most memory hungry method BatteryStats.dumpLocked has the Scheduler memory allocations in ArenaStack hidden by the register allocator: - before: MEM: used: 8300224, allocated: 9175040, lost: 197360 Scheduler 8300224 - after: MEM: used: 5914296, allocated: 7864320, lost: 78200 SsaLiveness 5532840 RegAllocator 144968 RegAllocVldt 236488 The total arena memory used, including the ArenaAllocator not listed above, goes from 44333648 to 41950324 (-5.4%). (Measured with kArenaAllocatorCountAllocations=true, kArenaAllocatorPreciseTracking=false.) Also remove one unnecessary -Wframe-larger-than= workaround and add one workaround for large frame with the above arena alloc tracking flags. Test: m test-art-host-gtest Test: testrunner.py --host Bug: 34053922 Change-Id: I7fd8d90dcc13b184b1e5bd0bcac072388710a129
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/optimizing_compiler.cc13
-rw-r--r--compiler/optimizing/scheduler.cc49
-rw-r--r--compiler/optimizing/scheduler.h42
-rw-r--r--compiler/optimizing/scheduler_arm.h5
-rw-r--r--compiler/optimizing/scheduler_arm64.h4
-rw-r--r--compiler/optimizing/scheduler_test.cc15
-rw-r--r--compiler/optimizing/stack_map_stream.h3
7 files changed, 63 insertions, 68 deletions
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d96746fdd7..b2733ee1f2 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -1101,15 +1101,18 @@ static void CreateJniStackMap(ArenaStack* arena_stack,
const JniCompiledMethod& jni_compiled_method,
/* out */ ArenaVector<uint8_t>* stack_map) {
ScopedArenaAllocator allocator(arena_stack);
- StackMapStream stack_map_stream(&allocator, jni_compiled_method.GetInstructionSet());
- stack_map_stream.BeginMethod(
+ // StackMapStream is quite large, so allocate it using the ScopedArenaAllocator
+ // to stay clear of the frame size limit.
+ std::unique_ptr<StackMapStream> stack_map_stream(
+ new (&allocator) StackMapStream(&allocator, jni_compiled_method.GetInstructionSet()));
+ stack_map_stream->BeginMethod(
jni_compiled_method.GetFrameSize(),
jni_compiled_method.GetCoreSpillMask(),
jni_compiled_method.GetFpSpillMask(),
/* num_dex_registers */ 0);
- stack_map_stream.EndMethod();
- stack_map->resize(stack_map_stream.PrepareForFillIn());
- stack_map_stream.FillInCodeInfo(MemoryRegion(stack_map->data(), stack_map->size()));
+ stack_map_stream->EndMethod();
+ stack_map->resize(stack_map_stream->PrepareForFillIn());
+ stack_map_stream->FillInCodeInfo(MemoryRegion(stack_map->data(), stack_map->size()));
}
CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index 588ea03d69..1aa16f45bc 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -545,60 +545,67 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingN
void HScheduler::Schedule(HGraph* graph) {
// We run lsa here instead of in a separate pass to better control whether we
// should run the analysis or not.
+ const HeapLocationCollector* heap_location_collector = nullptr;
LoadStoreAnalysis lsa(graph);
if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
lsa.Run();
- scheduling_graph_.SetHeapLocationCollector(lsa.GetHeapLocationCollector());
+ heap_location_collector = &lsa.GetHeapLocationCollector();
}
for (HBasicBlock* block : graph->GetReversePostOrder()) {
if (IsSchedulable(block)) {
- Schedule(block);
+ Schedule(block, heap_location_collector);
}
}
}
-void HScheduler::Schedule(HBasicBlock* block) {
- ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator_->Adapter(kArenaAllocScheduler));
+void HScheduler::Schedule(HBasicBlock* block,
+ const HeapLocationCollector* heap_location_collector) {
+ ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack());
+ ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator.Adapter(kArenaAllocScheduler));
// Build the scheduling graph.
- scheduling_graph_.Clear();
+ SchedulingGraph scheduling_graph(this, &allocator, heap_location_collector);
for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* instruction = it.Current();
CHECK_EQ(instruction->GetBlock(), block)
<< instruction->DebugName()
<< " is in block " << instruction->GetBlock()->GetBlockId()
<< ", and expected in block " << block->GetBlockId();
- SchedulingNode* node = scheduling_graph_.AddNode(instruction, IsSchedulingBarrier(instruction));
+ SchedulingNode* node = scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction));
CalculateLatency(node);
scheduling_nodes.push_back(node);
}
- if (scheduling_graph_.Size() <= 1) {
- scheduling_graph_.Clear();
+ if (scheduling_graph.Size() <= 1) {
return;
}
cursor_ = block->GetLastInstruction();
+ // The list of candidates for scheduling. A node becomes a candidate when all
+ // its predecessors have been scheduled.
+ ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler));
+
// Find the initial candidates for scheduling.
- candidates_.clear();
for (SchedulingNode* node : scheduling_nodes) {
if (!node->HasUnscheduledSuccessors()) {
node->MaybeUpdateCriticalPath(node->GetLatency());
- candidates_.push_back(node);
+ candidates.push_back(node);
}
}
- ScopedArenaVector<SchedulingNode*> initial_candidates(allocator_->Adapter(kArenaAllocScheduler));
+ ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler));
if (kDumpDotSchedulingGraphs) {
// Remember the list of initial candidates for debug output purposes.
- initial_candidates.assign(candidates_.begin(), candidates_.end());
+ initial_candidates.assign(candidates.begin(), candidates.end());
}
// Schedule all nodes.
- while (!candidates_.empty()) {
- Schedule(selector_->PopHighestPriorityNode(&candidates_, scheduling_graph_));
+ selector_->Reset();
+ while (!candidates.empty()) {
+ SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph);
+ Schedule(node, &candidates);
}
if (kDumpDotSchedulingGraphs) {
@@ -607,11 +614,12 @@ void HScheduler::Schedule(HBasicBlock* block) {
std::stringstream description;
description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
<< " B" << block->GetBlockId();
- scheduling_graph_.DumpAsDotGraph(description.str(), initial_candidates);
+ scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates);
}
}
-void HScheduler::Schedule(SchedulingNode* scheduling_node) {
+void HScheduler::Schedule(SchedulingNode* scheduling_node,
+ /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) {
// Check whether any of the node's predecessors will be valid candidates after
// this node is scheduled.
uint32_t path_to_node = scheduling_node->GetCriticalPath();
@@ -620,7 +628,7 @@ void HScheduler::Schedule(SchedulingNode* scheduling_node) {
path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
predecessor->DecrementNumberOfUnscheduledSuccessors();
if (!predecessor->HasUnscheduledSuccessors()) {
- candidates_.push_back(predecessor);
+ candidates->push_back(predecessor);
}
}
for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
@@ -630,7 +638,7 @@ void HScheduler::Schedule(SchedulingNode* scheduling_node) {
// correctness. So we do not use them to compute the critical path.
predecessor->DecrementNumberOfUnscheduledSuccessors();
if (!predecessor->HasUnscheduledSuccessors()) {
- candidates_.push_back(predecessor);
+ candidates->push_back(predecessor);
}
}
@@ -779,7 +787,6 @@ bool HInstructionScheduling::Run(bool only_optimize_loop_blocks,
#if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
// Phase-local allocator that allocates scheduler internal data structures like
// scheduling nodes, internel nodes map, dependencies, etc.
- ScopedArenaAllocator allocator(graph_->GetArenaStack());
CriticalPathSchedulingNodeSelector critical_path_selector;
RandomSchedulingNodeSelector random_selector;
SchedulingNodeSelector* selector = schedule_randomly
@@ -795,7 +802,7 @@ bool HInstructionScheduling::Run(bool only_optimize_loop_blocks,
switch (instruction_set_) {
#ifdef ART_ENABLE_CODEGEN_arm64
case InstructionSet::kArm64: {
- arm64::HSchedulerARM64 scheduler(&allocator, selector);
+ arm64::HSchedulerARM64 scheduler(selector);
scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
scheduler.Schedule(graph_);
break;
@@ -805,7 +812,7 @@ bool HInstructionScheduling::Run(bool only_optimize_loop_blocks,
case InstructionSet::kThumb2:
case InstructionSet::kArm: {
arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_);
- arm::HSchedulerARM scheduler(&allocator, selector, &arm_latency_visitor);
+ arm::HSchedulerARM scheduler(selector, &arm_latency_visitor);
scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
scheduler.Schedule(graph_);
break;
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index c7683e04a7..fd48d844e6 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -251,12 +251,14 @@ class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> {
*/
class SchedulingGraph : public ValueObject {
public:
- SchedulingGraph(const HScheduler* scheduler, ScopedArenaAllocator* allocator)
+ SchedulingGraph(const HScheduler* scheduler,
+ ScopedArenaAllocator* allocator,
+ const HeapLocationCollector* heap_location_collector)
: scheduler_(scheduler),
allocator_(allocator),
contains_scheduling_barrier_(false),
nodes_map_(allocator_->Adapter(kArenaAllocScheduler)),
- heap_location_collector_(nullptr) {}
+ heap_location_collector_(heap_location_collector) {}
SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) {
std::unique_ptr<SchedulingNode> node(
@@ -268,15 +270,6 @@ class SchedulingGraph : public ValueObject {
return result;
}
- void Clear() {
- nodes_map_.clear();
- contains_scheduling_barrier_ = false;
- }
-
- void SetHeapLocationCollector(const HeapLocationCollector& heap_location_collector) {
- heap_location_collector_ = &heap_location_collector;
- }
-
SchedulingNode* GetNode(const HInstruction* instr) const {
auto it = nodes_map_.find(instr);
if (it == nodes_map_.end()) {
@@ -329,7 +322,7 @@ class SchedulingGraph : public ValueObject {
ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_;
- const HeapLocationCollector* heap_location_collector_;
+ const HeapLocationCollector* const heap_location_collector_;
};
/*
@@ -377,6 +370,7 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor {
class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> {
public:
+ virtual void Reset() {}
virtual SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
const SchedulingGraph& graph) = 0;
virtual ~SchedulingNodeSelector() {}
@@ -418,6 +412,7 @@ class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector {
public:
CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {}
+ void Reset() OVERRIDE { prev_select_ = nullptr; }
SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
const SchedulingGraph& graph) OVERRIDE;
@@ -434,16 +429,11 @@ class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector {
class HScheduler {
public:
- HScheduler(ScopedArenaAllocator* allocator,
- SchedulingLatencyVisitor* latency_visitor,
- SchedulingNodeSelector* selector)
- : allocator_(allocator),
- latency_visitor_(latency_visitor),
+ HScheduler(SchedulingLatencyVisitor* latency_visitor, SchedulingNodeSelector* selector)
+ : latency_visitor_(latency_visitor),
selector_(selector),
only_optimize_loop_blocks_(true),
- scheduling_graph_(this, allocator),
- cursor_(nullptr),
- candidates_(allocator_->Adapter(kArenaAllocScheduler)) {}
+ cursor_(nullptr) {}
virtual ~HScheduler() {}
void Schedule(HGraph* graph);
@@ -454,8 +444,9 @@ class HScheduler {
virtual bool IsSchedulingBarrier(const HInstruction* instruction) const;
protected:
- void Schedule(HBasicBlock* block);
- void Schedule(SchedulingNode* scheduling_node);
+ void Schedule(HBasicBlock* block, const HeapLocationCollector* heap_location_collector);
+ void Schedule(SchedulingNode* scheduling_node,
+ /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates);
void Schedule(HInstruction* instruction);
// Any instruction returning `false` via this method will prevent its
@@ -476,19 +467,12 @@ class HScheduler {
node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency());
}
- ScopedArenaAllocator* const allocator_;
SchedulingLatencyVisitor* const latency_visitor_;
SchedulingNodeSelector* const selector_;
bool only_optimize_loop_blocks_;
- // We instantiate the members below as part of this class to avoid
- // instantiating them locally for every chunk scheduled.
- SchedulingGraph scheduling_graph_;
// A pointer indicating where the next instruction to be scheduled will be inserted.
HInstruction* cursor_;
- // The list of candidates for scheduling. A node becomes a candidate when all
- // its predecessors have been scheduled.
- ScopedArenaVector<SchedulingNode*> candidates_;
private:
DISALLOW_COPY_AND_ASSIGN(HScheduler);
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index 0cb8684376..2f369486b3 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -137,10 +137,9 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
class HSchedulerARM : public HScheduler {
public:
- HSchedulerARM(ScopedArenaAllocator* allocator,
- SchedulingNodeSelector* selector,
+ HSchedulerARM(SchedulingNodeSelector* selector,
SchedulingLatencyVisitorARM* arm_latency_visitor)
- : HScheduler(allocator, arm_latency_visitor, selector) {}
+ : HScheduler(arm_latency_visitor, selector) {}
~HSchedulerARM() OVERRIDE {}
bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index 4f394d5e16..0d2f8d9fa0 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -134,8 +134,8 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
class HSchedulerARM64 : public HScheduler {
public:
- HSchedulerARM64(ScopedArenaAllocator* allocator, SchedulingNodeSelector* selector)
- : HScheduler(allocator, &arm64_latency_visitor_, selector) {}
+ explicit HSchedulerARM64(SchedulingNodeSelector* selector)
+ : HScheduler(&arm64_latency_visitor_, selector) {}
~HSchedulerARM64() OVERRIDE {}
bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
index 7079e07ae1..fe23fb4cff 100644
--- a/compiler/optimizing/scheduler_test.cc
+++ b/compiler/optimizing/scheduler_test.cc
@@ -146,7 +146,9 @@ class SchedulerTest : public OptimizingUnitTest {
environment->SetRawEnvAt(1, mul);
mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
- SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator());
+ SchedulingGraph scheduling_graph(scheduler,
+ GetScopedAllocator(),
+ /* heap_location_collector */ nullptr);
// Instructions must be inserted in reverse order into the scheduling graph.
for (HInstruction* instr : ReverseRange(block_instructions)) {
scheduling_graph.AddNode(instr);
@@ -276,11 +278,10 @@ class SchedulerTest : public OptimizingUnitTest {
entry->AddInstruction(instr);
}
- SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator());
HeapLocationCollector heap_location_collector(graph_);
heap_location_collector.VisitBasicBlock(entry);
heap_location_collector.BuildAliasingMatrix();
- scheduling_graph.SetHeapLocationCollector(heap_location_collector);
+ SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator(), &heap_location_collector);
for (HInstruction* instr : ReverseRange(block_instructions)) {
// Build scheduling graph with memory access aliasing information
@@ -354,13 +355,13 @@ class SchedulerTest : public OptimizingUnitTest {
#if defined(ART_ENABLE_CODEGEN_arm64)
TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) {
CriticalPathSchedulingNodeSelector critical_path_selector;
- arm64::HSchedulerARM64 scheduler(GetScopedAllocator(), &critical_path_selector);
+ arm64::HSchedulerARM64 scheduler(&critical_path_selector);
TestBuildDependencyGraphAndSchedule(&scheduler);
}
TEST_F(SchedulerTest, ArrayAccessAliasingARM64) {
CriticalPathSchedulingNodeSelector critical_path_selector;
- arm64::HSchedulerARM64 scheduler(GetScopedAllocator(), &critical_path_selector);
+ arm64::HSchedulerARM64 scheduler(&critical_path_selector);
TestDependencyGraphOnAliasingArrayAccesses(&scheduler);
}
#endif
@@ -369,14 +370,14 @@ TEST_F(SchedulerTest, ArrayAccessAliasingARM64) {
TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM) {
CriticalPathSchedulingNodeSelector critical_path_selector;
arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr);
- arm::HSchedulerARM scheduler(GetScopedAllocator(), &critical_path_selector, &arm_latency_visitor);
+ arm::HSchedulerARM scheduler(&critical_path_selector, &arm_latency_visitor);
TestBuildDependencyGraphAndSchedule(&scheduler);
}
TEST_F(SchedulerTest, ArrayAccessAliasingARM) {
CriticalPathSchedulingNodeSelector critical_path_selector;
arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr);
- arm::HSchedulerARM scheduler(GetScopedAllocator(), &critical_path_selector, &arm_latency_visitor);
+ arm::HSchedulerARM scheduler(&critical_path_selector, &arm_latency_visitor);
TestDependencyGraphOnAliasingArrayAccesses(&scheduler);
}
#endif
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index de79f4921e..9d4598d3d9 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -34,7 +34,7 @@ namespace art {
* Collects and builds stack maps for a method. All the stack maps
* for a method are placed in a CodeInfo object.
*/
-class StackMapStream : public ValueObject {
+class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
public:
explicit StackMapStream(ScopedArenaAllocator* allocator, InstructionSet instruction_set)
: instruction_set_(instruction_set),
@@ -53,6 +53,7 @@ class StackMapStream : public ValueObject {
current_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)),
previous_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)),
dex_register_timestamp_(allocator->Adapter(kArenaAllocStackMapStream)),
+ expected_num_dex_registers_(0u),
temp_dex_register_mask_(allocator, 32, true, kArenaAllocStackMapStream),
temp_dex_register_map_(allocator->Adapter(kArenaAllocStackMapStream)) {
}