blob: d65d20cf43244cb82b38c3cdd0b3c842cd895941 [file] [log] [blame]
Alexandre Rames22aa54b2016-10-18 09:32:29 +01001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <string>
18
19#include "prepare_for_register_allocation.h"
20#include "scheduler.h"
21
22#ifdef ART_ENABLE_CODEGEN_arm64
23#include "scheduler_arm64.h"
24#endif
25
26namespace art {
27
28void SchedulingGraph::AddDependency(SchedulingNode* node,
29 SchedulingNode* dependency,
30 bool is_data_dependency) {
31 if (node == nullptr || dependency == nullptr) {
32 // A `nullptr` node indicates an instruction out of scheduling range (eg. in
33 // an other block), so we do not need to add a dependency edge to the graph.
34 return;
35 }
36
37 if (is_data_dependency) {
38 if (!HasImmediateDataDependency(node, dependency)) {
39 node->AddDataPredecessor(dependency);
40 }
41 } else if (!HasImmediateOtherDependency(node, dependency)) {
42 node->AddOtherPredecessor(dependency);
43 }
44}
45
46static bool MayHaveReorderingDependency(SideEffects node, SideEffects other) {
47 // Read after write.
48 if (node.MayDependOn(other)) {
49 return true;
50 }
51
52 // Write after read.
53 if (other.MayDependOn(node)) {
54 return true;
55 }
56
57 // Memory write after write.
58 if (node.DoesAnyWrite() && other.DoesAnyWrite()) {
59 return true;
60 }
61
62 return false;
63}
64
65
66// Check whether `node` depends on `other`, taking into account `SideEffect`
67// information and `CanThrow` information.
68static bool HasSideEffectDependency(const HInstruction* node, const HInstruction* other) {
69 if (MayHaveReorderingDependency(node->GetSideEffects(), other->GetSideEffects())) {
70 return true;
71 }
72
73 if (other->CanThrow() && node->GetSideEffects().DoesAnyWrite()) {
74 return true;
75 }
76
77 if (other->GetSideEffects().DoesAnyWrite() && node->CanThrow()) {
78 return true;
79 }
80
81 if (other->CanThrow() && node->CanThrow()) {
82 return true;
83 }
84
85 // Check side-effect dependency between ArrayGet and BoundsCheck.
86 if (node->IsArrayGet() && other->IsBoundsCheck() && node->InputAt(1) == other) {
87 return true;
88 }
89
90 return false;
91}
92
93void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_scheduling_barrier) {
94 SchedulingNode* instruction_node = GetNode(instruction);
95
96 // Define-use dependencies.
97 for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
98 AddDataDependency(GetNode(use.GetUser()), instruction_node);
99 }
100
101 // Scheduling barrier dependencies.
102 DCHECK(!is_scheduling_barrier || contains_scheduling_barrier_);
103 if (contains_scheduling_barrier_) {
104 // A barrier depends on instructions after it. And instructions before the
105 // barrier depend on it.
106 for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
107 SchedulingNode* other_node = GetNode(other);
108 bool other_is_barrier = other_node->IsSchedulingBarrier();
109 if (is_scheduling_barrier || other_is_barrier) {
110 AddOtherDependency(other_node, instruction_node);
111 }
112 if (other_is_barrier) {
113 // This other scheduling barrier guarantees ordering of instructions after
114 // it, so avoid creating additional useless dependencies in the graph.
115 // For example if we have
116 // instr_1
117 // barrier_2
118 // instr_3
119 // barrier_4
120 // instr_5
121 // we only create the following non-data dependencies
122 // 1 -> 2
123 // 2 -> 3
124 // 2 -> 4
125 // 3 -> 4
126 // 4 -> 5
127 // and do not create
128 // 1 -> 4
129 // 2 -> 5
130 // Note that in this example we could also avoid creating the dependency
131 // `2 -> 4`. But if we remove `instr_3` that dependency is required to
132 // order the barriers. So we generate it to avoid a special case.
133 break;
134 }
135 }
136 }
137
138 // Side effect dependencies.
139 if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) {
140 for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
141 SchedulingNode* other_node = GetNode(other);
142 if (other_node->IsSchedulingBarrier()) {
143 // We have reached a scheduling barrier so we can stop further
144 // processing.
145 DCHECK(HasImmediateOtherDependency(other_node, instruction_node));
146 break;
147 }
148 if (HasSideEffectDependency(other, instruction)) {
149 AddOtherDependency(other_node, instruction_node);
150 }
151 }
152 }
153
154 // Environment dependencies.
155 // We do not need to process those if the instruction is a scheduling barrier,
156 // since the barrier already has non-data dependencies on all following
157 // instructions.
158 if (!is_scheduling_barrier) {
159 for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
160 // Note that here we could stop processing if the environment holder is
161 // across a scheduling barrier. But checking this would likely require
162 // more work than simply iterating through environment uses.
163 AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
164 }
165 }
166}
167
168bool SchedulingGraph::HasImmediateDataDependency(const SchedulingNode* node,
169 const SchedulingNode* other) const {
170 return ContainsElement(node->GetDataPredecessors(), other);
171}
172
173bool SchedulingGraph::HasImmediateDataDependency(const HInstruction* instruction,
174 const HInstruction* other_instruction) const {
175 const SchedulingNode* node = GetNode(instruction);
176 const SchedulingNode* other = GetNode(other_instruction);
177 if (node == nullptr || other == nullptr) {
178 // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their
179 // corresponding SchedulingNode in the graph, and tell whether there is a dependency.
180 // Otherwise there is no dependency from SchedulingGraph's perspective, for example,
181 // instruction and other_instruction are in different basic blocks.
182 return false;
183 }
184 return HasImmediateDataDependency(node, other);
185}
186
187bool SchedulingGraph::HasImmediateOtherDependency(const SchedulingNode* node,
188 const SchedulingNode* other) const {
189 return ContainsElement(node->GetOtherPredecessors(), other);
190}
191
192bool SchedulingGraph::HasImmediateOtherDependency(const HInstruction* instruction,
193 const HInstruction* other_instruction) const {
194 const SchedulingNode* node = GetNode(instruction);
195 const SchedulingNode* other = GetNode(other_instruction);
196 if (node == nullptr || other == nullptr) {
197 // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their
198 // corresponding SchedulingNode in the graph, and tell whether there is a dependency.
199 // Otherwise there is no dependency from SchedulingGraph's perspective, for example,
200 // instruction and other_instruction are in different basic blocks.
201 return false;
202 }
203 return HasImmediateOtherDependency(node, other);
204}
205
206static const std::string InstructionTypeId(const HInstruction* instruction) {
207 std::string id;
208 Primitive::Type type = instruction->GetType();
209 if (type == Primitive::kPrimNot) {
210 id.append("l");
211 } else {
212 id.append(Primitive::Descriptor(instruction->GetType()));
213 }
214 // Use lower-case to be closer to the `HGraphVisualizer` output.
215 id[0] = std::tolower(id[0]);
216 id.append(std::to_string(instruction->GetId()));
217 return id;
218}
219
220// Ideally we would reuse the graph visualizer code, but it is not available
221// from here and it is not worth moving all that code only for our use.
222static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) {
223 const HInstruction* instruction = node->GetInstruction();
224 // Use the instruction typed id as the node identifier.
225 std::string instruction_id = InstructionTypeId(instruction);
226 output << instruction_id << "[shape=record, label=\""
227 << instruction_id << ' ' << instruction->DebugName() << " [";
228 // List the instruction's inputs in its description. When visualizing the
229 // graph this helps differentiating data inputs from other dependencies.
230 const char* seperator = "";
231 for (const HInstruction* input : instruction->GetInputs()) {
232 output << seperator << InstructionTypeId(input);
233 seperator = ",";
234 }
235 output << "]";
236 // Other properties of the node.
237 output << "\\ninternal_latency: " << node->GetInternalLatency();
238 output << "\\ncritical_path: " << node->GetCriticalPath();
239 if (node->IsSchedulingBarrier()) {
240 output << "\\n(barrier)";
241 }
242 output << "\"];\n";
243 // We want program order to go from top to bottom in the graph output, so we
244 // reverse the edges and specify `dir=back`.
245 for (const SchedulingNode* predecessor : node->GetDataPredecessors()) {
246 const HInstruction* predecessor_instruction = predecessor->GetInstruction();
247 output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
248 << "[label=\"" << predecessor->GetLatency() << "\",dir=back]\n";
249 }
250 for (const SchedulingNode* predecessor : node->GetOtherPredecessors()) {
251 const HInstruction* predecessor_instruction = predecessor->GetInstruction();
252 output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
253 << "[dir=back,color=blue]\n";
254 }
255}
256
257void SchedulingGraph::DumpAsDotGraph(const std::string& description,
258 const ArenaVector<SchedulingNode*>& initial_candidates) {
259 // TODO(xueliang): ideally we should move scheduling information into HInstruction, after that
260 // we should move this dotty graph dump feature to visualizer, and have a compiler option for it.
261 std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app);
262 // Description of this graph, as a comment.
263 output << "// " << description << "\n";
264 // Start the dot graph. Use an increasing index for easier differentiation.
265 output << "digraph G {\n";
266 for (const auto& entry : nodes_map_) {
267 DumpAsDotNode(output, entry.second);
268 }
269 // Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
270 for (auto node : initial_candidates) {
271 const HInstruction* instruction = node->GetInstruction();
272 output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
273 << "[label=\"" << node->GetLatency() << "\",dir=back]\n";
274 }
275 // End of the dot graph.
276 output << "}\n";
277 output.close();
278}
279
280SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
281 ArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const {
282 // Schedule condition inputs that can be materialized immediately before their use.
283 // In following example, after we've scheduled HSelect, we want LessThan to be scheduled
284 // immediately, because it is a materialized condition, and will be emitted right before HSelect
285 // in codegen phase.
286 //
287 // i20 HLessThan [...] HLessThan HAdd HAdd
288 // i21 HAdd [...] ===> | | |
289 // i22 HAdd [...] +----------+---------+
290 // i23 HSelect [i21, i22, i20] HSelect
291
292 if (prev_select_ == nullptr) {
293 return nullptr;
294 }
295
296 const HInstruction* instruction = prev_select_->GetInstruction();
297 const HCondition* condition = nullptr;
298 DCHECK(instruction != nullptr);
299
300 if (instruction->IsIf()) {
301 condition = instruction->AsIf()->InputAt(0)->AsCondition();
302 } else if (instruction->IsSelect()) {
303 condition = instruction->AsSelect()->GetCondition()->AsCondition();
304 }
305
306 SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
307
308 if ((condition_node != nullptr) &&
309 condition->HasOnlyOneNonEnvironmentUse() &&
310 ContainsElement(*nodes, condition_node)) {
311 DCHECK(!condition_node->HasUnscheduledSuccessors());
312 // Remove the condition from the list of candidates and schedule it.
313 RemoveElement(*nodes, condition_node);
314 return condition_node;
315 }
316
317 return nullptr;
318}
319
320SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode(
321 ArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) {
322 DCHECK(!nodes->empty());
323 SchedulingNode* select_node = nullptr;
324
325 // Optimize for materialized condition and its emit before use scenario.
326 select_node = SelectMaterializedCondition(nodes, graph);
327
328 if (select_node == nullptr) {
329 // Get highest priority node based on critical path information.
330 select_node = (*nodes)[0];
331 size_t select = 0;
332 for (size_t i = 1, e = nodes->size(); i < e; i++) {
333 SchedulingNode* check = (*nodes)[i];
334 SchedulingNode* candidate = (*nodes)[select];
335 select_node = GetHigherPrioritySchedulingNode(candidate, check);
336 if (select_node == check) {
337 select = i;
338 }
339 }
340 DeleteNodeAtIndex(nodes, select);
341 }
342
343 prev_select_ = select_node;
344 return select_node;
345}
346
347SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingNode(
348 SchedulingNode* candidate, SchedulingNode* check) const {
349 uint32_t candidate_path = candidate->GetCriticalPath();
350 uint32_t check_path = check->GetCriticalPath();
351 // First look at the critical_path.
352 if (check_path != candidate_path) {
353 return check_path < candidate_path ? check : candidate;
354 }
355 // If both critical paths are equal, schedule instructions with a higher latency
356 // first in program order.
357 return check->GetLatency() < candidate->GetLatency() ? check : candidate;
358}
359
360void HScheduler::Schedule(HGraph* graph) {
361 for (HBasicBlock* block : graph->GetReversePostOrder()) {
362 if (IsSchedulable(block)) {
363 Schedule(block);
364 }
365 }
366}
367
368void HScheduler::Schedule(HBasicBlock* block) {
369 ArenaVector<SchedulingNode*> scheduling_nodes(arena_->Adapter(kArenaAllocScheduler));
370
371 // Build the scheduling graph.
372 scheduling_graph_.Clear();
373 for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
374 HInstruction* instruction = it.Current();
375 SchedulingNode* node = scheduling_graph_.AddNode(instruction, IsSchedulingBarrier(instruction));
376 CalculateLatency(node);
377 scheduling_nodes.push_back(node);
378 }
379
380 if (scheduling_graph_.Size() <= 1) {
381 scheduling_graph_.Clear();
382 return;
383 }
384
385 cursor_ = block->GetLastInstruction();
386
387 // Find the initial candidates for scheduling.
388 candidates_.clear();
389 for (SchedulingNode* node : scheduling_nodes) {
390 if (!node->HasUnscheduledSuccessors()) {
391 node->MaybeUpdateCriticalPath(node->GetLatency());
392 candidates_.push_back(node);
393 }
394 }
395
396 ArenaVector<SchedulingNode*> initial_candidates(arena_->Adapter(kArenaAllocScheduler));
397 if (kDumpDotSchedulingGraphs) {
398 // Remember the list of initial candidates for debug output purposes.
399 initial_candidates.assign(candidates_.begin(), candidates_.end());
400 }
401
402 // Schedule all nodes.
403 while (!candidates_.empty()) {
404 Schedule(selector_->PopHighestPriorityNode(&candidates_, scheduling_graph_));
405 }
406
407 if (kDumpDotSchedulingGraphs) {
408 // Dump the graph in `dot` format.
409 HGraph* graph = block->GetGraph();
410 std::stringstream description;
411 description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
412 << " B" << block->GetBlockId();
413 scheduling_graph_.DumpAsDotGraph(description.str(), initial_candidates);
414 }
415}
416
417void HScheduler::Schedule(SchedulingNode* scheduling_node) {
418 // Check whether any of the node's predecessors will be valid candidates after
419 // this node is scheduled.
420 uint32_t path_to_node = scheduling_node->GetCriticalPath();
421 for (SchedulingNode* predecessor : scheduling_node->GetDataPredecessors()) {
422 predecessor->MaybeUpdateCriticalPath(
423 path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
424 predecessor->DecrementNumberOfUnscheduledSuccessors();
425 if (!predecessor->HasUnscheduledSuccessors()) {
426 candidates_.push_back(predecessor);
427 }
428 }
429 for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
430 // Do not update the critical path.
431 // The 'other' (so 'non-data') dependencies (usually) do not represent a
432 // 'material' dependency of nodes on others. They exist for program
433 // correctness. So we do not use them to compute the critical path.
434 predecessor->DecrementNumberOfUnscheduledSuccessors();
435 if (!predecessor->HasUnscheduledSuccessors()) {
436 candidates_.push_back(predecessor);
437 }
438 }
439
440 Schedule(scheduling_node->GetInstruction());
441}
442
443// Move an instruction after cursor instruction inside one basic block.
444static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) {
445 DCHECK_EQ(instruction->GetBlock(), cursor->GetBlock());
446 DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction());
447 DCHECK(!instruction->IsControlFlow());
448 DCHECK(!cursor->IsControlFlow());
449 instruction->MoveBefore(cursor->GetNext(), /* do_checks */ false);
450}
451
452void HScheduler::Schedule(HInstruction* instruction) {
453 if (instruction == cursor_) {
454 cursor_ = cursor_->GetPrevious();
455 } else {
456 MoveAfterInBlock(instruction, cursor_);
457 }
458}
459
460bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
461 // We want to avoid exhaustively listing all instructions, so we first check
462 // for instruction categories that we know are safe.
463 if (instruction->IsControlFlow() ||
464 instruction->IsConstant()) {
465 return true;
466 }
467 // Currently all unary and binary operations are safe to schedule, so avoid
468 // checking for each of them individually.
469 // Since nothing prevents a new scheduling-unsafe HInstruction to subclass
470 // HUnaryOperation (or HBinaryOperation), check in debug mode that we have
471 // the exhaustive lists here.
472 if (instruction->IsUnaryOperation()) {
473 DCHECK(instruction->IsBooleanNot() ||
474 instruction->IsNot() ||
475 instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
476 return true;
477 }
478 if (instruction->IsBinaryOperation()) {
479 DCHECK(instruction->IsAdd() ||
480 instruction->IsAnd() ||
481 instruction->IsCompare() ||
482 instruction->IsCondition() ||
483 instruction->IsDiv() ||
484 instruction->IsMul() ||
485 instruction->IsOr() ||
486 instruction->IsRem() ||
487 instruction->IsRor() ||
488 instruction->IsShl() ||
489 instruction->IsShr() ||
490 instruction->IsSub() ||
491 instruction->IsUShr() ||
492 instruction->IsXor()) << "unexpected instruction " << instruction->DebugName();
493 return true;
494 }
495 // The scheduler should not see any of these.
496 DCHECK(!instruction->IsParallelMove()) << "unexpected instruction " << instruction->DebugName();
497 // List of instructions explicitly excluded:
498 // HClearException
499 // HClinitCheck
500 // HDeoptimize
501 // HLoadClass
502 // HLoadException
503 // HMemoryBarrier
504 // HMonitorOperation
505 // HNativeDebugInfo
506 // HThrow
507 // HTryBoundary
508 // TODO: Some of the instructions above may be safe to schedule (maybe as
509 // scheduling barriers).
510 return instruction->IsArrayGet() ||
511 instruction->IsArraySet() ||
512 instruction->IsArrayLength() ||
513 instruction->IsBoundType() ||
514 instruction->IsBoundsCheck() ||
515 instruction->IsCheckCast() ||
516 instruction->IsClassTableGet() ||
517 instruction->IsCurrentMethod() ||
518 instruction->IsDivZeroCheck() ||
519 instruction->IsInstanceFieldGet() ||
520 instruction->IsInstanceFieldSet() ||
521 instruction->IsInstanceOf() ||
522 instruction->IsInvokeInterface() ||
523 instruction->IsInvokeStaticOrDirect() ||
524 instruction->IsInvokeUnresolved() ||
525 instruction->IsInvokeVirtual() ||
526 instruction->IsLoadString() ||
527 instruction->IsNewArray() ||
528 instruction->IsNewInstance() ||
529 instruction->IsNullCheck() ||
530 instruction->IsPackedSwitch() ||
531 instruction->IsParameterValue() ||
532 instruction->IsPhi() ||
533 instruction->IsReturn() ||
534 instruction->IsReturnVoid() ||
535 instruction->IsSelect() ||
536 instruction->IsStaticFieldGet() ||
537 instruction->IsStaticFieldSet() ||
538 instruction->IsSuspendCheck() ||
539 instruction->IsTypeConversion() ||
540 instruction->IsUnresolvedInstanceFieldGet() ||
541 instruction->IsUnresolvedInstanceFieldSet() ||
542 instruction->IsUnresolvedStaticFieldGet() ||
543 instruction->IsUnresolvedStaticFieldSet();
544}
545
546bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
547 // We may be only interested in loop blocks.
548 if (only_optimize_loop_blocks_ && !block->IsInLoop()) {
549 return false;
550 }
551 if (block->GetTryCatchInformation() != nullptr) {
552 // Do not schedule blocks that are part of try-catch.
553 // Because scheduler cannot see if catch block has assumptions on the instruction order in
554 // the try block. In following example, if we enable scheduler for the try block,
555 // MulitiplyAccumulate may be scheduled before DivZeroCheck,
556 // which can result in an incorrect value in the catch block.
557 // try {
558 // a = a/b; // DivZeroCheck
559 // // Div
560 // c = c*d+e; // MulitiplyAccumulate
561 // } catch {System.out.print(c); }
562 return false;
563 }
564 // Check whether all instructions in this block are schedulable.
565 for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
566 if (!IsSchedulable(it.Current())) {
567 return false;
568 }
569 }
570 return true;
571}
572
573bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
574 return instr->IsControlFlow() ||
575 // Don't break calling convention.
576 instr->IsParameterValue() ||
577 // Code generation of goto relies on SuspendCheck's position.
578 instr->IsSuspendCheck();
579}
580
581void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
582 bool schedule_randomly) {
583 // Avoid compilation error when compiling for unsupported instruction set.
584 UNUSED(only_optimize_loop_blocks);
585 UNUSED(schedule_randomly);
586 switch (instruction_set_) {
587#ifdef ART_ENABLE_CODEGEN_arm64
588 case kArm64: {
589 // Phase-local allocator that allocates scheduler internal data structures like
590 // scheduling nodes, internel nodes map, dependencies, etc.
591 ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
592
593 CriticalPathSchedulingNodeSelector critical_path_selector;
594 RandomSchedulingNodeSelector random_selector;
595 SchedulingNodeSelector* selector = schedule_randomly
596 ? static_cast<SchedulingNodeSelector*>(&random_selector)
597 : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
598
599 arm64::HSchedulerARM64 scheduler(&arena_allocator, selector);
600 scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
601 scheduler.Schedule(graph_);
602 break;
603 }
604#endif
605 default:
606 break;
607 }
608}
609
610} // namespace art