summaryrefslogtreecommitdiff
path: root/compiler/optimizing/scheduler_arm64.cc
diff options
context:
space:
mode:
author Vladimir Marko <vmarko@google.com> 2024-02-12 11:17:28 +0100
committer VladimĂ­r Marko <vmarko@google.com> 2024-02-13 15:28:35 +0000
commit77e5997b524a133d38585da8bf58420f2411f7ad (patch)
tree0273c71559a03593b358f04ef060214796159863 /compiler/optimizing/scheduler_arm64.cc
parent2389869ed372eebd886c2f984f23ec7e342da22b (diff)
Optimizing: Refactor `HScheduler`.
Move `SchedulingLatencyVisitor{ARM,ARM64}` to .cc files. Test: m test-art-host-gtest Test: testrunner.py --host --optimizing Test: run-gtests.sh Test: testrunner.py --target --optimizing Change-Id: I15cb1a4cbef00a328fec947189412c502bf80f46
Diffstat (limited to 'compiler/optimizing/scheduler_arm64.cc')
-rw-r--r--compiler/optimizing/scheduler_arm64.cc134
1 files changed, 134 insertions, 0 deletions
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 5113cf446d..08b8a3fb78 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -23,6 +23,115 @@
namespace art HIDDEN {
namespace arm64 {
+static constexpr uint32_t kArm64MemoryLoadLatency = 5;
+static constexpr uint32_t kArm64MemoryStoreLatency = 3;
+
+static constexpr uint32_t kArm64CallInternalLatency = 10;
+static constexpr uint32_t kArm64CallLatency = 5;
+
+// AArch64 instruction latency.
+// We currently assume that all arm64 CPUs share the same instruction latency list.
+static constexpr uint32_t kArm64IntegerOpLatency = 2;
+static constexpr uint32_t kArm64FloatingPointOpLatency = 5;
+
+static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3;
+static constexpr uint32_t kArm64DivDoubleLatency = 30;
+static constexpr uint32_t kArm64DivFloatLatency = 15;
+static constexpr uint32_t kArm64DivIntegerLatency = 5;
+static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
+static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
+static constexpr uint32_t kArm64MulIntegerLatency = 6;
+static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
+static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;
+
+static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
+static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
+static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
+static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
+static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
+static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
+static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
+static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
+static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
+static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
+
+class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor {
+ public:
+ // Default visitor for instructions not handled specifically below.
+ void VisitInstruction([[maybe_unused]] HInstruction*) override {
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ }
+
+// We add a second unused parameter to be able to use this macro like the others
+// defined in `nodes.h`.
+#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \
+ M(ArrayGet , unused) \
+ M(ArrayLength , unused) \
+ M(ArraySet , unused) \
+ M(BoundsCheck , unused) \
+ M(Div , unused) \
+ M(InstanceFieldGet , unused) \
+ M(InstanceOf , unused) \
+ M(LoadString , unused) \
+ M(Mul , unused) \
+ M(NewArray , unused) \
+ M(NewInstance , unused) \
+ M(Rem , unused) \
+ M(StaticFieldGet , unused) \
+ M(SuspendCheck , unused) \
+ M(TypeConversion , unused) \
+ M(VecReplicateScalar , unused) \
+ M(VecExtractScalar , unused) \
+ M(VecReduce , unused) \
+ M(VecCnv , unused) \
+ M(VecNeg , unused) \
+ M(VecAbs , unused) \
+ M(VecNot , unused) \
+ M(VecAdd , unused) \
+ M(VecHalvingAdd , unused) \
+ M(VecSub , unused) \
+ M(VecMul , unused) \
+ M(VecDiv , unused) \
+ M(VecMin , unused) \
+ M(VecMax , unused) \
+ M(VecAnd , unused) \
+ M(VecAndNot , unused) \
+ M(VecOr , unused) \
+ M(VecXor , unused) \
+ M(VecShl , unused) \
+ M(VecShr , unused) \
+ M(VecUShr , unused) \
+ M(VecSetScalars , unused) \
+ M(VecMultiplyAccumulate, unused) \
+ M(VecLoad , unused) \
+ M(VecStore , unused)
+
+#define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M) \
+ M(BinaryOperation , unused) \
+ M(Invoke , unused)
+
+#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
+ M(BitwiseNegatedRight, unused) \
+ M(MultiplyAccumulate, unused) \
+ M(IntermediateAddress, unused) \
+ M(IntermediateAddressIndex, unused) \
+ M(DataProcWithShifterOp, unused)
+
+#define DECLARE_VISIT_INSTRUCTION(type, unused) \
+ void Visit##type(H##type* instruction) override;
+
+ FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+ void HandleSimpleArithmeticSIMD(HVecOperation *instr);
+ void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
+};
+
void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr) {
last_visited_latency_ = DataType::IsFloatingPointType(instr->GetResultType())
? kArm64FloatingPointOpLatency
@@ -348,5 +457,30 @@ void SchedulingLatencyVisitorARM64::VisitVecStore(HVecStore* instr) {
last_visited_latency_ = kArm64SIMDMemoryStoreLatency;
}
+bool HSchedulerARM64::IsSchedulable(const HInstruction* instruction) const {
+ switch (instruction->GetKind()) {
+#define SCHEDULABLE_CASE(type, unused) \
+ case HInstruction::InstructionKind::k##type: \
+ return true;
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(SCHEDULABLE_CASE)
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM64(SCHEDULABLE_CASE)
+ FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(SCHEDULABLE_CASE)
+#undef SCHEDULABLE_CASE
+
+ default:
+ return HScheduler::IsSchedulable(instruction);
+ }
+}
+
+std::pair<SchedulingGraph, ScopedArenaVector<SchedulingNode*>>
+HSchedulerARM64::BuildSchedulingGraph(
+ HBasicBlock* block,
+ ScopedArenaAllocator* allocator,
+ const HeapLocationCollector* heap_location_collector) {
+ SchedulingLatencyVisitorARM64 latency_visitor;
+ return HScheduler::BuildSchedulingGraph(
+ block, allocator, heap_location_collector, &latency_visitor);
+}
+
} // namespace arm64
} // namespace art