Optimizing: Refactor `HScheduler`.

Move `SchedulingLatencyVisitor{ARM,ARM64}` to .cc files. Test: m test-art-host-gtest Test: testrunner.py --host --optimizing Test: run-gtests.sh Test: testrunner.py --target --optimizing Change-Id: I15cb1a4cbef00a328fec947189412c502bf80f46
author: Vladimir Marko <vmarko@google.com> 2024-02-12 11:17:28 +0100
committer: Vladimír Marko <vmarko@google.com> 2024-02-13 15:28:35 +0000
commit: 77e5997b524a133d38585da8bf58420f2411f7ad (patch)
tree: 0273c71559a03593b358f04ef060214796159863 /compiler/optimizing/scheduler_arm64.cc
parent: 2389869ed372eebd886c2f984f23ec7e342da22b (diff)
1 files changed, 134 insertions, 0 deletions
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 5113cf446d..08b8a3fb78 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -23,6 +23,115 @@
 namespace art HIDDEN {
 namespace arm64 {
 
+static constexpr uint32_t kArm64MemoryLoadLatency = 5;
+static constexpr uint32_t kArm64MemoryStoreLatency = 3;
+
+static constexpr uint32_t kArm64CallInternalLatency = 10;
+static constexpr uint32_t kArm64CallLatency = 5;
+
+// AArch64 instruction latency.
+// We currently assume that all arm64 CPUs share the same instruction latency list.
+static constexpr uint32_t kArm64IntegerOpLatency = 2;
+static constexpr uint32_t kArm64FloatingPointOpLatency = 5;
+
+static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3;
+static constexpr uint32_t kArm64DivDoubleLatency = 30;
+static constexpr uint32_t kArm64DivFloatLatency = 15;
+static constexpr uint32_t kArm64DivIntegerLatency = 5;
+static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
+static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
+static constexpr uint32_t kArm64MulIntegerLatency = 6;
+static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
+static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;
+
+static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
+static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
+static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
+static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
+static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
+static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
+static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
+static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
+static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
+static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
+
+class SchedulingLatencyVisitorARM64 final : public SchedulingLatencyVisitor {
+ public:
+  // Default visitor for instructions not handled specifically below.
+  void VisitInstruction([[maybe_unused]] HInstruction*) override {
+    last_visited_latency_ = kArm64IntegerOpLatency;
+  }
+
+// We add a second unused parameter to be able to use this macro like the others
+// defined in `nodes.h`.
+#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M)     \
+  M(ArrayGet             , unused)                   \
+  M(ArrayLength          , unused)                   \
+  M(ArraySet             , unused)                   \
+  M(BoundsCheck          , unused)                   \
+  M(Div                  , unused)                   \
+  M(InstanceFieldGet     , unused)                   \
+  M(InstanceOf           , unused)                   \
+  M(LoadString           , unused)                   \
+  M(Mul                  , unused)                   \
+  M(NewArray             , unused)                   \
+  M(NewInstance          , unused)                   \
+  M(Rem                  , unused)                   \
+  M(StaticFieldGet       , unused)                   \
+  M(SuspendCheck         , unused)                   \
+  M(TypeConversion       , unused)                   \
+  M(VecReplicateScalar   , unused)                   \
+  M(VecExtractScalar     , unused)                   \
+  M(VecReduce            , unused)                   \
+  M(VecCnv               , unused)                   \
+  M(VecNeg               , unused)                   \
+  M(VecAbs               , unused)                   \
+  M(VecNot               , unused)                   \
+  M(VecAdd               , unused)                   \
+  M(VecHalvingAdd        , unused)                   \
+  M(VecSub               , unused)                   \
+  M(VecMul               , unused)                   \
+  M(VecDiv               , unused)                   \
+  M(VecMin               , unused)                   \
+  M(VecMax               , unused)                   \
+  M(VecAnd               , unused)                   \
+  M(VecAndNot            , unused)                   \
+  M(VecOr                , unused)                   \
+  M(VecXor               , unused)                   \
+  M(VecShl               , unused)                   \
+  M(VecShr               , unused)                   \
+  M(VecUShr              , unused)                   \
+  M(VecSetScalars        , unused)                   \
+  M(VecMultiplyAccumulate, unused)                   \
+  M(VecLoad              , unused)                   \
+  M(VecStore             , unused)
+
+#define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M)   \
+  M(BinaryOperation      , unused)                   \
+  M(Invoke               , unused)
+
+#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
+  M(BitwiseNegatedRight, unused)                 \
+  M(MultiplyAccumulate, unused)                  \
+  M(IntermediateAddress, unused)                 \
+  M(IntermediateAddressIndex, unused)            \
+  M(DataProcWithShifterOp, unused)
+
+#define DECLARE_VISIT_INSTRUCTION(type, unused)  \
+  void Visit##type(H##type* instruction) override;
+
+  FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+  FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+  void HandleSimpleArithmeticSIMD(HVecOperation *instr);
+  void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
+};
+
 void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr) {
   last_visited_latency_ = DataType::IsFloatingPointType(instr->GetResultType())
       ? kArm64FloatingPointOpLatency
@@ -348,5 +457,30 @@ void SchedulingLatencyVisitorARM64::VisitVecStore(HVecStore* instr) {
   last_visited_latency_ = kArm64SIMDMemoryStoreLatency;
 }
 
+bool HSchedulerARM64::IsSchedulable(const HInstruction* instruction) const {
+  switch (instruction->GetKind()) {
+#define SCHEDULABLE_CASE(type, unused)       \
+    case HInstruction::InstructionKind::k##type:  \
+      return true;
+    FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(SCHEDULABLE_CASE)
+    FOR_EACH_CONCRETE_INSTRUCTION_ARM64(SCHEDULABLE_CASE)
+    FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(SCHEDULABLE_CASE)
+#undef SCHEDULABLE_CASE
+
+    default:
+      return HScheduler::IsSchedulable(instruction);
+  }
+}
+
+std::pair<SchedulingGraph, ScopedArenaVector<SchedulingNode*>>
+HSchedulerARM64::BuildSchedulingGraph(
+    HBasicBlock* block,
+    ScopedArenaAllocator* allocator,
+    const HeapLocationCollector* heap_location_collector) {
+  SchedulingLatencyVisitorARM64 latency_visitor;
+  return HScheduler::BuildSchedulingGraph(
+      block, allocator, heap_location_collector, &latency_visitor);
+}
+
 }  // namespace arm64
 }  // namespace art
author	Vladimir Marko <vmarko@google.com>	2024-02-12 11:17:28 +0100
committer	Vladimír Marko <vmarko@google.com>	2024-02-13 15:28:35 +0000
commit	77e5997b524a133d38585da8bf58420f2411f7ad (patch)
tree	0273c71559a03593b358f04ef060214796159863 /compiler/optimizing/scheduler_arm64.cc
parent	2389869ed372eebd886c2f984f23ec7e342da22b (diff)