Optimizing: Refactor `HScheduler`.

Move `SchedulingLatencyVisitor{ARM,ARM64}` to .cc files. Test: m test-art-host-gtest Test: testrunner.py --host --optimizing Test: run-gtests.sh Test: testrunner.py --target --optimizing Change-Id: I15cb1a4cbef00a328fec947189412c502bf80f46
author: Vladimir Marko <vmarko@google.com> 2024-02-12 11:17:28 +0100
committer: Vladimír Marko <vmarko@google.com> 2024-02-13 15:28:35 +0000
commit: 77e5997b524a133d38585da8bf58420f2411f7ad (patch)
tree: 0273c71559a03593b358f04ef060214796159863 /compiler/optimizing/scheduler_arm.h
parent: 2389869ed372eebd886c2f984f23ec7e342da22b (diff)
1 files changed, 14 insertions, 126 deletions
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index cf00fa12a3..25eac1b2c4 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -18,144 +18,32 @@
 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
 
 #include "base/macros.h"
-#include "code_generator_arm_vixl.h"
 #include "scheduler.h"
 
 namespace art HIDDEN {
-namespace arm {
-// AArch32 instruction latencies.
-// We currently assume that all ARM CPUs share the same instruction latency list.
-// The following latencies were tuned based on performance experiments and
-// automatic tuning using differential evolution approach on various benchmarks.
-static constexpr uint32_t kArmIntegerOpLatency = 2;
-static constexpr uint32_t kArmFloatingPointOpLatency = 11;
-static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
-static constexpr uint32_t kArmMulIntegerLatency = 6;
-static constexpr uint32_t kArmMulFloatingPointLatency = 11;
-static constexpr uint32_t kArmDivIntegerLatency = 10;
-static constexpr uint32_t kArmDivFloatLatency = 20;
-static constexpr uint32_t kArmDivDoubleLatency = 25;
-static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
-static constexpr uint32_t kArmMemoryLoadLatency = 9;
-static constexpr uint32_t kArmMemoryStoreLatency = 9;
-static constexpr uint32_t kArmMemoryBarrierLatency = 6;
-static constexpr uint32_t kArmBranchLatency = 4;
-static constexpr uint32_t kArmCallLatency = 5;
-static constexpr uint32_t kArmCallInternalLatency = 29;
-static constexpr uint32_t kArmLoadStringInternalLatency = 10;
-static constexpr uint32_t kArmNopLatency = 2;
-static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
-static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
-
-class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor {
- public:
-  explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
-      : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {}
-
-  // Default visitor for instructions not handled specifically below.
-  void VisitInstruction([[maybe_unused]] HInstruction*) override {
-    last_visited_latency_ = kArmIntegerOpLatency;
-  }
-
-// We add a second unused parameter to be able to use this macro like the others
-// defined in `nodes.h`.
-#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
-  M(ArrayGet, unused)                         \
-  M(ArrayLength, unused)                      \
-  M(ArraySet, unused)                         \
-  M(Add, unused)                              \
-  M(Sub, unused)                              \
-  M(And, unused)                              \
-  M(Or, unused)                               \
-  M(Ror, unused)                              \
-  M(Xor, unused)                              \
-  M(Shl, unused)                              \
-  M(Shr, unused)                              \
-  M(UShr, unused)                             \
-  M(Mul, unused)                              \
-  M(Div, unused)                              \
-  M(Condition, unused)                        \
-  M(Compare, unused)                          \
-  M(BoundsCheck, unused)                      \
-  M(InstanceFieldGet, unused)                 \
-  M(InstanceFieldSet, unused)                 \
-  M(InstanceOf, unused)                       \
-  M(Invoke, unused)                           \
-  M(LoadString, unused)                       \
-  M(NewArray, unused)                         \
-  M(NewInstance, unused)                      \
-  M(Rem, unused)                              \
-  M(StaticFieldGet, unused)                   \
-  M(StaticFieldSet, unused)                   \
-  M(SuspendCheck, unused)                     \
-  M(TypeConversion, unused)
-
-#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
-  M(BitwiseNegatedRight, unused)                 \
-  M(MultiplyAccumulate, unused)                  \
-  M(IntermediateAddress, unused)                 \
-  M(IntermediateAddressIndex, unused)            \
-  M(DataProcWithShifterOp, unused)
-
-#define DECLARE_VISIT_INSTRUCTION(type, unused)  \
-  void Visit##type(H##type* instruction) override;
 
-  FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
-  FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
-  FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+class CodeGenerator;
 
-#undef DECLARE_VISIT_INSTRUCTION
-
- private:
-  bool CanGenerateTest(HCondition* cond);
-  void HandleGenerateConditionWithZero(IfCondition cond);
-  void HandleGenerateLongTestConstant(HCondition* cond);
-  void HandleGenerateLongTest(HCondition* cond);
-  void HandleGenerateLongComparesAndJumps();
-  void HandleGenerateTest(HCondition* cond);
-  void HandleGenerateConditionGeneric(HCondition* cond);
-  void HandleGenerateEqualLong(HCondition* cond);
-  void HandleGenerateConditionLong(HCondition* cond);
-  void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
-  void HandleCondition(HCondition* instr);
-  void HandleBinaryOperationLantencies(HBinaryOperation* instr);
-  void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
-  void HandleShiftLatencies(HBinaryOperation* instr);
-  void HandleDivRemConstantIntegralLatencies(int32_t imm);
-  void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
-  void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
-  void HandleGenerateDataProcInstruction(bool internal_latency = false);
-  void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
-  void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
-
-  // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
-  // latency visitors may query CodeGenerator for such information for accurate latency settings.
-  CodeGeneratorARMVIXL* codegen_;
-};
+namespace arm {
 
-class HSchedulerARM : public HScheduler {
+class HSchedulerARM final : public HScheduler {
  public:
-  HSchedulerARM(SchedulingNodeSelector* selector,
-                SchedulingLatencyVisitorARM* arm_latency_visitor)
-      : HScheduler(arm_latency_visitor, selector) {}
+  HSchedulerARM(SchedulingNodeSelector* selector, CodeGenerator* codegen)
+      : HScheduler(selector), codegen_(codegen) {}
   ~HSchedulerARM() override {}
 
-  bool IsSchedulable(const HInstruction* instruction) const override {
-#define CASE_INSTRUCTION_KIND(type, unused) case \
-  HInstruction::InstructionKind::k##type:
-    switch (instruction->GetKind()) {
-      FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
-        return true;
-      FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
-        return true;
-      default:
-        return HScheduler::IsSchedulable(instruction);
-    }
-#undef CASE_INSTRUCTION_KIND
-  }
+  bool IsSchedulable(const HInstruction* instruction) const override;
+
+ protected:
+  std::pair<SchedulingGraph, ScopedArenaVector<SchedulingNode*>> BuildSchedulingGraph(
+      HBasicBlock* block,
+      ScopedArenaAllocator* allocator,
+      const HeapLocationCollector* heap_location_collector) override;
 
  private:
   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
+
+  CodeGenerator* const codegen_;
 };
 
 }  // namespace arm
author	Vladimir Marko <vmarko@google.com>	2024-02-12 11:17:28 +0100
committer	Vladimír Marko <vmarko@google.com>	2024-02-13 15:28:35 +0000
commit	77e5997b524a133d38585da8bf58420f2411f7ad (patch)
tree	0273c71559a03593b358f04ef060214796159863 /compiler/optimizing/scheduler_arm.h
parent	2389869ed372eebd886c2f984f23ec7e342da22b (diff)