summaryrefslogtreecommitdiff
path: root/compiler/optimizing/scheduler_arm.h
diff options
context:
space:
mode:
author Vladimir Marko <vmarko@google.com> 2024-02-12 11:17:28 +0100
committer VladimĂ­r Marko <vmarko@google.com> 2024-02-13 15:28:35 +0000
commit77e5997b524a133d38585da8bf58420f2411f7ad (patch)
tree0273c71559a03593b358f04ef060214796159863 /compiler/optimizing/scheduler_arm.h
parent2389869ed372eebd886c2f984f23ec7e342da22b (diff)
Optimizing: Refactor `HScheduler`.
Move `SchedulingLatencyVisitor{ARM,ARM64}` to .cc files. Test: m test-art-host-gtest Test: testrunner.py --host --optimizing Test: run-gtests.sh Test: testrunner.py --target --optimizing Change-Id: I15cb1a4cbef00a328fec947189412c502bf80f46
Diffstat (limited to 'compiler/optimizing/scheduler_arm.h')
-rw-r--r--compiler/optimizing/scheduler_arm.h140
1 files changed, 14 insertions, 126 deletions
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index cf00fa12a3..25eac1b2c4 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -18,144 +18,32 @@
#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
#include "base/macros.h"
-#include "code_generator_arm_vixl.h"
#include "scheduler.h"
namespace art HIDDEN {
-namespace arm {
-// AArch32 instruction latencies.
-// We currently assume that all ARM CPUs share the same instruction latency list.
-// The following latencies were tuned based on performance experiments and
-// automatic tuning using differential evolution approach on various benchmarks.
-static constexpr uint32_t kArmIntegerOpLatency = 2;
-static constexpr uint32_t kArmFloatingPointOpLatency = 11;
-static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
-static constexpr uint32_t kArmMulIntegerLatency = 6;
-static constexpr uint32_t kArmMulFloatingPointLatency = 11;
-static constexpr uint32_t kArmDivIntegerLatency = 10;
-static constexpr uint32_t kArmDivFloatLatency = 20;
-static constexpr uint32_t kArmDivDoubleLatency = 25;
-static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
-static constexpr uint32_t kArmMemoryLoadLatency = 9;
-static constexpr uint32_t kArmMemoryStoreLatency = 9;
-static constexpr uint32_t kArmMemoryBarrierLatency = 6;
-static constexpr uint32_t kArmBranchLatency = 4;
-static constexpr uint32_t kArmCallLatency = 5;
-static constexpr uint32_t kArmCallInternalLatency = 29;
-static constexpr uint32_t kArmLoadStringInternalLatency = 10;
-static constexpr uint32_t kArmNopLatency = 2;
-static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
-static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
-
-class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor {
- public:
- explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
- : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {}
-
- // Default visitor for instructions not handled specifically below.
- void VisitInstruction([[maybe_unused]] HInstruction*) override {
- last_visited_latency_ = kArmIntegerOpLatency;
- }
-
-// We add a second unused parameter to be able to use this macro like the others
-// defined in `nodes.h`.
-#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
- M(ArrayGet, unused) \
- M(ArrayLength, unused) \
- M(ArraySet, unused) \
- M(Add, unused) \
- M(Sub, unused) \
- M(And, unused) \
- M(Or, unused) \
- M(Ror, unused) \
- M(Xor, unused) \
- M(Shl, unused) \
- M(Shr, unused) \
- M(UShr, unused) \
- M(Mul, unused) \
- M(Div, unused) \
- M(Condition, unused) \
- M(Compare, unused) \
- M(BoundsCheck, unused) \
- M(InstanceFieldGet, unused) \
- M(InstanceFieldSet, unused) \
- M(InstanceOf, unused) \
- M(Invoke, unused) \
- M(LoadString, unused) \
- M(NewArray, unused) \
- M(NewInstance, unused) \
- M(Rem, unused) \
- M(StaticFieldGet, unused) \
- M(StaticFieldSet, unused) \
- M(SuspendCheck, unused) \
- M(TypeConversion, unused)
-
-#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
- M(BitwiseNegatedRight, unused) \
- M(MultiplyAccumulate, unused) \
- M(IntermediateAddress, unused) \
- M(IntermediateAddressIndex, unused) \
- M(DataProcWithShifterOp, unused)
-
-#define DECLARE_VISIT_INSTRUCTION(type, unused) \
- void Visit##type(H##type* instruction) override;
- FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
- FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
- FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+class CodeGenerator;
-#undef DECLARE_VISIT_INSTRUCTION
-
- private:
- bool CanGenerateTest(HCondition* cond);
- void HandleGenerateConditionWithZero(IfCondition cond);
- void HandleGenerateLongTestConstant(HCondition* cond);
- void HandleGenerateLongTest(HCondition* cond);
- void HandleGenerateLongComparesAndJumps();
- void HandleGenerateTest(HCondition* cond);
- void HandleGenerateConditionGeneric(HCondition* cond);
- void HandleGenerateEqualLong(HCondition* cond);
- void HandleGenerateConditionLong(HCondition* cond);
- void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
- void HandleCondition(HCondition* instr);
- void HandleBinaryOperationLantencies(HBinaryOperation* instr);
- void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
- void HandleShiftLatencies(HBinaryOperation* instr);
- void HandleDivRemConstantIntegralLatencies(int32_t imm);
- void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
- void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
- void HandleGenerateDataProcInstruction(bool internal_latency = false);
- void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
- void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
-
- // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
- // latency visitors may query CodeGenerator for such information for accurate latency settings.
- CodeGeneratorARMVIXL* codegen_;
-};
+namespace arm {
-class HSchedulerARM : public HScheduler {
+class HSchedulerARM final : public HScheduler {
public:
- HSchedulerARM(SchedulingNodeSelector* selector,
- SchedulingLatencyVisitorARM* arm_latency_visitor)
- : HScheduler(arm_latency_visitor, selector) {}
+ HSchedulerARM(SchedulingNodeSelector* selector, CodeGenerator* codegen)
+ : HScheduler(selector), codegen_(codegen) {}
~HSchedulerARM() override {}
- bool IsSchedulable(const HInstruction* instruction) const override {
-#define CASE_INSTRUCTION_KIND(type, unused) case \
- HInstruction::InstructionKind::k##type:
- switch (instruction->GetKind()) {
- FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
- return true;
- FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
- return true;
- default:
- return HScheduler::IsSchedulable(instruction);
- }
-#undef CASE_INSTRUCTION_KIND
- }
+ bool IsSchedulable(const HInstruction* instruction) const override;
+
+ protected:
+ std::pair<SchedulingGraph, ScopedArenaVector<SchedulingNode*>> BuildSchedulingGraph(
+ HBasicBlock* block,
+ ScopedArenaAllocator* allocator,
+ const HeapLocationCollector* heap_location_collector) override;
private:
DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
+
+ CodeGenerator* const codegen_;
};
} // namespace arm