diff options
author | 2024-02-12 11:17:28 +0100 | |
---|---|---|
committer | 2024-02-13 15:28:35 +0000 | |
commit | 77e5997b524a133d38585da8bf58420f2411f7ad (patch) | |
tree | 0273c71559a03593b358f04ef060214796159863 /compiler/optimizing/scheduler_arm.cc | |
parent | 2389869ed372eebd886c2f984f23ec7e342da22b (diff) |
Optimizing: Refactor `HScheduler`.
Move `SchedulingLatencyVisitor{ARM,ARM64}` to .cc files.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Test: run-gtests.sh
Test: testrunner.py --target --optimizing
Change-Id: I15cb1a4cbef00a328fec947189412c502bf80f46
Diffstat (limited to 'compiler/optimizing/scheduler_arm.cc')
-rw-r--r-- | compiler/optimizing/scheduler_arm.cc | 134 |
1 files changed, 134 insertions, 0 deletions
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index 510a0f5496..3ee6f06b46 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -17,6 +17,7 @@ #include "scheduler_arm.h" #include "arch/arm/instruction_set_features_arm.h" +#include "code_generator_arm_vixl.h" #include "code_generator_utils.h" #include "common_arm.h" #include "heap_poisoning.h" @@ -29,6 +30,116 @@ namespace arm { using helpers::Int32ConstantFrom; using helpers::Uint64ConstantFrom; +// AArch32 instruction latencies. +// We currently assume that all ARM CPUs share the same instruction latency list. +// The following latencies were tuned based on performance experiments and +// automatic tuning using differential evolution approach on various benchmarks. +static constexpr uint32_t kArmIntegerOpLatency = 2; +static constexpr uint32_t kArmFloatingPointOpLatency = 11; +static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4; +static constexpr uint32_t kArmMulIntegerLatency = 6; +static constexpr uint32_t kArmMulFloatingPointLatency = 11; +static constexpr uint32_t kArmDivIntegerLatency = 10; +static constexpr uint32_t kArmDivFloatLatency = 20; +static constexpr uint32_t kArmDivDoubleLatency = 25; +static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11; +static constexpr uint32_t kArmMemoryLoadLatency = 9; +static constexpr uint32_t kArmMemoryStoreLatency = 9; +static constexpr uint32_t kArmMemoryBarrierLatency = 6; +static constexpr uint32_t kArmBranchLatency = 4; +static constexpr uint32_t kArmCallLatency = 5; +static constexpr uint32_t kArmCallInternalLatency = 29; +static constexpr uint32_t kArmLoadStringInternalLatency = 10; +static constexpr uint32_t kArmNopLatency = 2; +static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; +static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; + +class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor { + public: + explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) + : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {} + + // Default visitor for instructions not handled specifically below. + void VisitInstruction([[maybe_unused]] HInstruction*) override { + last_visited_latency_ = kArmIntegerOpLatency; + } + +// We add a second unused parameter to be able to use this macro like the others +// defined in `nodes.h`. +#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \ + M(ArrayGet, unused) \ + M(ArrayLength, unused) \ + M(ArraySet, unused) \ + M(Add, unused) \ + M(Sub, unused) \ + M(And, unused) \ + M(Or, unused) \ + M(Ror, unused) \ + M(Xor, unused) \ + M(Shl, unused) \ + M(Shr, unused) \ + M(UShr, unused) \ + M(Mul, unused) \ + M(Div, unused) \ + M(Condition, unused) \ + M(Compare, unused) \ + M(BoundsCheck, unused) \ + M(InstanceFieldGet, unused) \ + M(InstanceFieldSet, unused) \ + M(InstanceOf, unused) \ + M(Invoke, unused) \ + M(LoadString, unused) \ + M(NewArray, unused) \ + M(NewInstance, unused) \ + M(Rem, unused) \ + M(StaticFieldGet, unused) \ + M(StaticFieldSet, unused) \ + M(SuspendCheck, unused) \ + M(TypeConversion, unused) + +#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ + M(BitwiseNegatedRight, unused) \ + M(MultiplyAccumulate, unused) \ + M(IntermediateAddress, unused) \ + M(IntermediateAddressIndex, unused) \ + M(DataProcWithShifterOp, unused) + +#define DECLARE_VISIT_INSTRUCTION(type, unused) \ + void Visit##type(H##type* instruction) override; + + FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) + +#undef DECLARE_VISIT_INSTRUCTION + + private: + bool CanGenerateTest(HCondition* cond); + void HandleGenerateConditionWithZero(IfCondition cond); + void HandleGenerateLongTestConstant(HCondition* cond); + void HandleGenerateLongTest(HCondition* cond); + void HandleGenerateLongComparesAndJumps(); + void HandleGenerateTest(HCondition* cond); + void HandleGenerateConditionGeneric(HCondition* cond); + void HandleGenerateEqualLong(HCondition* cond); + void HandleGenerateConditionLong(HCondition* cond); + void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond); + void HandleCondition(HCondition* instr); + void HandleBinaryOperationLantencies(HBinaryOperation* instr); + void HandleBitwiseOperationLantencies(HBinaryOperation* instr); + void HandleShiftLatencies(HBinaryOperation* instr); + void HandleDivRemConstantIntegralLatencies(int32_t imm); + void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info); + void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info); + void HandleGenerateDataProcInstruction(bool internal_latency = false); + void HandleGenerateDataProc(HDataProcWithShifterOp* instruction); + void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction); + + // The latency setting for each HInstruction depends on how CodeGenerator may generate code, + // latency visitors may query CodeGenerator for such information for accurate latency settings. + CodeGeneratorARMVIXL* codegen_; +}; + void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) { switch (instr->GetResultType()) { case DataType::Type::kInt64: @@ -1153,5 +1264,28 @@ void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) { } } +bool HSchedulerARM::IsSchedulable(const HInstruction* instruction) const { + switch (instruction->GetKind()) { +#define SCHEDULABLE_CASE(type, unused) \ + case HInstruction::InstructionKind::k##type: \ + return true; + FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(SCHEDULABLE_CASE) + FOR_EACH_CONCRETE_INSTRUCTION_ARM(SCHEDULABLE_CASE) +#undef SCHEDULABLE_CASE + + default: + return HScheduler::IsSchedulable(instruction); + } +} + +std::pair<SchedulingGraph, ScopedArenaVector<SchedulingNode*>> HSchedulerARM::BuildSchedulingGraph( + HBasicBlock* block, + ScopedArenaAllocator* allocator, + const HeapLocationCollector* heap_location_collector) { + SchedulingLatencyVisitorARM latency_visitor(codegen_); + return HScheduler::BuildSchedulingGraph( + block, allocator, heap_location_collector, &latency_visitor); +} + } // namespace arm } // namespace art |