summaryrefslogtreecommitdiff
path: root/compiler/optimizing/scheduler_arm.cc
diff options
context:
space:
mode:
author Vladimir Marko <vmarko@google.com> 2024-02-12 11:17:28 +0100
committer VladimĂ­r Marko <vmarko@google.com> 2024-02-13 15:28:35 +0000
commit77e5997b524a133d38585da8bf58420f2411f7ad (patch)
tree0273c71559a03593b358f04ef060214796159863 /compiler/optimizing/scheduler_arm.cc
parent2389869ed372eebd886c2f984f23ec7e342da22b (diff)
Optimizing: Refactor `HScheduler`.
Move `SchedulingLatencyVisitor{ARM,ARM64}` to .cc files. Test: m test-art-host-gtest Test: testrunner.py --host --optimizing Test: run-gtests.sh Test: testrunner.py --target --optimizing Change-Id: I15cb1a4cbef00a328fec947189412c502bf80f46
Diffstat (limited to 'compiler/optimizing/scheduler_arm.cc')
-rw-r--r--compiler/optimizing/scheduler_arm.cc134
1 files changed, 134 insertions, 0 deletions
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index 510a0f5496..3ee6f06b46 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -17,6 +17,7 @@
#include "scheduler_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_arm_vixl.h"
#include "code_generator_utils.h"
#include "common_arm.h"
#include "heap_poisoning.h"
@@ -29,6 +30,116 @@ namespace arm {
using helpers::Int32ConstantFrom;
using helpers::Uint64ConstantFrom;
+// AArch32 instruction latencies.
+// We currently assume that all ARM CPUs share the same instruction latency list.
+// The following latencies were tuned based on performance experiments and
+// automatic tuning using differential evolution approach on various benchmarks.
+static constexpr uint32_t kArmIntegerOpLatency = 2;
+static constexpr uint32_t kArmFloatingPointOpLatency = 11;
+static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
+static constexpr uint32_t kArmMulIntegerLatency = 6;
+static constexpr uint32_t kArmMulFloatingPointLatency = 11;
+static constexpr uint32_t kArmDivIntegerLatency = 10;
+static constexpr uint32_t kArmDivFloatLatency = 20;
+static constexpr uint32_t kArmDivDoubleLatency = 25;
+static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
+static constexpr uint32_t kArmMemoryLoadLatency = 9;
+static constexpr uint32_t kArmMemoryStoreLatency = 9;
+static constexpr uint32_t kArmMemoryBarrierLatency = 6;
+static constexpr uint32_t kArmBranchLatency = 4;
+static constexpr uint32_t kArmCallLatency = 5;
+static constexpr uint32_t kArmCallInternalLatency = 29;
+static constexpr uint32_t kArmLoadStringInternalLatency = 10;
+static constexpr uint32_t kArmNopLatency = 2;
+static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
+static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
+
+class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor {
+ public:
+ explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
+ : codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {}
+
+ // Default visitor for instructions not handled specifically below.
+ void VisitInstruction([[maybe_unused]] HInstruction*) override {
+ last_visited_latency_ = kArmIntegerOpLatency;
+ }
+
+// We add a second unused parameter to be able to use this macro like the others
+// defined in `nodes.h`.
+#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
+ M(ArrayGet, unused) \
+ M(ArrayLength, unused) \
+ M(ArraySet, unused) \
+ M(Add, unused) \
+ M(Sub, unused) \
+ M(And, unused) \
+ M(Or, unused) \
+ M(Ror, unused) \
+ M(Xor, unused) \
+ M(Shl, unused) \
+ M(Shr, unused) \
+ M(UShr, unused) \
+ M(Mul, unused) \
+ M(Div, unused) \
+ M(Condition, unused) \
+ M(Compare, unused) \
+ M(BoundsCheck, unused) \
+ M(InstanceFieldGet, unused) \
+ M(InstanceFieldSet, unused) \
+ M(InstanceOf, unused) \
+ M(Invoke, unused) \
+ M(LoadString, unused) \
+ M(NewArray, unused) \
+ M(NewInstance, unused) \
+ M(Rem, unused) \
+ M(StaticFieldGet, unused) \
+ M(StaticFieldSet, unused) \
+ M(SuspendCheck, unused) \
+ M(TypeConversion, unused)
+
+#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
+ M(BitwiseNegatedRight, unused) \
+ M(MultiplyAccumulate, unused) \
+ M(IntermediateAddress, unused) \
+ M(IntermediateAddressIndex, unused) \
+ M(DataProcWithShifterOp, unused)
+
+#define DECLARE_VISIT_INSTRUCTION(type, unused) \
+ void Visit##type(H##type* instruction) override;
+
+ FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+ bool CanGenerateTest(HCondition* cond);
+ void HandleGenerateConditionWithZero(IfCondition cond);
+ void HandleGenerateLongTestConstant(HCondition* cond);
+ void HandleGenerateLongTest(HCondition* cond);
+ void HandleGenerateLongComparesAndJumps();
+ void HandleGenerateTest(HCondition* cond);
+ void HandleGenerateConditionGeneric(HCondition* cond);
+ void HandleGenerateEqualLong(HCondition* cond);
+ void HandleGenerateConditionLong(HCondition* cond);
+ void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
+ void HandleCondition(HCondition* instr);
+ void HandleBinaryOperationLantencies(HBinaryOperation* instr);
+ void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
+ void HandleShiftLatencies(HBinaryOperation* instr);
+ void HandleDivRemConstantIntegralLatencies(int32_t imm);
+ void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
+ void HandleGenerateDataProcInstruction(bool internal_latency = false);
+ void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
+ void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
+
+ // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
+ // latency visitors may query CodeGenerator for such information for accurate latency settings.
+ CodeGeneratorARMVIXL* codegen_;
+};
+
void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
switch (instr->GetResultType()) {
case DataType::Type::kInt64:
@@ -1153,5 +1264,28 @@ void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
}
}
+bool HSchedulerARM::IsSchedulable(const HInstruction* instruction) const {
+ switch (instruction->GetKind()) {
+#define SCHEDULABLE_CASE(type, unused) \
+ case HInstruction::InstructionKind::k##type: \
+ return true;
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(SCHEDULABLE_CASE)
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM(SCHEDULABLE_CASE)
+#undef SCHEDULABLE_CASE
+
+ default:
+ return HScheduler::IsSchedulable(instruction);
+ }
+}
+
+std::pair<SchedulingGraph, ScopedArenaVector<SchedulingNode*>> HSchedulerARM::BuildSchedulingGraph(
+ HBasicBlock* block,
+ ScopedArenaAllocator* allocator,
+ const HeapLocationCollector* heap_location_collector) {
+ SchedulingLatencyVisitorARM latency_visitor(codegen_);
+ return HScheduler::BuildSchedulingGraph(
+ block, allocator, heap_location_collector, &latency_visitor);
+}
+
} // namespace arm
} // namespace art