xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ |
| 18 | #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ |
| 19 | |
| 20 | #include "code_generator_arm_vixl.h" |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 21 | #include "scheduler.h" |
| 22 | |
| 23 | namespace art { |
| 24 | namespace arm { |
Roland Levillain | 9983e30 | 2017-07-14 14:34:22 +0100 | [diff] [blame] | 25 | // TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere? |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 26 | typedef CodeGeneratorARMVIXL CodeGeneratorARMType; |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 27 | |
| 28 | // AArch32 instruction latencies. |
| 29 | // We currently assume that all ARM CPUs share the same instruction latency list. |
| 30 | // The following latencies were tuned based on performance experiments and |
| 31 | // automatic tuning using differential evolution approach on various benchmarks. |
| 32 | static constexpr uint32_t kArmIntegerOpLatency = 2; |
| 33 | static constexpr uint32_t kArmFloatingPointOpLatency = 11; |
| 34 | static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4; |
| 35 | static constexpr uint32_t kArmMulIntegerLatency = 6; |
| 36 | static constexpr uint32_t kArmMulFloatingPointLatency = 11; |
| 37 | static constexpr uint32_t kArmDivIntegerLatency = 10; |
| 38 | static constexpr uint32_t kArmDivFloatLatency = 20; |
| 39 | static constexpr uint32_t kArmDivDoubleLatency = 25; |
| 40 | static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11; |
| 41 | static constexpr uint32_t kArmMemoryLoadLatency = 9; |
| 42 | static constexpr uint32_t kArmMemoryStoreLatency = 9; |
| 43 | static constexpr uint32_t kArmMemoryBarrierLatency = 6; |
| 44 | static constexpr uint32_t kArmBranchLatency = 4; |
| 45 | static constexpr uint32_t kArmCallLatency = 5; |
| 46 | static constexpr uint32_t kArmCallInternalLatency = 29; |
| 47 | static constexpr uint32_t kArmLoadStringInternalLatency = 10; |
| 48 | static constexpr uint32_t kArmNopLatency = 2; |
| 49 | static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; |
| 50 | static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; |
| 51 | |
| 52 | class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { |
| 53 | public: |
| 54 | explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) |
| 55 | : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} |
| 56 | |
| 57 | // Default visitor for instructions not handled specifically below. |
| 58 | void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) { |
| 59 | last_visited_latency_ = kArmIntegerOpLatency; |
| 60 | } |
| 61 | |
| 62 | // We add a second unused parameter to be able to use this macro like the others |
| 63 | // defined in `nodes.h`. |
| 64 | #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \ |
| 65 | M(ArrayGet , unused) \ |
| 66 | M(ArrayLength , unused) \ |
| 67 | M(ArraySet , unused) \ |
| 68 | M(Add , unused) \ |
| 69 | M(Sub , unused) \ |
| 70 | M(And , unused) \ |
| 71 | M(Or , unused) \ |
| 72 | M(Ror , unused) \ |
| 73 | M(Xor , unused) \ |
| 74 | M(Shl , unused) \ |
| 75 | M(Shr , unused) \ |
| 76 | M(UShr , unused) \ |
| 77 | M(Mul , unused) \ |
| 78 | M(Div , unused) \ |
| 79 | M(Condition , unused) \ |
| 80 | M(Compare , unused) \ |
| 81 | M(BoundsCheck , unused) \ |
| 82 | M(InstanceFieldGet , unused) \ |
| 83 | M(InstanceFieldSet , unused) \ |
| 84 | M(InstanceOf , unused) \ |
| 85 | M(Invoke , unused) \ |
| 86 | M(LoadString , unused) \ |
| 87 | M(NewArray , unused) \ |
| 88 | M(NewInstance , unused) \ |
| 89 | M(Rem , unused) \ |
| 90 | M(StaticFieldGet , unused) \ |
| 91 | M(StaticFieldSet , unused) \ |
| 92 | M(SuspendCheck , unused) \ |
| 93 | M(TypeConversion , unused) |
| 94 | |
| 95 | #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ |
| 96 | M(BitwiseNegatedRight, unused) \ |
| 97 | M(MultiplyAccumulate, unused) \ |
| 98 | M(IntermediateAddress, unused) \ |
Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 99 | M(IntermediateAddressIndex, unused) \ |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 100 | M(DataProcWithShifterOp, unused) |
| 101 | |
| 102 | #define DECLARE_VISIT_INSTRUCTION(type, unused) \ |
| 103 | void Visit##type(H##type* instruction) OVERRIDE; |
| 104 | |
| 105 | FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) |
| 106 | FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) |
| 107 | FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) |
| 108 | |
| 109 | #undef DECLARE_VISIT_INSTRUCTION |
| 110 | |
| 111 | private: |
xueliang.zhong | bf9e21a | 2017-06-15 11:01:11 +0100 | [diff] [blame] | 112 | bool CanGenerateTest(HCondition* cond); |
| 113 | void HandleGenerateConditionWithZero(IfCondition cond); |
| 114 | void HandleGenerateLongTestConstant(HCondition* cond); |
| 115 | void HandleGenerateLongTest(HCondition* cond); |
| 116 | void HandleGenerateLongComparesAndJumps(); |
| 117 | void HandleGenerateTest(HCondition* cond); |
| 118 | void HandleGenerateConditionGeneric(HCondition* cond); |
| 119 | void HandleGenerateEqualLong(HCondition* cond); |
| 120 | void HandleGenerateConditionLong(HCondition* cond); |
| 121 | void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond); |
| 122 | void HandleCondition(HCondition* instr); |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 123 | void HandleBinaryOperationLantencies(HBinaryOperation* instr); |
| 124 | void HandleBitwiseOperationLantencies(HBinaryOperation* instr); |
| 125 | void HandleShiftLatencies(HBinaryOperation* instr); |
| 126 | void HandleDivRemConstantIntegralLatencies(int32_t imm); |
| 127 | void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info); |
| 128 | void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info); |
| 129 | void HandleGenerateDataProcInstruction(bool internal_latency = false); |
| 130 | void HandleGenerateDataProc(HDataProcWithShifterOp* instruction); |
| 131 | void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction); |
| 132 | |
| 133 | // The latency setting for each HInstruction depends on how CodeGenerator may generate code, |
| 134 | // latency visitors may query CodeGenerator for such information for accurate latency settings. |
| 135 | CodeGeneratorARMType* codegen_; |
| 136 | }; |
| 137 | |
| 138 | class HSchedulerARM : public HScheduler { |
| 139 | public: |
Vladimir Marko | e764d2e | 2017-10-05 14:35:55 +0100 | [diff] [blame] | 140 | HSchedulerARM(ScopedArenaAllocator* allocator, |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 141 | SchedulingNodeSelector* selector, |
| 142 | SchedulingLatencyVisitorARM* arm_latency_visitor) |
Vladimir Marko | e764d2e | 2017-10-05 14:35:55 +0100 | [diff] [blame] | 143 | : HScheduler(allocator, arm_latency_visitor, selector) {} |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 144 | ~HSchedulerARM() OVERRIDE {} |
| 145 | |
| 146 | bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { |
| 147 | #define CASE_INSTRUCTION_KIND(type, unused) case \ |
| 148 | HInstruction::InstructionKind::k##type: |
| 149 | switch (instruction->GetKind()) { |
| 150 | FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) |
| 151 | return true; |
| 152 | FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND) |
| 153 | return true; |
| 154 | default: |
| 155 | return HScheduler::IsSchedulable(instruction); |
| 156 | } |
| 157 | #undef CASE_INSTRUCTION_KIND |
| 158 | } |
| 159 | |
| 160 | private: |
| 161 | DISALLOW_COPY_AND_ASSIGN(HSchedulerARM); |
| 162 | }; |
| 163 | |
| 164 | } // namespace arm |
| 165 | } // namespace art |
| 166 | |
| 167 | #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ |