xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ |
| 18 | #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ |
| 19 | |
xueliang.zhong | 72a75f7 | 2017-05-25 16:50:39 +0100 | [diff] [blame] | 20 | #ifdef ART_USE_OLD_ARM_BACKEND |
| 21 | #include "code_generator_arm.h" |
| 22 | #else |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 23 | #include "code_generator_arm_vixl.h" |
xueliang.zhong | 72a75f7 | 2017-05-25 16:50:39 +0100 | [diff] [blame] | 24 | #endif |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 25 | #include "scheduler.h" |
| 26 | |
| 27 | namespace art { |
| 28 | namespace arm { |
| 29 | #ifdef ART_USE_OLD_ARM_BACKEND |
| 30 | typedef CodeGeneratorARM CodeGeneratorARMType; |
| 31 | #else |
| 32 | typedef CodeGeneratorARMVIXL CodeGeneratorARMType; |
| 33 | #endif |
| 34 | |
| 35 | // AArch32 instruction latencies. |
| 36 | // We currently assume that all ARM CPUs share the same instruction latency list. |
| 37 | // The following latencies were tuned based on performance experiments and |
| 38 | // automatic tuning using differential evolution approach on various benchmarks. |
| 39 | static constexpr uint32_t kArmIntegerOpLatency = 2; |
| 40 | static constexpr uint32_t kArmFloatingPointOpLatency = 11; |
| 41 | static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4; |
| 42 | static constexpr uint32_t kArmMulIntegerLatency = 6; |
| 43 | static constexpr uint32_t kArmMulFloatingPointLatency = 11; |
| 44 | static constexpr uint32_t kArmDivIntegerLatency = 10; |
| 45 | static constexpr uint32_t kArmDivFloatLatency = 20; |
| 46 | static constexpr uint32_t kArmDivDoubleLatency = 25; |
| 47 | static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11; |
| 48 | static constexpr uint32_t kArmMemoryLoadLatency = 9; |
| 49 | static constexpr uint32_t kArmMemoryStoreLatency = 9; |
| 50 | static constexpr uint32_t kArmMemoryBarrierLatency = 6; |
| 51 | static constexpr uint32_t kArmBranchLatency = 4; |
| 52 | static constexpr uint32_t kArmCallLatency = 5; |
| 53 | static constexpr uint32_t kArmCallInternalLatency = 29; |
| 54 | static constexpr uint32_t kArmLoadStringInternalLatency = 10; |
| 55 | static constexpr uint32_t kArmNopLatency = 2; |
| 56 | static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; |
| 57 | static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; |
| 58 | |
| 59 | class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { |
| 60 | public: |
| 61 | explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) |
| 62 | : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} |
| 63 | |
| 64 | // Default visitor for instructions not handled specifically below. |
| 65 | void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) { |
| 66 | last_visited_latency_ = kArmIntegerOpLatency; |
| 67 | } |
| 68 | |
| 69 | // We add a second unused parameter to be able to use this macro like the others |
| 70 | // defined in `nodes.h`. |
| 71 | #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \ |
| 72 | M(ArrayGet , unused) \ |
| 73 | M(ArrayLength , unused) \ |
| 74 | M(ArraySet , unused) \ |
| 75 | M(Add , unused) \ |
| 76 | M(Sub , unused) \ |
| 77 | M(And , unused) \ |
| 78 | M(Or , unused) \ |
| 79 | M(Ror , unused) \ |
| 80 | M(Xor , unused) \ |
| 81 | M(Shl , unused) \ |
| 82 | M(Shr , unused) \ |
| 83 | M(UShr , unused) \ |
| 84 | M(Mul , unused) \ |
| 85 | M(Div , unused) \ |
| 86 | M(Condition , unused) \ |
| 87 | M(Compare , unused) \ |
| 88 | M(BoundsCheck , unused) \ |
| 89 | M(InstanceFieldGet , unused) \ |
| 90 | M(InstanceFieldSet , unused) \ |
| 91 | M(InstanceOf , unused) \ |
| 92 | M(Invoke , unused) \ |
| 93 | M(LoadString , unused) \ |
| 94 | M(NewArray , unused) \ |
| 95 | M(NewInstance , unused) \ |
| 96 | M(Rem , unused) \ |
| 97 | M(StaticFieldGet , unused) \ |
| 98 | M(StaticFieldSet , unused) \ |
| 99 | M(SuspendCheck , unused) \ |
| 100 | M(TypeConversion , unused) |
| 101 | |
| 102 | #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ |
| 103 | M(BitwiseNegatedRight, unused) \ |
| 104 | M(MultiplyAccumulate, unused) \ |
| 105 | M(IntermediateAddress, unused) \ |
Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 106 | M(IntermediateAddressIndex, unused) \ |
xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 107 | M(DataProcWithShifterOp, unused) |
| 108 | |
| 109 | #define DECLARE_VISIT_INSTRUCTION(type, unused) \ |
| 110 | void Visit##type(H##type* instruction) OVERRIDE; |
| 111 | |
| 112 | FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) |
| 113 | FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) |
| 114 | FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) |
| 115 | |
| 116 | #undef DECLARE_VISIT_INSTRUCTION |
| 117 | |
| 118 | private: |
| 119 | void HandleBinaryOperationLantencies(HBinaryOperation* instr); |
| 120 | void HandleBitwiseOperationLantencies(HBinaryOperation* instr); |
| 121 | void HandleShiftLatencies(HBinaryOperation* instr); |
| 122 | void HandleDivRemConstantIntegralLatencies(int32_t imm); |
| 123 | void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info); |
| 124 | void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info); |
| 125 | void HandleGenerateDataProcInstruction(bool internal_latency = false); |
| 126 | void HandleGenerateDataProc(HDataProcWithShifterOp* instruction); |
| 127 | void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction); |
| 128 | |
| 129 | // The latency setting for each HInstruction depends on how CodeGenerator may generate code, |
| 130 | // latency visitors may query CodeGenerator for such information for accurate latency settings. |
| 131 | CodeGeneratorARMType* codegen_; |
| 132 | }; |
| 133 | |
| 134 | class HSchedulerARM : public HScheduler { |
| 135 | public: |
| 136 | HSchedulerARM(ArenaAllocator* arena, |
| 137 | SchedulingNodeSelector* selector, |
| 138 | SchedulingLatencyVisitorARM* arm_latency_visitor) |
| 139 | : HScheduler(arena, arm_latency_visitor, selector) {} |
| 140 | ~HSchedulerARM() OVERRIDE {} |
| 141 | |
| 142 | bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { |
| 143 | #define CASE_INSTRUCTION_KIND(type, unused) case \ |
| 144 | HInstruction::InstructionKind::k##type: |
| 145 | switch (instruction->GetKind()) { |
| 146 | FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) |
| 147 | return true; |
| 148 | FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND) |
| 149 | return true; |
| 150 | default: |
| 151 | return HScheduler::IsSchedulable(instruction); |
| 152 | } |
| 153 | #undef CASE_INSTRUCTION_KIND |
| 154 | } |
| 155 | |
| 156 | private: |
| 157 | DISALLOW_COPY_AND_ASSIGN(HSchedulerARM); |
| 158 | }; |
| 159 | |
| 160 | } // namespace arm |
| 161 | } // namespace art |
| 162 | |
| 163 | #endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ |