| /* |
| * Copyright (C) 2017 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "scheduler_arm.h" |
| |
| #include "arch/arm/instruction_set_features_arm.h" |
| #include "code_generator_utils.h" |
| #include "common_arm.h" |
| #include "heap_poisoning.h" |
| #include "mirror/array-inl.h" |
| #include "mirror/string.h" |
| |
| namespace art HIDDEN { |
| namespace arm { |
| |
| using helpers::Int32ConstantFrom; |
| using helpers::Uint64ConstantFrom; |
| |
| void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) { |
| switch (instr->GetResultType()) { |
| case DataType::Type::kInt64: |
| // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs, |
| // so a bubble (kArmNopLatency) is added to represent the internal carry flag |
| // dependency inside these pairs. |
| last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| case DataType::Type::kFloat32: |
| case DataType::Type::kFloat64: |
| last_visited_latency_ = kArmFloatingPointOpLatency; |
| break; |
| default: |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) { |
| HandleBinaryOperationLantencies(instr); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) { |
| HandleBinaryOperationLantencies(instr); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) { |
| switch (instr->GetResultType()) { |
| case DataType::Type::kInt64: |
| last_visited_internal_latency_ = 3 * kArmMulIntegerLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| case DataType::Type::kFloat32: |
| case DataType::Type::kFloat64: |
| last_visited_latency_ = kArmMulFloatingPointLatency; |
| break; |
| default: |
| last_visited_latency_ = kArmMulIntegerLatency; |
| break; |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) { |
| switch (instr->GetResultType()) { |
| case DataType::Type::kInt64: |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| case DataType::Type::kFloat32: |
| case DataType::Type::kFloat64: |
| last_visited_latency_ = kArmFloatingPointOpLatency; |
| break; |
| default: |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) { |
| HandleBitwiseOperationLantencies(instr); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) { |
| HandleBitwiseOperationLantencies(instr); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) { |
| HandleBitwiseOperationLantencies(instr); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) { |
| switch (instr->GetResultType()) { |
| case DataType::Type::kInt32: |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| case DataType::Type::kInt64: { |
| // HandleLongRotate |
| HInstruction* rhs = instr->GetRight(); |
| if (rhs->IsConstant()) { |
| uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance; |
| if (rot != 0u) { |
| last_visited_internal_latency_ = 3 * kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } else { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } |
| } else { |
| last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency; |
| last_visited_latency_ = kArmBranchLatency; |
| } |
| break; |
| } |
| default: |
| LOG(FATAL) << "Unexpected operation type " << instr->GetResultType(); |
| UNREACHABLE(); |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) { |
| DataType::Type type = instr->GetResultType(); |
| HInstruction* rhs = instr->GetRight(); |
| switch (type) { |
| case DataType::Type::kInt32: |
| if (!rhs->IsConstant()) { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| } |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| case DataType::Type::kInt64: |
| if (!rhs->IsConstant()) { |
| last_visited_internal_latency_ = 8 * kArmIntegerOpLatency; |
| } else { |
| uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance; |
| if (shift_value == 1 || shift_value >= 32) { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| } else { |
| last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; |
| } |
| } |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| default: |
| LOG(FATAL) << "Unexpected operation type " << type; |
| UNREACHABLE(); |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) { |
| HandleShiftLatencies(instr); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) { |
| HandleShiftLatencies(instr); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) { |
| HandleShiftLatencies(instr); |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleGenerateConditionWithZero(IfCondition condition) { |
| switch (condition) { |
| case kCondEQ: |
| case kCondBE: |
| case kCondNE: |
| case kCondA: |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| case kCondGE: |
| // Mvn |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| FALLTHROUGH_INTENDED; |
| case kCondLT: |
| // Lsr |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| case kCondAE: |
| // Trivially true. |
| // Mov |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| case kCondB: |
| // Trivially false. |
| // Mov |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| default: |
| LOG(FATAL) << "Unexpected condition " << condition; |
| UNREACHABLE(); |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleGenerateLongTestConstant(HCondition* condition) { |
| DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64); |
| |
| IfCondition cond = condition->GetCondition(); |
| |
| HInstruction* right = condition->InputAt(1); |
| |
| int64_t value = Uint64ConstantFrom(right); |
| |
| // Comparisons against 0 are common enough, so codegen has special handling for them. |
| if (value == 0) { |
| switch (cond) { |
| case kCondNE: |
| case kCondA: |
| case kCondEQ: |
| case kCondBE: |
| // Orrs |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| return; |
| case kCondLT: |
| case kCondGE: |
| // Cmp |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| return; |
| case kCondB: |
| case kCondAE: |
| // Cmp |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| return; |
| default: |
| break; |
| } |
| } |
| |
| switch (cond) { |
| case kCondEQ: |
| case kCondNE: |
| case kCondB: |
| case kCondBE: |
| case kCondA: |
| case kCondAE: { |
| // Cmp, IT, Cmp |
| last_visited_internal_latency_ += 3 * kArmIntegerOpLatency; |
| break; |
| } |
| case kCondLE: |
| case kCondGT: |
| // Trivially true or false. |
| if (value == std::numeric_limits<int64_t>::max()) { |
| // Cmp |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| break; |
| } |
| FALLTHROUGH_INTENDED; |
| case kCondGE: |
| case kCondLT: { |
| // Cmp, Sbcs |
| last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; |
| break; |
| } |
| default: |
| LOG(FATAL) << "Unreachable"; |
| UNREACHABLE(); |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleGenerateLongTest(HCondition* condition) { |
| DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64); |
| |
| IfCondition cond = condition->GetCondition(); |
| |
| switch (cond) { |
| case kCondEQ: |
| case kCondNE: |
| case kCondB: |
| case kCondBE: |
| case kCondA: |
| case kCondAE: { |
| // Cmp, IT, Cmp |
| last_visited_internal_latency_ += 3 * kArmIntegerOpLatency; |
| break; |
| } |
| case kCondLE: |
| case kCondGT: |
| case kCondGE: |
| case kCondLT: { |
| // Cmp, Sbcs |
| last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; |
| break; |
| } |
| default: |
| LOG(FATAL) << "Unreachable"; |
| UNREACHABLE(); |
| } |
| } |
| |
| // The GenerateTest series of function all counted as internal latency. |
| void SchedulingLatencyVisitorARM::HandleGenerateTest(HCondition* condition) { |
| const DataType::Type type = condition->GetLeft()->GetType(); |
| |
| if (type == DataType::Type::kInt64) { |
| condition->InputAt(1)->IsConstant() |
| ? HandleGenerateLongTestConstant(condition) |
| : HandleGenerateLongTest(condition); |
| } else if (DataType::IsFloatingPointType(type)) { |
| // GenerateVcmp + Vmrs |
| last_visited_internal_latency_ += 2 * kArmFloatingPointOpLatency; |
| } else { |
| // Cmp |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| } |
| } |
| |
| bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) { |
| if (condition->GetLeft()->GetType() == DataType::Type::kInt64) { |
| HInstruction* right = condition->InputAt(1); |
| |
| if (right->IsConstant()) { |
| IfCondition c = condition->GetCondition(); |
| const uint64_t value = Uint64ConstantFrom(right); |
| |
| if (c < kCondLT || c > kCondGE) { |
| if (value != 0) { |
| return false; |
| } |
| } else if (c == kCondLE || c == kCondGT) { |
| if (value < std::numeric_limits<int64_t>::max() && |
| !codegen_->GetAssembler()->ShifterOperandCanHold( |
| SBC, High32Bits(value + 1), vixl32::FlagsUpdate::SetFlags)) { |
| return false; |
| } |
| } else if (!codegen_->GetAssembler()->ShifterOperandCanHold( |
| SBC, High32Bits(value), vixl32::FlagsUpdate::SetFlags)) { |
| return false; |
| } |
| } |
| } |
| |
| return true; |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleGenerateConditionGeneric(HCondition* cond) { |
| HandleGenerateTest(cond); |
| |
| // Unlike codegen pass, we cannot check 'out' register IsLow() here, |
| // because scheduling is before liveness(location builder) and register allocator, |
| // so we can only choose to follow one path of codegen by assuming otu.IsLow() is true. |
| last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleGenerateEqualLong(HCondition* cond) { |
| DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64); |
| |
| IfCondition condition = cond->GetCondition(); |
| |
| last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; |
| |
| if (condition == kCondNE) { |
| // Orrs, IT, Mov |
| last_visited_internal_latency_ += 3 * kArmIntegerOpLatency; |
| } else { |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| HandleGenerateConditionWithZero(condition); |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleGenerateLongComparesAndJumps() { |
| last_visited_internal_latency_ += 4 * kArmIntegerOpLatency; |
| last_visited_internal_latency_ += kArmBranchLatency; |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleGenerateConditionLong(HCondition* cond) { |
| DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64); |
| |
| IfCondition condition = cond->GetCondition(); |
| HInstruction* right = cond->InputAt(1); |
| |
| if (right->IsConstant()) { |
| // Comparisons against 0 are common enough, so codegen has special handling for them. |
| if (Uint64ConstantFrom(right) == 0) { |
| switch (condition) { |
| case kCondNE: |
| case kCondA: |
| case kCondEQ: |
| case kCondBE: |
| // Orr |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| HandleGenerateConditionWithZero(condition); |
| return; |
| case kCondLT: |
| case kCondGE: |
| FALLTHROUGH_INTENDED; |
| case kCondAE: |
| case kCondB: |
| HandleGenerateConditionWithZero(condition); |
| return; |
| case kCondLE: |
| case kCondGT: |
| default: |
| break; |
| } |
| } |
| } |
| |
| if ((condition == kCondEQ || condition == kCondNE) && |
| !CanGenerateTest(cond)) { |
| HandleGenerateEqualLong(cond); |
| return; |
| } |
| |
| if (CanGenerateTest(cond)) { |
| HandleGenerateConditionGeneric(cond); |
| return; |
| } |
| |
| HandleGenerateLongComparesAndJumps(); |
| |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| last_visited_latency_ = kArmBranchLatency;; |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond) { |
| const DataType::Type type = cond->GetLeft()->GetType(); |
| |
| DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; |
| |
| if (type == DataType::Type::kInt64) { |
| HandleGenerateConditionLong(cond); |
| return; |
| } |
| |
| IfCondition condition = cond->GetCondition(); |
| HInstruction* right = cond->InputAt(1); |
| int64_t value; |
| |
| if (right->IsConstant()) { |
| value = Uint64ConstantFrom(right); |
| |
| // Comparisons against 0 are common enough, so codegen has special handling for them. |
| if (value == 0) { |
| switch (condition) { |
| case kCondNE: |
| case kCondA: |
| case kCondEQ: |
| case kCondBE: |
| case kCondLT: |
| case kCondGE: |
| case kCondAE: |
| case kCondB: |
| HandleGenerateConditionWithZero(condition); |
| return; |
| case kCondLE: |
| case kCondGT: |
| default: |
| break; |
| } |
| } |
| } |
| |
| if (condition == kCondEQ || condition == kCondNE) { |
| if (condition == kCondNE) { |
| // CMP, IT, MOV.ne |
| last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } else { |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| HandleGenerateConditionWithZero(condition); |
| } |
| return; |
| } |
| |
| HandleGenerateConditionGeneric(cond); |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleCondition(HCondition* cond) { |
| if (cond->IsEmittedAtUseSite()) { |
| last_visited_latency_ = 0; |
| return; |
| } |
| |
| const DataType::Type type = cond->GetLeft()->GetType(); |
| |
| if (DataType::IsFloatingPointType(type)) { |
| HandleGenerateConditionGeneric(cond); |
| return; |
| } |
| |
| DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; |
| |
| const IfCondition condition = cond->GetCondition(); |
| |
| if (type == DataType::Type::kBool && |
| cond->GetRight()->GetType() == DataType::Type::kBool && |
| (condition == kCondEQ || condition == kCondNE)) { |
| if (condition == kCondEQ) { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| } |
| last_visited_latency_ = kArmIntegerOpLatency; |
| return; |
| } |
| |
| HandleGenerateConditionIntegralOrNonPrimitive(cond); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) { |
| HandleCondition(instr); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) { |
| DataType::Type type = instr->InputAt(0)->GetType(); |
| switch (type) { |
| case DataType::Type::kBool: |
| case DataType::Type::kUint8: |
| case DataType::Type::kInt8: |
| case DataType::Type::kUint16: |
| case DataType::Type::kInt16: |
| case DataType::Type::kInt32: |
| last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; |
| break; |
| case DataType::Type::kInt64: |
| last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency; |
| break; |
| case DataType::Type::kFloat32: |
| case DataType::Type::kFloat64: |
| last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency; |
| break; |
| default: |
| last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; |
| break; |
| } |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) { |
| if (instruction->GetResultType() == DataType::Type::kInt32) { |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } else { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) { |
| if (internal_latency) { |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| } else { |
| last_visited_latency_ = kArmDataProcWithShifterOpLatency; |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) { |
| const HInstruction::InstructionKind kind = instruction->GetInstrKind(); |
| if (kind == HInstruction::kAdd) { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } else if (kind == HInstruction::kSub) { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } else { |
| HandleGenerateDataProcInstruction(/* internal_latency= */ true); |
| HandleGenerateDataProcInstruction(); |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) { |
| DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64); |
| DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())); |
| |
| const uint32_t shift_value = instruction->GetShiftAmount(); |
| const HInstruction::InstructionKind kind = instruction->GetInstrKind(); |
| |
| if (shift_value >= 32) { |
| // Different shift types actually generate similar code here, |
| // no need to differentiate shift types like the codegen pass does, |
| // which also avoids handling shift types from different ARM backends. |
| HandleGenerateDataProc(instruction); |
| } else { |
| DCHECK_GT(shift_value, 1U); |
| DCHECK_LT(shift_value, 32U); |
| |
| if (kind == HInstruction::kOr || kind == HInstruction::kXor) { |
| HandleGenerateDataProcInstruction(/* internal_latency= */ true); |
| HandleGenerateDataProcInstruction(/* internal_latency= */ true); |
| HandleGenerateDataProcInstruction(); |
| } else { |
| last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; |
| HandleGenerateDataProc(instruction); |
| } |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) { |
| const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); |
| |
| if (instruction->GetType() == DataType::Type::kInt32) { |
| HandleGenerateDataProcInstruction(); |
| } else { |
| DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64); |
| if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { |
| HandleGenerateDataProc(instruction); |
| } else { |
| HandleGenerateLongDataProc(instruction); |
| } |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* ATTRIBUTE_UNUSED) { |
| // Although the code generated is a simple `add` instruction, we found through empirical results |
| // that spacing it from its use in memory accesses was beneficial. |
| last_visited_internal_latency_ = kArmNopLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex( |
| HIntermediateAddressIndex* ATTRIBUTE_UNUSED) { |
| UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM"; |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { |
| last_visited_latency_ = kArmMulIntegerLatency; |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { |
| DataType::Type type = instruction->GetType(); |
| const bool maybe_compressed_char_at = |
| mirror::kUseStringCompression && instruction->IsStringCharAt(); |
| HInstruction* array_instr = instruction->GetArray(); |
| bool has_intermediate_address = array_instr->IsIntermediateAddress(); |
| HInstruction* index = instruction->InputAt(1); |
| |
| switch (type) { |
| case DataType::Type::kBool: |
| case DataType::Type::kUint8: |
| case DataType::Type::kInt8: |
| case DataType::Type::kUint16: |
| case DataType::Type::kInt16: |
| case DataType::Type::kInt32: { |
| if (maybe_compressed_char_at) { |
| last_visited_internal_latency_ += kArmMemoryLoadLatency; |
| } |
| if (index->IsConstant()) { |
| if (maybe_compressed_char_at) { |
| last_visited_internal_latency_ += |
| kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency; |
| last_visited_latency_ = kArmBranchLatency; |
| } else { |
| last_visited_latency_ += kArmMemoryLoadLatency; |
| } |
| } else { |
| if (has_intermediate_address) { |
| } else { |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| } |
| if (maybe_compressed_char_at) { |
| last_visited_internal_latency_ += |
| kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency; |
| last_visited_latency_ = kArmBranchLatency; |
| } else { |
| last_visited_latency_ += kArmMemoryLoadLatency; |
| } |
| } |
| break; |
| } |
| |
| case DataType::Type::kReference: { |
| if (gUseReadBarrier && kUseBakerReadBarrier) { |
| last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency; |
| } else { |
| if (index->IsConstant()) { |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } else { |
| if (has_intermediate_address) { |
| } else { |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| } |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } |
| } |
| break; |
| } |
| |
| case DataType::Type::kInt64: { |
| if (index->IsConstant()) { |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } else { |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } |
| break; |
| } |
| |
| case DataType::Type::kFloat32: { |
| if (index->IsConstant()) { |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } else { |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } |
| break; |
| } |
| |
| case DataType::Type::kFloat64: { |
| if (index->IsConstant()) { |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } else { |
| last_visited_internal_latency_ += kArmIntegerOpLatency; |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } |
| break; |
| } |
| |
| default: |
| LOG(FATAL) << "Unreachable type " << type; |
| UNREACHABLE(); |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) { |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| if (mirror::kUseStringCompression && instruction->IsStringLength()) { |
| last_visited_internal_latency_ = kArmMemoryLoadLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) { |
| HInstruction* index = instruction->InputAt(1); |
| DataType::Type value_type = instruction->GetComponentType(); |
| HInstruction* array_instr = instruction->GetArray(); |
| bool has_intermediate_address = array_instr->IsIntermediateAddress(); |
| |
| switch (value_type) { |
| case DataType::Type::kBool: |
| case DataType::Type::kUint8: |
| case DataType::Type::kInt8: |
| case DataType::Type::kUint16: |
| case DataType::Type::kInt16: |
| case DataType::Type::kInt32: { |
| if (index->IsConstant()) { |
| last_visited_latency_ = kArmMemoryStoreLatency; |
| } else { |
| if (has_intermediate_address) { |
| } else { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| } |
| last_visited_latency_ = kArmMemoryStoreLatency; |
| } |
| break; |
| } |
| |
| case DataType::Type::kReference: { |
| if (instruction->InputAt(2)->IsNullConstant()) { |
| if (index->IsConstant()) { |
| last_visited_latency_ = kArmMemoryStoreLatency; |
| } else { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| last_visited_latency_ = kArmMemoryStoreLatency; |
| } |
| } else { |
| // Following the exact instructions of runtime type checks is too complicated, |
| // just giving it a simple slow latency. |
| last_visited_latency_ = kArmRuntimeTypeCheckLatency; |
| } |
| break; |
| } |
| |
| case DataType::Type::kInt64: { |
| if (index->IsConstant()) { |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } else { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } |
| break; |
| } |
| |
| case DataType::Type::kFloat32: { |
| if (index->IsConstant()) { |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } else { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } |
| break; |
| } |
| |
| case DataType::Type::kFloat64: { |
| if (index->IsConstant()) { |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } else { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } |
| break; |
| } |
| |
| default: |
| LOG(FATAL) << "Unreachable type " << value_type; |
| UNREACHABLE(); |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) { |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| // Users do not use any data results. |
| last_visited_latency_ = 0; |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) { |
| if (imm == 0) { |
| last_visited_internal_latency_ = 0; |
| last_visited_latency_ = 0; |
| } else if (imm == 1 || imm == -1) { |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } else if (IsPowerOfTwo(AbsOrMin(imm))) { |
| last_visited_internal_latency_ = 3 * kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } else { |
| last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) { |
| DataType::Type type = instruction->GetResultType(); |
| switch (type) { |
| case DataType::Type::kInt32: { |
| HInstruction* rhs = instruction->GetRight(); |
| if (rhs->IsConstant()) { |
| int32_t imm = Int32ConstantFrom(rhs->AsConstant()); |
| HandleDivRemConstantIntegralLatencies(imm); |
| } else { |
| last_visited_latency_ = kArmDivIntegerLatency; |
| } |
| break; |
| } |
| case DataType::Type::kFloat32: |
| last_visited_latency_ = kArmDivFloatLatency; |
| break; |
| case DataType::Type::kFloat64: |
| last_visited_latency_ = kArmDivDoubleLatency; |
| break; |
| default: |
| last_visited_internal_latency_ = kArmCallInternalLatency; |
| last_visited_latency_ = kArmCallLatency; |
| break; |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitPredicatedInstanceFieldGet( |
| HPredicatedInstanceFieldGet* instruction) { |
| HandleFieldGetLatencies(instruction, instruction->GetFieldInfo()); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) { |
| HandleFieldGetLatencies(instruction, instruction->GetFieldInfo()); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { |
| HandleFieldSetLatencies(instruction, instruction->GetFieldInfo()); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) { |
| last_visited_internal_latency_ = kArmCallInternalLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) { |
| last_visited_internal_latency_ = kArmCallInternalLatency; |
| last_visited_latency_ = kArmCallLatency; |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) { |
| last_visited_internal_latency_ = kArmLoadStringInternalLatency; |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) { |
| last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency; |
| last_visited_latency_ = kArmCallLatency; |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) { |
| if (instruction->IsStringAlloc()) { |
| last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency; |
| } else { |
| last_visited_internal_latency_ = kArmCallInternalLatency; |
| } |
| last_visited_latency_ = kArmCallLatency; |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) { |
| DataType::Type type = instruction->GetResultType(); |
| switch (type) { |
| case DataType::Type::kInt32: { |
| HInstruction* rhs = instruction->GetRight(); |
| if (rhs->IsConstant()) { |
| int32_t imm = Int32ConstantFrom(rhs->AsConstant()); |
| HandleDivRemConstantIntegralLatencies(imm); |
| } else { |
| last_visited_internal_latency_ = kArmDivIntegerLatency; |
| last_visited_latency_ = kArmMulIntegerLatency; |
| } |
| break; |
| } |
| default: |
| last_visited_internal_latency_ = kArmCallInternalLatency; |
| last_visited_latency_ = kArmCallLatency; |
| break; |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction, |
| const FieldInfo& field_info) { |
| DCHECK(instruction->IsInstanceFieldGet() || |
| instruction->IsStaticFieldGet() || |
| instruction->IsPredicatedInstanceFieldGet()); |
| DCHECK(codegen_ != nullptr); |
| bool is_volatile = field_info.IsVolatile(); |
| DataType::Type field_type = field_info.GetFieldType(); |
| bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); |
| |
| switch (field_type) { |
| case DataType::Type::kBool: |
| case DataType::Type::kUint8: |
| case DataType::Type::kInt8: |
| case DataType::Type::kUint16: |
| case DataType::Type::kInt16: |
| case DataType::Type::kInt32: |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| break; |
| |
| case DataType::Type::kReference: |
| if (gUseReadBarrier && kUseBakerReadBarrier) { |
| last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } else { |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } |
| break; |
| |
| case DataType::Type::kInt64: |
| if (is_volatile && !atomic_ldrd_strd) { |
| last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } else { |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } |
| break; |
| |
| case DataType::Type::kFloat32: |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| break; |
| |
| case DataType::Type::kFloat64: |
| if (is_volatile && !atomic_ldrd_strd) { |
| last_visited_internal_latency_ = |
| kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } else { |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| } |
| break; |
| |
| default: |
| last_visited_latency_ = kArmMemoryLoadLatency; |
| break; |
| } |
| |
| if (is_volatile) { |
| last_visited_internal_latency_ += kArmMemoryBarrierLatency; |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction, |
| const FieldInfo& field_info) { |
| DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); |
| DCHECK(codegen_ != nullptr); |
| bool is_volatile = field_info.IsVolatile(); |
| DataType::Type field_type = field_info.GetFieldType(); |
| bool needs_write_barrier = |
| CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); |
| bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); |
| |
| switch (field_type) { |
| case DataType::Type::kBool: |
| case DataType::Type::kUint8: |
| case DataType::Type::kInt8: |
| case DataType::Type::kUint16: |
| case DataType::Type::kInt16: |
| if (is_volatile) { |
| last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency; |
| last_visited_latency_ = kArmMemoryBarrierLatency; |
| } else { |
| last_visited_latency_ = kArmMemoryStoreLatency; |
| } |
| break; |
| |
| case DataType::Type::kInt32: |
| case DataType::Type::kReference: |
| if (kPoisonHeapReferences && needs_write_barrier) { |
| last_visited_internal_latency_ += kArmIntegerOpLatency * 2; |
| } |
| last_visited_latency_ = kArmMemoryStoreLatency; |
| break; |
| |
| case DataType::Type::kInt64: |
| if (is_volatile && !atomic_ldrd_strd) { |
| last_visited_internal_latency_ = |
| kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } else { |
| last_visited_latency_ = kArmMemoryStoreLatency; |
| } |
| break; |
| |
| case DataType::Type::kFloat32: |
| last_visited_latency_ = kArmMemoryStoreLatency; |
| break; |
| |
| case DataType::Type::kFloat64: |
| if (is_volatile && !atomic_ldrd_strd) { |
| last_visited_internal_latency_ = kArmIntegerOpLatency + |
| kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| } else { |
| last_visited_latency_ = kArmMemoryStoreLatency; |
| } |
| break; |
| |
| default: |
| last_visited_latency_ = kArmMemoryStoreLatency; |
| break; |
| } |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) { |
| HandleFieldGetLatencies(instruction, instruction->GetFieldInfo()); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) { |
| HandleFieldSetLatencies(instruction, instruction->GetFieldInfo()); |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) { |
| HBasicBlock* block = instruction->GetBlock(); |
| DCHECK_IMPLIES(block->GetLoopInformation() == nullptr, |
| block->IsEntryBlock() && instruction->GetNext()->IsGoto()); |
| // Users do not use any data results. |
| last_visited_latency_ = 0; |
| } |
| |
| void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) { |
| DataType::Type result_type = instr->GetResultType(); |
| DataType::Type input_type = instr->GetInputType(); |
| |
| switch (result_type) { |
| case DataType::Type::kUint8: |
| case DataType::Type::kInt8: |
| case DataType::Type::kUint16: |
| case DataType::Type::kInt16: |
| last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX |
| break; |
| |
| case DataType::Type::kInt32: |
| switch (input_type) { |
| case DataType::Type::kInt64: |
| last_visited_latency_ = kArmIntegerOpLatency; // MOV |
| break; |
| case DataType::Type::kFloat32: |
| case DataType::Type::kFloat64: |
| last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; |
| last_visited_latency_ = kArmFloatingPointOpLatency; |
| break; |
| default: |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| } |
| break; |
| |
| case DataType::Type::kInt64: |
| switch (input_type) { |
| case DataType::Type::kBool: |
| case DataType::Type::kUint8: |
| case DataType::Type::kInt8: |
| case DataType::Type::kUint16: |
| case DataType::Type::kInt16: |
| case DataType::Type::kInt32: |
| // MOV and extension |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| case DataType::Type::kFloat32: |
| case DataType::Type::kFloat64: |
| // invokes runtime |
| last_visited_internal_latency_ = kArmCallInternalLatency; |
| break; |
| default: |
| last_visited_internal_latency_ = kArmIntegerOpLatency; |
| last_visited_latency_ = kArmIntegerOpLatency; |
| break; |
| } |
| break; |
| |
| case DataType::Type::kFloat32: |
| switch (input_type) { |
| case DataType::Type::kBool: |
| case DataType::Type::kUint8: |
| case DataType::Type::kInt8: |
| case DataType::Type::kUint16: |
| case DataType::Type::kInt16: |
| case DataType::Type::kInt32: |
| last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; |
| last_visited_latency_ = kArmFloatingPointOpLatency; |
| break; |
| case DataType::Type::kInt64: |
| // invokes runtime |
| last_visited_internal_latency_ = kArmCallInternalLatency; |
| break; |
| case DataType::Type::kFloat64: |
| last_visited_latency_ = kArmFloatingPointOpLatency; |
| break; |
| default: |
| last_visited_latency_ = kArmFloatingPointOpLatency; |
| break; |
| } |
| break; |
| |
| case DataType::Type::kFloat64: |
| switch (input_type) { |
| case DataType::Type::kBool: |
| case DataType::Type::kUint8: |
| case DataType::Type::kInt8: |
| case DataType::Type::kUint16: |
| case DataType::Type::kInt16: |
| case DataType::Type::kInt32: |
| last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; |
| last_visited_latency_ = kArmFloatingPointOpLatency; |
| break; |
| case DataType::Type::kInt64: |
| last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency; |
| last_visited_latency_ = kArmFloatingPointOpLatency; |
| break; |
| case DataType::Type::kFloat32: |
| last_visited_latency_ = kArmFloatingPointOpLatency; |
| break; |
| default: |
| last_visited_latency_ = kArmFloatingPointOpLatency; |
| break; |
| } |
| break; |
| |
| default: |
| last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency; |
| break; |
| } |
| } |
| |
| } // namespace arm |
| } // namespace art |