blob: 3ee6f06b469fd13370535b4fc684f6cf7a4c553f [file] [log] [blame]
/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "scheduler_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
#include "code_generator_arm_vixl.h"
#include "code_generator_utils.h"
#include "common_arm.h"
#include "heap_poisoning.h"
#include "mirror/array-inl.h"
#include "mirror/string.h"
namespace art HIDDEN {
namespace arm {
using helpers::Int32ConstantFrom;
using helpers::Uint64ConstantFrom;
// AArch32 instruction latencies.
// We currently assume that all ARM CPUs share the same instruction latency list.
// The following latencies were tuned based on performance experiments and
// automatic tuning using differential evolution approach on various benchmarks.
static constexpr uint32_t kArmIntegerOpLatency = 2;
static constexpr uint32_t kArmFloatingPointOpLatency = 11;
static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
static constexpr uint32_t kArmMulIntegerLatency = 6;
static constexpr uint32_t kArmMulFloatingPointLatency = 11;
static constexpr uint32_t kArmDivIntegerLatency = 10;
static constexpr uint32_t kArmDivFloatLatency = 20;
static constexpr uint32_t kArmDivDoubleLatency = 25;
static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
static constexpr uint32_t kArmMemoryLoadLatency = 9;
static constexpr uint32_t kArmMemoryStoreLatency = 9;
static constexpr uint32_t kArmMemoryBarrierLatency = 6;
static constexpr uint32_t kArmBranchLatency = 4;
static constexpr uint32_t kArmCallLatency = 5;
static constexpr uint32_t kArmCallInternalLatency = 29;
static constexpr uint32_t kArmLoadStringInternalLatency = 10;
static constexpr uint32_t kArmNopLatency = 2;
static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
class SchedulingLatencyVisitorARM final : public SchedulingLatencyVisitor {
public:
explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
: codegen_(down_cast<CodeGeneratorARMVIXL*>(codegen)) {}
// Default visitor for instructions not handled specifically below.
void VisitInstruction([[maybe_unused]] HInstruction*) override {
last_visited_latency_ = kArmIntegerOpLatency;
}
// We add a second unused parameter to be able to use this macro like the others
// defined in `nodes.h`.
#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
M(ArrayGet, unused) \
M(ArrayLength, unused) \
M(ArraySet, unused) \
M(Add, unused) \
M(Sub, unused) \
M(And, unused) \
M(Or, unused) \
M(Ror, unused) \
M(Xor, unused) \
M(Shl, unused) \
M(Shr, unused) \
M(UShr, unused) \
M(Mul, unused) \
M(Div, unused) \
M(Condition, unused) \
M(Compare, unused) \
M(BoundsCheck, unused) \
M(InstanceFieldGet, unused) \
M(InstanceFieldSet, unused) \
M(InstanceOf, unused) \
M(Invoke, unused) \
M(LoadString, unused) \
M(NewArray, unused) \
M(NewInstance, unused) \
M(Rem, unused) \
M(StaticFieldGet, unused) \
M(StaticFieldSet, unused) \
M(SuspendCheck, unused) \
M(TypeConversion, unused)
#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
M(BitwiseNegatedRight, unused) \
M(MultiplyAccumulate, unused) \
M(IntermediateAddress, unused) \
M(IntermediateAddressIndex, unused) \
M(DataProcWithShifterOp, unused)
#define DECLARE_VISIT_INSTRUCTION(type, unused) \
void Visit##type(H##type* instruction) override;
FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
private:
bool CanGenerateTest(HCondition* cond);
void HandleGenerateConditionWithZero(IfCondition cond);
void HandleGenerateLongTestConstant(HCondition* cond);
void HandleGenerateLongTest(HCondition* cond);
void HandleGenerateLongComparesAndJumps();
void HandleGenerateTest(HCondition* cond);
void HandleGenerateConditionGeneric(HCondition* cond);
void HandleGenerateEqualLong(HCondition* cond);
void HandleGenerateConditionLong(HCondition* cond);
void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
void HandleCondition(HCondition* instr);
void HandleBinaryOperationLantencies(HBinaryOperation* instr);
void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
void HandleShiftLatencies(HBinaryOperation* instr);
void HandleDivRemConstantIntegralLatencies(int32_t imm);
void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
void HandleGenerateDataProcInstruction(bool internal_latency = false);
void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
// The latency setting for each HInstruction depends on how CodeGenerator may generate code,
// latency visitors may query CodeGenerator for such information for accurate latency settings.
CodeGeneratorARMVIXL* codegen_;
};
void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) {
switch (instr->GetResultType()) {
case DataType::Type::kInt64:
// HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs,
// so a bubble (kArmNopLatency) is added to represent the internal carry flag
// dependency inside these pairs.
last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency;
last_visited_latency_ = kArmIntegerOpLatency;
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
default:
last_visited_latency_ = kArmIntegerOpLatency;
break;
}
}
void SchedulingLatencyVisitorARM::VisitAdd(HAdd* instr) {
HandleBinaryOperationLantencies(instr);
}
void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) {
HandleBinaryOperationLantencies(instr);
}
void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) {
switch (instr->GetResultType()) {
case DataType::Type::kInt64:
last_visited_internal_latency_ = 3 * kArmMulIntegerLatency;
last_visited_latency_ = kArmIntegerOpLatency;
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
last_visited_latency_ = kArmMulFloatingPointLatency;
break;
default:
last_visited_latency_ = kArmMulIntegerLatency;
break;
}
}
void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) {
switch (instr->GetResultType()) {
case DataType::Type::kInt64:
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
default:
last_visited_latency_ = kArmIntegerOpLatency;
break;
}
}
void SchedulingLatencyVisitorARM::VisitAnd(HAnd* instr) {
HandleBitwiseOperationLantencies(instr);
}
void SchedulingLatencyVisitorARM::VisitOr(HOr* instr) {
HandleBitwiseOperationLantencies(instr);
}
void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) {
HandleBitwiseOperationLantencies(instr);
}
void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) {
switch (instr->GetResultType()) {
case DataType::Type::kInt32:
last_visited_latency_ = kArmIntegerOpLatency;
break;
case DataType::Type::kInt64: {
// HandleLongRotate
HInstruction* rhs = instr->GetRight();
if (rhs->IsConstant()) {
uint64_t rot = Uint64ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
if (rot != 0u) {
last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
} else {
last_visited_internal_latency_ = 9 * kArmIntegerOpLatency + kArmBranchLatency;
last_visited_latency_ = kArmBranchLatency;
}
break;
}
default:
LOG(FATAL) << "Unexpected operation type " << instr->GetResultType();
UNREACHABLE();
}
}
void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) {
DataType::Type type = instr->GetResultType();
HInstruction* rhs = instr->GetRight();
switch (type) {
case DataType::Type::kInt32:
if (!rhs->IsConstant()) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
}
last_visited_latency_ = kArmIntegerOpLatency;
break;
case DataType::Type::kInt64:
if (!rhs->IsConstant()) {
last_visited_internal_latency_ = 8 * kArmIntegerOpLatency;
} else {
uint32_t shift_value = Int32ConstantFrom(rhs->AsConstant()) & kMaxLongShiftDistance;
if (shift_value == 1 || shift_value >= 32) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
} else {
last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
}
}
last_visited_latency_ = kArmIntegerOpLatency;
break;
default:
LOG(FATAL) << "Unexpected operation type " << type;
UNREACHABLE();
}
}
void SchedulingLatencyVisitorARM::VisitShl(HShl* instr) {
HandleShiftLatencies(instr);
}
void SchedulingLatencyVisitorARM::VisitShr(HShr* instr) {
HandleShiftLatencies(instr);
}
void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) {
HandleShiftLatencies(instr);
}
void SchedulingLatencyVisitorARM::HandleGenerateConditionWithZero(IfCondition condition) {
switch (condition) {
case kCondEQ:
case kCondBE:
case kCondNE:
case kCondA:
last_visited_internal_latency_ += kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
break;
case kCondGE:
// Mvn
last_visited_internal_latency_ += kArmIntegerOpLatency;
FALLTHROUGH_INTENDED;
case kCondLT:
// Lsr
last_visited_latency_ = kArmIntegerOpLatency;
break;
case kCondAE:
// Trivially true.
// Mov
last_visited_latency_ = kArmIntegerOpLatency;
break;
case kCondB:
// Trivially false.
// Mov
last_visited_latency_ = kArmIntegerOpLatency;
break;
default:
LOG(FATAL) << "Unexpected condition " << condition;
UNREACHABLE();
}
}
void SchedulingLatencyVisitorARM::HandleGenerateLongTestConstant(HCondition* condition) {
DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
IfCondition cond = condition->GetCondition();
HInstruction* right = condition->InputAt(1);
int64_t value = Uint64ConstantFrom(right);
// Comparisons against 0 are common enough, so codegen has special handling for them.
if (value == 0) {
switch (cond) {
case kCondNE:
case kCondA:
case kCondEQ:
case kCondBE:
// Orrs
last_visited_internal_latency_ += kArmIntegerOpLatency;
return;
case kCondLT:
case kCondGE:
// Cmp
last_visited_internal_latency_ += kArmIntegerOpLatency;
return;
case kCondB:
case kCondAE:
// Cmp
last_visited_internal_latency_ += kArmIntegerOpLatency;
return;
default:
break;
}
}
switch (cond) {
case kCondEQ:
case kCondNE:
case kCondB:
case kCondBE:
case kCondA:
case kCondAE: {
// Cmp, IT, Cmp
last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
break;
}
case kCondLE:
case kCondGT:
// Trivially true or false.
if (value == std::numeric_limits<int64_t>::max()) {
// Cmp
last_visited_internal_latency_ += kArmIntegerOpLatency;
break;
}
FALLTHROUGH_INTENDED;
case kCondGE:
case kCondLT: {
// Cmp, Sbcs
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
break;
}
default:
LOG(FATAL) << "Unreachable";
UNREACHABLE();
}
}
void SchedulingLatencyVisitorARM::HandleGenerateLongTest(HCondition* condition) {
DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64);
IfCondition cond = condition->GetCondition();
switch (cond) {
case kCondEQ:
case kCondNE:
case kCondB:
case kCondBE:
case kCondA:
case kCondAE: {
// Cmp, IT, Cmp
last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
break;
}
case kCondLE:
case kCondGT:
case kCondGE:
case kCondLT: {
// Cmp, Sbcs
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
break;
}
default:
LOG(FATAL) << "Unreachable";
UNREACHABLE();
}
}
// The GenerateTest series of function all counted as internal latency.
void SchedulingLatencyVisitorARM::HandleGenerateTest(HCondition* condition) {
const DataType::Type type = condition->GetLeft()->GetType();
if (type == DataType::Type::kInt64) {
condition->InputAt(1)->IsConstant()
? HandleGenerateLongTestConstant(condition)
: HandleGenerateLongTest(condition);
} else if (DataType::IsFloatingPointType(type)) {
// GenerateVcmp + Vmrs
last_visited_internal_latency_ += 2 * kArmFloatingPointOpLatency;
} else {
// Cmp
last_visited_internal_latency_ += kArmIntegerOpLatency;
}
}
bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) {
if (condition->GetLeft()->GetType() == DataType::Type::kInt64) {
HInstruction* right = condition->InputAt(1);
if (right->IsConstant()) {
IfCondition c = condition->GetCondition();
const uint64_t value = Uint64ConstantFrom(right);
if (c < kCondLT || c > kCondGE) {
if (value != 0) {
return false;
}
} else if (c == kCondLE || c == kCondGT) {
if (value < std::numeric_limits<int64_t>::max() &&
!codegen_->GetAssembler()->ShifterOperandCanHold(
SBC, High32Bits(value + 1), vixl32::FlagsUpdate::SetFlags)) {
return false;
}
} else if (!codegen_->GetAssembler()->ShifterOperandCanHold(
SBC, High32Bits(value), vixl32::FlagsUpdate::SetFlags)) {
return false;
}
}
}
return true;
}
void SchedulingLatencyVisitorARM::HandleGenerateConditionGeneric(HCondition* cond) {
HandleGenerateTest(cond);
// Unlike codegen pass, we cannot check 'out' register IsLow() here,
// because scheduling is before liveness(location builder) and register allocator,
// so we can only choose to follow one path of codegen by assuming otu.IsLow() is true.
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
void SchedulingLatencyVisitorARM::HandleGenerateEqualLong(HCondition* cond) {
DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
IfCondition condition = cond->GetCondition();
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
if (condition == kCondNE) {
// Orrs, IT, Mov
last_visited_internal_latency_ += 3 * kArmIntegerOpLatency;
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
HandleGenerateConditionWithZero(condition);
}
}
void SchedulingLatencyVisitorARM::HandleGenerateLongComparesAndJumps() {
last_visited_internal_latency_ += 4 * kArmIntegerOpLatency;
last_visited_internal_latency_ += kArmBranchLatency;
}
void SchedulingLatencyVisitorARM::HandleGenerateConditionLong(HCondition* cond) {
DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64);
IfCondition condition = cond->GetCondition();
HInstruction* right = cond->InputAt(1);
if (right->IsConstant()) {
// Comparisons against 0 are common enough, so codegen has special handling for them.
if (Uint64ConstantFrom(right) == 0) {
switch (condition) {
case kCondNE:
case kCondA:
case kCondEQ:
case kCondBE:
// Orr
last_visited_internal_latency_ += kArmIntegerOpLatency;
HandleGenerateConditionWithZero(condition);
return;
case kCondLT:
case kCondGE:
FALLTHROUGH_INTENDED;
case kCondAE:
case kCondB:
HandleGenerateConditionWithZero(condition);
return;
case kCondLE:
case kCondGT:
default:
break;
}
}
}
if ((condition == kCondEQ || condition == kCondNE) &&
!CanGenerateTest(cond)) {
HandleGenerateEqualLong(cond);
return;
}
if (CanGenerateTest(cond)) {
HandleGenerateConditionGeneric(cond);
return;
}
HandleGenerateLongComparesAndJumps();
last_visited_internal_latency_ += kArmIntegerOpLatency;
last_visited_latency_ = kArmBranchLatency;;
}
void SchedulingLatencyVisitorARM::HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond) {
const DataType::Type type = cond->GetLeft()->GetType();
DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
if (type == DataType::Type::kInt64) {
HandleGenerateConditionLong(cond);
return;
}
IfCondition condition = cond->GetCondition();
HInstruction* right = cond->InputAt(1);
int64_t value;
if (right->IsConstant()) {
value = Uint64ConstantFrom(right);
// Comparisons against 0 are common enough, so codegen has special handling for them.
if (value == 0) {
switch (condition) {
case kCondNE:
case kCondA:
case kCondEQ:
case kCondBE:
case kCondLT:
case kCondGE:
case kCondAE:
case kCondB:
HandleGenerateConditionWithZero(condition);
return;
case kCondLE:
case kCondGT:
default:
break;
}
}
}
if (condition == kCondEQ || condition == kCondNE) {
if (condition == kCondNE) {
// CMP, IT, MOV.ne
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
HandleGenerateConditionWithZero(condition);
}
return;
}
HandleGenerateConditionGeneric(cond);
}
void SchedulingLatencyVisitorARM::HandleCondition(HCondition* cond) {
if (cond->IsEmittedAtUseSite()) {
last_visited_latency_ = 0;
return;
}
const DataType::Type type = cond->GetLeft()->GetType();
if (DataType::IsFloatingPointType(type)) {
HandleGenerateConditionGeneric(cond);
return;
}
DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type;
const IfCondition condition = cond->GetCondition();
if (type == DataType::Type::kBool &&
cond->GetRight()->GetType() == DataType::Type::kBool &&
(condition == kCondEQ || condition == kCondNE)) {
if (condition == kCondEQ) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
}
last_visited_latency_ = kArmIntegerOpLatency;
return;
}
HandleGenerateConditionIntegralOrNonPrimitive(cond);
}
void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) {
HandleCondition(instr);
}
void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) {
DataType::Type type = instr->InputAt(0)->GetType();
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
break;
case DataType::Type::kInt64:
last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency;
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency;
break;
default:
last_visited_internal_latency_ = 2 * kArmIntegerOpLatency;
break;
}
last_visited_latency_ = kArmIntegerOpLatency;
}
void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
if (instruction->GetResultType() == DataType::Type::kInt32) {
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
}
void SchedulingLatencyVisitorARM::HandleGenerateDataProcInstruction(bool internal_latency) {
if (internal_latency) {
last_visited_internal_latency_ += kArmIntegerOpLatency;
} else {
last_visited_latency_ = kArmDataProcWithShifterOpLatency;
}
}
void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* instruction) {
const HInstruction::InstructionKind kind = instruction->GetInstrKind();
if (kind == HInstruction::kAdd) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else if (kind == HInstruction::kSub) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
HandleGenerateDataProcInstruction(/* internal_latency= */ true);
HandleGenerateDataProcInstruction();
}
}
void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) {
DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind()));
const uint32_t shift_value = instruction->GetShiftAmount();
const HInstruction::InstructionKind kind = instruction->GetInstrKind();
if (shift_value >= 32) {
// Different shift types actually generate similar code here,
// no need to differentiate shift types like the codegen pass does,
// which also avoids handling shift types from different ARM backends.
HandleGenerateDataProc(instruction);
} else {
DCHECK_GT(shift_value, 1U);
DCHECK_LT(shift_value, 32U);
if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
HandleGenerateDataProcInstruction(/* internal_latency= */ true);
HandleGenerateDataProcInstruction(/* internal_latency= */ true);
HandleGenerateDataProcInstruction();
} else {
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
HandleGenerateDataProc(instruction);
}
}
}
void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) {
const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
if (instruction->GetType() == DataType::Type::kInt32) {
HandleGenerateDataProcInstruction();
} else {
DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64);
if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) {
HandleGenerateDataProc(instruction);
} else {
HandleGenerateLongDataProc(instruction);
}
}
}
void SchedulingLatencyVisitorARM::VisitIntermediateAddress([[maybe_unused]] HIntermediateAddress*) {
// Although the code generated is a simple `add` instruction, we found through empirical results
// that spacing it from its use in memory accesses was beneficial.
last_visited_internal_latency_ = kArmNopLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
[[maybe_unused]] HIntermediateAddressIndex*) {
UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
}
void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate([[maybe_unused]] HMultiplyAccumulate*) {
last_visited_latency_ = kArmMulIntegerLatency;
}
void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
DataType::Type type = instruction->GetType();
const bool maybe_compressed_char_at =
mirror::kUseStringCompression && instruction->IsStringCharAt();
HInstruction* array_instr = instruction->GetArray();
bool has_intermediate_address = array_instr->IsIntermediateAddress();
HInstruction* index = instruction->InputAt(1);
switch (type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32: {
if (maybe_compressed_char_at) {
last_visited_internal_latency_ += kArmMemoryLoadLatency;
}
if (index->IsConstant()) {
if (maybe_compressed_char_at) {
last_visited_internal_latency_ +=
kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
last_visited_latency_ = kArmBranchLatency;
} else {
last_visited_latency_ += kArmMemoryLoadLatency;
}
} else {
if (has_intermediate_address) {
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
}
if (maybe_compressed_char_at) {
last_visited_internal_latency_ +=
kArmIntegerOpLatency + kArmBranchLatency + kArmMemoryLoadLatency;
last_visited_latency_ = kArmBranchLatency;
} else {
last_visited_latency_ += kArmMemoryLoadLatency;
}
}
break;
}
case DataType::Type::kReference: {
if (codegen_->EmitBakerReadBarrier()) {
last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency;
} else {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
if (has_intermediate_address) {
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
}
last_visited_latency_ = kArmMemoryLoadLatency;
}
}
break;
}
case DataType::Type::kInt64: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
}
case DataType::Type::kFloat32: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
}
case DataType::Type::kFloat64: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
}
default:
LOG(FATAL) << "Unreachable type " << type;
UNREACHABLE();
}
}
void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) {
last_visited_latency_ = kArmMemoryLoadLatency;
if (mirror::kUseStringCompression && instruction->IsStringLength()) {
last_visited_internal_latency_ = kArmMemoryLoadLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
}
void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) {
HInstruction* index = instruction->InputAt(1);
DataType::Type value_type = instruction->GetComponentType();
HInstruction* array_instr = instruction->GetArray();
bool has_intermediate_address = array_instr->IsIntermediateAddress();
switch (value_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryStoreLatency;
} else {
if (has_intermediate_address) {
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
}
last_visited_latency_ = kArmMemoryStoreLatency;
}
break;
}
case DataType::Type::kReference: {
if (instruction->InputAt(2)->IsNullConstant()) {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryStoreLatency;
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryStoreLatency;
}
} else {
// Following the exact instructions of runtime type checks is too complicated,
// just giving it a simple slow latency.
last_visited_latency_ = kArmRuntimeTypeCheckLatency;
}
break;
}
case DataType::Type::kInt64: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
}
case DataType::Type::kFloat32: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
}
case DataType::Type::kFloat64: {
if (index->IsConstant()) {
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
}
default:
LOG(FATAL) << "Unreachable type " << value_type;
UNREACHABLE();
}
}
void SchedulingLatencyVisitorARM::VisitBoundsCheck([[maybe_unused]] HBoundsCheck*) {
last_visited_internal_latency_ = kArmIntegerOpLatency;
// Users do not use any data results.
last_visited_latency_ = 0;
}
void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t imm) {
if (imm == 0) {
last_visited_internal_latency_ = 0;
last_visited_latency_ = 0;
} else if (imm == 1 || imm == -1) {
last_visited_latency_ = kArmIntegerOpLatency;
} else if (IsPowerOfTwo(AbsOrMin(imm))) {
last_visited_internal_latency_ = 3 * kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_internal_latency_ = kArmMulIntegerLatency + 2 * kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
}
void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) {
DataType::Type type = instruction->GetResultType();
switch (type) {
case DataType::Type::kInt32: {
HInstruction* rhs = instruction->GetRight();
if (rhs->IsConstant()) {
int32_t imm = Int32ConstantFrom(rhs->AsConstant());
HandleDivRemConstantIntegralLatencies(imm);
} else {
last_visited_latency_ = kArmDivIntegerLatency;
}
break;
}
case DataType::Type::kFloat32:
last_visited_latency_ = kArmDivFloatLatency;
break;
case DataType::Type::kFloat64:
last_visited_latency_ = kArmDivDoubleLatency;
break;
default:
last_visited_internal_latency_ = kArmCallInternalLatency;
last_visited_latency_ = kArmCallLatency;
break;
}
}
void SchedulingLatencyVisitorARM::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
}
void SchedulingLatencyVisitorARM::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
}
void SchedulingLatencyVisitorARM::VisitInstanceOf([[maybe_unused]] HInstanceOf*) {
last_visited_internal_latency_ = kArmCallInternalLatency;
last_visited_latency_ = kArmIntegerOpLatency;
}
void SchedulingLatencyVisitorARM::VisitInvoke([[maybe_unused]] HInvoke*) {
last_visited_internal_latency_ = kArmCallInternalLatency;
last_visited_latency_ = kArmCallLatency;
}
void SchedulingLatencyVisitorARM::VisitLoadString([[maybe_unused]] HLoadString*) {
last_visited_internal_latency_ = kArmLoadStringInternalLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
}
void SchedulingLatencyVisitorARM::VisitNewArray([[maybe_unused]] HNewArray*) {
last_visited_internal_latency_ = kArmIntegerOpLatency + kArmCallInternalLatency;
last_visited_latency_ = kArmCallLatency;
}
void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) {
if (instruction->IsStringAlloc()) {
last_visited_internal_latency_ = 2 * kArmMemoryLoadLatency + kArmCallInternalLatency;
} else {
last_visited_internal_latency_ = kArmCallInternalLatency;
}
last_visited_latency_ = kArmCallLatency;
}
void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) {
DataType::Type type = instruction->GetResultType();
switch (type) {
case DataType::Type::kInt32: {
HInstruction* rhs = instruction->GetRight();
if (rhs->IsConstant()) {
int32_t imm = Int32ConstantFrom(rhs->AsConstant());
HandleDivRemConstantIntegralLatencies(imm);
} else {
last_visited_internal_latency_ = kArmDivIntegerLatency;
last_visited_latency_ = kArmMulIntegerLatency;
}
break;
}
default:
last_visited_internal_latency_ = kArmCallInternalLatency;
last_visited_latency_ = kArmCallLatency;
break;
}
}
void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruction,
const FieldInfo& field_info) {
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
DCHECK(codegen_ != nullptr);
bool is_volatile = field_info.IsVolatile();
DataType::Type field_type = field_info.GetFieldType();
bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
switch (field_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
last_visited_latency_ = kArmMemoryLoadLatency;
break;
case DataType::Type::kReference:
if (codegen_->EmitBakerReadBarrier()) {
last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
case DataType::Type::kInt64:
if (is_volatile && !atomic_ldrd_strd) {
last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency;
last_visited_latency_ = kArmMemoryLoadLatency;
} else {
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
case DataType::Type::kFloat32:
last_visited_latency_ = kArmMemoryLoadLatency;
break;
case DataType::Type::kFloat64:
if (is_volatile && !atomic_ldrd_strd) {
last_visited_internal_latency_ =
kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_latency_ = kArmMemoryLoadLatency;
}
break;
default:
last_visited_latency_ = kArmMemoryLoadLatency;
break;
}
if (is_volatile) {
last_visited_internal_latency_ += kArmMemoryBarrierLatency;
}
}
void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruction,
const FieldInfo& field_info) {
DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
DCHECK(codegen_ != nullptr);
bool is_volatile = field_info.IsVolatile();
DataType::Type field_type = field_info.GetFieldType();
bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
switch (field_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
if (is_volatile) {
last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency;
last_visited_latency_ = kArmMemoryBarrierLatency;
} else {
last_visited_latency_ = kArmMemoryStoreLatency;
}
break;
case DataType::Type::kInt32:
case DataType::Type::kReference:
if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
last_visited_internal_latency_ += kArmIntegerOpLatency * 2;
}
last_visited_latency_ = kArmMemoryStoreLatency;
break;
case DataType::Type::kInt64:
if (is_volatile && !atomic_ldrd_strd) {
last_visited_internal_latency_ =
kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_latency_ = kArmMemoryStoreLatency;
}
break;
case DataType::Type::kFloat32:
last_visited_latency_ = kArmMemoryStoreLatency;
break;
case DataType::Type::kFloat64:
if (is_volatile && !atomic_ldrd_strd) {
last_visited_internal_latency_ = kArmIntegerOpLatency +
kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
last_visited_latency_ = kArmMemoryStoreLatency;
}
break;
default:
last_visited_latency_ = kArmMemoryStoreLatency;
break;
}
}
void SchedulingLatencyVisitorARM::VisitStaticFieldGet(HStaticFieldGet* instruction) {
HandleFieldGetLatencies(instruction, instruction->GetFieldInfo());
}
void SchedulingLatencyVisitorARM::VisitStaticFieldSet(HStaticFieldSet* instruction) {
HandleFieldSetLatencies(instruction, instruction->GetFieldInfo());
}
void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) {
HBasicBlock* block = instruction->GetBlock();
DCHECK_IMPLIES(block->GetLoopInformation() == nullptr,
block->IsEntryBlock() && instruction->GetNext()->IsGoto());
// Users do not use any data results.
last_visited_latency_ = 0;
}
void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) {
DataType::Type result_type = instr->GetResultType();
DataType::Type input_type = instr->GetInputType();
switch (result_type) {
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX
break;
case DataType::Type::kInt32:
switch (input_type) {
case DataType::Type::kInt64:
last_visited_latency_ = kArmIntegerOpLatency; // MOV
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
default:
last_visited_latency_ = kArmIntegerOpLatency;
break;
}
break;
case DataType::Type::kInt64:
switch (input_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
// MOV and extension
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
// invokes runtime
last_visited_internal_latency_ = kArmCallInternalLatency;
break;
default:
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
break;
}
break;
case DataType::Type::kFloat32:
switch (input_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
case DataType::Type::kInt64:
// invokes runtime
last_visited_internal_latency_ = kArmCallInternalLatency;
break;
case DataType::Type::kFloat64:
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
default:
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
}
break;
case DataType::Type::kFloat64:
switch (input_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16:
case DataType::Type::kInt32:
last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
case DataType::Type::kInt64:
last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency;
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
case DataType::Type::kFloat32:
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
default:
last_visited_latency_ = kArmFloatingPointOpLatency;
break;
}
break;
default:
last_visited_latency_ = kArmTypeConversionFloatingPointIntegerLatency;
break;
}
}
bool HSchedulerARM::IsSchedulable(const HInstruction* instruction) const {
switch (instruction->GetKind()) {
#define SCHEDULABLE_CASE(type, unused) \
case HInstruction::InstructionKind::k##type: \
return true;
FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(SCHEDULABLE_CASE)
FOR_EACH_CONCRETE_INSTRUCTION_ARM(SCHEDULABLE_CASE)
#undef SCHEDULABLE_CASE
default:
return HScheduler::IsSchedulable(instruction);
}
}
std::pair<SchedulingGraph, ScopedArenaVector<SchedulingNode*>> HSchedulerARM::BuildSchedulingGraph(
HBasicBlock* block,
ScopedArenaAllocator* allocator,
const HeapLocationCollector* heap_location_collector) {
SchedulingLatencyVisitorARM latency_visitor(codegen_);
return HScheduler::BuildSchedulingGraph(
block, allocator, heap_location_collector, &latency_visitor);
}
} // namespace arm
} // namespace art