summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/builder.h5
-rw-r--r--compiler/optimizing/code_generator.cc18
-rw-r--r--compiler/optimizing/code_generator.h2
-rw-r--r--compiler/optimizing/code_generator_arm.cc9
-rw-r--r--compiler/optimizing/code_generator_arm64.cc44
-rw-r--r--compiler/optimizing/code_generator_arm64.h9
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc36
-rw-r--r--compiler/optimizing/code_generator_mips.cc197
-rw-r--r--compiler/optimizing/code_generator_mips.h32
-rw-r--r--compiler/optimizing/code_generator_mips64.cc86
-rw-r--r--compiler/optimizing/code_generator_mips64.h33
-rw-r--r--compiler/optimizing/code_generator_x86.cc8
-rw-r--r--compiler/optimizing/code_generator_x86.h4
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc7
-rw-r--r--compiler/optimizing/code_generator_x86_64.h9
-rw-r--r--compiler/optimizing/codegen_test.cc364
-rw-r--r--compiler/optimizing/codegen_test_utils.h355
-rw-r--r--compiler/optimizing/common_arm.h6
-rw-r--r--compiler/optimizing/common_arm64.h4
-rw-r--r--compiler/optimizing/dex_cache_array_fixups_mips.cc7
-rw-r--r--compiler/optimizing/induction_var_range.cc29
-rw-r--r--compiler/optimizing/inliner.cc19
-rw-r--r--compiler/optimizing/instruction_builder.cc222
-rw-r--r--compiler/optimizing/instruction_builder.h18
-rw-r--r--compiler/optimizing/intrinsics.h3
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc255
-rw-r--r--compiler/optimizing/nodes.cc25
-rw-r--r--compiler/optimizing/nodes.h30
-rw-r--r--compiler/optimizing/optimizing_compiler.cc26
-rw-r--r--compiler/optimizing/optimizing_compiler.h9
-rw-r--r--compiler/optimizing/optimizing_unit_test.h3
-rw-r--r--compiler/optimizing/scheduler.cc610
-rw-r--r--compiler/optimizing/scheduler.h487
-rw-r--r--compiler/optimizing/scheduler_arm64.cc196
-rw-r--r--compiler/optimizing/scheduler_arm64.h117
-rw-r--r--compiler/optimizing/scheduler_test.cc238
-rw-r--r--compiler/optimizing/sharpening.cc147
-rw-r--r--compiler/optimizing/sharpening.h12
-rw-r--r--compiler/optimizing/ssa_builder.cc2
-rw-r--r--compiler/optimizing/stack_map_stream.cc133
-rw-r--r--compiler/optimizing/stack_map_stream.h12
-rw-r--r--compiler/optimizing/stack_map_test.cc66
42 files changed, 3169 insertions, 725 deletions
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 8cf4089eba..e4ad4222fb 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -32,6 +32,8 @@
namespace art {
+class CodeGenerator;
+
class HGraphBuilder : public ValueObject {
public:
HGraphBuilder(HGraph* graph,
@@ -40,6 +42,7 @@ class HGraphBuilder : public ValueObject {
const DexFile* dex_file,
const DexFile::CodeItem& code_item,
CompilerDriver* driver,
+ CodeGenerator* code_generator,
OptimizingCompilerStats* compiler_stats,
const uint8_t* interpreter_metadata,
Handle<mirror::DexCache> dex_cache,
@@ -61,6 +64,7 @@ class HGraphBuilder : public ValueObject {
dex_compilation_unit,
outer_compilation_unit,
driver,
+ code_generator,
interpreter_metadata,
compiler_stats,
dex_cache,
@@ -89,6 +93,7 @@ class HGraphBuilder : public ValueObject {
/* dex_compilation_unit */ nullptr,
/* outer_compilation_unit */ nullptr,
/* compiler_driver */ nullptr,
+ /* code_generator */ nullptr,
/* interpreter_metadata */ nullptr,
/* compiler_stats */ nullptr,
null_dex_cache_,
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 99427f05da..d68aa51b1b 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1417,4 +1417,22 @@ void CodeGenerator::EmitJitRoots(uint8_t* code,
EmitJitRootPatches(code, roots_data);
}
+QuickEntrypointEnum CodeGenerator::GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass) {
+ ScopedObjectAccess soa(Thread::Current());
+ if (array_klass.Get() == nullptr) {
+ // This can only happen for non-primitive arrays, as primitive arrays can always
+ // be resolved.
+ return kQuickAllocArrayResolved32;
+ }
+
+ switch (array_klass->GetComponentSize()) {
+ case 1: return kQuickAllocArrayResolved8;
+ case 2: return kQuickAllocArrayResolved16;
+ case 4: return kQuickAllocArrayResolved32;
+ case 8: return kQuickAllocArrayResolved64;
+ }
+ LOG(FATAL) << "Unreachable";
+ return kQuickAllocArrayResolved;
+}
+
} // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 2d129aff22..b912672792 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -573,6 +573,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
uint32_t GetReferenceSlowFlagOffset() const;
uint32_t GetReferenceDisableFlagOffset() const;
+ static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass);
+
protected:
// Patch info used for recording locations of required linker patches and their targets,
// i.e. target method, string, type or code identified by their dex file and index.
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index f5b6ebef9c..20cdae3619 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -3993,8 +3993,11 @@ void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) {
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
- codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
+ QuickEntrypointEnum entrypoint =
+ CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
+ DCHECK(!codegen_->IsLeafMethod());
}
void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) {
@@ -5719,6 +5722,9 @@ void ParallelMoveResolverARM::RestoreScratch(int reg) {
HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
switch (desired_class_load_kind) {
+ case HLoadClass::LoadKind::kInvalid:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
@@ -5849,6 +5855,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
break;
}
case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kInvalid:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 9762ee81b1..598be4715b 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1452,6 +1452,19 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) {
(cst->IsDoubleConstant() && type == Primitive::kPrimDouble);
}
+// Allocate a scratch register from the VIXL pool, querying first into
+// the floating-point register pool, and then the the core register
+// pool. This is essentially a reimplementation of
+// vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize
+// using a different allocation strategy.
+static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm,
+ vixl::aarch64::UseScratchRegisterScope* temps,
+ int size_in_bits) {
+ return masm->GetScratchFPRegisterList()->IsEmpty()
+ ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits))
+ : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits));
+}
+
void CodeGeneratorARM64::MoveLocation(Location destination,
Location source,
Primitive::Type dst_type) {
@@ -1533,7 +1546,9 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
HConstant* src_cst = source.GetConstant();
CPURegister temp;
if (src_cst->IsZeroBitPattern()) {
- temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant()) ? xzr : wzr;
+ temp = (src_cst->IsLongConstant() || src_cst->IsDoubleConstant())
+ ? Register(xzr)
+ : Register(wzr);
} else {
if (src_cst->IsIntConstant()) {
temp = temps.AcquireW();
@@ -1561,8 +1576,16 @@ void CodeGeneratorARM64::MoveLocation(Location destination,
// a move is blocked by a another move requiring a scratch FP
// register, which would reserve D31). To prevent this issue, we
// ask for a scratch register of any type (core or FP).
- CPURegister temp =
- temps.AcquireCPURegisterOfSize(destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize);
+ //
+ // Also, we start by asking for a FP scratch register first, as the
+ // demand of scratch core registers is higher. This is why we
+ // use AcquireFPOrCoreCPURegisterOfSize instead of
+ // UseScratchRegisterScope::AcquireCPURegisterOfSize, which
+ // allocates core scratch registers first.
+ CPURegister temp = AcquireFPOrCoreCPURegisterOfSize(
+ GetVIXLAssembler(),
+ &temps,
+ (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize));
__ Ldr(temp, StackOperandFrom(source));
__ Str(temp, StackOperandFrom(destination));
}
@@ -1903,6 +1926,9 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) {
LocationSummary::kNoCall);
if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ // We need a temporary register for the read barrier marking slow
+ // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
+ locations->AddTemp(Location::RequiresRegister());
}
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
@@ -1930,11 +1956,9 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Object FieldGet with Baker's read barrier case.
- MacroAssembler* masm = GetVIXLAssembler();
- UseScratchRegisterScope temps(masm);
// /* HeapReference<Object> */ out = *(base + offset)
Register base = RegisterFrom(base_loc, Primitive::kPrimNot);
- Register temp = temps.AcquireW();
+ Register temp = WRegisterFrom(locations->GetTemp(0));
// Note that potential implicit null checks are handled in this
// CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
@@ -4336,6 +4360,9 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
switch (desired_class_load_kind) {
+ case HLoadClass::LoadKind::kInvalid:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
@@ -4474,6 +4501,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
break;
}
case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kInvalid:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
}
@@ -4762,7 +4790,9 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
- codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
+ QuickEntrypointEnum entrypoint =
+ CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 7d3c655b27..f6cb90a63a 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -210,12 +210,11 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
return helpers::LocationFrom(vixl::aarch64::x0);
}
- Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
- return Primitive::Is64BitType(type)
+ Location GetSetValueLocation(Primitive::Type type ATTRIBUTE_UNUSED,
+ bool is_instance) const OVERRIDE {
+ return is_instance
? helpers::LocationFrom(vixl::aarch64::x2)
- : (is_instance
- ? helpers::LocationFrom(vixl::aarch64::x2)
- : helpers::LocationFrom(vixl::aarch64::x1));
+ : helpers::LocationFrom(vixl::aarch64::x1);
}
Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
return helpers::LocationFrom(vixl::aarch64::d0);
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index ffaf18fb4d..e18960872e 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -3998,15 +3998,18 @@ void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConventionARMVIXL calling_convention;
locations->SetOut(LocationFrom(r0));
- locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
- locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2)));
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
}
void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
- codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
+ QuickEntrypointEnum entrypoint =
+ CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
+ DCHECK(!codegen_->IsLeafMethod());
}
void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
@@ -5796,6 +5799,9 @@ void ParallelMoveResolverARMVIXL::RestoreScratch(int reg ATTRIBUTE_UNUSED) {
HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
switch (desired_class_load_kind) {
+ case HLoadClass::LoadKind::kInvalid:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
@@ -5916,6 +5922,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
break;
}
case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kInvalid:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
}
@@ -7253,8 +7260,7 @@ vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter(
// save one load. However, since this is just an intrinsic slow path we prefer this
// simple and more robust approach rather that trying to determine if that's the case.
SlowPathCode* slow_path = GetCurrentSlowPath();
- DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
- if (slow_path->IsCoreRegisterSaved(RegisterFrom(location).GetCode())) {
+ if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(RegisterFrom(location).GetCode())) {
int stack_offset = slow_path->GetStackOffsetOfCoreRegister(RegisterFrom(location).GetCode());
GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, stack_offset);
return temp;
@@ -7679,15 +7685,21 @@ void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_in
vixl32::Register jump_offset = temps.Acquire();
// Load jump offset from the table.
- __ Adr(table_base, jump_table->GetTableStartLabel());
- __ Ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
+ {
+ const size_t jump_size = switch_instr->GetNumEntries() * sizeof(int32_t);
+ ExactAssemblyScope aas(GetVIXLAssembler(),
+ (vixl32::kMaxInstructionSizeInBytes * 4) + jump_size,
+ CodeBufferCheckScope::kMaximumSize);
+ __ adr(table_base, jump_table->GetTableStartLabel());
+ __ ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
- // Jump to target block by branching to table_base(pc related) + offset.
- vixl32::Register target_address = table_base;
- __ Add(target_address, table_base, jump_offset);
- __ Bx(target_address);
+ // Jump to target block by branching to table_base(pc related) + offset.
+ vixl32::Register target_address = table_base;
+ __ add(target_address, table_base, jump_offset);
+ __ bx(target_address);
- jump_table->EmitTable(codegen_);
+ jump_table->EmitTable(codegen_);
+ }
}
}
void LocationsBuilderARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 76be74e921..0677dad078 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -258,8 +258,10 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
DCHECK_NE(out.AsRegister<Register>(), AT);
CodeGeneratorMIPS::PcRelativePatchInfo* info =
mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index);
- mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base);
- __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, 0);
+ bool reordering = __ SetReorder(false);
+ mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base);
+ __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
}
__ B(GetExitLabel());
}
@@ -313,8 +315,10 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
DCHECK_NE(out, AT);
CodeGeneratorMIPS::PcRelativePatchInfo* info =
mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
- mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base);
- __ StoreToOffset(kStoreWord, out, TMP, 0);
+ bool reordering = __ SetReorder(false);
+ mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base);
+ __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
__ B(GetExitLabel());
}
@@ -480,6 +484,8 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
boot_image_address_patches_(std::less<uint32_t>(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
clobbered_ra_(false) {
// Save RA (containing the return address) to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(RA));
@@ -700,9 +706,6 @@ bool CodeGeneratorMIPS::HasAllocatedCalleeSaveRegisters() const {
// (this can happen in leaf methods), force CodeGenerator::InitializeCodeGeneration()
// into the path that creates a stack frame so that RA can be explicitly saved and restored.
// RA can't otherwise be saved/restored when it's the only spilled register.
- // TODO: Can this be improved? It causes creation of a stack frame (while RA might be
- // saved in an unused temporary register) and saving of RA and the current method pointer
- // in the frame.
return CodeGenerator::HasAllocatedCalleeSaveRegisters() || clobbered_ra_;
}
@@ -1127,16 +1130,15 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address)
return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
}
-void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholder(
- PcRelativePatchInfo* info, Register out, Register base) {
- bool reordering = __ SetReorder(false);
+void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info,
+ Register out,
+ Register base) {
if (GetInstructionSetFeatures().IsR6()) {
DCHECK_EQ(base, ZERO);
__ Bind(&info->high_label);
__ Bind(&info->pc_rel_label);
- // Add a 32-bit offset to PC.
+ // Add the high half of a 32-bit offset to PC.
__ Auipc(out, /* placeholder */ 0x1234);
- __ Addiu(out, out, /* placeholder */ 0x5678);
} else {
// If base is ZERO, emit NAL to obtain the actual base.
if (base == ZERO) {
@@ -1150,11 +1152,72 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholder(
if (base == ZERO) {
__ Bind(&info->pc_rel_label);
}
- __ Ori(out, out, /* placeholder */ 0x5678);
- // Add a 32-bit offset to PC.
+ // Add the high half of a 32-bit offset to PC.
__ Addu(out, out, (base == ZERO) ? RA : base);
}
- __ SetReorder(reordering);
+ // The immediately following instruction will add the sign-extended low half of the 32-bit
+ // offset to `out` (e.g. lw, jialc, addiu).
+}
+
+CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch(
+ const DexFile& dex_file,
+ dex::StringIndex dex_index,
+ Handle<mirror::String> handle) {
+ jit_string_roots_.Overwrite(StringReference(&dex_file, dex_index),
+ reinterpret_cast64<uint64_t>(handle.GetReference()));
+ jit_string_patches_.emplace_back(dex_file, dex_index.index_);
+ return &jit_string_patches_.back();
+}
+
+CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootClassPatch(
+ const DexFile& dex_file,
+ dex::TypeIndex dex_index,
+ Handle<mirror::Class> handle) {
+ jit_class_roots_.Overwrite(TypeReference(&dex_file, dex_index),
+ reinterpret_cast64<uint64_t>(handle.GetReference()));
+ jit_class_patches_.emplace_back(dex_file, dex_index.index_);
+ return &jit_class_patches_.back();
+}
+
+void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code,
+ const uint8_t* roots_data,
+ const CodeGeneratorMIPS::JitPatchInfo& info,
+ uint64_t index_in_table) const {
+ uint32_t literal_offset = GetAssembler().GetLabelLocation(&info.high_label);
+ uintptr_t address =
+ reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+ uint32_t addr32 = dchecked_integral_cast<uint32_t>(address);
+ // lui reg, addr32_high
+ DCHECK_EQ(code[literal_offset + 0], 0x34);
+ DCHECK_EQ(code[literal_offset + 1], 0x12);
+ DCHECK_EQ((code[literal_offset + 2] & 0xE0), 0x00);
+ DCHECK_EQ(code[literal_offset + 3], 0x3C);
+ // lw reg, reg, addr32_low
+ DCHECK_EQ(code[literal_offset + 4], 0x78);
+ DCHECK_EQ(code[literal_offset + 5], 0x56);
+ DCHECK_EQ((code[literal_offset + 7] & 0xFC), 0x8C);
+ addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "lw reg, reg, addr32_low".
+ // lui reg, addr32_high
+ code[literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16);
+ code[literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24);
+ // lw reg, reg, addr32_low
+ code[literal_offset + 4] = static_cast<uint8_t>(addr32 >> 0);
+ code[literal_offset + 5] = static_cast<uint8_t>(addr32 >> 8);
+}
+
+void CodeGeneratorMIPS::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+ for (const JitPatchInfo& info : jit_string_patches_) {
+ const auto& it = jit_string_roots_.find(StringReference(&info.target_dex_file,
+ dex::StringIndex(info.index)));
+ DCHECK(it != jit_string_roots_.end());
+ PatchJitRootUse(code, roots_data, info, it->second);
+ }
+ for (const JitPatchInfo& info : jit_class_patches_) {
+ const auto& it = jit_class_roots_.find(TypeReference(&info.target_dex_file,
+ dex::TypeIndex(info.index)));
+ DCHECK(it != jit_class_roots_.end());
+ PatchJitRootUse(code, roots_data, info, it->second);
+ }
}
void CodeGeneratorMIPS::MarkGCCard(Register object,
@@ -5159,7 +5222,8 @@ void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invo
// art::PrepareForRegisterAllocation.
DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
- bool has_extra_input = invoke->HasPcRelativeDexCache();
+ bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6();
+ bool has_extra_input = invoke->HasPcRelativeDexCache() && !is_r6;
IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
@@ -5200,12 +5264,13 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
if (kEmitCompilerReadBarrier) {
UNIMPLEMENTED(FATAL) << "for read barrier";
}
- // We disable PC-relative load when there is an irreducible loop, as the optimization
+ // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
// is incompatible with it.
// TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
// with irreducible loops.
bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
- bool fallback_load = has_irreducible_loops;
+ bool is_r6 = GetInstructionSetFeatures().IsR6();
+ bool fallback_load = has_irreducible_loops && !is_r6;
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimeAddress:
DCHECK(!GetCompilerOptions().GetCompilePic());
@@ -5220,8 +5285,7 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
break;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
- // TODO: implement.
- fallback_load = true;
+ fallback_load = false;
break;
case HLoadString::LoadKind::kDexCacheViaMethod:
fallback_load = false;
@@ -5238,11 +5302,15 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
if (kEmitCompilerReadBarrier) {
UNIMPLEMENTED(FATAL) << "for read barrier";
}
- // We disable pc-relative load when there is an irreducible loop, as the optimization
+ // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
// is incompatible with it.
bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
- bool fallback_load = has_irreducible_loops;
+ bool is_r6 = GetInstructionSetFeatures().IsR6();
+ bool fallback_load = has_irreducible_loops && !is_r6;
switch (desired_class_load_kind) {
+ case HLoadClass::LoadKind::kInvalid:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
fallback_load = false;
break;
@@ -5259,7 +5327,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
break;
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
- fallback_load = true;
+ fallback_load = false;
break;
case HLoadClass::LoadKind::kDexCacheViaMethod:
fallback_load = false;
@@ -5273,6 +5341,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
Register temp) {
+ CHECK(!GetInstructionSetFeatures().IsR6());
CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
if (!invoke->GetLocations()->Intrinsified()) {
@@ -5301,13 +5370,13 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticO
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
- // We disable PC-relative load when there is an irreducible loop, as the optimization
+ // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
// is incompatible with it.
bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
- bool fallback_load = true;
+ bool is_r6 = GetInstructionSetFeatures().IsR6();
+ bool fallback_load = has_irreducible_loops && !is_r6;
switch (dispatch_info.method_load_kind) {
case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
- fallback_load = has_irreducible_loops;
break;
default:
fallback_load = false;
@@ -5325,7 +5394,8 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation();
- Register base_reg = invoke->HasPcRelativeDexCache()
+ bool is_r6 = GetInstructionSetFeatures().IsR6();
+ Register base_reg = (invoke->HasPcRelativeDexCache() && !is_r6)
? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>())
: ZERO;
@@ -5346,14 +5416,23 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
break;
- case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
- HMipsDexCacheArraysBase* base =
- invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase();
- int32_t offset =
- invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
- __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+ case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+ if (is_r6) {
+ uint32_t offset = invoke->GetDexCacheArrayOffset();
+ CodeGeneratorMIPS::PcRelativePatchInfo* info =
+ NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset);
+ bool reordering = __ SetReorder(false);
+ EmitPcRelativeAddressPlaceholderHigh(info, TMP, ZERO);
+ __ Lw(temp.AsRegister<Register>(), TMP, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
+ } else {
+ HMipsDexCacheArraysBase* base =
+ invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase();
+ int32_t offset =
+ invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
+ __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+ }
break;
- }
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
Register reg = temp.AsRegister<Register>();
@@ -5546,7 +5625,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
- codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
+ bool reordering = __ SetReorder(false);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+ __ Addiu(out, out, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
break;
}
case HLoadClass::LoadKind::kBootImageAddress: {
@@ -5562,16 +5644,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
case HLoadClass::LoadKind::kBssEntry: {
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
- codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
- __ LoadFromOffset(kLoadWord, out, out, 0);
+ bool reordering = __ SetReorder(false);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+ __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
generate_null_check = true;
break;
}
case HLoadClass::LoadKind::kJitTableAddress: {
- LOG(FATAL) << "Unimplemented";
+ CodeGeneratorMIPS::JitPatchInfo* info = codegen_->NewJitRootClassPatch(cls->GetDexFile(),
+ cls->GetTypeIndex(),
+ cls->GetClass());
+ bool reordering = __ SetReorder(false);
+ __ Bind(&info->high_label);
+ __ Lui(out, /* placeholder */ 0x1234);
+ GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
break;
}
case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kInvalid:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
}
@@ -5678,7 +5770,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
- codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
+ bool reordering = __ SetReorder(false);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+ __ Addiu(out, out, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
return; // No dex cache slow path.
}
case HLoadString::LoadKind::kBootImageAddress: {
@@ -5694,14 +5789,28 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
- codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg);
- __ LoadFromOffset(kLoadWord, out, out, 0);
+ bool reordering = __ SetReorder(false);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
+ __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
codegen_->AddSlowPath(slow_path);
__ Beqz(out, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
return;
}
+ case HLoadString::LoadKind::kJitTableAddress: {
+ CodeGeneratorMIPS::JitPatchInfo* info =
+ codegen_->NewJitRootStringPatch(load->GetDexFile(),
+ load->GetStringIndex(),
+ load->GetString());
+ bool reordering = __ SetReorder(false);
+ __ Bind(&info->high_label);
+ __ Lui(out, /* placeholder */ 0x1234);
+ GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
+ return;
+ }
default:
break;
}
@@ -6894,8 +7003,12 @@ void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArra
Register reg = base->GetLocations()->Out().AsRegister<Register>();
CodeGeneratorMIPS::PcRelativePatchInfo* info =
codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
+ CHECK(!codegen_->GetInstructionSetFeatures().IsR6());
+ bool reordering = __ SetReorder(false);
// TODO: Reuse MipsComputeBaseMethodAddress on R2 instead of passing ZERO to force emitting NAL.
- codegen_->EmitPcRelativeAddressPlaceholder(info, reg, ZERO);
+ codegen_->EmitPcRelativeAddressPlaceholderHigh(info, reg, ZERO);
+ __ Addiu(reg, reg, /* placeholder */ 0x5678);
+ __ SetReorder(reordering);
}
void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index c8fd325999..47eba50248 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -352,6 +352,7 @@ class CodeGeneratorMIPS : public CodeGenerator {
// Emit linker patches.
void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+ void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
void MarkGCCard(Register object, Register value, bool value_can_be_null);
@@ -463,7 +464,32 @@ class CodeGeneratorMIPS : public CodeGenerator {
Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index);
Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
- void EmitPcRelativeAddressPlaceholder(PcRelativePatchInfo* info, Register out, Register base);
+ void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, Register out, Register base);
+
+ // The JitPatchInfo is used for JIT string and class loads.
+ struct JitPatchInfo {
+ JitPatchInfo(const DexFile& dex_file, uint64_t idx)
+ : target_dex_file(dex_file), index(idx) { }
+ JitPatchInfo(JitPatchInfo&& other) = default;
+
+ const DexFile& target_dex_file;
+ // String/type index.
+ uint64_t index;
+ // Label for the instruction loading the most significant half of the address.
+ // The least significant half is loaded with the instruction that follows immediately.
+ MipsLabel high_label;
+ };
+
+ void PatchJitRootUse(uint8_t* code,
+ const uint8_t* roots_data,
+ const JitPatchInfo& info,
+ uint64_t index_in_table) const;
+ JitPatchInfo* NewJitRootStringPatch(const DexFile& dex_file,
+ dex::StringIndex dex_index,
+ Handle<mirror::String> handle);
+ JitPatchInfo* NewJitRootClassPatch(const DexFile& dex_file,
+ dex::TypeIndex dex_index,
+ Handle<mirror::Class> handle);
private:
Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
@@ -512,6 +538,10 @@ class CodeGeneratorMIPS : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
// Deduplication map for patchable boot image addresses.
Uint32ToLiteralMap boot_image_address_patches_;
+ // Patches for string root accesses in JIT compiled code.
+ ArenaDeque<JitPatchInfo> jit_string_patches_;
+ // Patches for class root accesses in JIT compiled code.
+ ArenaDeque<JitPatchInfo> jit_class_patches_;
// PC-relative loads on R2 clobber RA, which may need to be preserved explicitly in leaf methods.
// This is a flag set by pc_relative_fixups_mips and dex_cache_array_fixups_mips optimizations.
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 192b4a5050..4c8dabfede 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -91,9 +91,6 @@ Location InvokeDexCallingConventionVisitorMIPS64::GetNextLocation(Primitive::Typ
// Space on the stack is reserved for all arguments.
stack_index_ += Primitive::Is64BitType(type) ? 2 : 1;
- // TODO: shouldn't we use a whole machine word per argument on the stack?
- // Implicit 4-byte method pointer (and such) will cause misalignment.
-
return next_location;
}
@@ -434,7 +431,11 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
boot_image_address_patches_(std::less<uint32_t>(),
- graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+ graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ jit_string_patches_(StringReferenceValueComparator(),
+ graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ jit_class_patches_(TypeReferenceValueComparator(),
+ graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
// Save RA (containing the return address) to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(RA));
}
@@ -1055,6 +1056,49 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn
// offset to `out` (e.g. ld, jialc, daddiu).
}
+Literal* CodeGeneratorMIPS64::DeduplicateJitStringLiteral(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ Handle<mirror::String> handle) {
+ jit_string_roots_.Overwrite(StringReference(&dex_file, string_index),
+ reinterpret_cast64<uint64_t>(handle.GetReference()));
+ return jit_string_patches_.GetOrCreate(
+ StringReference(&dex_file, string_index),
+ [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+Literal* CodeGeneratorMIPS64::DeduplicateJitClassLiteral(const DexFile& dex_file,
+ dex::TypeIndex type_index,
+ Handle<mirror::Class> handle) {
+ jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index),
+ reinterpret_cast64<uint64_t>(handle.GetReference()));
+ return jit_class_patches_.GetOrCreate(
+ TypeReference(&dex_file, type_index),
+ [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+}
+
+void CodeGeneratorMIPS64::PatchJitRootUse(uint8_t* code,
+ const uint8_t* roots_data,
+ const Literal* literal,
+ uint64_t index_in_table) const {
+ uint32_t literal_offset = GetAssembler().GetLabelLocation(literal->GetLabel());
+ uintptr_t address =
+ reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
+ reinterpret_cast<uint32_t*>(code + literal_offset)[0] = dchecked_integral_cast<uint32_t>(address);
+}
+
+void CodeGeneratorMIPS64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
+ for (const auto& entry : jit_string_patches_) {
+ const auto& it = jit_string_roots_.find(entry.first);
+ DCHECK(it != jit_string_roots_.end());
+ PatchJitRootUse(code, roots_data, entry.second, it->second);
+ }
+ for (const auto& entry : jit_class_patches_) {
+ const auto& it = jit_class_roots_.find(entry.first);
+ DCHECK(it != jit_class_roots_.end());
+ PatchJitRootUse(code, roots_data, entry.second, it->second);
+ }
+}
+
void CodeGeneratorMIPS64::SetupBlockedRegisters() const {
// ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
blocked_core_registers_[ZERO] = true;
@@ -3117,14 +3161,6 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(
Location root,
GpuRegister obj,
uint32_t offset) {
- // When handling PC-relative loads, the caller calls
- // EmitPcRelativeAddressPlaceholderHigh() and then GenerateGcRootFieldLoad().
- // The relative patcher expects the two methods to emit the following patchable
- // sequence of instructions in this case:
- // auipc reg1, 0x1234 // 0x1234 is a placeholder for offset_high.
- // lwu reg2, 0x5678(reg1) // 0x5678 is a placeholder for offset_low.
- // TODO: Adjust GenerateGcRootFieldLoad() and its caller when this method is
- // extended (e.g. for read barriers) so as not to break the relative patcher.
GpuRegister root_reg = root.AsRegister<GpuRegister>();
if (kEmitCompilerReadBarrier) {
UNIMPLEMENTED(FATAL) << "for read barrier";
@@ -3317,8 +3353,6 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
break;
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
- // TODO: implement.
- fallback_load = true;
break;
}
if (fallback_load) {
@@ -3334,6 +3368,9 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
}
bool fallback_load = false;
switch (desired_class_load_kind) {
+ case HLoadClass::LoadKind::kInvalid:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
@@ -3349,8 +3386,6 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
break;
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
- // TODO: implement.
- fallback_load = true;
break;
case HLoadClass::LoadKind::kDexCacheViaMethod:
break;
@@ -3588,11 +3623,16 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
generate_null_check = true;
break;
}
- case HLoadClass::LoadKind::kJitTableAddress: {
- LOG(FATAL) << "Unimplemented";
+ case HLoadClass::LoadKind::kJitTableAddress:
+ __ LoadLiteral(out,
+ kLoadUnsignedWord,
+ codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
+ cls->GetTypeIndex(),
+ cls->GetClass()));
+ GenerateGcRootFieldLoad(cls, out_loc, out, 0);
break;
- }
case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kInvalid:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
}
@@ -3693,6 +3733,14 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
__ Bind(slow_path->GetExitLabel());
return;
}
+ case HLoadString::LoadKind::kJitTableAddress:
+ __ LoadLiteral(out,
+ kLoadUnsignedWord,
+ codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
+ load->GetStringIndex(),
+ load->GetString()));
+ GenerateGcRootFieldLoad(load, out_loc, out, 0);
+ return;
default:
break;
}
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 52b780c106..26cc7dc788 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -52,7 +52,7 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength =
static constexpr GpuRegister kCoreCalleeSaves[] =
- { S0, S1, S2, S3, S4, S5, S6, S7, GP, S8, RA }; // TODO: review
+ { S0, S1, S2, S3, S4, S5, S6, S7, GP, S8, RA };
static constexpr FpuRegister kFpuCalleeSaves[] =
{ F24, F25, F26, F27, F28, F29, F30, F31 };
@@ -115,12 +115,11 @@ class FieldAccessCallingConventionMIPS64 : public FieldAccessCallingConvention {
Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
return Location::RegisterLocation(V0);
}
- Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
- return Primitive::Is64BitType(type)
+ Location GetSetValueLocation(Primitive::Type type ATTRIBUTE_UNUSED,
+ bool is_instance) const OVERRIDE {
+ return is_instance
? Location::RegisterLocation(A2)
- : (is_instance
- ? Location::RegisterLocation(A2)
- : Location::RegisterLocation(A1));
+ : Location::RegisterLocation(A1);
}
Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
return Location::FpuRegisterLocation(F0);
@@ -313,6 +312,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
// Emit linker patches.
void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+ void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
void MarkGCCard(GpuRegister object, GpuRegister value, bool value_can_be_null);
@@ -426,10 +426,27 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, GpuRegister out);
+ void PatchJitRootUse(uint8_t* code,
+ const uint8_t* roots_data,
+ const Literal* literal,
+ uint64_t index_in_table) const;
+ Literal* DeduplicateJitStringLiteral(const DexFile& dex_file,
+ dex::StringIndex string_index,
+ Handle<mirror::String> handle);
+ Literal* DeduplicateJitClassLiteral(const DexFile& dex_file,
+ dex::TypeIndex type_index,
+ Handle<mirror::Class> handle);
+
private:
using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>;
using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, Literal*>;
using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+ using StringToLiteralMap = ArenaSafeMap<StringReference,
+ Literal*,
+ StringReferenceValueComparator>;
+ using TypeToLiteralMap = ArenaSafeMap<TypeReference,
+ Literal*,
+ TypeReferenceValueComparator>;
using BootStringToLiteralMap = ArenaSafeMap<StringReference,
Literal*,
StringReferenceValueComparator>;
@@ -477,6 +494,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
// Deduplication map for patchable boot image addresses.
Uint32ToLiteralMap boot_image_address_patches_;
+ // Patches for string root accesses in JIT compiled code.
+ StringToLiteralMap jit_string_patches_;
+ // Patches for class root accesses in JIT compiled code.
+ TypeToLiteralMap jit_class_patches_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorMIPS64);
};
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1b7431612d..137b55423b 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4214,7 +4214,9 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
- codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
+ QuickEntrypointEnum entrypoint =
+ CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
DCHECK(!codegen_->IsLeafMethod());
}
@@ -6022,6 +6024,9 @@ void ParallelMoveResolverX86::RestoreScratch(int reg) {
HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
switch (desired_class_load_kind) {
+ case HLoadClass::LoadKind::kInvalid:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
@@ -6157,6 +6162,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
break;
}
case HLoadClass::LoadKind::kDexCacheViaMethod:
+ case HLoadClass::LoadKind::kInvalid:
LOG(FATAL) << "UNREACHABLE";
UNREACHABLE();
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 7350fcc48a..5360dc9209 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -110,7 +110,9 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
}
Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
return Primitive::Is64BitType(type)
- ? Location::RegisterPairLocation(EDX, EBX)
+ ? (is_instance
+ ? Location::RegisterPairLocation(EDX, EBX)
+ : Location::RegisterPairLocation(ECX, EDX))
: (is_instance
? Location::RegisterLocation(EDX)
: Location::RegisterLocation(ECX));
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index c4caf4bf9d..c5367ce86e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -4096,7 +4096,9 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
- codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
+ QuickEntrypointEnum entrypoint =
+ CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
DCHECK(!codegen_->IsLeafMethod());
}
@@ -5425,6 +5427,9 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
switch (desired_class_load_kind) {
+ case HLoadClass::LoadKind::kInvalid:
+ LOG(FATAL) << "UNREACHABLE";
+ UNREACHABLE();
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 3438b8159f..3a83731b3f 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -92,12 +92,11 @@ class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
return Location::RegisterLocation(RAX);
}
- Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
- return Primitive::Is64BitType(type)
+ Location GetSetValueLocation(Primitive::Type type ATTRIBUTE_UNUSED, bool is_instance)
+ const OVERRIDE {
+ return is_instance
? Location::RegisterLocation(RDX)
- : (is_instance
- ? Location::RegisterLocation(RDX)
- : Location::RegisterLocation(RSI));
+ : Location::RegisterLocation(RSI);
}
Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
return Location::FpuRegisterLocation(XMM0);
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index e3f3df0ff5..f8bbf68c1c 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -17,30 +17,15 @@
#include <functional>
#include <memory>
-#include "arch/instruction_set.h"
-#include "arch/arm/instruction_set_features_arm.h"
-#include "arch/arm/registers_arm.h"
-#include "arch/arm64/instruction_set_features_arm64.h"
-#include "arch/mips/instruction_set_features_mips.h"
-#include "arch/mips/registers_mips.h"
-#include "arch/mips64/instruction_set_features_mips64.h"
-#include "arch/mips64/registers_mips64.h"
-#include "arch/x86/instruction_set_features_x86.h"
-#include "arch/x86/registers_x86.h"
-#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "base/macros.h"
#include "builder.h"
-#include "code_simulator_container.h"
-#include "common_compiler_test.h"
+#include "codegen_test_utils.h"
#include "dex_file.h"
#include "dex_instruction.h"
#include "driver/compiler_options.h"
-#include "graph_checker.h"
#include "nodes.h"
#include "optimizing_unit_test.h"
-#include "prepare_for_register_allocation.h"
#include "register_allocator_linear_scan.h"
-#include "ssa_liveness_analysis.h"
#include "utils.h"
#include "utils/arm/assembler_arm_vixl.h"
#include "utils/arm/managed_register_arm.h"
@@ -48,324 +33,10 @@
#include "utils/mips64/managed_register_mips64.h"
#include "utils/x86/managed_register_x86.h"
-#ifdef ART_ENABLE_CODEGEN_arm
-#include "code_generator_arm.h"
-#include "code_generator_arm_vixl.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_arm64
-#include "code_generator_arm64.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86
-#include "code_generator_x86.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86_64
-#include "code_generator_x86_64.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_mips
-#include "code_generator_mips.h"
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_mips64
-#include "code_generator_mips64.h"
-#endif
-
#include "gtest/gtest.h"
namespace art {
-typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&);
-
-class CodegenTargetConfig {
- public:
- CodegenTargetConfig(InstructionSet isa, CreateCodegenFn create_codegen)
- : isa_(isa), create_codegen_(create_codegen) {
- }
- InstructionSet GetInstructionSet() const { return isa_; }
- CodeGenerator* CreateCodeGenerator(HGraph* graph, const CompilerOptions& compiler_options) {
- return create_codegen_(graph, compiler_options);
- }
-
- private:
- CodegenTargetConfig() {}
- InstructionSet isa_;
- CreateCodegenFn create_codegen_;
-};
-
-#ifdef ART_ENABLE_CODEGEN_arm
-// Provide our own codegen, that ensures the C calling conventions
-// are preserved. Currently, ART and C do not match as R4 is caller-save
-// in ART, and callee-save in C. Alternatively, we could use or write
-// the stub that saves and restores all registers, but it is easier
-// to just overwrite the code generator.
-class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
- public:
- TestCodeGeneratorARM(HGraph* graph,
- const ArmInstructionSetFeatures& isa_features,
- const CompilerOptions& compiler_options)
- : arm::CodeGeneratorARM(graph, isa_features, compiler_options) {
- AddAllocatedRegister(Location::RegisterLocation(arm::R6));
- AddAllocatedRegister(Location::RegisterLocation(arm::R7));
- }
-
- void SetupBlockedRegisters() const OVERRIDE {
- arm::CodeGeneratorARM::SetupBlockedRegisters();
- blocked_core_registers_[arm::R4] = true;
- blocked_core_registers_[arm::R6] = false;
- blocked_core_registers_[arm::R7] = false;
- }
-};
-
-// A way to test the VIXL32-based code generator on ARM. This will replace
-// TestCodeGeneratorARM when the VIXL32-based backend replaces the existing one.
-class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
- public:
- TestCodeGeneratorARMVIXL(HGraph* graph,
- const ArmInstructionSetFeatures& isa_features,
- const CompilerOptions& compiler_options)
- : arm::CodeGeneratorARMVIXL(graph, isa_features, compiler_options) {
- AddAllocatedRegister(Location::RegisterLocation(arm::R6));
- AddAllocatedRegister(Location::RegisterLocation(arm::R7));
- }
-
- void SetupBlockedRegisters() const OVERRIDE {
- arm::CodeGeneratorARMVIXL::SetupBlockedRegisters();
- blocked_core_registers_[arm::R4] = true;
- blocked_core_registers_[arm::R6] = false;
- blocked_core_registers_[arm::R7] = false;
- }
-};
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86
-class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
- public:
- TestCodeGeneratorX86(HGraph* graph,
- const X86InstructionSetFeatures& isa_features,
- const CompilerOptions& compiler_options)
- : x86::CodeGeneratorX86(graph, isa_features, compiler_options) {
- // Save edi, we need it for getting enough registers for long multiplication.
- AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
- }
-
- void SetupBlockedRegisters() const OVERRIDE {
- x86::CodeGeneratorX86::SetupBlockedRegisters();
- // ebx is a callee-save register in C, but caller-save for ART.
- blocked_core_registers_[x86::EBX] = true;
-
- // Make edi available.
- blocked_core_registers_[x86::EDI] = false;
- }
-};
-#endif
-
-class InternalCodeAllocator : public CodeAllocator {
- public:
- InternalCodeAllocator() : size_(0) { }
-
- virtual uint8_t* Allocate(size_t size) {
- size_ = size;
- memory_.reset(new uint8_t[size]);
- return memory_.get();
- }
-
- size_t GetSize() const { return size_; }
- uint8_t* GetMemory() const { return memory_.get(); }
-
- private:
- size_t size_;
- std::unique_ptr<uint8_t[]> memory_;
-
- DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
-};
-
-static bool CanExecuteOnHardware(InstructionSet target_isa) {
- return (target_isa == kRuntimeISA)
- // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2).
- || (kRuntimeISA == kArm && target_isa == kThumb2);
-}
-
-static bool CanExecute(InstructionSet target_isa) {
- CodeSimulatorContainer simulator(target_isa);
- return CanExecuteOnHardware(target_isa) || simulator.CanSimulate();
-}
-
-template <typename Expected>
-static Expected SimulatorExecute(CodeSimulator* simulator, Expected (*f)());
-
-template <>
-bool SimulatorExecute<bool>(CodeSimulator* simulator, bool (*f)()) {
- simulator->RunFrom(reinterpret_cast<intptr_t>(f));
- return simulator->GetCReturnBool();
-}
-
-template <>
-int32_t SimulatorExecute<int32_t>(CodeSimulator* simulator, int32_t (*f)()) {
- simulator->RunFrom(reinterpret_cast<intptr_t>(f));
- return simulator->GetCReturnInt32();
-}
-
-template <>
-int64_t SimulatorExecute<int64_t>(CodeSimulator* simulator, int64_t (*f)()) {
- simulator->RunFrom(reinterpret_cast<intptr_t>(f));
- return simulator->GetCReturnInt64();
-}
-
-template <typename Expected>
-static void VerifyGeneratedCode(InstructionSet target_isa,
- Expected (*f)(),
- bool has_result,
- Expected expected) {
- ASSERT_TRUE(CanExecute(target_isa)) << "Target isa is not executable.";
-
- // Verify on simulator.
- CodeSimulatorContainer simulator(target_isa);
- if (simulator.CanSimulate()) {
- Expected result = SimulatorExecute<Expected>(simulator.Get(), f);
- if (has_result) {
- ASSERT_EQ(expected, result);
- }
- }
-
- // Verify on hardware.
- if (CanExecuteOnHardware(target_isa)) {
- Expected result = f();
- if (has_result) {
- ASSERT_EQ(expected, result);
- }
- }
-}
-
-template <typename Expected>
-static void Run(const InternalCodeAllocator& allocator,
- const CodeGenerator& codegen,
- bool has_result,
- Expected expected) {
- InstructionSet target_isa = codegen.GetInstructionSet();
-
- typedef Expected (*fptr)();
- CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
- fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
- if (target_isa == kThumb2) {
- // For thumb we need the bottom bit set.
- f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
- }
- VerifyGeneratedCode(target_isa, f, has_result, expected);
-}
-
-static void ValidateGraph(HGraph* graph) {
- GraphChecker graph_checker(graph);
- graph_checker.Run();
- if (!graph_checker.IsValid()) {
- for (const auto& error : graph_checker.GetErrors()) {
- std::cout << error << std::endl;
- }
- }
- ASSERT_TRUE(graph_checker.IsValid());
-}
-
-template <typename Expected>
-static void RunCodeNoCheck(CodeGenerator* codegen,
- HGraph* graph,
- const std::function<void(HGraph*)>& hook_before_codegen,
- bool has_result,
- Expected expected) {
- SsaLivenessAnalysis liveness(graph, codegen);
- PrepareForRegisterAllocation(graph).Run();
- liveness.Analyze();
- RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
- hook_before_codegen(graph);
- InternalCodeAllocator allocator;
- codegen->Compile(&allocator);
- Run(allocator, *codegen, has_result, expected);
-}
-
-template <typename Expected>
-static void RunCode(CodeGenerator* codegen,
- HGraph* graph,
- std::function<void(HGraph*)> hook_before_codegen,
- bool has_result,
- Expected expected) {
- ValidateGraph(graph);
- RunCodeNoCheck(codegen, graph, hook_before_codegen, has_result, expected);
-}
-
-template <typename Expected>
-static void RunCode(CodegenTargetConfig target_config,
- HGraph* graph,
- std::function<void(HGraph*)> hook_before_codegen,
- bool has_result,
- Expected expected) {
- CompilerOptions compiler_options;
- std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options));
- RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected);
-}
-
-#ifdef ART_ENABLE_CODEGEN_arm
-CodeGenerator* create_codegen_arm(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
- ArmInstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena()) TestCodeGeneratorARM(graph,
- *features_arm.get(),
- compiler_options);
-}
-
-CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
- ArmInstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena())
- TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_arm64
-CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
- Arm64InstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph,
- *features_arm64.get(),
- compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86
-CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena()) TestCodeGeneratorX86(graph, *features_x86.get(), compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_x86_64
-CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
- X86_64InstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena())
- x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_mips
-CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
- MipsInstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena())
- mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options);
-}
-#endif
-
-#ifdef ART_ENABLE_CODEGEN_mips64
-CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
- Mips64InstructionSetFeatures::FromCppDefines());
- return new (graph->GetArena())
- mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options);
-}
-#endif
-
// Return all combinations of ISA and code generator that are executable on
// hardware, or on simulator, and that we'd like to test.
static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
@@ -1067,6 +738,39 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) {
}
#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+// Regression test for b/34760542.
+TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) {
+ std::unique_ptr<const Arm64InstructionSetFeatures> features(
+ Arm64InstructionSetFeatures::FromCppDefines());
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ HGraph* graph = CreateGraph(&allocator);
+ arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions());
+
+ codegen.Initialize();
+
+ // The following ParallelMove used to fail this assertion:
+ //
+ // Assertion failed (!available->IsEmpty())
+ //
+ // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable.
+ HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena());
+ move->AddMove(Location::DoubleStackSlot(0),
+ Location::DoubleStackSlot(257),
+ Primitive::kPrimDouble,
+ nullptr);
+ move->AddMove(Location::DoubleStackSlot(257),
+ Location::DoubleStackSlot(0),
+ Primitive::kPrimDouble,
+ nullptr);
+ codegen.GetMoveResolver()->EmitNativeCode(move);
+
+ InternalCodeAllocator code_allocator;
+ codegen.Finalize(&code_allocator);
+}
+#endif
+
#ifdef ART_ENABLE_CODEGEN_mips
TEST_F(CodegenTest, MipsClobberRA) {
std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
new file mode 100644
index 0000000000..cd954043f5
--- /dev/null
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_
+#define ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "arch/arm/registers_arm.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/instruction_set.h"
+#include "arch/mips/instruction_set_features_mips.h"
+#include "arch/mips/registers_mips.h"
+#include "arch/mips64/instruction_set_features_mips64.h"
+#include "arch/mips64/registers_mips64.h"
+#include "arch/x86/instruction_set_features_x86.h"
+#include "arch/x86/registers_x86.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
+#include "code_simulator_container.h"
+#include "common_compiler_test.h"
+#include "graph_checker.h"
+#include "prepare_for_register_allocation.h"
+#include "ssa_liveness_analysis.h"
+
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "code_generator_arm.h"
+#include "code_generator_arm_vixl.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "code_generator_arm64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+#include "code_generator_x86.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "code_generator_x86_64.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+#include "code_generator_mips.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+#include "code_generator_mips64.h"
+#endif
+
+namespace art {
+
+typedef CodeGenerator* (*CreateCodegenFn)(HGraph*, const CompilerOptions&);
+
+class CodegenTargetConfig {
+ public:
+ CodegenTargetConfig(InstructionSet isa, CreateCodegenFn create_codegen)
+ : isa_(isa), create_codegen_(create_codegen) {
+ }
+ InstructionSet GetInstructionSet() const { return isa_; }
+ CodeGenerator* CreateCodeGenerator(HGraph* graph, const CompilerOptions& compiler_options) {
+ return create_codegen_(graph, compiler_options);
+ }
+
+ private:
+ CodegenTargetConfig() {}
+ InstructionSet isa_;
+ CreateCodegenFn create_codegen_;
+};
+
+#ifdef ART_ENABLE_CODEGEN_arm
+// Provide our own codegen, that ensures the C calling conventions
+// are preserved. Currently, ART and C do not match as R4 is caller-save
+// in ART, and callee-save in C. Alternatively, we could use or write
+// the stub that saves and restores all registers, but it is easier
+// to just overwrite the code generator.
+class TestCodeGeneratorARM : public arm::CodeGeneratorARM {
+ public:
+ TestCodeGeneratorARM(HGraph* graph,
+ const ArmInstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
+ : arm::CodeGeneratorARM(graph, isa_features, compiler_options) {
+ AddAllocatedRegister(Location::RegisterLocation(arm::R6));
+ AddAllocatedRegister(Location::RegisterLocation(arm::R7));
+ }
+
+ void SetupBlockedRegisters() const OVERRIDE {
+ arm::CodeGeneratorARM::SetupBlockedRegisters();
+ blocked_core_registers_[arm::R4] = true;
+ blocked_core_registers_[arm::R6] = false;
+ blocked_core_registers_[arm::R7] = false;
+ }
+};
+
+// A way to test the VIXL32-based code generator on ARM. This will replace
+// TestCodeGeneratorARM when the VIXL32-based backend replaces the existing one.
+class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
+ public:
+ TestCodeGeneratorARMVIXL(HGraph* graph,
+ const ArmInstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
+ : arm::CodeGeneratorARMVIXL(graph, isa_features, compiler_options) {
+ AddAllocatedRegister(Location::RegisterLocation(arm::R6));
+ AddAllocatedRegister(Location::RegisterLocation(arm::R7));
+ }
+
+ void SetupBlockedRegisters() const OVERRIDE {
+ arm::CodeGeneratorARMVIXL::SetupBlockedRegisters();
+ blocked_core_registers_[arm::R4] = true;
+ blocked_core_registers_[arm::R6] = false;
+ blocked_core_registers_[arm::R7] = false;
+ }
+};
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
+ public:
+ TestCodeGeneratorX86(HGraph* graph,
+ const X86InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options)
+ : x86::CodeGeneratorX86(graph, isa_features, compiler_options) {
+ // Save edi, we need it for getting enough registers for long multiplication.
+ AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
+ }
+
+ void SetupBlockedRegisters() const OVERRIDE {
+ x86::CodeGeneratorX86::SetupBlockedRegisters();
+ // ebx is a callee-save register in C, but caller-save for ART.
+ blocked_core_registers_[x86::EBX] = true;
+
+ // Make edi available.
+ blocked_core_registers_[x86::EDI] = false;
+ }
+};
+#endif
+
+class InternalCodeAllocator : public CodeAllocator {
+ public:
+ InternalCodeAllocator() : size_(0) { }
+
+ virtual uint8_t* Allocate(size_t size) {
+ size_ = size;
+ memory_.reset(new uint8_t[size]);
+ return memory_.get();
+ }
+
+ size_t GetSize() const { return size_; }
+ uint8_t* GetMemory() const { return memory_.get(); }
+
+ private:
+ size_t size_;
+ std::unique_ptr<uint8_t[]> memory_;
+
+ DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
+};
+
+static bool CanExecuteOnHardware(InstructionSet target_isa) {
+ return (target_isa == kRuntimeISA)
+ // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2).
+ || (kRuntimeISA == kArm && target_isa == kThumb2);
+}
+
+static bool CanExecute(InstructionSet target_isa) {
+ CodeSimulatorContainer simulator(target_isa);
+ return CanExecuteOnHardware(target_isa) || simulator.CanSimulate();
+}
+
+template <typename Expected>
+inline static Expected SimulatorExecute(CodeSimulator* simulator, Expected (*f)());
+
+template <>
+inline bool SimulatorExecute<bool>(CodeSimulator* simulator, bool (*f)()) {
+ simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+ return simulator->GetCReturnBool();
+}
+
+template <>
+inline int32_t SimulatorExecute<int32_t>(CodeSimulator* simulator, int32_t (*f)()) {
+ simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+ return simulator->GetCReturnInt32();
+}
+
+template <>
+inline int64_t SimulatorExecute<int64_t>(CodeSimulator* simulator, int64_t (*f)()) {
+ simulator->RunFrom(reinterpret_cast<intptr_t>(f));
+ return simulator->GetCReturnInt64();
+}
+
+template <typename Expected>
+static void VerifyGeneratedCode(InstructionSet target_isa,
+ Expected (*f)(),
+ bool has_result,
+ Expected expected) {
+ ASSERT_TRUE(CanExecute(target_isa)) << "Target isa is not executable.";
+
+ // Verify on simulator.
+ CodeSimulatorContainer simulator(target_isa);
+ if (simulator.CanSimulate()) {
+ Expected result = SimulatorExecute<Expected>(simulator.Get(), f);
+ if (has_result) {
+ ASSERT_EQ(expected, result);
+ }
+ }
+
+ // Verify on hardware.
+ if (CanExecuteOnHardware(target_isa)) {
+ Expected result = f();
+ if (has_result) {
+ ASSERT_EQ(expected, result);
+ }
+ }
+}
+
+template <typename Expected>
+static void Run(const InternalCodeAllocator& allocator,
+ const CodeGenerator& codegen,
+ bool has_result,
+ Expected expected) {
+ InstructionSet target_isa = codegen.GetInstructionSet();
+
+ typedef Expected (*fptr)();
+ CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
+ fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
+ if (target_isa == kThumb2) {
+ // For thumb we need the bottom bit set.
+ f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
+ }
+ VerifyGeneratedCode(target_isa, f, has_result, expected);
+}
+
+static void ValidateGraph(HGraph* graph) {
+ GraphChecker graph_checker(graph);
+ graph_checker.Run();
+ if (!graph_checker.IsValid()) {
+ for (const auto& error : graph_checker.GetErrors()) {
+ std::cout << error << std::endl;
+ }
+ }
+ ASSERT_TRUE(graph_checker.IsValid());
+}
+
+template <typename Expected>
+static void RunCodeNoCheck(CodeGenerator* codegen,
+ HGraph* graph,
+ const std::function<void(HGraph*)>& hook_before_codegen,
+ bool has_result,
+ Expected expected) {
+ SsaLivenessAnalysis liveness(graph, codegen);
+ PrepareForRegisterAllocation(graph).Run();
+ liveness.Analyze();
+ RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters();
+ hook_before_codegen(graph);
+ InternalCodeAllocator allocator;
+ codegen->Compile(&allocator);
+ Run(allocator, *codegen, has_result, expected);
+}
+
+template <typename Expected>
+static void RunCode(CodeGenerator* codegen,
+ HGraph* graph,
+ std::function<void(HGraph*)> hook_before_codegen,
+ bool has_result,
+ Expected expected) {
+ ValidateGraph(graph);
+ RunCodeNoCheck(codegen, graph, hook_before_codegen, has_result, expected);
+}
+
+template <typename Expected>
+static void RunCode(CodegenTargetConfig target_config,
+ HGraph* graph,
+ std::function<void(HGraph*)> hook_before_codegen,
+ bool has_result,
+ Expected expected) {
+ CompilerOptions compiler_options;
+ std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options));
+ RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected);
+}
+
+#ifdef ART_ENABLE_CODEGEN_arm
+CodeGenerator* create_codegen_arm(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
+ ArmInstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena()) TestCodeGeneratorARM(graph,
+ *features_arm.get(),
+ compiler_options);
+}
+
+CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
+ ArmInstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena())
+ TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
+ Arm64InstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph,
+ *features_arm64.get(),
+ compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86
+CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const X86InstructionSetFeatures> features_x86(
+ X86InstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena()) TestCodeGeneratorX86(graph, *features_x86.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_x86_64
+CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
+ X86_64InstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena())
+ x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips
+CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
+ MipsInstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena())
+ mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options);
+}
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_mips64
+CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) {
+ std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
+ Mips64InstructionSetFeatures::FromCppDefines());
+ return new (graph->GetArena())
+ mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options);
+}
+#endif
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index 21c3ae628a..ecb86875d6 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -146,6 +146,12 @@ inline vixl::aarch32::Register InputRegister(HInstruction* instr) {
return InputRegisterAt(instr, 0);
}
+inline vixl::aarch32::DRegister DRegisterFromS(vixl::aarch32::SRegister s) {
+ vixl::aarch32::DRegister d = vixl::aarch32::DRegister(s.GetCode() / 2);
+ DCHECK(s.Is(d.GetLane(0)) || s.Is(d.GetLane(1)));
+ return d;
+}
+
inline int32_t Int32ConstantFrom(HInstruction* instr) {
if (instr->IsIntConstant()) {
return instr->AsIntConstant()->GetValue();
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 776a483d43..93ea090583 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -130,8 +130,8 @@ inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* inst
Primitive::Type input_type = input->GetType();
if (input->IsConstant() && input->AsConstant()->IsZeroBitPattern()) {
return (Primitive::ComponentSize(input_type) >= vixl::aarch64::kXRegSizeInBytes)
- ? vixl::aarch64::xzr
- : vixl::aarch64::wzr;
+ ? vixl::aarch64::Register(vixl::aarch64::xzr)
+ : vixl::aarch64::Register(vixl::aarch64::wzr);
}
return InputCPURegisterAt(instr, index);
}
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc
index 04a4294c48..7734f9197d 100644
--- a/compiler/optimizing/dex_cache_array_fixups_mips.cc
+++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc
@@ -47,7 +47,7 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor {
// Computing the dex cache base for PC-relative accesses will clobber RA with
// the NAL instruction on R2. Take a note of this before generating the method
// entry.
- if (!dex_cache_array_bases_.empty() && !codegen_->GetInstructionSetFeatures().IsR6()) {
+ if (!dex_cache_array_bases_.empty()) {
codegen_->ClobberRA();
}
}
@@ -92,6 +92,11 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor {
};
void DexCacheArrayFixups::Run() {
+ CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_);
+ if (mips_codegen->GetInstructionSetFeatures().IsR6()) {
+ // Do nothing for R6 because it has PC-relative addressing.
+ return;
+ }
if (graph_->HasIrreducibleLoops()) {
// Do not run this optimization, as irreducible loops do not work with an instruction
// that can be live-in at the irreducible loop header.
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 3973985338..5539413aad 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -57,14 +57,18 @@ static bool IsIntAndGet(HInstruction* instruction, int64_t* value) {
return false;
}
-/** Returns b^e for b,e >= 1. */
-static int64_t IntPow(int64_t b, int64_t e) {
+/** Returns b^e for b,e >= 1. Sets overflow if arithmetic wrap-around occurred. */
+static int64_t IntPow(int64_t b, int64_t e, /*out*/ bool* overflow) {
DCHECK_GE(b, 1);
DCHECK_GE(e, 1);
int64_t pow = 1;
while (e) {
if (e & 1) {
+ int64_t oldpow = pow;
pow *= b;
+ if (pow < oldpow) {
+ *overflow = true;
+ }
}
e >>= 1;
b *= b;
@@ -1020,20 +1024,27 @@ bool InductionVarRange::GenerateLastValueGeometric(HInductionVarAnalysis::Induct
HInstruction* opb = nullptr;
if (GenerateCode(info->op_a, nullptr, graph, block, &opa, false, false) &&
GenerateCode(info->op_b, nullptr, graph, block, &opb, false, false)) {
- // Compute f ^ m for known maximum index value m.
- int64_t fpow = IntPow(f, m);
if (graph != nullptr) {
- DCHECK(info->operation == HInductionVarAnalysis::kMul ||
- info->operation == HInductionVarAnalysis::kDiv);
Primitive::Type type = info->type;
+ // Compute f ^ m for known maximum index value m.
+ bool overflow = false;
+ int64_t fpow = IntPow(f, m, &overflow);
+ if (info->operation == HInductionVarAnalysis::kDiv) {
+ // For division, any overflow truncates to zero.
+ if (overflow || (type != Primitive::kPrimLong && !CanLongValueFitIntoInt(fpow))) {
+ fpow = 0;
+ }
+ } else if (type != Primitive::kPrimLong) {
+ // For multiplication, okay to truncate to required precision.
+ DCHECK(info->operation == HInductionVarAnalysis::kMul);
+ fpow = static_cast<int32_t>(fpow);
+ }
+ // Generate code.
if (fpow == 0) {
// Special case: repeated mul/div always yields zero.
*result = graph->GetConstant(type, 0);
} else {
// Last value: a * f ^ m + b or a * f ^ -m + b.
- if (type != Primitive::kPrimLong) {
- fpow = static_cast<int32_t>(fpow); // okay to truncate
- }
HInstruction* e = nullptr;
if (info->operation == HInductionVarAnalysis::kMul) {
e = new (graph->GetArena()) HMul(type, opa, graph->GetConstant(type, fpow));
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 5d40f75618..f0afccb782 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -304,7 +304,8 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
// We do not support HDeoptimize in OSR methods.
return nullptr;
}
- return resolved_method->GetSingleImplementation();
+ PointerSize pointer_size = caller_compilation_unit_.GetClassLinker()->GetImagePointerSize();
+ return resolved_method->GetSingleImplementation(pointer_size);
}
bool HInliner::TryInline(HInvoke* invoke_instruction) {
@@ -557,9 +558,13 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
is_referrer,
invoke_instruction->GetDexPc(),
/* needs_access_check */ false);
+ HLoadClass::LoadKind kind = HSharpening::SharpenClass(
+ load_class, codegen_, compiler_driver_, caller_compilation_unit_);
+ DCHECK(kind != HLoadClass::LoadKind::kInvalid)
+ << "We should always be able to reference a class for inline caches";
+ // Insert before setting the kind, as setting the kind affects the inputs.
bb_cursor->InsertInstructionAfter(load_class, receiver_class);
- // Sharpen after adding the instruction, as the sharpening may remove inputs.
- HSharpening::SharpenClass(load_class, codegen_, compiler_driver_);
+ load_class->SetLoadKind(kind);
// TODO: Extend reference type propagation to understand the guard.
HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
@@ -1285,6 +1290,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
resolved_method->GetDexFile(),
*code_item,
compiler_driver_,
+ codegen_,
inline_stats.get(),
resolved_method->GetQuickenedInfo(class_linker->GetImagePointerSize()),
dex_cache,
@@ -1415,10 +1421,13 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
return false;
}
- if (!same_dex_file && current->NeedsEnvironment()) {
+ if (current->NeedsEnvironment() &&
+ !CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(),
+ resolved_method)) {
VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
<< " could not be inlined because " << current->DebugName()
- << " needs an environment and is in a different dex file";
+ << " needs an environment, is in a different dex file"
+ << ", and cannot be encoded in the stack maps.";
return false;
}
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index cac385ce3c..a1c391f455 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -22,6 +22,7 @@
#include "dex_instruction-inl.h"
#include "driver/compiler_options.h"
#include "imtable-inl.h"
+#include "sharpening.h"
#include "scoped_thread_state_change-inl.h"
namespace art {
@@ -847,7 +848,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
ScopedObjectAccess soa(Thread::Current());
if (invoke_type == kStatic) {
clinit_check = ProcessClinitCheckForInvoke(
- dex_pc, resolved_method, method_idx, &clinit_check_requirement);
+ dex_pc, resolved_method, &clinit_check_requirement);
} else if (invoke_type == kSuper) {
if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
// Update the method index to the one resolved. Note that this may be a no-op if
@@ -933,15 +934,8 @@ bool HInstructionBuilder::BuildInvokePolymorphic(const Instruction& instruction
bool HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) {
ScopedObjectAccess soa(Thread::Current());
- Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
- Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
-
- if (outer_dex_cache.Get() != dex_cache.Get()) {
- // We currently do not support inlining allocations across dex files.
- return false;
- }
- HLoadClass* load_class = BuildLoadClass(type_index, dex_pc, /* check_access */ true);
+ HLoadClass* load_class = BuildLoadClass(type_index, dex_pc);
HInstruction* cls = load_class;
Handle<mirror::Class> klass = load_class->GetClass();
@@ -1005,39 +999,23 @@ bool HInstructionBuilder::IsInitialized(Handle<mirror::Class> cls) const {
HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke(
uint32_t dex_pc,
ArtMethod* resolved_method,
- uint32_t method_idx,
HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
- Thread* self = Thread::Current();
- StackHandleScope<2> hs(self);
- Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
- Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
- Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
- Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
-
- // The index at which the method's class is stored in the DexCache's type array.
- dex::TypeIndex storage_index;
- bool is_outer_class = (resolved_method->GetDeclaringClass() == outer_class.Get());
- if (is_outer_class) {
- storage_index = outer_class->GetDexTypeIndex();
- } else if (outer_dex_cache.Get() == dex_cache.Get()) {
- // Get `storage_index` from IsClassOfStaticMethodAvailableToReferrer.
- compiler_driver_->IsClassOfStaticMethodAvailableToReferrer(outer_dex_cache.Get(),
- GetCompilingClass(),
- resolved_method,
- method_idx,
- &storage_index);
- }
+ Handle<mirror::Class> klass = handles_->NewHandle(resolved_method->GetDeclaringClass());
HClinitCheck* clinit_check = nullptr;
-
- if (IsInitialized(resolved_method_class)) {
+ if (IsInitialized(klass)) {
*clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
- } else if (storage_index.IsValid()) {
- *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
- HLoadClass* cls = BuildLoadClass(
- storage_index, dex_pc, /* check_access */ false, /* outer */ true);
- clinit_check = new (arena_) HClinitCheck(cls, dex_pc);
- AppendInstruction(clinit_check);
+ } else {
+ HLoadClass* cls = BuildLoadClass(klass->GetDexTypeIndex(),
+ klass->GetDexFile(),
+ klass,
+ dex_pc,
+ /* needs_access_check */ false);
+ if (cls != nullptr) {
+ *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
+ clinit_check = new (arena_) HClinitCheck(cls, dex_pc);
+ AppendInstruction(clinit_check);
+ }
}
return clinit_check;
}
@@ -1216,9 +1194,7 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio
}
ScopedObjectAccess soa(Thread::Current());
- ArtField* resolved_field =
- compiler_driver_->ComputeInstanceFieldInfo(field_index, dex_compilation_unit_, is_put, soa);
-
+ ArtField* resolved_field = ResolveField(field_index, /* is_static */ false, is_put);
// Generate an explicit null check on the reference, unless the field access
// is unresolved. In that case, we rely on the runtime to perform various
@@ -1336,6 +1312,56 @@ void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& in
}
}
+ArtField* HInstructionBuilder::ResolveField(uint16_t field_idx, bool is_static, bool is_put) {
+ ScopedObjectAccess soa(Thread::Current());
+ StackHandleScope<2> hs(soa.Self());
+
+ ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
+ Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+ soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader())));
+ Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
+
+ ArtField* resolved_field = class_linker->ResolveField(*dex_compilation_unit_->GetDexFile(),
+ field_idx,
+ dex_compilation_unit_->GetDexCache(),
+ class_loader,
+ is_static);
+
+ if (UNLIKELY(resolved_field == nullptr)) {
+ // Clean up any exception left by type resolution.
+ soa.Self()->ClearException();
+ return nullptr;
+ }
+
+ // Check static/instance. The class linker has a fast path for looking into the dex cache
+ // and does not check static/instance if it hits it.
+ if (UNLIKELY(resolved_field->IsStatic() != is_static)) {
+ return nullptr;
+ }
+
+ // Check access.
+ if (compiling_class.Get() == nullptr) {
+ if (!resolved_field->IsPublic()) {
+ return nullptr;
+ }
+ } else if (!compiling_class->CanAccessResolvedField(resolved_field->GetDeclaringClass(),
+ resolved_field,
+ dex_compilation_unit_->GetDexCache().Get(),
+ field_idx)) {
+ return nullptr;
+ }
+
+ if (is_put &&
+ resolved_field->IsFinal() &&
+ (compiling_class.Get() != resolved_field->GetDeclaringClass())) {
+ // Final fields can only be updated within their own class.
+ // TODO: Only allow it in constructors. b/34966607.
+ return nullptr;
+ }
+
+ return resolved_field;
+}
+
bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
uint32_t dex_pc,
bool is_put) {
@@ -1343,12 +1369,7 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
uint16_t field_index = instruction.VRegB_21c();
ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<3> hs(soa.Self());
- Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
- Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader())));
- ArtField* resolved_field = compiler_driver_->ResolveField(
- soa, dex_cache, class_loader, dex_compilation_unit_, field_index, true);
+ ArtField* resolved_field = ResolveField(field_index, /* is_static */ true, is_put);
if (resolved_field == nullptr) {
MaybeRecordStat(MethodCompilationStat::kUnresolvedField);
@@ -1358,38 +1379,23 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
}
Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType();
- Handle<mirror::DexCache> outer_dex_cache = outer_compilation_unit_->GetDexCache();
- Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
- // The index at which the field's class is stored in the DexCache's type array.
- dex::TypeIndex storage_index;
- bool is_outer_class = (outer_class.Get() == resolved_field->GetDeclaringClass());
- if (is_outer_class) {
- storage_index = outer_class->GetDexTypeIndex();
- } else if (outer_dex_cache.Get() != dex_cache.Get()) {
- // The compiler driver cannot currently understand multiple dex caches involved. Just bailout.
- return false;
- } else {
- // TODO: This is rather expensive. Perf it and cache the results if needed.
- std::pair<bool, bool> pair = compiler_driver_->IsFastStaticField(
- outer_dex_cache.Get(),
- GetCompilingClass(),
- resolved_field,
- field_index,
- &storage_index);
- bool can_easily_access = is_put ? pair.second : pair.first;
- if (!can_easily_access) {
- MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
- BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
- return true;
- }
+ Handle<mirror::Class> klass = handles_->NewHandle(resolved_field->GetDeclaringClass());
+ HLoadClass* constant = BuildLoadClass(klass->GetDexTypeIndex(),
+ klass->GetDexFile(),
+ klass,
+ dex_pc,
+ /* needs_access_check */ false);
+
+ if (constant == nullptr) {
+ // The class cannot be referenced from this compiled code. Generate
+ // an unresolved access.
+ MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess);
+ BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type);
+ return true;
}
- HLoadClass* constant = BuildLoadClass(
- storage_index, dex_pc, /* check_access */ false, /* outer */ true);
-
HInstruction* cls = constant;
- Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
if (!IsInitialized(klass)) {
cls = new (arena_) HClinitCheck(constant, dex_pc);
AppendInstruction(cls);
@@ -1497,7 +1503,7 @@ void HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
uint32_t* args,
uint32_t register_index) {
HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
- HLoadClass* cls = BuildLoadClass(type_index, dex_pc, /* check_access */ true);
+ HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
HInstruction* object = new (arena_) HNewArray(cls, length, dex_pc);
AppendInstruction(object);
@@ -1627,44 +1633,68 @@ static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
}
}
-HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index,
- uint32_t dex_pc,
- bool check_access,
- bool outer) {
+HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) {
ScopedObjectAccess soa(Thread::Current());
- const DexCompilationUnit* compilation_unit =
- outer ? outer_compilation_unit_ : dex_compilation_unit_;
- const DexFile& dex_file = *compilation_unit->GetDexFile();
- StackHandleScope<1> hs(soa.Self());
+ StackHandleScope<2> hs(soa.Self());
+ const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
soa.Decode<mirror::ClassLoader>(dex_compilation_unit_->GetClassLoader())));
Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass(
- soa, compilation_unit->GetDexCache(), class_loader, type_index, compilation_unit));
+ soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_));
- bool is_accessible = false;
- if (!check_access) {
- is_accessible = true;
- } else if (klass.Get() != nullptr) {
+ bool needs_access_check = true;
+ if (klass.Get() != nullptr) {
if (klass->IsPublic()) {
- is_accessible = true;
+ needs_access_check = false;
} else {
mirror::Class* compiling_class = GetCompilingClass();
if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) {
- is_accessible = true;
+ needs_access_check = false;
}
}
}
+ return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
+}
+
+HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index,
+ const DexFile& dex_file,
+ Handle<mirror::Class> klass,
+ uint32_t dex_pc,
+ bool needs_access_check) {
+ // Try to find a reference in the compiling dex file.
+ const DexFile* actual_dex_file = &dex_file;
+ if (!IsSameDexFile(dex_file, *dex_compilation_unit_->GetDexFile())) {
+ dex::TypeIndex local_type_index =
+ klass->FindTypeIndexInOtherDexFile(*dex_compilation_unit_->GetDexFile());
+ if (local_type_index.IsValid()) {
+ type_index = local_type_index;
+ actual_dex_file = dex_compilation_unit_->GetDexFile();
+ }
+ }
+
+ // Note: `klass` must be from `handles_`.
HLoadClass* load_class = new (arena_) HLoadClass(
graph_->GetCurrentMethod(),
type_index,
- dex_file,
+ *actual_dex_file,
klass,
klass.Get() != nullptr && (klass.Get() == GetOutermostCompilingClass()),
dex_pc,
- !is_accessible);
+ needs_access_check);
+ HLoadClass::LoadKind load_kind = HSharpening::SharpenClass(load_class,
+ code_generator_,
+ compiler_driver_,
+ *dex_compilation_unit_);
+
+ if (load_kind == HLoadClass::LoadKind::kInvalid) {
+ // We actually cannot reference this class, we're forced to bail.
+ return nullptr;
+ }
+ // Append the instruction first, as setting the load kind affects the inputs.
AppendInstruction(load_class);
+ load_class->SetLoadKind(load_kind);
return load_class;
}
@@ -1674,7 +1704,7 @@ void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
dex::TypeIndex type_index,
uint32_t dex_pc) {
HInstruction* object = LoadLocal(reference, Primitive::kPrimNot);
- HLoadClass* cls = BuildLoadClass(type_index, dex_pc, /* check_access */ true);
+ HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
ScopedObjectAccess soa(Thread::Current());
TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass());
@@ -2498,7 +2528,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
case Instruction::NEW_ARRAY: {
dex::TypeIndex type_index(instruction.VRegC_22c());
HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt);
- HLoadClass* cls = BuildLoadClass(type_index, dex_pc, /* check_access */ true);
+ HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
AppendInstruction(new (arena_) HNewArray(cls, length, dex_pc));
UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction());
break;
@@ -2673,7 +2703,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
case Instruction::CONST_CLASS: {
dex::TypeIndex type_index(instruction.VRegB_21c());
- BuildLoadClass(type_index, dex_pc, /* check_access */ true);
+ BuildLoadClass(type_index, dex_pc);
UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
break;
}
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 5efe95094c..3bb680ce44 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -31,6 +31,7 @@
namespace art {
+class CodeGenerator;
class Instruction;
class HInstructionBuilder : public ValueObject {
@@ -44,6 +45,7 @@ class HInstructionBuilder : public ValueObject {
DexCompilationUnit* dex_compilation_unit,
const DexCompilationUnit* const outer_compilation_unit,
CompilerDriver* driver,
+ CodeGenerator* code_generator,
const uint8_t* interpreter_metadata,
OptimizingCompilerStats* compiler_stats,
Handle<mirror::DexCache> dex_cache,
@@ -61,6 +63,7 @@ class HInstructionBuilder : public ValueObject {
current_locals_(nullptr),
latest_result_(nullptr),
compiler_driver_(driver),
+ code_generator_(code_generator),
dex_compilation_unit_(dex_compilation_unit),
outer_compilation_unit_(outer_compilation_unit),
interpreter_metadata_(interpreter_metadata),
@@ -228,10 +231,14 @@ class HInstructionBuilder : public ValueObject {
// Builds a `HLoadClass` loading the given `type_index`. If `outer` is true,
// this method will use the outer class's dex file to lookup the type at
// `type_index`.
+ HLoadClass* BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc);
+
HLoadClass* BuildLoadClass(dex::TypeIndex type_index,
+ const DexFile& dex_file,
+ Handle<mirror::Class> klass,
uint32_t dex_pc,
- bool check_access,
- bool outer = false);
+ bool needs_access_check)
+ REQUIRES_SHARED(Locks::mutator_lock_);
// Returns the outer-most compiling method's class.
mirror::Class* GetOutermostCompilingClass() const;
@@ -275,7 +282,6 @@ class HInstructionBuilder : public ValueObject {
HClinitCheck* ProcessClinitCheckForInvoke(
uint32_t dex_pc,
ArtMethod* method,
- uint32_t method_idx,
HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
REQUIRES_SHARED(Locks::mutator_lock_);
@@ -290,6 +296,10 @@ class HInstructionBuilder : public ValueObject {
// not be resolved.
ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
+ // Try to resolve a field using the class linker. Return null if it could not
+ // be found.
+ ArtField* ResolveField(uint16_t field_idx, bool is_static, bool is_put);
+
ArenaAllocator* const arena_;
HGraph* const graph_;
VariableSizedHandleScope* handles_;
@@ -311,6 +321,8 @@ class HInstructionBuilder : public ValueObject {
CompilerDriver* const compiler_driver_;
+ CodeGenerator* const code_generator_;
+
// The compilation unit of the current method being compiled. Note that
// it can be an inlined method.
DexCompilationUnit* const dex_compilation_unit_;
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 1e73cf67df..6425e1313f 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -31,6 +31,9 @@ class DexFile;
static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U;
static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000);
+static constexpr uint32_t kNanFloat = 0x7fc00000U;
+static constexpr uint64_t kNanDouble = 0x7ff8000000000000;
+
// Recognize intrinsics from HInvoke nodes.
class IntrinsicsRecognizer : public HOptimization {
public:
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 68c2d2e36e..70a3d38c13 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -40,10 +40,12 @@ using helpers::LocationFrom;
using helpers::LowRegisterFrom;
using helpers::LowSRegisterFrom;
using helpers::OutputDRegister;
+using helpers::OutputSRegister;
using helpers::OutputRegister;
using helpers::OutputVRegister;
using helpers::RegisterFrom;
using helpers::SRegisterFrom;
+using helpers::DRegisterFromS;
using namespace vixl::aarch32; // NOLINT(build/namespaces)
@@ -462,6 +464,214 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
}
+static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+ Location op1_loc = invoke->GetLocations()->InAt(0);
+ Location op2_loc = invoke->GetLocations()->InAt(1);
+ Location out_loc = invoke->GetLocations()->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
+ return;
+ }
+
+ vixl32::SRegister op1 = SRegisterFrom(op1_loc);
+ vixl32::SRegister op2 = SRegisterFrom(op2_loc);
+ vixl32::SRegister out = OutputSRegister(invoke);
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ const vixl32::Register temp1 = temps.Acquire();
+ vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
+ vixl32::Label nan, done;
+
+ DCHECK(op1.Is(out));
+
+ __ Vcmp(op1, op2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling.
+
+ // op1 <> op2
+ vixl32::ConditionType cond = is_min ? gt : lt;
+ {
+ ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cond);
+ __ vmov(cond, F32, out, op2);
+ }
+ __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation.
+
+ // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
+ __ Vmov(temp1, op1);
+ __ Vmov(temp2, op2);
+ if (is_min) {
+ __ Orr(temp1, temp1, temp2);
+ } else {
+ __ And(temp1, temp1, temp2);
+ }
+ __ Vmov(out, temp1);
+ __ B(&done);
+
+ // handle NaN input.
+ __ Bind(&nan);
+ __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
+ __ Vmov(out, temp1);
+
+ __ Bind(&done);
+}
+
+static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
+ CreateFPFPToFPLocations(arena_, invoke);
+ invoke->GetLocations()->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
+ GenMinMaxFloat(invoke, /* is_min */ true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
+ CreateFPFPToFPLocations(arena_, invoke);
+ invoke->GetLocations()->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
+ GenMinMaxFloat(invoke, /* is_min */ false, GetAssembler());
+}
+
+static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+ Location op1_loc = invoke->GetLocations()->InAt(0);
+ Location op2_loc = invoke->GetLocations()->InAt(1);
+ Location out_loc = invoke->GetLocations()->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
+ return;
+ }
+
+ vixl32::DRegister op1 = DRegisterFrom(op1_loc);
+ vixl32::DRegister op2 = DRegisterFrom(op2_loc);
+ vixl32::DRegister out = OutputDRegister(invoke);
+ vixl32::Label handle_nan_eq, done;
+
+ DCHECK(op1.Is(out));
+
+ __ Vcmp(op1, op2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling.
+
+ // op1 <> op2
+ vixl32::ConditionType cond = is_min ? gt : lt;
+ {
+ ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cond);
+ __ vmov(cond, F64, out, op2);
+ }
+ __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation.
+
+ // handle op1 == op2, max(+0.0,-0.0).
+ if (!is_min) {
+ __ Vand(F64, out, op1, op2);
+ __ B(&done);
+ }
+
+ // handle op1 == op2, min(+0.0,-0.0), NaN input.
+ __ Bind(&handle_nan_eq);
+ __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
+
+ __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
+ CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
+ GenMinMaxDouble(invoke, /* is_min */ true , GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+ CreateFPFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
+ GenMinMaxDouble(invoke, /* is_min */ false, GetAssembler());
+}
+
+static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+ Location op1_loc = invoke->GetLocations()->InAt(0);
+ Location op2_loc = invoke->GetLocations()->InAt(1);
+ Location out_loc = invoke->GetLocations()->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
+ return;
+ }
+
+ vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
+ vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
+ vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
+ vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
+ vixl32::Register out_lo = LowRegisterFrom(out_loc);
+ vixl32::Register out_hi = HighRegisterFrom(out_loc);
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ const vixl32::Register temp = temps.Acquire();
+
+ DCHECK(op1_lo.Is(out_lo));
+ DCHECK(op1_hi.Is(out_hi));
+
+ // Compare op1 >= op2, or op1 < op2.
+ __ Cmp(out_lo, op2_lo);
+ __ Sbcs(temp, out_hi, op2_hi);
+
+ // Now GE/LT condition code is correct for the long comparison.
+ {
+ vixl32::ConditionType cond = is_min ? ge : lt;
+ ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
+ 3 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ itt(cond);
+ __ mov(cond, out_lo, op2_lo);
+ __ mov(cond, out_hi, op2_hi);
+ }
+}
+
+static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
+ CreateLongLongToLongLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
+ GenMinMaxLong(invoke, /* is_min */ true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
+ CreateLongLongToLongLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
+ GenMinMaxLong(invoke, /* is_min */ false, GetAssembler());
+}
+
static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
vixl32::Register op1 = InputRegisterAt(invoke, 0);
vixl32::Register op2 = InputRegisterAt(invoke, 1);
@@ -514,6 +724,18 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
__ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
}
+void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
+ if (features_.HasARMv8AInstructions()) {
+ CreateFPToFPLocations(arena_, invoke);
+ }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
+ ArmVIXLAssembler* assembler = GetAssembler();
+ __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
+}
+
void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
CreateIntToIntLocations(arena_, invoke);
}
@@ -2742,15 +2964,30 @@ void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxFloatFloat)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinLongLong)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxLongLong)
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathCeil) // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFloor) // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRint)
+void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
+ if (features_.HasARMv8AInstructions()) {
+ CreateFPToFPLocations(arena_, invoke);
+ }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
+ ArmVIXLAssembler* assembler = GetAssembler();
+ DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
+ __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
+ if (features_.HasARMv8AInstructions()) {
+ CreateFPToFPLocations(arena_, invoke);
+ }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
+ ArmVIXLAssembler* assembler = GetAssembler();
+ DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
+ __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
+}
+
UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe?
UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index d15145e673..abbb91a1a9 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1354,13 +1354,15 @@ std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind&
return os;
}
-void HInstruction::MoveBefore(HInstruction* cursor) {
- DCHECK(!IsPhi());
- DCHECK(!IsControlFlow());
- DCHECK(CanBeMoved() ||
- // HShouldDeoptimizeFlag can only be moved by CHAGuardOptimization.
- IsShouldDeoptimizeFlag());
- DCHECK(!cursor->IsPhi());
+void HInstruction::MoveBefore(HInstruction* cursor, bool do_checks) {
+ if (do_checks) {
+ DCHECK(!IsPhi());
+ DCHECK(!IsControlFlow());
+ DCHECK(CanBeMoved() ||
+ // HShouldDeoptimizeFlag can only be moved by CHAGuardOptimization.
+ IsShouldDeoptimizeFlag());
+ DCHECK(!cursor->IsPhi());
+ }
next_->previous_ = previous_;
if (previous_ != nullptr) {
@@ -2462,16 +2464,15 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const {
}
}
-void HLoadClass::SetLoadKindInternal(LoadKind load_kind) {
- // Once sharpened, the load kind should not be changed again.
- // Also, kReferrersClass should never be overwritten.
- DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod);
+void HLoadClass::SetLoadKind(LoadKind load_kind) {
SetPackedField<LoadKindField>(load_kind);
- if (load_kind != LoadKind::kDexCacheViaMethod) {
+ if (load_kind != LoadKind::kDexCacheViaMethod &&
+ load_kind != LoadKind::kReferrersClass) {
RemoveAsUserOfInput(0u);
SetRawInputAt(0u, nullptr);
}
+
if (!NeedsEnvironment()) {
RemoveEnvironment();
SetSideEffects(SideEffects::None());
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index f0ea9e20e6..96f9abafbf 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -2065,8 +2065,8 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
other->ReplaceInput(this, use_index);
}
- // Move `this` instruction before `cursor`.
- void MoveBefore(HInstruction* cursor);
+ // Move `this` instruction before `cursor`
+ void MoveBefore(HInstruction* cursor, bool do_checks = true);
// Move `this` before its first user and out of any loops. If there is no
// out-of-loop user that dominates all other users, move the instruction
@@ -4322,6 +4322,11 @@ class HInvokeInterface FINAL : public HInvoke {
return (obj == InputAt(0)) && !GetLocations()->Intrinsified();
}
+ bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
+ // The assembly stub currently needs it.
+ return true;
+ }
+
uint32_t GetImtIndex() const { return imt_index_; }
uint32_t GetDexMethodIndex() const { return dex_method_index_; }
@@ -5508,6 +5513,9 @@ class HLoadClass FINAL : public HInstruction {
public:
// Determines how to load the Class.
enum class LoadKind {
+ // We cannot load this class. See HSharpening::SharpenLoadClass.
+ kInvalid = -1,
+
// Use the Class* from the method's own ArtMethod*.
kReferrersClass,
@@ -5564,18 +5572,7 @@ class HLoadClass FINAL : public HInstruction {
SetPackedFlag<kFlagGenerateClInitCheck>(false);
}
- void SetLoadKind(LoadKind load_kind) {
- SetLoadKindInternal(load_kind);
- }
-
- void SetLoadKindWithTypeReference(LoadKind load_kind,
- const DexFile& dex_file,
- dex::TypeIndex type_index) {
- DCHECK(HasTypeReference(load_kind));
- DCHECK(IsSameDexFile(dex_file_, dex_file));
- DCHECK_EQ(type_index_, type_index);
- SetLoadKindInternal(load_kind);
- }
+ void SetLoadKind(LoadKind load_kind);
LoadKind GetLoadKind() const {
return GetPackedField<LoadKindField>();
@@ -5694,6 +5691,11 @@ class HLoadClass FINAL : public HInstruction {
// for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative.
HUserRecord<HInstruction*> special_input_;
+ // A type index and dex file where the class can be accessed. The dex file can be:
+ // - The compiling method's dex file if the class is defined there too.
+ // - The compiling method's dex file if the class is referenced there.
+ // - The dex file where the class is defined. When the load kind can only be
+ // kBssEntry or kDexCacheViaMethod, we cannot emit code for this `HLoadClass`.
const dex::TypeIndex type_index_;
const DexFile& dex_file_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 297500b12f..727ca7d893 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -90,6 +90,7 @@
#include "reference_type_propagation.h"
#include "register_allocator_linear_scan.h"
#include "select_generator.h"
+#include "scheduler.h"
#include "sharpening.h"
#include "side_effects_analysis.h"
#include "ssa_builder.h"
@@ -658,10 +659,13 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set,
new (arena) arm64::InstructionSimplifierArm64(graph, stats);
SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch");
+ HInstructionScheduling* scheduling =
+ new (arena) HInstructionScheduling(graph, instruction_set);
HOptimization* arm64_optimizations[] = {
simplifier,
side_effects,
- gvn
+ gvn,
+ scheduling,
};
RunOptimizations(arm64_optimizations, arraysize(arm64_optimizations), pass_observer);
break;
@@ -995,6 +999,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
&dex_file,
*code_item,
compiler_driver,
+ codegen.get(),
compilation_stats_.get(),
interpreter_metadata,
dex_cache,
@@ -1129,6 +1134,25 @@ bool IsCompilingWithCoreImage() {
return false;
}
+bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) {
+ // Note: the runtime is null only for unit testing.
+ return Runtime::Current() == nullptr || !Runtime::Current()->IsAotCompiler();
+}
+
+bool CanEncodeInlinedMethodInStackMap(const DexFile& caller_dex_file, ArtMethod* callee) {
+ if (!Runtime::Current()->IsAotCompiler()) {
+ // JIT can always encode methods in stack maps.
+ return true;
+ }
+ if (IsSameDexFile(caller_dex_file, *callee->GetDexFile())) {
+ return true;
+ }
+ // TODO(ngeoffray): Support more AOT cases for inlining:
+ // - methods in multidex
+ // - methods in boot image for on-device non-PIC compilation.
+ return false;
+}
+
bool OptimizingCompiler::JitCompile(Thread* self,
jit::JitCodeCache* code_cache,
ArtMethod* method,
diff --git a/compiler/optimizing/optimizing_compiler.h b/compiler/optimizing/optimizing_compiler.h
index 0c89da12e8..d8cea30a6b 100644
--- a/compiler/optimizing/optimizing_compiler.h
+++ b/compiler/optimizing/optimizing_compiler.h
@@ -17,10 +17,15 @@
#ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_
#define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_
+#include "base/mutex.h"
+#include "globals.h"
+
namespace art {
+class ArtMethod;
class Compiler;
class CompilerDriver;
+class DexFile;
Compiler* CreateOptimizingCompiler(CompilerDriver* driver);
@@ -29,6 +34,10 @@ Compiler* CreateOptimizingCompiler(CompilerDriver* driver);
// information for checking invariants.
bool IsCompilingWithCoreImage();
+bool EncodeArtMethodInInlineInfo(ArtMethod* method);
+bool CanEncodeInlinedMethodInStackMap(const DexFile& caller_dex_file, ArtMethod* callee)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 58d90176cd..bf963b8996 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -64,6 +64,9 @@ LiveInterval* BuildInterval(const size_t ranges[][2],
void RemoveSuspendChecks(HGraph* graph) {
for (HBasicBlock* block : graph->GetBlocks()) {
if (block != nullptr) {
+ if (block->GetLoopInformation() != nullptr) {
+ block->GetLoopInformation()->SetSuspendCheck(nullptr);
+ }
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* current = it.Current();
if (current->IsSuspendCheck()) {
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
new file mode 100644
index 0000000000..d65d20cf43
--- /dev/null
+++ b/compiler/optimizing/scheduler.cc
@@ -0,0 +1,610 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string>
+
+#include "prepare_for_register_allocation.h"
+#include "scheduler.h"
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "scheduler_arm64.h"
+#endif
+
+namespace art {
+
+void SchedulingGraph::AddDependency(SchedulingNode* node,
+ SchedulingNode* dependency,
+ bool is_data_dependency) {
+ if (node == nullptr || dependency == nullptr) {
+ // A `nullptr` node indicates an instruction out of scheduling range (eg. in
+ // an other block), so we do not need to add a dependency edge to the graph.
+ return;
+ }
+
+ if (is_data_dependency) {
+ if (!HasImmediateDataDependency(node, dependency)) {
+ node->AddDataPredecessor(dependency);
+ }
+ } else if (!HasImmediateOtherDependency(node, dependency)) {
+ node->AddOtherPredecessor(dependency);
+ }
+}
+
+static bool MayHaveReorderingDependency(SideEffects node, SideEffects other) {
+ // Read after write.
+ if (node.MayDependOn(other)) {
+ return true;
+ }
+
+ // Write after read.
+ if (other.MayDependOn(node)) {
+ return true;
+ }
+
+ // Memory write after write.
+ if (node.DoesAnyWrite() && other.DoesAnyWrite()) {
+ return true;
+ }
+
+ return false;
+}
+
+
+// Check whether `node` depends on `other`, taking into account `SideEffect`
+// information and `CanThrow` information.
+static bool HasSideEffectDependency(const HInstruction* node, const HInstruction* other) {
+ if (MayHaveReorderingDependency(node->GetSideEffects(), other->GetSideEffects())) {
+ return true;
+ }
+
+ if (other->CanThrow() && node->GetSideEffects().DoesAnyWrite()) {
+ return true;
+ }
+
+ if (other->GetSideEffects().DoesAnyWrite() && node->CanThrow()) {
+ return true;
+ }
+
+ if (other->CanThrow() && node->CanThrow()) {
+ return true;
+ }
+
+ // Check side-effect dependency between ArrayGet and BoundsCheck.
+ if (node->IsArrayGet() && other->IsBoundsCheck() && node->InputAt(1) == other) {
+ return true;
+ }
+
+ return false;
+}
+
+void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_scheduling_barrier) {
+ SchedulingNode* instruction_node = GetNode(instruction);
+
+ // Define-use dependencies.
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ AddDataDependency(GetNode(use.GetUser()), instruction_node);
+ }
+
+ // Scheduling barrier dependencies.
+ DCHECK(!is_scheduling_barrier || contains_scheduling_barrier_);
+ if (contains_scheduling_barrier_) {
+ // A barrier depends on instructions after it. And instructions before the
+ // barrier depend on it.
+ for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
+ SchedulingNode* other_node = GetNode(other);
+ bool other_is_barrier = other_node->IsSchedulingBarrier();
+ if (is_scheduling_barrier || other_is_barrier) {
+ AddOtherDependency(other_node, instruction_node);
+ }
+ if (other_is_barrier) {
+ // This other scheduling barrier guarantees ordering of instructions after
+ // it, so avoid creating additional useless dependencies in the graph.
+ // For example if we have
+ // instr_1
+ // barrier_2
+ // instr_3
+ // barrier_4
+ // instr_5
+ // we only create the following non-data dependencies
+ // 1 -> 2
+ // 2 -> 3
+ // 2 -> 4
+ // 3 -> 4
+ // 4 -> 5
+ // and do not create
+ // 1 -> 4
+ // 2 -> 5
+ // Note that in this example we could also avoid creating the dependency
+ // `2 -> 4`. But if we remove `instr_3` that dependency is required to
+ // order the barriers. So we generate it to avoid a special case.
+ break;
+ }
+ }
+ }
+
+ // Side effect dependencies.
+ if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) {
+ for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
+ SchedulingNode* other_node = GetNode(other);
+ if (other_node->IsSchedulingBarrier()) {
+ // We have reached a scheduling barrier so we can stop further
+ // processing.
+ DCHECK(HasImmediateOtherDependency(other_node, instruction_node));
+ break;
+ }
+ if (HasSideEffectDependency(other, instruction)) {
+ AddOtherDependency(other_node, instruction_node);
+ }
+ }
+ }
+
+ // Environment dependencies.
+ // We do not need to process those if the instruction is a scheduling barrier,
+ // since the barrier already has non-data dependencies on all following
+ // instructions.
+ if (!is_scheduling_barrier) {
+ for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+ // Note that here we could stop processing if the environment holder is
+ // across a scheduling barrier. But checking this would likely require
+ // more work than simply iterating through environment uses.
+ AddOtherDependency(GetNode(use.GetUser()->GetHolder()), instruction_node);
+ }
+ }
+}
+
+bool SchedulingGraph::HasImmediateDataDependency(const SchedulingNode* node,
+ const SchedulingNode* other) const {
+ return ContainsElement(node->GetDataPredecessors(), other);
+}
+
+bool SchedulingGraph::HasImmediateDataDependency(const HInstruction* instruction,
+ const HInstruction* other_instruction) const {
+ const SchedulingNode* node = GetNode(instruction);
+ const SchedulingNode* other = GetNode(other_instruction);
+ if (node == nullptr || other == nullptr) {
+ // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their
+ // corresponding SchedulingNode in the graph, and tell whether there is a dependency.
+ // Otherwise there is no dependency from SchedulingGraph's perspective, for example,
+ // instruction and other_instruction are in different basic blocks.
+ return false;
+ }
+ return HasImmediateDataDependency(node, other);
+}
+
+bool SchedulingGraph::HasImmediateOtherDependency(const SchedulingNode* node,
+ const SchedulingNode* other) const {
+ return ContainsElement(node->GetOtherPredecessors(), other);
+}
+
+bool SchedulingGraph::HasImmediateOtherDependency(const HInstruction* instruction,
+ const HInstruction* other_instruction) const {
+ const SchedulingNode* node = GetNode(instruction);
+ const SchedulingNode* other = GetNode(other_instruction);
+ if (node == nullptr || other == nullptr) {
+ // Both instructions must be in current basic block, i.e. the SchedulingGraph can see their
+ // corresponding SchedulingNode in the graph, and tell whether there is a dependency.
+ // Otherwise there is no dependency from SchedulingGraph's perspective, for example,
+ // instruction and other_instruction are in different basic blocks.
+ return false;
+ }
+ return HasImmediateOtherDependency(node, other);
+}
+
+static const std::string InstructionTypeId(const HInstruction* instruction) {
+ std::string id;
+ Primitive::Type type = instruction->GetType();
+ if (type == Primitive::kPrimNot) {
+ id.append("l");
+ } else {
+ id.append(Primitive::Descriptor(instruction->GetType()));
+ }
+ // Use lower-case to be closer to the `HGraphVisualizer` output.
+ id[0] = std::tolower(id[0]);
+ id.append(std::to_string(instruction->GetId()));
+ return id;
+}
+
+// Ideally we would reuse the graph visualizer code, but it is not available
+// from here and it is not worth moving all that code only for our use.
+static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) {
+ const HInstruction* instruction = node->GetInstruction();
+ // Use the instruction typed id as the node identifier.
+ std::string instruction_id = InstructionTypeId(instruction);
+ output << instruction_id << "[shape=record, label=\""
+ << instruction_id << ' ' << instruction->DebugName() << " [";
+ // List the instruction's inputs in its description. When visualizing the
+ // graph this helps differentiating data inputs from other dependencies.
+ const char* seperator = "";
+ for (const HInstruction* input : instruction->GetInputs()) {
+ output << seperator << InstructionTypeId(input);
+ seperator = ",";
+ }
+ output << "]";
+ // Other properties of the node.
+ output << "\\ninternal_latency: " << node->GetInternalLatency();
+ output << "\\ncritical_path: " << node->GetCriticalPath();
+ if (node->IsSchedulingBarrier()) {
+ output << "\\n(barrier)";
+ }
+ output << "\"];\n";
+ // We want program order to go from top to bottom in the graph output, so we
+ // reverse the edges and specify `dir=back`.
+ for (const SchedulingNode* predecessor : node->GetDataPredecessors()) {
+ const HInstruction* predecessor_instruction = predecessor->GetInstruction();
+ output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
+ << "[label=\"" << predecessor->GetLatency() << "\",dir=back]\n";
+ }
+ for (const SchedulingNode* predecessor : node->GetOtherPredecessors()) {
+ const HInstruction* predecessor_instruction = predecessor->GetInstruction();
+ output << InstructionTypeId(predecessor_instruction) << ":s -> " << instruction_id << ":n "
+ << "[dir=back,color=blue]\n";
+ }
+}
+
+void SchedulingGraph::DumpAsDotGraph(const std::string& description,
+ const ArenaVector<SchedulingNode*>& initial_candidates) {
+ // TODO(xueliang): ideally we should move scheduling information into HInstruction, after that
+ // we should move this dotty graph dump feature to visualizer, and have a compiler option for it.
+ std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app);
+ // Description of this graph, as a comment.
+ output << "// " << description << "\n";
+ // Start the dot graph. Use an increasing index for easier differentiation.
+ output << "digraph G {\n";
+ for (const auto& entry : nodes_map_) {
+ DumpAsDotNode(output, entry.second);
+ }
+ // Create a fake 'end_of_scheduling' node to help visualization of critical_paths.
+ for (auto node : initial_candidates) {
+ const HInstruction* instruction = node->GetInstruction();
+ output << InstructionTypeId(instruction) << ":s -> end_of_scheduling:n "
+ << "[label=\"" << node->GetLatency() << "\",dir=back]\n";
+ }
+ // End of the dot graph.
+ output << "}\n";
+ output.close();
+}
+
+SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition(
+ ArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const {
+ // Schedule condition inputs that can be materialized immediately before their use.
+ // In following example, after we've scheduled HSelect, we want LessThan to be scheduled
+ // immediately, because it is a materialized condition, and will be emitted right before HSelect
+ // in codegen phase.
+ //
+ // i20 HLessThan [...] HLessThan HAdd HAdd
+ // i21 HAdd [...] ===> | | |
+ // i22 HAdd [...] +----------+---------+
+ // i23 HSelect [i21, i22, i20] HSelect
+
+ if (prev_select_ == nullptr) {
+ return nullptr;
+ }
+
+ const HInstruction* instruction = prev_select_->GetInstruction();
+ const HCondition* condition = nullptr;
+ DCHECK(instruction != nullptr);
+
+ if (instruction->IsIf()) {
+ condition = instruction->AsIf()->InputAt(0)->AsCondition();
+ } else if (instruction->IsSelect()) {
+ condition = instruction->AsSelect()->GetCondition()->AsCondition();
+ }
+
+ SchedulingNode* condition_node = (condition != nullptr) ? graph.GetNode(condition) : nullptr;
+
+ if ((condition_node != nullptr) &&
+ condition->HasOnlyOneNonEnvironmentUse() &&
+ ContainsElement(*nodes, condition_node)) {
+ DCHECK(!condition_node->HasUnscheduledSuccessors());
+ // Remove the condition from the list of candidates and schedule it.
+ RemoveElement(*nodes, condition_node);
+ return condition_node;
+ }
+
+ return nullptr;
+}
+
+SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode(
+ ArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) {
+ DCHECK(!nodes->empty());
+ SchedulingNode* select_node = nullptr;
+
+ // Optimize for materialized condition and its emit before use scenario.
+ select_node = SelectMaterializedCondition(nodes, graph);
+
+ if (select_node == nullptr) {
+ // Get highest priority node based on critical path information.
+ select_node = (*nodes)[0];
+ size_t select = 0;
+ for (size_t i = 1, e = nodes->size(); i < e; i++) {
+ SchedulingNode* check = (*nodes)[i];
+ SchedulingNode* candidate = (*nodes)[select];
+ select_node = GetHigherPrioritySchedulingNode(candidate, check);
+ if (select_node == check) {
+ select = i;
+ }
+ }
+ DeleteNodeAtIndex(nodes, select);
+ }
+
+ prev_select_ = select_node;
+ return select_node;
+}
+
+SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingNode(
+ SchedulingNode* candidate, SchedulingNode* check) const {
+ uint32_t candidate_path = candidate->GetCriticalPath();
+ uint32_t check_path = check->GetCriticalPath();
+ // First look at the critical_path.
+ if (check_path != candidate_path) {
+ return check_path < candidate_path ? check : candidate;
+ }
+ // If both critical paths are equal, schedule instructions with a higher latency
+ // first in program order.
+ return check->GetLatency() < candidate->GetLatency() ? check : candidate;
+}
+
+void HScheduler::Schedule(HGraph* graph) {
+ for (HBasicBlock* block : graph->GetReversePostOrder()) {
+ if (IsSchedulable(block)) {
+ Schedule(block);
+ }
+ }
+}
+
+void HScheduler::Schedule(HBasicBlock* block) {
+ ArenaVector<SchedulingNode*> scheduling_nodes(arena_->Adapter(kArenaAllocScheduler));
+
+ // Build the scheduling graph.
+ scheduling_graph_.Clear();
+ for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ SchedulingNode* node = scheduling_graph_.AddNode(instruction, IsSchedulingBarrier(instruction));
+ CalculateLatency(node);
+ scheduling_nodes.push_back(node);
+ }
+
+ if (scheduling_graph_.Size() <= 1) {
+ scheduling_graph_.Clear();
+ return;
+ }
+
+ cursor_ = block->GetLastInstruction();
+
+ // Find the initial candidates for scheduling.
+ candidates_.clear();
+ for (SchedulingNode* node : scheduling_nodes) {
+ if (!node->HasUnscheduledSuccessors()) {
+ node->MaybeUpdateCriticalPath(node->GetLatency());
+ candidates_.push_back(node);
+ }
+ }
+
+ ArenaVector<SchedulingNode*> initial_candidates(arena_->Adapter(kArenaAllocScheduler));
+ if (kDumpDotSchedulingGraphs) {
+ // Remember the list of initial candidates for debug output purposes.
+ initial_candidates.assign(candidates_.begin(), candidates_.end());
+ }
+
+ // Schedule all nodes.
+ while (!candidates_.empty()) {
+ Schedule(selector_->PopHighestPriorityNode(&candidates_, scheduling_graph_));
+ }
+
+ if (kDumpDotSchedulingGraphs) {
+ // Dump the graph in `dot` format.
+ HGraph* graph = block->GetGraph();
+ std::stringstream description;
+ description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
+ << " B" << block->GetBlockId();
+ scheduling_graph_.DumpAsDotGraph(description.str(), initial_candidates);
+ }
+}
+
+void HScheduler::Schedule(SchedulingNode* scheduling_node) {
+ // Check whether any of the node's predecessors will be valid candidates after
+ // this node is scheduled.
+ uint32_t path_to_node = scheduling_node->GetCriticalPath();
+ for (SchedulingNode* predecessor : scheduling_node->GetDataPredecessors()) {
+ predecessor->MaybeUpdateCriticalPath(
+ path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
+ predecessor->DecrementNumberOfUnscheduledSuccessors();
+ if (!predecessor->HasUnscheduledSuccessors()) {
+ candidates_.push_back(predecessor);
+ }
+ }
+ for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
+ // Do not update the critical path.
+ // The 'other' (so 'non-data') dependencies (usually) do not represent a
+ // 'material' dependency of nodes on others. They exist for program
+ // correctness. So we do not use them to compute the critical path.
+ predecessor->DecrementNumberOfUnscheduledSuccessors();
+ if (!predecessor->HasUnscheduledSuccessors()) {
+ candidates_.push_back(predecessor);
+ }
+ }
+
+ Schedule(scheduling_node->GetInstruction());
+}
+
+// Move an instruction after cursor instruction inside one basic block.
+static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) {
+ DCHECK_EQ(instruction->GetBlock(), cursor->GetBlock());
+ DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction());
+ DCHECK(!instruction->IsControlFlow());
+ DCHECK(!cursor->IsControlFlow());
+ instruction->MoveBefore(cursor->GetNext(), /* do_checks */ false);
+}
+
+void HScheduler::Schedule(HInstruction* instruction) {
+ if (instruction == cursor_) {
+ cursor_ = cursor_->GetPrevious();
+ } else {
+ MoveAfterInBlock(instruction, cursor_);
+ }
+}
+
+bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
+ // We want to avoid exhaustively listing all instructions, so we first check
+ // for instruction categories that we know are safe.
+ if (instruction->IsControlFlow() ||
+ instruction->IsConstant()) {
+ return true;
+ }
+ // Currently all unary and binary operations are safe to schedule, so avoid
+ // checking for each of them individually.
+ // Since nothing prevents a new scheduling-unsafe HInstruction to subclass
+ // HUnaryOperation (or HBinaryOperation), check in debug mode that we have
+ // the exhaustive lists here.
+ if (instruction->IsUnaryOperation()) {
+ DCHECK(instruction->IsBooleanNot() ||
+ instruction->IsNot() ||
+ instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
+ return true;
+ }
+ if (instruction->IsBinaryOperation()) {
+ DCHECK(instruction->IsAdd() ||
+ instruction->IsAnd() ||
+ instruction->IsCompare() ||
+ instruction->IsCondition() ||
+ instruction->IsDiv() ||
+ instruction->IsMul() ||
+ instruction->IsOr() ||
+ instruction->IsRem() ||
+ instruction->IsRor() ||
+ instruction->IsShl() ||
+ instruction->IsShr() ||
+ instruction->IsSub() ||
+ instruction->IsUShr() ||
+ instruction->IsXor()) << "unexpected instruction " << instruction->DebugName();
+ return true;
+ }
+ // The scheduler should not see any of these.
+ DCHECK(!instruction->IsParallelMove()) << "unexpected instruction " << instruction->DebugName();
+ // List of instructions explicitly excluded:
+ // HClearException
+ // HClinitCheck
+ // HDeoptimize
+ // HLoadClass
+ // HLoadException
+ // HMemoryBarrier
+ // HMonitorOperation
+ // HNativeDebugInfo
+ // HThrow
+ // HTryBoundary
+ // TODO: Some of the instructions above may be safe to schedule (maybe as
+ // scheduling barriers).
+ return instruction->IsArrayGet() ||
+ instruction->IsArraySet() ||
+ instruction->IsArrayLength() ||
+ instruction->IsBoundType() ||
+ instruction->IsBoundsCheck() ||
+ instruction->IsCheckCast() ||
+ instruction->IsClassTableGet() ||
+ instruction->IsCurrentMethod() ||
+ instruction->IsDivZeroCheck() ||
+ instruction->IsInstanceFieldGet() ||
+ instruction->IsInstanceFieldSet() ||
+ instruction->IsInstanceOf() ||
+ instruction->IsInvokeInterface() ||
+ instruction->IsInvokeStaticOrDirect() ||
+ instruction->IsInvokeUnresolved() ||
+ instruction->IsInvokeVirtual() ||
+ instruction->IsLoadString() ||
+ instruction->IsNewArray() ||
+ instruction->IsNewInstance() ||
+ instruction->IsNullCheck() ||
+ instruction->IsPackedSwitch() ||
+ instruction->IsParameterValue() ||
+ instruction->IsPhi() ||
+ instruction->IsReturn() ||
+ instruction->IsReturnVoid() ||
+ instruction->IsSelect() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsStaticFieldSet() ||
+ instruction->IsSuspendCheck() ||
+ instruction->IsTypeConversion() ||
+ instruction->IsUnresolvedInstanceFieldGet() ||
+ instruction->IsUnresolvedInstanceFieldSet() ||
+ instruction->IsUnresolvedStaticFieldGet() ||
+ instruction->IsUnresolvedStaticFieldSet();
+}
+
+bool HScheduler::IsSchedulable(const HBasicBlock* block) const {
+ // We may be only interested in loop blocks.
+ if (only_optimize_loop_blocks_ && !block->IsInLoop()) {
+ return false;
+ }
+ if (block->GetTryCatchInformation() != nullptr) {
+ // Do not schedule blocks that are part of try-catch.
+ // Because scheduler cannot see if catch block has assumptions on the instruction order in
+ // the try block. In following example, if we enable scheduler for the try block,
+ // MulitiplyAccumulate may be scheduled before DivZeroCheck,
+ // which can result in an incorrect value in the catch block.
+ // try {
+ // a = a/b; // DivZeroCheck
+ // // Div
+ // c = c*d+e; // MulitiplyAccumulate
+ // } catch {System.out.print(c); }
+ return false;
+ }
+ // Check whether all instructions in this block are schedulable.
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ if (!IsSchedulable(it.Current())) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
+ return instr->IsControlFlow() ||
+ // Don't break calling convention.
+ instr->IsParameterValue() ||
+ // Code generation of goto relies on SuspendCheck's position.
+ instr->IsSuspendCheck();
+}
+
+void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
+ bool schedule_randomly) {
+ // Avoid compilation error when compiling for unsupported instruction set.
+ UNUSED(only_optimize_loop_blocks);
+ UNUSED(schedule_randomly);
+ switch (instruction_set_) {
+#ifdef ART_ENABLE_CODEGEN_arm64
+ case kArm64: {
+ // Phase-local allocator that allocates scheduler internal data structures like
+ // scheduling nodes, internel nodes map, dependencies, etc.
+ ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool());
+
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ RandomSchedulingNodeSelector random_selector;
+ SchedulingNodeSelector* selector = schedule_randomly
+ ? static_cast<SchedulingNodeSelector*>(&random_selector)
+ : static_cast<SchedulingNodeSelector*>(&critical_path_selector);
+
+ arm64::HSchedulerARM64 scheduler(&arena_allocator, selector);
+ scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
+ scheduler.Schedule(graph_);
+ break;
+ }
+#endif
+ default:
+ break;
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
new file mode 100644
index 0000000000..ab0dad4300
--- /dev/null
+++ b/compiler/optimizing/scheduler.h
@@ -0,0 +1,487 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_H_
+#define ART_COMPILER_OPTIMIZING_SCHEDULER_H_
+
+#include <fstream>
+
+#include "base/time_utils.h"
+#include "driver/compiler_driver.h"
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+// General description of instruction scheduling.
+//
+// This pass tries to improve the quality of the generated code by reordering
+// instructions in the graph to avoid execution delays caused by execution
+// dependencies.
+// Currently, scheduling is performed at the block level, so no `HInstruction`
+// ever leaves its block in this pass.
+//
+// The scheduling process iterates through blocks in the graph. For blocks that
+// we can and want to schedule:
+// 1) Build a dependency graph for instructions.
+// It includes data dependencies (inputs/uses), but also environment
+// dependencies and side-effect dependencies.
+// 2) Schedule the dependency graph.
+// This is a topological sort of the dependency graph, using heuristics to
+// decide what node to scheduler first when there are multiple candidates.
+//
+// A few factors impacting the quality of the scheduling are:
+// - The heuristics used to decide what node to schedule in the topological sort
+// when there are multiple valid candidates. There is a wide range of
+// complexity possible here, going from a simple model only considering
+// latencies, to a super detailed CPU pipeline model.
+// - Fewer dependencies in the dependency graph give more freedom for the
+// scheduling heuristics. For example de-aliasing can allow possibilities for
+// reordering of memory accesses.
+// - The level of abstraction of the IR. It is easier to evaluate scheduling for
+// IRs that translate to a single assembly instruction than for IRs
+// that generate multiple assembly instructions or generate different code
+// depending on properties of the IR.
+// - Scheduling is performed before register allocation, it is not aware of the
+// impact of moving instructions on register allocation.
+//
+//
+// The scheduling code uses the terms predecessors, successors, and dependencies.
+// This can be confusing at times, so here are clarifications.
+// These terms are used from the point of view of the program dependency graph. So
+// the inputs of an instruction are part of its dependencies, and hence part its
+// predecessors. So the uses of an instruction are (part of) its successors.
+// (Side-effect dependencies can yield predecessors or successors that are not
+// inputs or uses.)
+//
+// Here is a trivial example. For the Java code:
+//
+// int a = 1 + 2;
+//
+// we would have the instructions
+//
+// i1 HIntConstant 1
+// i2 HIntConstant 2
+// i3 HAdd [i1,i2]
+//
+// `i1` and `i2` are predecessors of `i3`.
+// `i3` is a successor of `i1` and a successor of `i2`.
+// In a scheduling graph for this code we would have three nodes `n1`, `n2`,
+// and `n3` (respectively for instructions `i1`, `i1`, and `i3`).
+// Conceptually the program dependency graph for this would contain two edges
+//
+// n1 -> n3
+// n2 -> n3
+//
+// Since we schedule backwards (starting from the last instruction in each basic
+// block), the implementation of nodes keeps a list of pointers their
+// predecessors. So `n3` would keep pointers to its predecessors `n1` and `n2`.
+//
+// Node dependencies are also referred to from the program dependency graph
+// point of view: we say that node `B` immediately depends on `A` if there is an
+// edge from `A` to `B` in the program dependency graph. `A` is a predecessor of
+// `B`, `B` is a successor of `A`. In the example above `n3` depends on `n1` and
+// `n2`.
+// Since nodes in the scheduling graph keep a list of their predecessors, node
+// `B` will have a pointer to its predecessor `A`.
+// As we schedule backwards, `B` will be selected for scheduling before `A` is.
+//
+// So the scheduling for the example above could happen as follow
+//
+// |---------------------------+------------------------|
+// | candidates for scheduling | instructions scheduled |
+// | --------------------------+------------------------|
+//
+// The only node without successors is `n3`, so it is the only initial
+// candidate.
+//
+// | n3 | (none) |
+//
+// We schedule `n3` as the last (and only) instruction. All its predecessors
+// that do not have any unscheduled successors become candidate. That is, `n1`
+// and `n2` become candidates.
+//
+// | n1, n2 | n3 |
+//
+// One of the candidates is selected. In practice this is where scheduling
+// heuristics kick in, to decide which of the candidates should be selected.
+// In this example, let it be `n1`. It is scheduled before previously scheduled
+// nodes (in program order). There are no other nodes to add to the list of
+// candidates.
+//
+// | n2 | n1 |
+// | | n3 |
+//
+// The only candidate available for scheduling is `n2`. Schedule it before
+// (in program order) the previously scheduled nodes.
+//
+// | (none) | n2 |
+// | | n1 |
+// | | n3 |
+// |---------------------------+------------------------|
+//
+// So finally the instructions will be executed in the order `i2`, `i1`, and `i3`.
+// In this trivial example, it does not matter which of `i1` and `i2` is
+// scheduled first since they are constants. However the same process would
+// apply if `i1` and `i2` were actual operations (for example `HMul` and `HDiv`).
+
+// Set to true to have instruction scheduling dump scheduling graphs to the file
+// `scheduling_graphs.dot`. See `SchedulingGraph::DumpAsDotGraph()`.
+static constexpr bool kDumpDotSchedulingGraphs = false;
+
+// Typically used as a default instruction latency.
+static constexpr uint32_t kGenericInstructionLatency = 1;
+
+class HScheduler;
+
+/**
+ * A node representing an `HInstruction` in the `SchedulingGraph`.
+ */
+class SchedulingNode : public ArenaObject<kArenaAllocScheduler> {
+ public:
+ SchedulingNode(HInstruction* instr, ArenaAllocator* arena, bool is_scheduling_barrier)
+ : latency_(0),
+ internal_latency_(0),
+ critical_path_(0),
+ instruction_(instr),
+ is_scheduling_barrier_(is_scheduling_barrier),
+ data_predecessors_(arena->Adapter(kArenaAllocScheduler)),
+ other_predecessors_(arena->Adapter(kArenaAllocScheduler)),
+ num_unscheduled_successors_(0) {
+ data_predecessors_.reserve(kPreallocatedPredecessors);
+ }
+
+ void AddDataPredecessor(SchedulingNode* predecessor) {
+ data_predecessors_.push_back(predecessor);
+ predecessor->num_unscheduled_successors_++;
+ }
+
+ void AddOtherPredecessor(SchedulingNode* predecessor) {
+ other_predecessors_.push_back(predecessor);
+ predecessor->num_unscheduled_successors_++;
+ }
+
+ void DecrementNumberOfUnscheduledSuccessors() {
+ num_unscheduled_successors_--;
+ }
+
+ void MaybeUpdateCriticalPath(uint32_t other_critical_path) {
+ critical_path_ = std::max(critical_path_, other_critical_path);
+ }
+
+ bool HasUnscheduledSuccessors() const {
+ return num_unscheduled_successors_ != 0;
+ }
+
+ HInstruction* GetInstruction() const { return instruction_; }
+ uint32_t GetLatency() const { return latency_; }
+ void SetLatency(uint32_t latency) { latency_ = latency; }
+ uint32_t GetInternalLatency() const { return internal_latency_; }
+ void SetInternalLatency(uint32_t internal_latency) { internal_latency_ = internal_latency; }
+ uint32_t GetCriticalPath() const { return critical_path_; }
+ bool IsSchedulingBarrier() const { return is_scheduling_barrier_; }
+ const ArenaVector<SchedulingNode*>& GetDataPredecessors() const { return data_predecessors_; }
+ const ArenaVector<SchedulingNode*>& GetOtherPredecessors() const { return other_predecessors_; }
+
+ private:
+ // The latency of this node. It represents the latency between the moment the
+ // last instruction for this node has executed to the moment the result
+ // produced by this node is available to users.
+ uint32_t latency_;
+ // This represents the time spent *within* the generated code for this node.
+ // It should be zero for nodes that only generate a single instruction.
+ uint32_t internal_latency_;
+
+ // The critical path from this instruction to the end of scheduling. It is
+ // used by the scheduling heuristics to measure the priority of this instruction.
+ // It is defined as
+ // critical_path_ = latency_ + max((use.internal_latency_ + use.critical_path_) for all uses)
+ // (Note that here 'uses' is equivalent to 'data successors'. Also see comments in
+ // `HScheduler::Schedule(SchedulingNode* scheduling_node)`).
+ uint32_t critical_path_;
+
+ // The instruction that this node represents.
+ HInstruction* const instruction_;
+
+ // If a node is scheduling barrier, other nodes cannot be scheduled before it.
+ const bool is_scheduling_barrier_;
+
+ // The lists of predecessors. They cannot be scheduled before this node. Once
+ // this node is scheduled, we check whether any of its predecessors has become a
+ // valid candidate for scheduling.
+ // Predecessors in `data_predecessors_` are data dependencies. Those in
+ // `other_predecessors_` contain side-effect dependencies, environment
+ // dependencies, and scheduling barrier dependencies.
+ ArenaVector<SchedulingNode*> data_predecessors_;
+ ArenaVector<SchedulingNode*> other_predecessors_;
+
+ // The number of unscheduled successors for this node. This number is
+ // decremented as successors are scheduled. When it reaches zero this node
+ // becomes a valid candidate to schedule.
+ uint32_t num_unscheduled_successors_;
+
+ static constexpr size_t kPreallocatedPredecessors = 4;
+};
+
+/*
+ * Directed acyclic graph for scheduling.
+ */
+class SchedulingGraph : public ValueObject {
+ public:
+ SchedulingGraph(const HScheduler* scheduler, ArenaAllocator* arena)
+ : scheduler_(scheduler),
+ arena_(arena),
+ contains_scheduling_barrier_(false),
+ nodes_map_(arena_->Adapter(kArenaAllocScheduler)) {}
+
+ SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) {
+ SchedulingNode* node = new (arena_) SchedulingNode(instr, arena_, is_scheduling_barrier);
+ nodes_map_.Insert(std::make_pair(instr, node));
+ contains_scheduling_barrier_ |= is_scheduling_barrier;
+ AddDependencies(instr, is_scheduling_barrier);
+ return node;
+ }
+
+ void Clear() {
+ nodes_map_.Clear();
+ contains_scheduling_barrier_ = false;
+ }
+
+ SchedulingNode* GetNode(const HInstruction* instr) const {
+ auto it = nodes_map_.Find(instr);
+ if (it == nodes_map_.end()) {
+ return nullptr;
+ } else {
+ return it->second;
+ }
+ }
+
+ bool IsSchedulingBarrier(const HInstruction* instruction) const;
+
+ bool HasImmediateDataDependency(const SchedulingNode* node, const SchedulingNode* other) const;
+ bool HasImmediateDataDependency(const HInstruction* node, const HInstruction* other) const;
+ bool HasImmediateOtherDependency(const SchedulingNode* node, const SchedulingNode* other) const;
+ bool HasImmediateOtherDependency(const HInstruction* node, const HInstruction* other) const;
+
+ size_t Size() const {
+ return nodes_map_.Size();
+ }
+
+ // Dump the scheduling graph, in dot file format, appending it to the file
+ // `scheduling_graphs.dot`.
+ void DumpAsDotGraph(const std::string& description,
+ const ArenaVector<SchedulingNode*>& initial_candidates);
+
+ protected:
+ void AddDependency(SchedulingNode* node, SchedulingNode* dependency, bool is_data_dependency);
+ void AddDataDependency(SchedulingNode* node, SchedulingNode* dependency) {
+ AddDependency(node, dependency, /*is_data_dependency*/true);
+ }
+ void AddOtherDependency(SchedulingNode* node, SchedulingNode* dependency) {
+ AddDependency(node, dependency, /*is_data_dependency*/false);
+ }
+
+ // Add dependencies nodes for the given `HInstruction`: inputs, environments, and side-effects.
+ void AddDependencies(HInstruction* instruction, bool is_scheduling_barrier = false);
+
+ const HScheduler* const scheduler_;
+
+ ArenaAllocator* const arena_;
+
+ bool contains_scheduling_barrier_;
+
+ ArenaHashMap<const HInstruction*, SchedulingNode*> nodes_map_;
+};
+
+/*
+ * The visitors derived from this base class are used by schedulers to evaluate
+ * the latencies of `HInstruction`s.
+ */
+class SchedulingLatencyVisitor : public HGraphDelegateVisitor {
+ public:
+ // This class and its sub-classes will never be used to drive a visit of an
+ // `HGraph` but only to visit `HInstructions` one at a time, so we do not need
+ // to pass a valid graph to `HGraphDelegateVisitor()`.
+ SchedulingLatencyVisitor() : HGraphDelegateVisitor(nullptr) {}
+
+ void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". "
+ "Architecture-specific scheduling latency visitors must handle all instructions"
+ " (potentially by overriding the generic `VisitInstruction()`.";
+ UNREACHABLE();
+ }
+
+ void Visit(HInstruction* instruction) {
+ instruction->Accept(this);
+ }
+
+ void CalculateLatency(SchedulingNode* node) {
+ // By default nodes have no internal latency.
+ last_visited_internal_latency_ = 0;
+ Visit(node->GetInstruction());
+ }
+
+ uint32_t GetLastVisitedLatency() const { return last_visited_latency_; }
+ uint32_t GetLastVisitedInternalLatency() const { return last_visited_internal_latency_; }
+
+ protected:
+ // The latency of the most recent visited SchedulingNode.
+ // This is for reporting the latency value to the user of this visitor.
+ uint32_t last_visited_latency_;
+ // This represents the time spent *within* the generated code for the most recent visited
+ // SchedulingNode. This is for reporting the internal latency value to the user of this visitor.
+ uint32_t last_visited_internal_latency_;
+};
+
+class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> {
+ public:
+ virtual SchedulingNode* PopHighestPriorityNode(ArenaVector<SchedulingNode*>* nodes,
+ const SchedulingGraph& graph) = 0;
+ virtual ~SchedulingNodeSelector() {}
+ protected:
+ static void DeleteNodeAtIndex(ArenaVector<SchedulingNode*>* nodes, size_t index) {
+ (*nodes)[index] = nodes->back();
+ nodes->pop_back();
+ }
+};
+
+/*
+ * Select a `SchedulingNode` at random within the candidates.
+ */
+class RandomSchedulingNodeSelector : public SchedulingNodeSelector {
+ public:
+ explicit RandomSchedulingNodeSelector() : seed_(0) {
+ seed_ = static_cast<uint32_t>(NanoTime());
+ srand(seed_);
+ }
+
+ SchedulingNode* PopHighestPriorityNode(ArenaVector<SchedulingNode*>* nodes,
+ const SchedulingGraph& graph) OVERRIDE {
+ UNUSED(graph);
+ DCHECK(!nodes->empty());
+ size_t select = rand_r(&seed_) % nodes->size();
+ SchedulingNode* select_node = (*nodes)[select];
+ DeleteNodeAtIndex(nodes, select);
+ return select_node;
+ }
+
+ uint32_t seed_;
+};
+
+/*
+ * Select a `SchedulingNode` according to critical path information,
+ * with heuristics to favor certain instruction patterns like materialized condition.
+ */
+class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector {
+ public:
+ CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {}
+
+ SchedulingNode* PopHighestPriorityNode(ArenaVector<SchedulingNode*>* nodes,
+ const SchedulingGraph& graph) OVERRIDE;
+
+ protected:
+ SchedulingNode* GetHigherPrioritySchedulingNode(SchedulingNode* candidate,
+ SchedulingNode* check) const;
+
+ SchedulingNode* SelectMaterializedCondition(ArenaVector<SchedulingNode*>* nodes,
+ const SchedulingGraph& graph) const;
+
+ private:
+ const SchedulingNode* prev_select_;
+};
+
+class HScheduler {
+ public:
+ HScheduler(ArenaAllocator* arena,
+ SchedulingLatencyVisitor* latency_visitor,
+ SchedulingNodeSelector* selector)
+ : arena_(arena),
+ latency_visitor_(latency_visitor),
+ selector_(selector),
+ only_optimize_loop_blocks_(true),
+ scheduling_graph_(this, arena),
+ candidates_(arena_->Adapter(kArenaAllocScheduler)) {}
+ virtual ~HScheduler() {}
+
+ void Schedule(HGraph* graph);
+
+ void SetOnlyOptimizeLoopBlocks(bool loop_only) { only_optimize_loop_blocks_ = loop_only; }
+
+ // Instructions can not be rescheduled across a scheduling barrier.
+ virtual bool IsSchedulingBarrier(const HInstruction* instruction) const;
+
+ protected:
+ void Schedule(HBasicBlock* block);
+ void Schedule(SchedulingNode* scheduling_node);
+ void Schedule(HInstruction* instruction);
+
+ // Any instruction returning `false` via this method will prevent its
+ // containing basic block from being scheduled.
+ // This method is used to restrict scheduling to instructions that we know are
+ // safe to handle.
+ virtual bool IsSchedulable(const HInstruction* instruction) const;
+ bool IsSchedulable(const HBasicBlock* block) const;
+
+ void CalculateLatency(SchedulingNode* node) {
+ latency_visitor_->CalculateLatency(node);
+ node->SetLatency(latency_visitor_->GetLastVisitedLatency());
+ node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency());
+ }
+
+ ArenaAllocator* const arena_;
+ SchedulingLatencyVisitor* const latency_visitor_;
+ SchedulingNodeSelector* const selector_;
+ bool only_optimize_loop_blocks_;
+
+ // We instantiate the members below as part of this class to avoid
+ // instantiating them locally for every chunk scheduled.
+ SchedulingGraph scheduling_graph_;
+ // A pointer indicating where the next instruction to be scheduled will be inserted.
+ HInstruction* cursor_;
+ // The list of candidates for scheduling. A node becomes a candidate when all
+ // its predecessors have been scheduled.
+ ArenaVector<SchedulingNode*> candidates_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HScheduler);
+};
+
+inline bool SchedulingGraph::IsSchedulingBarrier(const HInstruction* instruction) const {
+ return scheduler_->IsSchedulingBarrier(instruction);
+}
+
+class HInstructionScheduling : public HOptimization {
+ public:
+ HInstructionScheduling(HGraph* graph, InstructionSet instruction_set)
+ : HOptimization(graph, kInstructionScheduling),
+ instruction_set_(instruction_set) {}
+
+ void Run() {
+ Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false);
+ }
+ void Run(bool only_optimize_loop_blocks, bool schedule_randomly);
+
+ static constexpr const char* kInstructionScheduling = "scheduler";
+
+ const InstructionSet instruction_set_;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HInstructionScheduling);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_H_
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
new file mode 100644
index 0000000000..e3701fbcb1
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "scheduler_arm64.h"
+#include "code_generator_utils.h"
+
+namespace art {
+namespace arm64 {
+
+void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr) {
+ last_visited_latency_ = Primitive::IsFloatingPointType(instr->GetResultType())
+ ? kArm64FloatingPointOpLatency
+ : kArm64IntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitBitwiseNegatedRight(
+ HBitwiseNegatedRight* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64IntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitArm64DataProcWithShifterOp(
+ HArm64DataProcWithShifterOp* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64DataProcWithShifterOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitIntermediateAddress(
+ HIntermediateAddress* ATTRIBUTE_UNUSED) {
+ // Although the code generated is a simple `add` instruction, we found through empirical results
+ // that spacing it from its use in memory accesses was beneficial.
+ last_visited_latency_ = kArm64IntegerOpLatency + 2;
+}
+
+void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64MulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitArrayGet(HArrayGet* instruction) {
+ if (!instruction->GetArray()->IsIntermediateAddress()) {
+ // Take the intermediate address computation into account.
+ last_visited_internal_latency_ = kArm64IntegerOpLatency;
+ }
+ last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitArrayLength(HArrayLength* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitArraySet(HArraySet* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64MemoryStoreLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArm64IntegerOpLatency;
+ // Users do not use any data results.
+ last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM64::VisitDiv(HDiv* instr) {
+ Primitive::Type type = instr->GetResultType();
+ switch (type) {
+ case Primitive::kPrimFloat:
+ last_visited_latency_ = kArm64DivFloatLatency;
+ break;
+ case Primitive::kPrimDouble:
+ last_visited_latency_ = kArm64DivDoubleLatency;
+ break;
+ default:
+ // Follow the code path used by code generation.
+ if (instr->GetRight()->IsConstant()) {
+ int64_t imm = Int64FromConstant(instr->GetRight()->AsConstant());
+ if (imm == 0) {
+ last_visited_internal_latency_ = 0;
+ last_visited_latency_ = 0;
+ } else if (imm == 1 || imm == -1) {
+ last_visited_internal_latency_ = 0;
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ last_visited_internal_latency_ = 4 * kArm64IntegerOpLatency;
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ last_visited_internal_latency_ = 4 * kArm64IntegerOpLatency;
+ last_visited_latency_ = kArm64MulIntegerLatency;
+ }
+ } else {
+ last_visited_latency_ = kArm64DivIntegerLatency;
+ }
+ break;
+ }
+}
+
+void SchedulingLatencyVisitorARM64::VisitInstanceFieldGet(HInstanceFieldGet* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitInstanceOf(HInstanceOf* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArm64CallInternalLatency;
+ last_visited_latency_ = kArm64IntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitInvoke(HInvoke* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArm64CallInternalLatency;
+ last_visited_latency_ = kArm64CallLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitLoadString(HLoadString* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArm64LoadStringInternalLatency;
+ last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitMul(HMul* instr) {
+ last_visited_latency_ = Primitive::IsFloatingPointType(instr->GetResultType())
+ ? kArm64MulFloatingPointLatency
+ : kArm64MulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitNewArray(HNewArray* ATTRIBUTE_UNUSED) {
+ last_visited_internal_latency_ = kArm64IntegerOpLatency + kArm64CallInternalLatency;
+ last_visited_latency_ = kArm64CallLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitNewInstance(HNewInstance* instruction) {
+ if (instruction->IsStringAlloc()) {
+ last_visited_internal_latency_ = 2 + kArm64MemoryLoadLatency + kArm64CallInternalLatency;
+ } else {
+ last_visited_internal_latency_ = kArm64CallInternalLatency;
+ }
+ last_visited_latency_ = kArm64CallLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitRem(HRem* instruction) {
+ if (Primitive::IsFloatingPointType(instruction->GetResultType())) {
+ last_visited_internal_latency_ = kArm64CallInternalLatency;
+ last_visited_latency_ = kArm64CallLatency;
+ } else {
+ // Follow the code path used by code generation.
+ if (instruction->GetRight()->IsConstant()) {
+ int64_t imm = Int64FromConstant(instruction->GetRight()->AsConstant());
+ if (imm == 0) {
+ last_visited_internal_latency_ = 0;
+ last_visited_latency_ = 0;
+ } else if (imm == 1 || imm == -1) {
+ last_visited_internal_latency_ = 0;
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ last_visited_internal_latency_ = 4 * kArm64IntegerOpLatency;
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ last_visited_internal_latency_ = 4 * kArm64IntegerOpLatency;
+ last_visited_latency_ = kArm64MulIntegerLatency;
+ }
+ } else {
+ last_visited_internal_latency_ = kArm64DivIntegerLatency;
+ last_visited_latency_ = kArm64MulIntegerLatency;
+ }
+ }
+}
+
+void SchedulingLatencyVisitorARM64::VisitStaticFieldGet(HStaticFieldGet* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64MemoryLoadLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
+ HBasicBlock* block = instruction->GetBlock();
+ DCHECK((block->GetLoopInformation() != nullptr) ||
+ (block->IsEntryBlock() && instruction->GetNext()->IsGoto()));
+ // Users do not use any data results.
+ last_visited_latency_ = 0;
+}
+
+void SchedulingLatencyVisitorARM64::VisitTypeConversion(HTypeConversion* instr) {
+ if (Primitive::IsFloatingPointType(instr->GetResultType()) ||
+ Primitive::IsFloatingPointType(instr->GetInputType())) {
+ last_visited_latency_ = kArm64TypeConversionFloatingPointIntegerLatency;
+ } else {
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ }
+}
+
+} // namespace arm64
+} // namespace art
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
new file mode 100644
index 0000000000..702027c535
--- /dev/null
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
+#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
+
+#include "scheduler.h"
+
+namespace art {
+namespace arm64 {
+
+static constexpr uint32_t kArm64MemoryLoadLatency = 5;
+static constexpr uint32_t kArm64MemoryStoreLatency = 3;
+
+static constexpr uint32_t kArm64CallInternalLatency = 10;
+static constexpr uint32_t kArm64CallLatency = 5;
+
+// AArch64 instruction latency.
+// We currently assume that all arm64 CPUs share the same instruction latency list.
+static constexpr uint32_t kArm64IntegerOpLatency = 2;
+static constexpr uint32_t kArm64FloatingPointOpLatency = 5;
+
+
+static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3;
+static constexpr uint32_t kArm64DivDoubleLatency = 30;
+static constexpr uint32_t kArm64DivFloatLatency = 15;
+static constexpr uint32_t kArm64DivIntegerLatency = 5;
+static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
+static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
+static constexpr uint32_t kArm64MulIntegerLatency = 6;
+static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
+
+class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
+ public:
+ // Default visitor for instructions not handled specifically below.
+ void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
+ last_visited_latency_ = kArm64IntegerOpLatency;
+ }
+
+// We add a second unused parameter to be able to use this macro like the others
+// defined in `nodes.h`.
+#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \
+ M(ArrayGet , unused) \
+ M(ArrayLength , unused) \
+ M(ArraySet , unused) \
+ M(BinaryOperation , unused) \
+ M(BoundsCheck , unused) \
+ M(Div , unused) \
+ M(InstanceFieldGet , unused) \
+ M(InstanceOf , unused) \
+ M(Invoke , unused) \
+ M(LoadString , unused) \
+ M(Mul , unused) \
+ M(NewArray , unused) \
+ M(NewInstance , unused) \
+ M(Rem , unused) \
+ M(StaticFieldGet , unused) \
+ M(SuspendCheck , unused) \
+ M(TypeConversion , unused)
+
+#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
+ M(BitwiseNegatedRight, unused) \
+ M(MultiplyAccumulate, unused) \
+ M(IntermediateAddress, unused)
+
+#define DECLARE_VISIT_INSTRUCTION(type, unused) \
+ void Visit##type(H##type* instruction) OVERRIDE;
+
+ FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
+
+#undef DECLARE_VISIT_INSTRUCTION
+};
+
+class HSchedulerARM64 : public HScheduler {
+ public:
+ HSchedulerARM64(ArenaAllocator* arena, SchedulingNodeSelector* selector)
+ : HScheduler(arena, &arm64_latency_visitor_, selector) {}
+ ~HSchedulerARM64() OVERRIDE {}
+
+ bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
+#define CASE_INSTRUCTION_KIND(type, unused) case \
+ HInstruction::InstructionKind::k##type:
+ switch (instruction->GetKind()) {
+ FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
+ return true;
+ FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
+ return true;
+ default:
+ return HScheduler::IsSchedulable(instruction);
+ }
+#undef CASE_INSTRUCTION_KIND
+ }
+
+ private:
+ SchedulingLatencyVisitorARM64 arm64_latency_visitor_;
+ DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64);
+};
+
+} // namespace arm64
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
new file mode 100644
index 0000000000..31d13e2a26
--- /dev/null
+++ b/compiler/optimizing/scheduler_test.cc
@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/arena_allocator.h"
+#include "builder.h"
+#include "codegen_test_utils.h"
+#include "common_compiler_test.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "pc_relative_fixups_x86.h"
+#include "register_allocator.h"
+#include "scheduler.h"
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+#include "scheduler_arm64.h"
+#endif
+
+namespace art {
+
+// Return all combinations of ISA and code generator that are executable on
+// hardware, or on simulator, and that we'd like to test.
+static ::std::vector<CodegenTargetConfig> GetTargetConfigs() {
+ ::std::vector<CodegenTargetConfig> v;
+ ::std::vector<CodegenTargetConfig> test_config_candidates = {
+#ifdef ART_ENABLE_CODEGEN_arm
+ CodegenTargetConfig(kArm, create_codegen_arm),
+ CodegenTargetConfig(kThumb2, create_codegen_arm),
+#endif
+#ifdef ART_ENABLE_CODEGEN_arm64
+ CodegenTargetConfig(kArm64, create_codegen_arm64),
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+ CodegenTargetConfig(kX86, create_codegen_x86),
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+ CodegenTargetConfig(kX86_64, create_codegen_x86_64),
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips
+ CodegenTargetConfig(kMips, create_codegen_mips),
+#endif
+#ifdef ART_ENABLE_CODEGEN_mips64
+ CodegenTargetConfig(kMips64, create_codegen_mips64)
+#endif
+ };
+
+ for (auto test_config : test_config_candidates) {
+ if (CanExecute(test_config.GetInstructionSet())) {
+ v.push_back(test_config);
+ }
+ }
+
+ return v;
+}
+
+class SchedulerTest : public CommonCompilerTest {};
+
+#ifdef ART_ENABLE_CODEGEN_arm64
+TEST_F(SchedulerTest, DependencyGraph) {
+ ArenaPool pool;
+ ArenaAllocator allocator(&pool);
+ HGraph* graph = CreateGraph(&allocator);
+ HBasicBlock* entry = new (&allocator) HBasicBlock(graph);
+ HBasicBlock* block1 = new (&allocator) HBasicBlock(graph);
+ graph->AddBlock(entry);
+ graph->AddBlock(block1);
+ graph->SetEntryBlock(entry);
+
+ // entry:
+ // array ParameterValue
+ // c1 IntConstant
+ // c2 IntConstant
+ // block1:
+ // add1 Add [c1, c2]
+ // add2 Add [add1, c2]
+ // mul Mul [add1, add2]
+ // div_check DivZeroCheck [add2] (env: add2, mul)
+ // div Div [add1, div_check]
+ // array_get1 ArrayGet [array, add1]
+ // array_set1 ArraySet [array, add1, add2]
+ // array_get2 ArrayGet [array, add1]
+ // array_set2 ArraySet [array, add1, add2]
+
+ HInstruction* array = new (&allocator) HParameterValue(graph->GetDexFile(),
+ dex::TypeIndex(0),
+ 0,
+ Primitive::kPrimNot);
+ HInstruction* c1 = graph->GetIntConstant(1);
+ HInstruction* c2 = graph->GetIntConstant(10);
+ HInstruction* add1 = new (&allocator) HAdd(Primitive::kPrimInt, c1, c2);
+ HInstruction* add2 = new (&allocator) HAdd(Primitive::kPrimInt, add1, c2);
+ HInstruction* mul = new (&allocator) HMul(Primitive::kPrimInt, add1, add2);
+ HInstruction* div_check = new (&allocator) HDivZeroCheck(add2, 0);
+ HInstruction* div = new (&allocator) HDiv(Primitive::kPrimInt, add1, div_check, 0);
+ HInstruction* array_get1 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+ HInstruction* array_set1 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+ HInstruction* array_get2 = new (&allocator) HArrayGet(array, add1, Primitive::kPrimInt, 0);
+ HInstruction* array_set2 = new (&allocator) HArraySet(array, add1, add2, Primitive::kPrimInt, 0);
+
+ DCHECK(div_check->CanThrow());
+
+ entry->AddInstruction(array);
+
+ HInstruction* block_instructions[] = {add1,
+ add2,
+ mul,
+ div_check,
+ div,
+ array_get1,
+ array_set1,
+ array_get2,
+ array_set2};
+ for (auto instr : block_instructions) {
+ block1->AddInstruction(instr);
+ }
+
+ HEnvironment* environment = new (&allocator) HEnvironment(&allocator,
+ 2,
+ graph->GetArtMethod(),
+ 0,
+ div_check);
+ div_check->SetRawEnvironment(environment);
+ environment->SetRawEnvAt(0, add2);
+ add2->AddEnvUseAt(div_check->GetEnvironment(), 0);
+ environment->SetRawEnvAt(1, mul);
+ mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
+
+ ArenaAllocator* arena = graph->GetArena();
+ CriticalPathSchedulingNodeSelector critical_path_selector;
+ arm64::HSchedulerARM64 scheduler(arena, &critical_path_selector);
+ SchedulingGraph scheduling_graph(&scheduler, arena);
+ // Instructions must be inserted in reverse order into the scheduling graph.
+ for (auto instr : ReverseRange(block_instructions)) {
+ scheduling_graph.AddNode(instr);
+ }
+
+ // Should not have dependencies cross basic blocks.
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, c1));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add2, c2));
+
+ // Define-use dependency.
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(add2, add1));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(add1, add2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div_check, add2));
+ ASSERT_FALSE(scheduling_graph.HasImmediateDataDependency(div_check, add1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(div, div_check));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateDataDependency(array_set1, add2));
+
+ // Read and write dependencies
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
+
+ // Env dependency.
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(mul, div_check));
+
+ // CanThrow.
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, div_check));
+}
+#endif
+
+static void CompileWithRandomSchedulerAndRun(const uint16_t* data,
+ bool has_result,
+ int expected) {
+ for (CodegenTargetConfig target_config : GetTargetConfigs()) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ HGraph* graph = CreateCFG(&arena, data);
+
+ // Schedule the graph randomly.
+ HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
+ scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
+
+ RunCode(target_config,
+ graph,
+ [](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
+ has_result, expected);
+ }
+}
+
+TEST_F(SchedulerTest, RandomScheduling) {
+ //
+ // Java source: crafted code to make sure (random) scheduling should get correct result.
+ //
+ // int result = 0;
+ // float fr = 10.0f;
+ // for (int i = 1; i < 10; i++) {
+ // fr ++;
+ // int t1 = result >> i;
+ // int t2 = result * i;
+ // result = result + t1 - t2;
+ // fr = fr / i;
+ // result += (int)fr;
+ // }
+ // return result;
+ //
+ const uint16_t data[] = SIX_REGISTERS_CODE_ITEM(
+ Instruction::CONST_4 | 0 << 12 | 2 << 8, // const/4 v2, #int 0
+ Instruction::CONST_HIGH16 | 0 << 8, 0x4120, // const/high16 v0, #float 10.0 // #41200000
+ Instruction::CONST_4 | 1 << 12 | 1 << 8, // const/4 v1, #int 1
+ Instruction::CONST_16 | 5 << 8, 0x000a, // const/16 v5, #int 10
+ Instruction::IF_GE | 5 << 12 | 1 << 8, 0x0014, // if-ge v1, v5, 001a // +0014
+ Instruction::CONST_HIGH16 | 5 << 8, 0x3f80, // const/high16 v5, #float 1.0 // #3f800000
+ Instruction::ADD_FLOAT_2ADDR | 5 << 12 | 0 << 8, // add-float/2addr v0, v5
+ Instruction::SHR_INT | 3 << 8, 1 << 8 | 2 , // shr-int v3, v2, v1
+ Instruction::MUL_INT | 4 << 8, 1 << 8 | 2, // mul-int v4, v2, v1
+ Instruction::ADD_INT | 5 << 8, 3 << 8 | 2, // add-int v5, v2, v3
+ Instruction::SUB_INT | 2 << 8, 4 << 8 | 5, // sub-int v2, v5, v4
+ Instruction::INT_TO_FLOAT | 1 << 12 | 5 << 8, // int-to-float v5, v1
+ Instruction::DIV_FLOAT_2ADDR | 5 << 12 | 0 << 8, // div-float/2addr v0, v5
+ Instruction::FLOAT_TO_INT | 0 << 12 | 5 << 8, // float-to-int v5, v0
+ Instruction::ADD_INT_2ADDR | 5 << 12 | 2 << 8, // add-int/2addr v2, v5
+ Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8 | 1, // add-int/lit8 v1, v1, #int 1 // #01
+ Instruction::GOTO | 0xeb << 8, // goto 0004 // -0015
+ Instruction::RETURN | 2 << 8); // return v2
+
+ constexpr int kNumberOfRuns = 10;
+ for (int i = 0; i < kNumberOfRuns; ++i) {
+ CompileWithRandomSchedulerAndRun(data, true, 138774);
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index c5294107ae..f07f02a719 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -42,8 +42,6 @@ void HSharpening::Run() {
HInstruction* instruction = it.Current();
if (instruction->IsInvokeStaticOrDirect()) {
ProcessInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect());
- } else if (instruction->IsLoadClass()) {
- ProcessLoadClass(instruction->AsLoadClass());
} else if (instruction->IsLoadString()) {
ProcessLoadString(instruction->AsLoadString());
}
@@ -97,7 +95,9 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
// class is initialized already or being initialized, and the call will not
// be invoked once the method is deoptimized.
- if (callee == codegen_->GetGraph()->GetArtMethod()) {
+ // We don't optimize for debuggable as it would prevent us from obsoleting the method in some
+ // situations.
+ if (callee == codegen_->GetGraph()->GetArtMethod() && !codegen_->GetGraph()->IsDebuggable()) {
// Recursive call.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
@@ -131,104 +131,93 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
invoke->SetDispatchInfo(dispatch_info);
}
-void HSharpening::ProcessLoadClass(HLoadClass* load_class) {
- ScopedObjectAccess soa(Thread::Current());
- SharpenClass(load_class, codegen_, compiler_driver_);
-}
-
-void HSharpening::SharpenClass(HLoadClass* load_class,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver) {
+HLoadClass::LoadKind HSharpening::SharpenClass(HLoadClass* load_class,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ const DexCompilationUnit& dex_compilation_unit) {
Handle<mirror::Class> klass = load_class->GetClass();
DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod ||
load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
<< load_class->GetLoadKind();
DCHECK(!load_class->IsInBootImage()) << "HLoadClass should not be optimized before sharpening.";
+ HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
+
if (load_class->NeedsAccessCheck()) {
// We need to call the runtime anyway, so we simply get the class as that call's return value.
- return;
- }
-
- if (load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) {
+ } else if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
// Loading from the ArtMethod* is the most efficient retrieval in code size.
// TODO: This may not actually be true for all architectures and
// locations of target classes. The additional register pressure
// for using the ArtMethod* should be considered.
- return;
- }
-
- const DexFile& dex_file = load_class->GetDexFile();
- dex::TypeIndex type_index = load_class->GetTypeIndex();
+ } else {
+ const DexFile& dex_file = load_class->GetDexFile();
+ dex::TypeIndex type_index = load_class->GetTypeIndex();
- bool is_in_boot_image = false;
- HLoadClass::LoadKind desired_load_kind = static_cast<HLoadClass::LoadKind>(-1);
- Runtime* runtime = Runtime::Current();
- if (codegen->GetCompilerOptions().IsBootImage()) {
- // Compiling boot image. Check if the class is a boot image class.
- DCHECK(!runtime->UseJitCompilation());
- if (!compiler_driver->GetSupportBootImageFixup()) {
- // compiler_driver_test. Do not sharpen.
- desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
- } else if ((klass.Get() != nullptr) && compiler_driver->IsImageClass(
- dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
- is_in_boot_image = true;
- desired_load_kind = codegen->GetCompilerOptions().GetCompilePic()
- ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative
- : HLoadClass::LoadKind::kBootImageLinkTimeAddress;
+ bool is_in_boot_image = false;
+ HLoadClass::LoadKind desired_load_kind = HLoadClass::LoadKind::kInvalid;
+ Runtime* runtime = Runtime::Current();
+ if (codegen->GetCompilerOptions().IsBootImage()) {
+ // Compiling boot image. Check if the class is a boot image class.
+ DCHECK(!runtime->UseJitCompilation());
+ if (!compiler_driver->GetSupportBootImageFixup()) {
+ // compiler_driver_test. Do not sharpen.
+ desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+ } else if ((klass.Get() != nullptr) && compiler_driver->IsImageClass(
+ dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
+ is_in_boot_image = true;
+ desired_load_kind = codegen->GetCompilerOptions().GetCompilePic()
+ ? HLoadClass::LoadKind::kBootImageLinkTimePcRelative
+ : HLoadClass::LoadKind::kBootImageLinkTimeAddress;
+ } else {
+ // Not a boot image class.
+ DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file));
+ desired_load_kind = HLoadClass::LoadKind::kBssEntry;
+ }
} else {
- // Not a boot image class.
- DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file));
- desired_load_kind = HLoadClass::LoadKind::kBssEntry;
- }
- } else {
- is_in_boot_image = (klass.Get() != nullptr) &&
- runtime->GetHeap()->ObjectIsInBootImageSpace(klass.Get());
- if (runtime->UseJitCompilation()) {
- // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
- // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
- if (is_in_boot_image) {
- // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
+ is_in_boot_image = (klass.Get() != nullptr) &&
+ runtime->GetHeap()->ObjectIsInBootImageSpace(klass.Get());
+ if (runtime->UseJitCompilation()) {
+ // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
+ // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
+ if (is_in_boot_image) {
+ // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
+ desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
+ } else if (klass.Get() != nullptr) {
+ desired_load_kind = HLoadClass::LoadKind::kJitTableAddress;
+ } else {
+ // Class not loaded yet. This happens when the dex code requesting
+ // this `HLoadClass` hasn't been executed in the interpreter.
+ // Fallback to the dex cache.
+ // TODO(ngeoffray): Generate HDeoptimize instead.
+ desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+ }
+ } else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) {
+ // AOT app compilation. Check if the class is in the boot image.
desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
- } else if (klass.Get() != nullptr) {
- desired_load_kind = HLoadClass::LoadKind::kJitTableAddress;
} else {
- // Class not loaded yet. This happens when the dex code requesting
- // this `HLoadClass` hasn't been executed in the interpreter.
- // Fallback to the dex cache.
- // TODO(ngeoffray): Generate HDeoptimize instead.
- desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+ // Not JIT and either the klass is not in boot image or we are compiling in PIC mode.
+ desired_load_kind = HLoadClass::LoadKind::kBssEntry;
}
- } else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) {
- // AOT app compilation. Check if the class is in the boot image.
- desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
- } else {
- // Not JIT and either the klass is not in boot image or we are compiling in PIC mode.
- desired_load_kind = HLoadClass::LoadKind::kBssEntry;
}
- }
- DCHECK_NE(desired_load_kind, static_cast<HLoadClass::LoadKind>(-1));
+ DCHECK_NE(desired_load_kind, HLoadClass::LoadKind::kInvalid);
- if (is_in_boot_image) {
- load_class->MarkInBootImage();
+ if (is_in_boot_image) {
+ load_class->MarkInBootImage();
+ }
+ load_kind = codegen->GetSupportedLoadClassKind(desired_load_kind);
}
- HLoadClass::LoadKind load_kind = codegen->GetSupportedLoadClassKind(desired_load_kind);
- switch (load_kind) {
- case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
- case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBssEntry:
- case HLoadClass::LoadKind::kDexCacheViaMethod:
- load_class->SetLoadKindWithTypeReference(load_kind, dex_file, type_index);
- break;
- case HLoadClass::LoadKind::kBootImageAddress:
- case HLoadClass::LoadKind::kJitTableAddress:
- load_class->SetLoadKind(load_kind);
- break;
- default:
- LOG(FATAL) << "Unexpected load kind: " << load_kind;
- UNREACHABLE();
+ if (!IsSameDexFile(load_class->GetDexFile(), *dex_compilation_unit.GetDexFile())) {
+ if ((load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) ||
+ (load_kind == HLoadClass::LoadKind::kBssEntry)) {
+ // We actually cannot reference this class, we're forced to bail.
+ // We cannot reference this class with Bss, as the entrypoint will lookup the class
+ // in the caller's dex file, but that dex file does not reference the class.
+ return HLoadClass::LoadKind::kInvalid;
+ }
}
+ return load_kind;
}
void HSharpening::ProcessLoadString(HLoadString* load_string) {
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index ae3d83ef2c..4240b2f339 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_SHARPENING_H_
#define ART_COMPILER_OPTIMIZING_SHARPENING_H_
+#include "nodes.h"
#include "optimization.h"
namespace art {
@@ -24,7 +25,6 @@ namespace art {
class CodeGenerator;
class CompilerDriver;
class DexCompilationUnit;
-class HInvokeStaticOrDirect;
// Optimization that tries to improve the way we dispatch methods and access types,
// fields, etc. Besides actual method sharpening based on receiver type (for example
@@ -47,15 +47,15 @@ class HSharpening : public HOptimization {
static constexpr const char* kSharpeningPassName = "sharpening";
- // Used internally but also by the inliner.
- static void SharpenClass(HLoadClass* load_class,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver)
+ // Used by the builder and the inliner.
+ static HLoadClass::LoadKind SharpenClass(HLoadClass* load_class,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver,
+ const DexCompilationUnit& dex_compilation_unit)
REQUIRES_SHARED(Locks::mutator_lock_);
private:
void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke);
- void ProcessLoadClass(HLoadClass* load_class);
void ProcessLoadString(HLoadString* load_string);
CodeGenerator* codegen_;
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index ae1e369999..487e4dd498 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -17,8 +17,10 @@
#include "ssa_builder.h"
#include "bytecode_utils.h"
+#include "mirror/class-inl.h"
#include "nodes.h"
#include "reference_type_propagation.h"
+#include "scoped_thread_state_change-inl.h"
#include "ssa_phi_elimination.h"
namespace art {
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index a9a1e6f592..10f5cab907 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -16,7 +16,11 @@
#include "stack_map_stream.h"
-#include "art_method.h"
+#include <unordered_map>
+
+#include "art_method-inl.h"
+#include "base/stl_util.h"
+#include "optimizing/optimizing_compiler.h"
#include "runtime.h"
#include "scoped_thread_state_change-inl.h"
@@ -40,6 +44,7 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
current_entry_.inline_infos_start_index = inline_infos_.size();
current_entry_.dex_register_map_hash = 0;
current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound;
+ current_entry_.stack_mask_index = 0;
if (num_dex_registers != 0) {
current_entry_.live_dex_registers_mask =
ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
@@ -103,11 +108,6 @@ void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t
current_dex_register_++;
}
-static bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) {
- // Note: the runtime is null only for unit testing.
- return Runtime::Current() == nullptr || !Runtime::Current()->IsAotCompiler();
-}
-
void StackMapStream::BeginInlineInfoEntry(ArtMethod* method,
uint32_t dex_pc,
uint32_t num_dex_registers,
@@ -153,32 +153,43 @@ CodeOffset StackMapStream::ComputeMaxNativePcCodeOffset() const {
}
size_t StackMapStream::PrepareForFillIn() {
- int stack_mask_number_of_bits = stack_mask_max_ + 1; // Need room for max element too.
+ const size_t stack_mask_size_in_bits = stack_mask_max_ + 1; // Need room for max element too.
+ const size_t number_of_stack_masks = PrepareStackMasks(stack_mask_size_in_bits);
+ const size_t register_mask_size_in_bits = MinimumBitsToStore(register_mask_max_);
+ const size_t number_of_register_masks = PrepareRegisterMasks();
dex_register_maps_size_ = ComputeDexRegisterMapsSize();
ComputeInlineInfoEncoding(); // needs dex_register_maps_size_.
inline_info_size_ = inline_infos_.size() * inline_info_encoding_.GetEntrySize();
CodeOffset max_native_pc_offset = ComputeMaxNativePcCodeOffset();
- // The stack map contains compressed native offsets.
- size_t stack_map_size = stack_map_encoding_.SetFromSizes(max_native_pc_offset.CompressedValue(),
- dex_pc_max_,
- dex_register_maps_size_,
- inline_info_size_,
- register_mask_max_,
- stack_mask_number_of_bits);
- stack_maps_size_ = stack_maps_.size() * stack_map_size;
+ // The stack map contains compressed native PC offsets.
+ const size_t stack_map_size = stack_map_encoding_.SetFromSizes(
+ max_native_pc_offset.CompressedValue(),
+ dex_pc_max_,
+ dex_register_maps_size_,
+ inline_info_size_,
+ number_of_register_masks,
+ number_of_stack_masks);
+ stack_maps_size_ = RoundUp(stack_maps_.size() * stack_map_size, kBitsPerByte) / kBitsPerByte;
dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
-
- size_t non_header_size =
+ const size_t stack_masks_bits = number_of_stack_masks * stack_mask_size_in_bits;
+ const size_t register_masks_bits = number_of_register_masks * register_mask_size_in_bits;
+ // Register masks are last, stack masks are right before that last.
+ // They are both bit packed / aligned.
+ const size_t non_header_size =
stack_maps_size_ +
dex_register_location_catalog_size_ +
dex_register_maps_size_ +
- inline_info_size_;
+ inline_info_size_ +
+ RoundUp(stack_masks_bits + register_masks_bits, kBitsPerByte) / kBitsPerByte;
// Prepare the CodeInfo variable-sized encoding.
CodeInfoEncoding code_info_encoding;
code_info_encoding.non_header_size = non_header_size;
code_info_encoding.number_of_stack_maps = stack_maps_.size();
- code_info_encoding.stack_map_size_in_bytes = stack_map_size;
+ code_info_encoding.number_of_stack_masks = number_of_stack_masks;
+ code_info_encoding.number_of_register_masks = number_of_register_masks;
+ code_info_encoding.stack_mask_size_in_bits = stack_mask_size_in_bits;
+ code_info_encoding.register_mask_size_in_bits = register_mask_size_in_bits;
code_info_encoding.stack_map_encoding = stack_map_encoding_;
code_info_encoding.inline_info_encoding = inline_info_encoding_;
code_info_encoding.number_of_location_catalog_entries = location_catalog_entries_.size();
@@ -321,18 +332,8 @@ void StackMapStream::FillIn(MemoryRegion region) {
stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc);
stack_map.SetNativePcCodeOffset(stack_map_encoding_, entry.native_pc_code_offset);
- stack_map.SetRegisterMask(stack_map_encoding_, entry.register_mask);
- size_t number_of_stack_mask_bits = stack_map.GetNumberOfStackMaskBits(stack_map_encoding_);
- if (entry.sp_mask != nullptr) {
- for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) {
- stack_map.SetStackMaskBit(stack_map_encoding_, bit, entry.sp_mask->IsBitSet(bit));
- }
- } else {
- // The MemoryRegion does not have to be zeroed, so make sure we clear the bits.
- for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) {
- stack_map.SetStackMaskBit(stack_map_encoding_, bit, false);
- }
- }
+ stack_map.SetRegisterMaskIndex(stack_map_encoding_, entry.register_mask_index);
+ stack_map.SetStackMaskIndex(stack_map_encoding_, entry.stack_mask_index);
if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
// No dex map available.
@@ -353,7 +354,7 @@ void StackMapStream::FillIn(MemoryRegion region) {
next_dex_register_map_offset += register_region.size();
DexRegisterMap dex_register_map(register_region);
stack_map.SetDexRegisterMapOffset(
- stack_map_encoding_, register_region.start() - dex_register_locations_region.start());
+ stack_map_encoding_, register_region.begin() - dex_register_locations_region.begin());
// Set the dex register location.
FillInDexRegisterMap(dex_register_map,
@@ -373,7 +374,7 @@ void StackMapStream::FillIn(MemoryRegion region) {
// Currently relative to the dex register map.
stack_map.SetInlineDescriptorOffset(
- stack_map_encoding_, inline_region.start() - dex_register_locations_region.start());
+ stack_map_encoding_, inline_region.begin() - dex_register_locations_region.begin());
inline_info.SetDepth(inline_info_encoding_, entry.inlining_depth);
DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size());
@@ -408,7 +409,7 @@ void StackMapStream::FillIn(MemoryRegion region) {
DexRegisterMap dex_register_map(register_region);
inline_info.SetDexRegisterMapOffsetAtDepth(
inline_info_encoding_,
- depth, register_region.start() - dex_register_locations_region.start());
+ depth, register_region.begin() - dex_register_locations_region.begin());
FillInDexRegisterMap(dex_register_map,
inline_entry.num_dex_registers,
@@ -423,6 +424,25 @@ void StackMapStream::FillIn(MemoryRegion region) {
}
}
+ // Write stack masks table.
+ size_t stack_mask_bits = encoding.stack_mask_size_in_bits;
+ if (stack_mask_bits > 0) {
+ size_t stack_mask_bytes = RoundUp(stack_mask_bits, kBitsPerByte) / kBitsPerByte;
+ for (size_t i = 0; i < encoding.number_of_stack_masks; ++i) {
+ MemoryRegion source(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes);
+ BitMemoryRegion stack_mask = code_info.GetStackMask(encoding, i);
+ for (size_t bit_index = 0; bit_index < encoding.stack_mask_size_in_bits; ++bit_index) {
+ stack_mask.StoreBit(bit_index, source.LoadBit(bit_index));
+ }
+ }
+ }
+
+ // Write register masks table.
+ for (size_t i = 0; i < encoding.number_of_register_masks; ++i) {
+ BitMemoryRegion register_mask = code_info.GetRegisterMask(encoding, i);
+ register_mask.StoreBits(0, register_masks_[i], encoding.register_mask_size_in_bits);
+ }
+
// Verify all written data in debug build.
if (kIsDebugBuild) {
CheckCodeInfo(region);
@@ -536,6 +556,38 @@ void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info,
}
}
+size_t StackMapStream::PrepareRegisterMasks() {
+ register_masks_.resize(stack_maps_.size(), 0u);
+ std::unordered_map<uint32_t, size_t> dedupe;
+ for (StackMapEntry& stack_map : stack_maps_) {
+ const size_t index = dedupe.size();
+ stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second;
+ register_masks_[index] = stack_map.register_mask;
+ }
+ return dedupe.size();
+}
+
+size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
+ // Preallocate memory since we do not want it to move (the dedup map will point into it).
+ const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
+ stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u);
+ // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later
+ // when copying out from stack_masks_.
+ std::unordered_map<MemoryRegion,
+ size_t,
+ FNVHash<MemoryRegion>,
+ MemoryRegion::ContentEquals> dedup(stack_maps_.size());
+ for (StackMapEntry& stack_map : stack_maps_) {
+ size_t index = dedup.size();
+ MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size);
+ for (size_t i = 0; i < entry_size_in_bits; i++) {
+ stack_mask.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i));
+ }
+ stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second;
+ }
+ return dedup.size();
+}
+
// Check that all StackMapStream inputs are correctly encoded by trying to read them back.
void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
CodeInfo code_info(region);
@@ -550,16 +602,19 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
DCHECK_EQ(stack_map.GetNativePcOffset(stack_map_encoding, instruction_set_),
entry.native_pc_code_offset.Uint32Value(instruction_set_));
DCHECK_EQ(stack_map.GetDexPc(stack_map_encoding), entry.dex_pc);
- DCHECK_EQ(stack_map.GetRegisterMask(stack_map_encoding), entry.register_mask);
- size_t num_stack_mask_bits = stack_map.GetNumberOfStackMaskBits(stack_map_encoding);
+ DCHECK_EQ(stack_map.GetRegisterMaskIndex(stack_map_encoding), entry.register_mask_index);
+ DCHECK_EQ(code_info.GetRegisterMaskOf(encoding, stack_map), entry.register_mask);
+ const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding);
+ DCHECK_EQ(stack_map.GetStackMaskIndex(stack_map_encoding), entry.stack_mask_index);
+ BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
if (entry.sp_mask != nullptr) {
- DCHECK_GE(num_stack_mask_bits, entry.sp_mask->GetNumberOfBits());
+ DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits());
for (size_t b = 0; b < num_stack_mask_bits; b++) {
- DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), entry.sp_mask->IsBitSet(b));
+ DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b));
}
} else {
for (size_t b = 0; b < num_stack_mask_bits; b++) {
- DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), 0u);
+ DCHECK_EQ(stack_mask.LoadBit(b), 0u);
}
}
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 8fec472437..b1069a17be 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -68,6 +68,8 @@ class StackMapStream : public ValueObject {
location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
+ stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
+ register_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
stack_mask_max_(-1),
dex_pc_max_(0),
register_mask_max_(0),
@@ -107,6 +109,8 @@ class StackMapStream : public ValueObject {
BitVector* live_dex_registers_mask;
uint32_t dex_register_map_hash;
size_t same_dex_register_map_as_;
+ uint32_t stack_mask_index;
+ uint32_t register_mask_index;
};
struct InlineInfoEntry {
@@ -160,6 +164,12 @@ class StackMapStream : public ValueObject {
CodeOffset ComputeMaxNativePcCodeOffset() const;
+ // Returns the number of unique stack masks.
+ size_t PrepareStackMasks(size_t entry_size_in_bits);
+
+ // Returns the number of unique register masks.
+ size_t PrepareRegisterMasks();
+
// Returns the index of an entry with the same dex register map as the current_entry,
// or kNoSameDexMapFound if no such entry exists.
size_t FindEntryWithTheSameDexMap();
@@ -193,6 +203,8 @@ class StackMapStream : public ValueObject {
// A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
ArenaVector<size_t> dex_register_locations_;
ArenaVector<InlineInfoEntry> inline_infos_;
+ ArenaVector<uint8_t> stack_masks_;
+ ArenaVector<uint32_t> register_masks_;
int stack_mask_max_;
uint32_t dex_pc_max_;
uint32_t register_mask_max_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index f68695bcbc..ce6d5c2b22 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -27,15 +27,16 @@ namespace art {
// Check that the stack mask of given stack map is identical
// to the given bit vector. Returns true if they are same.
static bool CheckStackMask(
+ const CodeInfo& code_info,
+ const CodeInfoEncoding& encoding,
const StackMap& stack_map,
- StackMapEncoding& encoding,
const BitVector& bit_vector) {
- int number_of_bits = stack_map.GetNumberOfStackMaskBits(encoding);
- if (bit_vector.GetHighestBitSet() >= number_of_bits) {
+ BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
+ if (bit_vector.GetNumberOfBits() > encoding.stack_mask_size_in_bits) {
return false;
}
- for (int i = 0; i < number_of_bits; ++i) {
- if (stack_map.GetStackMaskBit(encoding, i) != bit_vector.IsBitSet(i)) {
+ for (size_t i = 0; i < encoding.stack_mask_size_in_bits; ++i) {
+ if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) {
return false;
}
}
@@ -79,9 +80,9 @@ TEST(StackMapTest, Test1) {
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
- ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask));
+ ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask));
ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
DexRegisterMap dex_register_map =
@@ -194,9 +195,9 @@ TEST(StackMapTest, Test2) {
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
- ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask1));
+ ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1));
ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
DexRegisterMap dex_register_map =
@@ -253,9 +254,9 @@ TEST(StackMapTest, Test2) {
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u, encoding)));
ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0xFFu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0xFFu, code_info.GetRegisterMaskOf(encoding, stack_map));
- ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask2));
+ ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask2));
ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
DexRegisterMap dex_register_map =
@@ -307,9 +308,9 @@ TEST(StackMapTest, Test2) {
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(192u, encoding)));
ASSERT_EQ(2u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0xABu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0xABu, code_info.GetRegisterMaskOf(encoding, stack_map));
- ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask3));
+ ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask3));
ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
DexRegisterMap dex_register_map =
@@ -361,9 +362,9 @@ TEST(StackMapTest, Test2) {
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(256u, encoding)));
ASSERT_EQ(3u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0xCDu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0xCDu, code_info.GetRegisterMaskOf(encoding, stack_map));
- ASSERT_TRUE(CheckStackMask(stack_map, encoding.stack_map_encoding, sp_mask4));
+ ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask4));
ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
DexRegisterMap dex_register_map =
@@ -443,7 +444,7 @@ TEST(StackMapTest, TestNonLiveDexRegisters) {
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
DexRegisterMap dex_register_map =
@@ -642,7 +643,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
@@ -652,7 +653,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) {
ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(68, encoding)));
ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map_encoding));
ASSERT_EQ(68u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
- ASSERT_EQ(0x4u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
+ ASSERT_EQ(0x4u, code_info.GetRegisterMaskOf(encoding, stack_map));
ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map_encoding));
@@ -839,4 +840,33 @@ TEST(StackMapTest, CodeOffsetTest) {
EXPECT_EQ(offset_mips64.Uint32Value(kMips64), kMips64InstructionAlignment);
}
+
+TEST(StackMapTest, TestDeduplicateStackMask) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ StackMapStream stream(&arena, kRuntimeISA);
+
+ ArenaBitVector sp_mask(&arena, 0, true);
+ sp_mask.SetBit(1);
+ sp_mask.SetBit(4);
+ stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0);
+ stream.EndStackMapEntry();
+ stream.BeginStackMapEntry(0, 8, 0x3, &sp_mask, 0, 0);
+ stream.EndStackMapEntry();
+
+ size_t size = stream.PrepareForFillIn();
+ void* memory = arena.Alloc(size, kArenaAllocMisc);
+ MemoryRegion region(memory, size);
+ stream.FillIn(region);
+
+ CodeInfo code_info(region);
+ CodeInfoEncoding encoding = code_info.ExtractEncoding();
+ ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding));
+
+ StackMap stack_map1 = code_info.GetStackMapForNativePcOffset(4, encoding);
+ StackMap stack_map2 = code_info.GetStackMapForNativePcOffset(8, encoding);
+ EXPECT_EQ(stack_map1.GetStackMaskIndex(encoding.stack_map_encoding),
+ stack_map2.GetStackMaskIndex(encoding.stack_map_encoding));
+}
+
} // namespace art