diff options
Diffstat (limited to 'compiler/optimizing')
23 files changed, 1242 insertions, 487 deletions
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 85002045a3..49f4f18390 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -531,40 +531,15 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { uint32_t GetReferenceDisableFlagOffset() const; protected: - // Method patch info used for recording locations of required linker patches and - // target methods. The target method can be used for various purposes, whether for - // patching the address of the method or the code pointer or a PC-relative call. + // Patch info used for recording locations of required linker patches and their targets, + // i.e. target method, string, type or code identified by their dex file and index. template <typename LabelType> - struct MethodPatchInfo { - explicit MethodPatchInfo(MethodReference m) : target_method(m), label() { } - - MethodReference target_method; - LabelType label; - }; - - // String patch info used for recording locations of required linker patches and - // target strings. The actual string address can be absolute or PC-relative. - template <typename LabelType> - struct StringPatchInfo { - StringPatchInfo(const DexFile& df, uint32_t index) - : dex_file(df), string_index(index), label() { } - - const DexFile& dex_file; - uint32_t string_index; - LabelType label; - }; - - // Type patch info used for recording locations of required linker patches and - // target types. The actual type address can be absolute or PC-relative. - // TODO: Consider merging with MethodPatchInfo and StringPatchInfo - all these - // classes contain the dex file, some index and the label. - template <typename LabelType> - struct TypePatchInfo { - TypePatchInfo(const DexFile& df, uint32_t index) - : dex_file(df), type_index(index), label() { } + struct PatchInfo { + PatchInfo(const DexFile& target_dex_file, uint32_t target_index) + : dex_file(target_dex_file), index(target_index) { } const DexFile& dex_file; - uint32_t type_index; + uint32_t index; LabelType label; }; diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 681988d2ac..9870876879 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -422,6 +422,50 @@ class LoadClassSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM); }; +class LoadStringSlowPathARM : public SlowPathCodeARM { + public: + explicit LoadStringSlowPathARM(HLoadString* instruction) : SlowPathCodeARM(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + + CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + HLoadString* load = instruction_->AsLoadString(); + const uint32_t string_index = load->GetStringIndex(); + __ LoadImmediate(calling_convention.GetRegisterAt(0), string_index); + arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0)); + + RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the + // .bss entry address in the fast path, so that we can avoid another calculation here. + CodeGeneratorARM::PcRelativePatchInfo* labels = + arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + __ BindTrackedLabel(&labels->movw_label); + __ movw(IP, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(IP, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(IP, IP, ShifterOperand(PC)); + __ str(locations->Out().AsRegister<Register>(), Address(IP)); + + __ b(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM"; } + + private: + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM); +}; + class TypeCheckSlowPathARM : public SlowPathCodeARM { public: TypeCheckSlowPathARM(HInstruction* instruction, bool is_fatal) @@ -5641,15 +5685,8 @@ HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind( case HLoadString::LoadKind::kDexCacheAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); - // We disable pc-relative load when there is an irreducible loop, as the optimization - // is incompatible with it. - // TODO: Create as many ArmDexCacheArraysBase instructions as needed for methods - // with irreducible loops. - if (GetGraph()->HasIrreducibleLoops()) { - return HLoadString::LoadKind::kDexCacheViaMethod; - } break; case HLoadString::LoadKind::kDexCacheViaMethod: break; @@ -5659,12 +5696,13 @@ HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind( void LocationsBuilderARM::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = load->NeedsEnvironment() - ? LocationSummary::kCallOnMainOnly + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); - DCHECK(load_kind != HLoadString::LoadKind::kDexCachePcRelative) << "Not supported"; if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RegisterLocation(R0)); @@ -5686,6 +5724,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARM::PcRelativePatchInfo* labels = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); __ BindTrackedLabel(&labels->movw_label); @@ -5702,6 +5741,23 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) { __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address)); return; // No dex cache slow path. } + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorARM::PcRelativePatchInfo* labels = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + __ BindTrackedLabel(&labels->movw_label); + __ movw(out, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(out, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(out, out, ShifterOperand(PC)); + GenerateGcRootFieldLoad(load, out_loc, out, 0); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load); + codegen_->AddSlowPath(slow_path); + __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } default: break; } @@ -6850,7 +6906,8 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ bl(GetFrameEntryLabel()); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: - relative_call_patches_.emplace_back(invoke->GetTargetMethod()); + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); __ BindTrackedLabel(&relative_call_patches_.back().label); // Arbitrarily branch to the BL itself, override at link time. __ bl(&relative_call_patches_.back().label); @@ -6952,17 +7009,37 @@ Literal* CodeGeneratorARM::DeduplicateDexCacheAddressLiteral(uint32_t address) { return DeduplicateUint32Literal(address, &uint32_literals_); } +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorARM::EmitPcRelativeLinkerPatches( + const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PcRelativePatchInfo& info : infos) { + const DexFile& dex_file = info.target_dex_file; + size_t offset_or_index = info.offset_or_index; + DCHECK(info.add_pc_label.IsBound()); + uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); + // Add MOVW patch. + DCHECK(info.movw_label.IsBound()); + uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); + linker_patches->push_back(Factory(movw_offset, &dex_file, add_pc_offset, offset_or_index)); + // Add MOVT patch. + DCHECK(info.movt_label.IsBound()); + uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); + linker_patches->push_back(Factory(movt_offset, &dex_file, add_pc_offset, offset_or_index)); + } +} + void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size() + - /* MOVW+MOVT for each base */ 2u * pc_relative_dex_cache_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + boot_image_string_patches_.size() + - /* MOVW+MOVT for each base */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + boot_image_type_patches_.size() + - /* MOVW+MOVT for each base */ 2u * pc_relative_type_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + boot_image_address_patches_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { @@ -6983,32 +7060,13 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche target_method.dex_file, target_method.dex_method_index)); } - for (const MethodPatchInfo<Label>& info : relative_call_patches_) { + for (const PatchInfo<Label>& info : relative_call_patches_) { uint32_t literal_offset = info.label.Position(); - linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); - } - for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { - const DexFile& dex_file = info.target_dex_file; - size_t base_element_offset = info.offset_or_index; - DCHECK(info.add_pc_label.IsBound()); - uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); - // Add MOVW patch. - DCHECK(info.movw_label.IsBound()); - uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset, - &dex_file, - add_pc_offset, - base_element_offset)); - // Add MOVT patch. - DCHECK(info.movt_label.IsBound()); - uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset, - &dex_file, - add_pc_offset, - base_element_offset)); + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); } + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); for (const auto& entry : boot_image_string_patches_) { const StringReference& target_string = entry.first; Literal* literal = entry.second; @@ -7018,25 +7076,12 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche target_string.dex_file, target_string.string_index)); } - for (const PcRelativePatchInfo& info : pc_relative_string_patches_) { - const DexFile& dex_file = info.target_dex_file; - uint32_t string_index = info.offset_or_index; - DCHECK(info.add_pc_label.IsBound()); - uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); - // Add MOVW patch. - DCHECK(info.movw_label.IsBound()); - uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); - linker_patches->push_back(LinkerPatch::RelativeStringPatch(movw_offset, - &dex_file, - add_pc_offset, - string_index)); - // Add MOVT patch. - DCHECK(info.movt_label.IsBound()); - uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); - linker_patches->push_back(LinkerPatch::RelativeStringPatch(movt_offset, - &dex_file, - add_pc_offset, - string_index)); + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); + } else { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, + linker_patches); } for (const auto& entry : boot_image_type_patches_) { const TypeReference& target_type = entry.first; @@ -7047,26 +7092,8 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche target_type.dex_file, target_type.type_index)); } - for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { - const DexFile& dex_file = info.target_dex_file; - uint32_t type_index = info.offset_or_index; - DCHECK(info.add_pc_label.IsBound()); - uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.Position()); - // Add MOVW patch. - DCHECK(info.movw_label.IsBound()); - uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.Position()); - linker_patches->push_back(LinkerPatch::RelativeTypePatch(movw_offset, - &dex_file, - add_pc_offset, - type_index)); - // Add MOVT patch. - DCHECK(info.movt_label.IsBound()); - uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.Position()); - linker_patches->push_back(LinkerPatch::RelativeTypePatch(movt_offset, - &dex_file, - add_pc_offset, - type_index)); - } + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, + linker_patches); for (const auto& entry : boot_image_address_patches_) { DCHECK(GetCompilerOptions().GetIncludePatchInformation()); Literal* literal = entry.second; diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 6416d40f7f..ef2e23f258 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -593,6 +593,10 @@ class CodeGeneratorARM : public CodeGenerator { uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches); + // Labels for each block that will be compiled. Label* block_labels_; // Indexed by block id. Label frame_entry_label_; @@ -609,12 +613,12 @@ class CodeGeneratorARM : public CodeGenerator { MethodToLiteralMap call_patches_; // Relative call patch info. // Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + ArenaDeque<PatchInfo<Label>> relative_call_patches_; // PC-relative patch info for each HArmDexCacheArraysBase. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Deduplication map for boot string literals for kBootImageLinkTimeAddress. BootStringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info. + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Deduplication map for boot type literals for kBootImageLinkTimeAddress. BootTypeToLiteralMap boot_image_type_patches_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 4f7f36bb5a..969d653f97 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -329,6 +329,55 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64); }; +class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { + public: + explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); + + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + __ Mov(calling_convention.GetRegisterAt(0).W(), string_index); + arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + Primitive::Type type = instruction_->GetType(); + arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); + + RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler()); + Register temp = temps.AcquireX(); + const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile(); + // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary + // for the ADRP in the fast path, so that we can avoid the ADRP here. + vixl::aarch64::Label* adrp_label = + arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index); + arm64_codegen->EmitAdrpPlaceholder(adrp_label, temp); + vixl::aarch64::Label* strp_label = + arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + { + SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler()); + __ Bind(strp_label); + __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot), + MemOperand(temp, /* offset placeholder */ 0)); + } + + __ B(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); +}; + class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {} @@ -3631,19 +3680,11 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok const DexFile& dex_file = invoke->GetDexFile(); uint32_t element_offset = invoke->GetDexCacheArrayOffset(); vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(XRegisterFrom(temp), /* offset placeholder */ 0); - } + EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); // Add LDR with its PC-relative DexCache access patch. vixl::aarch64::Label* ldr_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(ldr_label); - __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), /* offset placeholder */ 0)); - } + EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { @@ -3676,7 +3717,8 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok __ Bl(&frame_entry_label_); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - relative_call_patches_.emplace_back(invoke->GetTargetMethod()); + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); vixl::aarch64::Label* label = &relative_call_patches_.back().label; SingleEmissionCheckScope guard(GetVIXLAssembler()); __ Bind(label); @@ -3798,6 +3840,45 @@ vixl::aarch64::Literal<uint64_t>* CodeGeneratorARM64::DeduplicateDexCacheAddress return DeduplicateUint64Literal(address); } +void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register reg) { + DCHECK(reg.IsX()); + SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(fixup_label); + __ adrp(reg, /* offset placeholder */ 0); +} + +void CodeGeneratorARM64::EmitAddPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register out, + vixl::aarch64::Register base) { + DCHECK(out.IsX()); + DCHECK(base.IsX()); + SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(fixup_label); + __ add(out, base, Operand(/* offset placeholder */ 0)); +} + +void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register out, + vixl::aarch64::Register base) { + DCHECK(base.IsX()); + SingleEmissionCheckScope guard(GetVIXLAssembler()); + __ Bind(fixup_label); + __ ldr(out, MemOperand(base, /* offset placeholder */ 0)); +} + +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( + const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PcRelativePatchInfo& info : infos) { + linker_patches->push_back(Factory(info.label.GetLocation(), + &info.target_dex_file, + info.pc_insn_label->GetLocation(), + info.offset_or_index)); + } +} + void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -3825,10 +3906,9 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc target_method.dex_file, target_method.dex_method_index)); } - for (const MethodPatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) { - linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.GetLocation(), - info.target_method.dex_file, - info.target_method.dex_method_index)); + for (const PatchInfo<vixl::aarch64::Label>& info : relative_call_patches_) { + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(info.label.GetLocation(), &info.dex_file, info.index)); } for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(), @@ -3843,11 +3923,12 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc target_string.dex_file, target_string.string_index)); } - for (const PcRelativePatchInfo& info : pc_relative_string_patches_) { - linker_patches->push_back(LinkerPatch::RelativeStringPatch(info.label.GetLocation(), - &info.target_dex_file, - info.pc_insn_label->GetLocation(), - info.offset_or_index)); + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); + } else { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, + linker_patches); } for (const auto& entry : boot_image_type_patches_) { const TypeReference& target_type = entry.first; @@ -3856,12 +3937,8 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc target_type.dex_file, target_type.type_index)); } - for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { - linker_patches->push_back(LinkerPatch::RelativeTypePatch(info.label.GetLocation(), - &info.target_dex_file, - info.pc_insn_label->GetLocation(), - info.offset_or_index)); - } + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, + linker_patches); for (const auto& entry : boot_image_address_patches_) { DCHECK(GetCompilerOptions().GetIncludePatchInformation()); vixl::aarch64::Literal<uint32_t>* literal = entry.second; @@ -4018,19 +4095,11 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { const DexFile& dex_file = cls->GetDexFile(); uint32_t type_index = cls->GetTypeIndex(); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(out.X(), /* offset placeholder */ 0); - } + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative type patch. vixl::aarch64::Label* add_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(add_label); - __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0)); - } + codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); break; } case HLoadClass::LoadKind::kBootImageAddress: { @@ -4067,11 +4136,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) { uint32_t element_offset = cls->GetDexCacheElementOffset(); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(out.X(), /* offset placeholder */ 0); - } + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add LDR with its PC-relative DexCache access patch. vixl::aarch64::Label* ldr_label = codegen_->NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); @@ -4156,7 +4221,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( case HLoadString::LoadKind::kDexCacheAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kDexCacheViaMethod: @@ -4167,7 +4232,9 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = load->NeedsEnvironment() - ? LocationSummary::kCallOnMainOnly + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { @@ -4191,20 +4258,13 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { // Add ADRP with its PC-relative String patch. const DexFile& dex_file = load->GetDexFile(); uint32_t string_index = load->GetStringIndex(); + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(adrp_label); - __ adrp(out.X(), /* offset placeholder */ 0); - } + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative String patch. vixl::aarch64::Label* add_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); - { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(add_label); - __ add(out.X(), out.X(), Operand(/* offset placeholder */ 0)); - } + codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { @@ -4212,6 +4272,28 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) { __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress())); return; // No dex cache slow path. } + case HLoadString::LoadKind::kBssEntry: { + // Add ADRP with its PC-relative String .bss entry patch. + const DexFile& dex_file = load->GetDexFile(); + uint32_t string_index = load->GetStringIndex(); + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); + // Add LDR with its PC-relative String patch. + vixl::aarch64::Label* ldr_label = + codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ + GenerateGcRootFieldLoad(load, + load->GetLocations()->Out(), + out.X(), + /* placeholder */ 0u, + ldr_label); + SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load); + codegen_->AddSlowPath(slow_path); + __ Cbz(out.X(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } default: break; } @@ -4981,6 +5063,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru uint32_t offset, vixl::aarch64::Label* fixup_label, bool requires_read_barrier) { + DCHECK(fixup_label == nullptr || offset == 0u); Register root_reg = RegisterFrom(root, Primitive::kPrimNot); if (requires_read_barrier) { DCHECK(kEmitCompilerReadBarrier); @@ -4997,9 +5080,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru if (fixup_label == nullptr) { __ Ldr(root_reg, MemOperand(obj, offset)); } else { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(fixup_label); - __ ldr(root_reg, MemOperand(obj, offset)); + codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj); } static_assert( sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), @@ -5028,9 +5109,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru if (fixup_label == nullptr) { __ Add(root_reg.X(), obj.X(), offset); } else { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(fixup_label); - __ add(root_reg.X(), obj.X(), offset); + codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X()); } // /* mirror::Object* */ root = root->Read() codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); @@ -5041,9 +5120,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instru if (fixup_label == nullptr) { __ Ldr(root_reg, MemOperand(obj, offset)); } else { - SingleEmissionCheckScope guard(GetVIXLAssembler()); - __ Bind(fixup_label); - __ ldr(root_reg, MemOperand(obj, offset)); + codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X()); } // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index a15224578d..eb28ecb427 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -564,6 +564,14 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address); vixl::aarch64::Literal<uint64_t>* DeduplicateDexCacheAddressLiteral(uint64_t address); + void EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label, vixl::aarch64::Register reg); + void EmitAddPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register out, + vixl::aarch64::Register base); + void EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_label, + vixl::aarch64::Register out, + vixl::aarch64::Register base); + void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; // Fast path implementation of ReadBarrier::Barrier for a heap @@ -691,6 +699,10 @@ class CodeGeneratorARM64 : public CodeGenerator { void EmitJumpTables(); + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches); + // Labels for each block that will be compiled. // We use a deque so that the `vixl::aarch64::Label` objects do not move in memory. ArenaDeque<vixl::aarch64::Label> block_labels_; // Indexed by block id. @@ -713,12 +725,12 @@ class CodeGeneratorARM64 : public CodeGenerator { MethodToLiteralMap call_patches_; // Relative call patch info. // Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<MethodPatchInfo<vixl::aarch64::Label>> relative_call_patches_; + ArenaDeque<PatchInfo<vixl::aarch64::Label>> relative_call_patches_; // PC-relative DexCache access info. ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Deduplication map for boot string literals for kBootImageLinkTimeAddress. BootStringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info. + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Deduplication map for boot type literals for kBootImageLinkTimeAddress. BootTypeToLiteralMap boot_image_type_patches_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 5c0ca85c78..990bbcc85b 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -279,7 +279,8 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + HLoadString* load = instruction_->AsLoadString(); + const uint32_t string_index = load->GetStringIndex(); __ LoadConst32(calling_convention.GetRegisterAt(0), string_index); mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); @@ -289,6 +290,19 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { type); RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the + // .bss entry address in the fast path, so that we can avoid another calculation here. + bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); + Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); + Register out = locations->Out().AsRegister<Register>(); + DCHECK_NE(out, AT); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); + mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base); + __ StoreToOffset(kStoreWord, out, TMP, 0); + __ B(GetExitLabel()); } @@ -957,6 +971,24 @@ void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* lo } } +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( + const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PcRelativePatchInfo& info : infos) { + const DexFile& dex_file = info.target_dex_file; + size_t offset_or_index = info.offset_or_index; + DCHECK(info.high_label.IsBound()); + uint32_t high_offset = __ GetLabelLocation(&info.high_label); + // On R2 we use HMipsComputeBaseMethodAddress and patch relative to + // the assembler's base label used for PC-relative addressing. + uint32_t pc_rel_offset = info.pc_rel_label.IsBound() + ? __ GetLabelLocation(&info.pc_rel_label) + : __ GetPcRelBaseLabelLocation(); + linker_patches->push_back(Factory(high_offset, &dex_file, pc_rel_offset, offset_or_index)); + } +} + void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -987,48 +1019,17 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch target_method.dex_file, target_method.dex_method_index)); } - for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { - const DexFile& dex_file = info.target_dex_file; - size_t base_element_offset = info.offset_or_index; - DCHECK(info.high_label.IsBound()); - uint32_t high_offset = __ GetLabelLocation(&info.high_label); - DCHECK(info.pc_rel_label.IsBound()); - uint32_t pc_rel_offset = __ GetLabelLocation(&info.pc_rel_label); - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(high_offset, - &dex_file, - pc_rel_offset, - base_element_offset)); - } - for (const PcRelativePatchInfo& info : pc_relative_string_patches_) { - const DexFile& dex_file = info.target_dex_file; - size_t string_index = info.offset_or_index; - DCHECK(info.high_label.IsBound()); - uint32_t high_offset = __ GetLabelLocation(&info.high_label); - // On R2 we use HMipsComputeBaseMethodAddress and patch relative to - // the assembler's base label used for PC-relative literals. - uint32_t pc_rel_offset = info.pc_rel_label.IsBound() - ? __ GetLabelLocation(&info.pc_rel_label) - : __ GetPcRelBaseLabelLocation(); - linker_patches->push_back(LinkerPatch::RelativeStringPatch(high_offset, - &dex_file, - pc_rel_offset, - string_index)); - } - for (const PcRelativePatchInfo& info : pc_relative_type_patches_) { - const DexFile& dex_file = info.target_dex_file; - size_t type_index = info.offset_or_index; - DCHECK(info.high_label.IsBound()); - uint32_t high_offset = __ GetLabelLocation(&info.high_label); - // On R2 we use HMipsComputeBaseMethodAddress and patch relative to - // the assembler's base label used for PC-relative literals. - uint32_t pc_rel_offset = info.pc_rel_label.IsBound() - ? __ GetLabelLocation(&info.pc_rel_label) - : __ GetPcRelBaseLabelLocation(); - linker_patches->push_back(LinkerPatch::RelativeTypePatch(high_offset, - &dex_file, - pc_rel_offset, - type_index)); + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, + linker_patches); + } else { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, + linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, + linker_patches); for (const auto& entry : boot_image_string_patches_) { const StringReference& target_string = entry.first; Literal* literal = entry.second; @@ -1118,6 +1119,36 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map); } +void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholder( + PcRelativePatchInfo* info, Register out, Register base) { + bool reordering = __ SetReorder(false); + if (GetInstructionSetFeatures().IsR6()) { + DCHECK_EQ(base, ZERO); + __ Bind(&info->high_label); + __ Bind(&info->pc_rel_label); + // Add a 32-bit offset to PC. + __ Auipc(out, /* placeholder */ 0x1234); + __ Addiu(out, out, /* placeholder */ 0x5678); + } else { + // If base is ZERO, emit NAL to obtain the actual base. + if (base == ZERO) { + // Generate a dummy PC-relative call to obtain PC. + __ Nal(); + } + __ Bind(&info->high_label); + __ Lui(out, /* placeholder */ 0x1234); + // If we emitted the NAL, bind the pc_rel_label, otherwise base is a register holding + // the HMipsComputeBaseMethodAddress which has its own label stored in MipsAssembler. + if (base == ZERO) { + __ Bind(&info->pc_rel_label); + } + __ Ori(out, out, /* placeholder */ 0x5678); + // Add a 32-bit offset to PC. + __ Addu(out, out, (base == ZERO) ? RA : base); + } + __ SetReorder(reordering); +} + void CodeGeneratorMIPS::MarkGCCard(Register object, Register value) { MipsLabel done; Register card = AT; @@ -4229,6 +4260,8 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( } // We disable PC-relative load when there is an irreducible loop, as the optimization // is incompatible with it. + // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods + // with irreducible loops. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); bool fallback_load = has_irreducible_loops; switch (desired_string_load_kind) { @@ -4244,10 +4277,8 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); fallback_load = false; break; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); - // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods - // with irreducible loops. break; case HLoadString::LoadKind::kDexCacheViaMethod: fallback_load = false; @@ -4504,8 +4535,13 @@ void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDire } void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { - LocationSummary* locations = invoke->GetLocations(); - Location receiver = locations->InAt(0); + // Use the calling convention instead of the location of the receiver, as + // intrinsics may have put the receiver in a different register. In the intrinsics + // slow path, the arguments have been moved to the right place, so here we are + // guaranteed that the receiver is the first register of the calling convention. + InvokeDexCallingConvention calling_convention; + Register receiver = calling_convention.GetRegisterAt(0); + Register temp = temp_location.AsRegister<Register>(); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kMipsPointerSize).SizeValue(); @@ -4513,8 +4549,7 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize); // temp = object->GetClass(); - DCHECK(receiver.IsRegister()); - __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset); + __ LoadFromOffset(kLoadWord, temp, receiver, class_offset); MaybeRecordImplicitNullCheck(invoke); // temp = temp->GetMethodAt(method_offset); __ LoadFromOffset(kLoadWord, temp, temp, method_offset); @@ -4627,23 +4662,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) { DCHECK(!kEmitCompilerReadBarrier); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); - bool reordering = __ SetReorder(false); - if (isR6) { - __ Bind(&info->high_label); - __ Bind(&info->pc_rel_label); - // Add a 32-bit offset to PC. - __ Auipc(out, /* placeholder */ 0x1234); - __ Addiu(out, out, /* placeholder */ 0x5678); - } else { - __ Bind(&info->high_label); - __ Lui(out, /* placeholder */ 0x1234); - // We do not bind info->pc_rel_label here, we'll use the assembler's label - // for PC-relative literals and the base from HMipsComputeBaseMethodAddress. - __ Ori(out, out, /* placeholder */ 0x5678); - // Add a 32-bit offset to PC. - __ Addu(out, out, base_or_current_method_reg); - } - __ SetReorder(reordering); + codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); break; } case HLoadClass::LoadKind::kBootImageAddress: { @@ -4732,7 +4751,9 @@ void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear AT void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); @@ -4741,12 +4762,12 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBssEntry: if (codegen_->GetInstructionSetFeatures().IsR6()) { break; } FALLTHROUGH_INTENDED; // We need an extra register for PC-relative dex cache accesses. - case HLoadString::LoadKind::kDexCachePcRelative: case HLoadString::LoadKind::kDexCacheViaMethod: locations->SetInAt(0, Location::RequiresRegister()); break; @@ -4768,6 +4789,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBssEntry: base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); break; default: @@ -4785,25 +4807,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { return; // No dex cache slow path. case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(!kEmitCompilerReadBarrier); + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); - bool reordering = __ SetReorder(false); - if (isR6) { - __ Bind(&info->high_label); - __ Bind(&info->pc_rel_label); - // Add a 32-bit offset to PC. - __ Auipc(out, /* placeholder */ 0x1234); - __ Addiu(out, out, /* placeholder */ 0x5678); - } else { - __ Bind(&info->high_label); - __ Lui(out, /* placeholder */ 0x1234); - // We do not bind info->pc_rel_label here, we'll use the assembler's label - // for PC-relative literals and the base from HMipsComputeBaseMethodAddress. - __ Ori(out, out, /* placeholder */ 0x5678); - // Add a 32-bit offset to PC. - __ Addu(out, out, base_or_current_method_reg); - } - __ SetReorder(reordering); + codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { @@ -4815,15 +4822,28 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) { codegen_->DeduplicateBootImageAddressLiteral(address)); return; // No dex cache slow path. } + case HLoadString::LoadKind::kBssEntry: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); + __ LoadFromOffset(kLoadWord, out, out, 0); + SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); + codegen_->AddSlowPath(slow_path); + __ Beqz(out, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } default: break; } // TODO: Re-add the compiler code to do string dex cache lookup again. - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); - codegen_->AddSlowPath(slow_path); - __ B(slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + InvokeRuntimeCallingConvention calling_convention; + __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex()); + codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); } void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) { @@ -6011,25 +6031,8 @@ void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArra Register reg = base->GetLocations()->Out().AsRegister<Register>(); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); - bool reordering = __ SetReorder(false); - if (codegen_->GetInstructionSetFeatures().IsR6()) { - __ Bind(&info->high_label); - __ Bind(&info->pc_rel_label); - // Add a 32-bit offset to PC. - __ Auipc(reg, /* placeholder */ 0x1234); - __ Addiu(reg, reg, /* placeholder */ 0x5678); - } else { - // Generate a dummy PC-relative call to obtain PC. - __ Nal(); - __ Bind(&info->high_label); - __ Lui(reg, /* placeholder */ 0x1234); - __ Bind(&info->pc_rel_label); - __ Ori(reg, reg, /* placeholder */ 0x5678); - // Add a 32-bit offset to PC. - __ Addu(reg, reg, RA); - // TODO: Can we share this code with that of VisitMipsComputeBaseMethodAddress()? - } - __ SetReorder(reordering); + // TODO: Reuse MipsComputeBaseMethodAddress on R2 instead of passing ZERO to force emitting NAL. + codegen_->EmitPcRelativeAddressPlaceholder(info, reg, ZERO); } void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index f943978b3b..0e8d8d40cf 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -435,6 +435,8 @@ class CodeGeneratorMIPS : public CodeGenerator { Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, uint32_t type_index); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); + void EmitPcRelativeAddressPlaceholder(PcRelativePatchInfo* info, Register out, Register base); + private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); @@ -455,6 +457,10 @@ class CodeGeneratorMIPS : public CodeGenerator { uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, + ArenaVector<LinkerPatch>* linker_patches); + // Labels for each block that will be compiled. MipsLabel* block_labels_; MipsLabel frame_entry_label_; @@ -473,7 +479,7 @@ class CodeGeneratorMIPS : public CodeGenerator { ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Deduplication map for boot string literals for kBootImageLinkTimeAddress. BootStringToLiteralMap boot_image_string_patches_; - // PC-relative String patch info. + // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; // Deduplication map for boot type literals for kBootImageLinkTimeAddress. BootTypeToLiteralMap boot_image_type_patches_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index a7051aeeb1..0b23599665 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -212,6 +212,42 @@ class SuspendCheckSlowPathX86 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86); }; +class LoadStringSlowPathX86 : public SlowPathCode { + public: + explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + + CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index)); + x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); + RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + Register method_address = locations->InAt(0).AsRegister<Register>(); + __ movl(Address(method_address, CodeGeneratorX86::kDummy32BitOffset), + locations->Out().AsRegister<Register>()); + Label* fixup_label = x86_codegen->NewStringBssEntryPatch(instruction_->AsLoadString()); + __ Bind(fixup_label); + + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; } + + private: + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86); +}; + class LoadClassSlowPathX86 : public SlowPathCode { public: LoadClassSlowPathX86(HLoadClass* cls, @@ -4294,7 +4330,8 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: __ movl(temp.AsRegister<Register>(), Immediate(/* placeholder */ 0)); - method_patches_.emplace_back(invoke->GetTargetMethod()); + method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn. break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { @@ -4339,7 +4376,8 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ call(GetFrameEntryLabel()); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - relative_call_patches_.emplace_back(invoke->GetTargetMethod()); + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); Label* label = &relative_call_patches_.back().label; __ call(label); // Bind to the patch label, override at link time. __ Bind(label); // Bind the label at the end of the "call" insn. @@ -4398,7 +4436,8 @@ void CodeGeneratorX86::RecordSimplePatch() { } } -void CodeGeneratorX86::RecordStringPatch(HLoadString* load_string) { +void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { + DCHECK(GetCompilerOptions().IsBootImage()); string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); __ Bind(&string_patches_.back().label); } @@ -4408,6 +4447,12 @@ void CodeGeneratorX86::RecordTypePatch(HLoadClass* load_class) { __ Bind(&type_patches_.back().label); } +Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { + DCHECK(!GetCompilerOptions().IsBootImage()); + string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); + return &string_patches_.back().label; +} + Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset) { // Add the patch entry and bind its label at the end of the instruction. @@ -4415,6 +4460,21 @@ Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file return &pc_relative_dex_cache_patches_.back().label; } +// The label points to the end of the "movl" or another instruction but the literal offset +// for method patch needs to point to the embedded constant which occupies the last 4 bytes. +constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; + +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( + const ArenaDeque<PatchInfo<Label>>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PatchInfo<Label>& info : infos) { + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; + linker_patches->push_back( + Factory(literal_offset, &info.dex_file, GetMethodAddressOffset(), info.index)); + } +} + void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -4425,59 +4485,38 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche string_patches_.size() + type_patches_.size(); linker_patches->reserve(size); - // The label points to the end of the "movl" insn but the literal offset for method - // patch needs to point to the embedded constant which occupies the last 4 bytes. - constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; - for (const MethodPatchInfo<Label>& info : method_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); - } - for (const MethodPatchInfo<Label>& info : relative_call_patches_) { + for (const PatchInfo<Label>& info : method_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); + linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, &info.dex_file, info.index)); } - for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) { + for (const PatchInfo<Label>& info : relative_call_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset, - &info.target_dex_file, - GetMethodAddressOffset(), - info.element_offset)); + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); } + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); for (const Label& label : simple_patches_) { uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset)); } - if (GetCompilerOptions().GetCompilePic()) { - for (const StringPatchInfo<Label>& info : string_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset, - &info.dex_file, - GetMethodAddressOffset(), - info.string_index)); - } - for (const TypePatchInfo<Label>& info : type_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset, - &info.dex_file, - GetMethodAddressOffset(), - info.type_index)); - } + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); + } else if (GetCompilerOptions().GetCompilePic()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } else { - for (const StringPatchInfo<Label>& info : string_patches_) { + for (const PatchInfo<Label>& info : string_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::StringPatch(literal_offset, - &info.dex_file, - info.string_index)); + linker_patches->push_back( + LinkerPatch::StringPatch(literal_offset, &info.dex_file, info.index)); } - for (const TypePatchInfo<Label>& info : type_patches_) { + } + if (GetCompilerOptions().GetCompilePic()) { + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(type_patches_, linker_patches); + } else { + for (const PatchInfo<Label>& info : type_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, - &info.dex_file, - info.type_index)); + linker_patches->push_back(LinkerPatch::TypePatch(literal_offset, &info.dex_file, info.index)); } } } @@ -5991,7 +6030,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( case HLoadString::LoadKind::kBootImageLinkTimePcRelative: DCHECK(GetCompilerOptions().GetCompilePic()); FALLTHROUGH_INTENDED; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT. // We disable pc-relative load when there is an irreducible loop, as the optimization // is incompatible with it. @@ -6014,13 +6053,15 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnMainOnly + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod || load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadString::LoadKind::kDexCachePcRelative) { + load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { @@ -6038,13 +6079,13 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: { __ movl(out, Immediate(/* placeholder */ 0)); - codegen_->RecordStringPatch(load); + codegen_->RecordBootStringPatch(load); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordStringPatch(load); + codegen_->RecordBootStringPatch(load); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { @@ -6054,6 +6095,19 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) { codegen_->RecordSimplePatch(); return; // No dex cache slow path. } + case HLoadString::LoadKind::kBssEntry: { + Register method_address = locations->InAt(0).AsRegister<Register>(); + Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset); + Label* fixup_label = codegen_->NewStringBssEntryPatch(load); + // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); + codegen_->AddSlowPath(slow_path); + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } default: break; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 1bd28da178..25f5c2a58f 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -411,8 +411,9 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; void RecordSimplePatch(); - void RecordStringPatch(HLoadString* load_string); + void RecordBootStringPatch(HLoadString* load_string); void RecordTypePatch(HLoadClass* load_class); + Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -580,15 +581,9 @@ class CodeGeneratorX86 : public CodeGenerator { private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); - struct PcRelativeDexCacheAccessInfo { - PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) - : target_dex_file(dex_file), element_offset(element_off), label() { } - - const DexFile& target_dex_file; - uint32_t element_offset; - // NOTE: Label is bound to the end of the instruction that has an embedded 32-bit offset. - Label label; - }; + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, + ArenaVector<LinkerPatch>* linker_patches); // Labels for each block that will be compiled. Label* block_labels_; // Indexed by block id. @@ -600,16 +595,16 @@ class CodeGeneratorX86 : public CodeGenerator { const X86InstructionSetFeatures& isa_features_; // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<MethodPatchInfo<Label>> method_patches_; - ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + ArenaDeque<PatchInfo<Label>> method_patches_; + ArenaDeque<PatchInfo<Label>> relative_call_patches_; // PC-relative DexCache access info. - ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_; + ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; // Patch locations for patchoat where the linker doesn't do any other work. ArenaDeque<Label> simple_patches_; - // String patch locations. - ArenaDeque<StringPatchInfo<Label>> string_patches_; + // String patch locations; type depends on configuration (app .bss or boot image PIC/non-PIC). + ArenaDeque<PatchInfo<Label>> string_patches_; // Type patch locations. - ArenaDeque<TypePatchInfo<Label>> type_patches_; + ArenaDeque<PatchInfo<Label>> type_patches_; // Offset to the start of the constant area in the assembled code. // Used for fixups to the constant area. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index b243ee0c59..28638d721d 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -287,6 +287,44 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64); }; +class LoadStringSlowPathX86_64 : public SlowPathCode { + public: + explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {} + + void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { + LocationSummary* locations = instruction_->GetLocations(); + DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); + + CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); + __ Bind(GetEntryLabel()); + SaveLiveRegisters(codegen, locations); + + InvokeRuntimeCallingConvention calling_convention; + const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex(); + __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index)); + x86_64_codegen->InvokeRuntime(kQuickResolveString, + instruction_, + instruction_->GetDexPc(), + this); + CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); + RestoreLiveRegisters(codegen, locations); + + // Store the resolved String to the BSS entry. + __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false), + locations->Out().AsRegister<CpuRegister>()); + Label* fixup_label = x86_64_codegen->NewStringBssEntryPatch(instruction_->AsLoadString()); + __ Bind(fixup_label); + + __ jmp(GetExitLabel()); + } + + const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; } + + private: + DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64); +}; + class TypeCheckSlowPathX86_64 : public SlowPathCode { public: TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal) @@ -772,7 +810,8 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup: __ movl(temp.AsRegister<CpuRegister>(), Immediate(0)); // Placeholder. - method_patches_.emplace_back(invoke->GetTargetMethod()); + method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn. break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { @@ -819,7 +858,8 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo __ call(&frame_entry_label_); break; case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: { - relative_call_patches_.emplace_back(invoke->GetTargetMethod()); + relative_call_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetTargetMethod().dex_method_index); Label* label = &relative_call_patches_.back().label; __ call(label); // Bind to the patch label, override at link time. __ Bind(label); // Bind the label at the end of the "call" insn. @@ -879,7 +919,8 @@ void CodeGeneratorX86_64::RecordSimplePatch() { } } -void CodeGeneratorX86_64::RecordStringPatch(HLoadString* load_string) { +void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { + DCHECK(GetCompilerOptions().IsBootImage()); string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); __ Bind(&string_patches_.back().label); } @@ -889,6 +930,12 @@ void CodeGeneratorX86_64::RecordTypePatch(HLoadClass* load_class) { __ Bind(&type_patches_.back().label); } +Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { + DCHECK(!GetCompilerOptions().IsBootImage()); + string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex()); + return &string_patches_.back().label; +} + Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset) { // Add a patch entry and return the label. @@ -896,6 +943,21 @@ Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_f return &pc_relative_dex_cache_patches_.back().label; } +// The label points to the end of the "movl" or another instruction but the literal offset +// for method patch needs to point to the embedded constant which occupies the last 4 bytes. +constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; + +template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( + const ArenaDeque<PatchInfo<Label>>& infos, + ArenaVector<LinkerPatch>* linker_patches) { + for (const PatchInfo<Label>& info : infos) { + uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; + linker_patches->push_back( + Factory(literal_offset, &info.dex_file, info.label.Position(), info.index)); + } +} + void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = @@ -906,48 +968,29 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat string_patches_.size() + type_patches_.size(); linker_patches->reserve(size); - // The label points to the end of the "movl" insn but the literal offset for method - // patch needs to point to the embedded constant which occupies the last 4 bytes. - constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; - for (const MethodPatchInfo<Label>& info : method_patches_) { - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); - } - for (const MethodPatchInfo<Label>& info : relative_call_patches_) { + for (const PatchInfo<Label>& info : method_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset, - info.target_method.dex_file, - info.target_method.dex_method_index)); + linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset, &info.dex_file, info.index)); } - for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) { + for (const PatchInfo<Label>& info : relative_call_patches_) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset, - &info.target_dex_file, - info.label.Position(), - info.element_offset)); + linker_patches->push_back( + LinkerPatch::RelativeCodePatch(literal_offset, &info.dex_file, info.index)); } + EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, + linker_patches); for (const Label& label : simple_patches_) { uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset)); } - for (const StringPatchInfo<Label>& info : string_patches_) { + if (!GetCompilerOptions().IsBootImage()) { + EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); + } else { // These are always PC-relative, see GetSupportedLoadStringKind(). - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeStringPatch(literal_offset, - &info.dex_file, - info.label.Position(), - info.string_index)); - } - for (const TypePatchInfo<Label>& info : type_patches_) { - // These are always PC-relative, see GetSupportedLoadClassKind(). - uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(LinkerPatch::RelativeTypePatch(literal_offset, - &info.dex_file, - info.label.Position(), - info.type_index)); + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); } + // These are always PC-relative, see GetSupportedLoadClassKind(). + EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(type_patches_, linker_patches); } void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { @@ -5390,7 +5433,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( case HLoadString::LoadKind::kDexCacheAddress: DCHECK(Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kDexCachePcRelative: + case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kDexCacheViaMethod: @@ -5401,7 +5444,9 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = load->NeedsEnvironment() - ? LocationSummary::kCallOnMainOnly + ? ((load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kCallOnSlowPath) : LocationSummary::kNoCall; LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { @@ -5420,7 +5465,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordStringPatch(load); + codegen_->RecordBootStringPatch(load); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { @@ -5430,6 +5475,19 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) { codegen_->RecordSimplePatch(); return; // No dex cache slow path. } + case HLoadString::LoadKind::kBssEntry: { + Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, + /* no_rip */ false); + Label* fixup_label = codegen_->NewStringBssEntryPatch(load); + // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ + GenerateGcRootFieldLoad(load, out_loc, address, fixup_label); + SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); + codegen_->AddSlowPath(slow_path); + __ testl(out, out); + __ j(kEqual, slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } default: break; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 8dec44eb03..57ef83f621 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -406,8 +406,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; void RecordSimplePatch(); - void RecordStringPatch(HLoadString* load_string); + void RecordBootStringPatch(HLoadString* load_string); void RecordTypePatch(HLoadClass* load_class); + Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -555,14 +556,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { static constexpr int32_t kDummy32BitOffset = 256; private: - struct PcRelativeDexCacheAccessInfo { - PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off) - : target_dex_file(dex_file), element_offset(element_off), label() { } - - const DexFile& target_dex_file; - uint32_t element_offset; - Label label; - }; + template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, + ArenaVector<LinkerPatch>* linker_patches); // Labels for each block that will be compiled. Label* block_labels_; // Indexed by block id. @@ -578,16 +574,16 @@ class CodeGeneratorX86_64 : public CodeGenerator { int constant_area_start_; // Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back(). - ArenaDeque<MethodPatchInfo<Label>> method_patches_; - ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_; + ArenaDeque<PatchInfo<Label>> method_patches_; + ArenaDeque<PatchInfo<Label>> relative_call_patches_; // PC-relative DexCache access info. - ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_; + ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; // Patch locations for patchoat where the linker doesn't do any other work. ArenaDeque<Label> simple_patches_; - // String patch locations. - ArenaDeque<StringPatchInfo<Label>> string_patches_; + // String patch locations; type depends on configuration (app .bss or boot image PIC). + ArenaDeque<PatchInfo<Label>> string_patches_; // Type patch locations. - ArenaDeque<TypePatchInfo<Label>> type_patches_; + ArenaDeque<PatchInfo<Label>> type_patches_; // Fixups for jump tables need to be handled specially. ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc index 7010171c80..82b81238ab 100644 --- a/compiler/optimizing/dex_cache_array_fixups_arm.cc +++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc @@ -62,21 +62,6 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { } } - void VisitLoadString(HLoadString* load_string) OVERRIDE { - // If this is a load with PC-relative access to the dex cache strings array, - // we need to add the dex cache arrays base as the special input. - if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) { - // Initialize base for target dex file if needed. - const DexFile& dex_file = load_string->GetDexFile(); - HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file); - // Update the element offset in base. - DexCacheArraysLayout layout(kArmPointerSize, &dex_file); - base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex())); - // Add the special argument base to the load. - load_string->AddSpecialInput(base); - } - } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { // If this is an invoke with PC-relative access to the dex cache methods array, // we need to add the dex cache arrays base as the special input. diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc index 4456b49e87..31fff26dd5 100644 --- a/compiler/optimizing/dex_cache_array_fixups_mips.cc +++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc @@ -68,21 +68,6 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { } } - void VisitLoadString(HLoadString* load_string) OVERRIDE { - // If this is a load with PC-relative access to the dex cache strings array, - // we need to add the dex cache arrays base as the special input. - if (load_string->GetLoadKind() == HLoadString::LoadKind::kDexCachePcRelative) { - // Initialize base for target dex file if needed. - const DexFile& dex_file = load_string->GetDexFile(); - HMipsDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(dex_file); - // Update the element offset in base. - DexCacheArraysLayout layout(kMipsPointerSize, &dex_file); - base->UpdateElementOffset(layout.StringOffset(load_string->GetStringIndex())); - // Add the special argument base to the load. - load_string->AddSpecialInput(base); - } - } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { // If this is an invoke with PC-relative access to the dex cache methods array, // we need to add the dex cache arrays base as the special input. diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc new file mode 100644 index 0000000000..383a0278c6 --- /dev/null +++ b/compiler/optimizing/loop_optimization.cc @@ -0,0 +1,320 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loop_optimization.h" + +#include "base/arena_containers.h" +#include "induction_var_range.h" +#include "ssa_liveness_analysis.h" +#include "nodes.h" + +namespace art { + +// TODO: Generalize to cycles, as found by induction analysis? +static bool IsPhiAddSub(HPhi* phi, /*out*/ HInstruction** addsub_out) { + HInputsRef inputs = phi->GetInputs(); + if (inputs.size() == 2 && (inputs[1]->IsAdd() || inputs[1]->IsSub())) { + HInstruction* addsub = inputs[1]; + if (addsub->InputAt(0) == phi || addsub->InputAt(1) == phi) { + if (addsub->GetUses().HasExactlyOneElement()) { + *addsub_out = addsub; + return true; + } + } + } + return false; +} + +static bool IsOnlyUsedAfterLoop(const HLoopInformation& loop_info, + HPhi* phi, HInstruction* addsub) { + for (const HUseListNode<HInstruction*>& use : phi->GetUses()) { + if (use.GetUser() != addsub) { + HLoopInformation* other_loop_info = use.GetUser()->GetBlock()->GetLoopInformation(); + if (other_loop_info != nullptr && other_loop_info->IsIn(loop_info)) { + return false; + } + } + } + return true; +} + +// Find: phi: Phi(init, addsub) +// s: SuspendCheck +// c: Condition(phi, bound) +// i: If(c) +// TODO: Find a less pattern matching approach? +static bool IsEmptyHeader(HBasicBlock* block, /*out*/ HInstruction** addsub) { + HInstruction* phi = block->GetFirstPhi(); + if (phi != nullptr && phi->GetNext() == nullptr && IsPhiAddSub(phi->AsPhi(), addsub)) { + HInstruction* s = block->GetFirstInstruction(); + if (s != nullptr && s->IsSuspendCheck()) { + HInstruction* c = s->GetNext(); + if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) { + HInstruction* i = c->GetNext(); + if (i != nullptr && i->IsIf() && i->InputAt(0) == c) { + // Check that phi is only used inside loop as expected. + for (const HUseListNode<HInstruction*>& use : phi->GetUses()) { + if (use.GetUser() != *addsub && use.GetUser() != c) { + return false; + } + } + return true; + } + } + } + } + return false; +} + +static bool IsEmptyBody(HBasicBlock* block, HInstruction* addsub) { + HInstruction* phi = block->GetFirstPhi(); + HInstruction* i = block->GetFirstInstruction(); + return phi == nullptr && i == addsub && i->GetNext() != nullptr && i->GetNext()->IsGoto(); +} + +static HBasicBlock* TryRemovePreHeader(HBasicBlock* preheader, HBasicBlock* entry_block) { + if (preheader->GetPredecessors().size() == 1) { + HBasicBlock* entry = preheader->GetSinglePredecessor(); + HInstruction* anchor = entry->GetLastInstruction(); + // If the pre-header has a single predecessor we can remove it too if + // either the pre-header just contains a goto, or if the predecessor + // is not the entry block so we can push instructions backward + // (moving computation into the entry block is too dangerous!). + if (preheader->GetFirstInstruction() == nullptr || + preheader->GetFirstInstruction()->IsGoto() || + (entry != entry_block && anchor->IsGoto())) { + // Push non-goto statements backward to empty the pre-header. + for (HInstructionIterator it(preheader->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (!instruction->IsGoto()) { + if (!instruction->CanBeMoved()) { + return nullptr; // pushing failed to move all + } + it.Current()->MoveBefore(anchor); + } + } + return entry; + } + } + return nullptr; +} + +static void RemoveFromCycle(HInstruction* instruction) { + // A bit more elaborate than the usual instruction removal, + // since there may be a cycle in the use structure. + instruction->RemoveAsUserOfAllInputs(); + instruction->RemoveEnvironmentUsers(); + instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false); +} + +// +// Class methods. +// + +HLoopOptimization::HLoopOptimization(HGraph* graph, + HInductionVarAnalysis* induction_analysis) + : HOptimization(graph, kLoopOptimizationPassName), + induction_range_(induction_analysis), + loop_allocator_(nullptr), + top_loop_(nullptr), + last_loop_(nullptr) { +} + +void HLoopOptimization::Run() { + // Well-behaved loops only. + // TODO: make this less of a sledgehammer. + if (graph_-> HasTryCatch() || graph_->HasIrreducibleLoops()) { + return; + } + + ArenaAllocator allocator(graph_->GetArena()->GetArenaPool()); + loop_allocator_ = &allocator; + + // Build the linear order. This step enables building a loop hierarchy that + // properly reflects the outer-inner and previous-next relation. + graph_->Linearize(); + // Build the loop hierarchy. + for (HLinearOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) { + HBasicBlock* block = it_graph.Current(); + if (block->IsLoopHeader()) { + AddLoop(block->GetLoopInformation()); + } + } + if (top_loop_ != nullptr) { + // Traverse the loop hierarchy inner-to-outer and optimize. + TraverseLoopsInnerToOuter(top_loop_); + } + loop_allocator_ = nullptr; +} + +void HLoopOptimization::AddLoop(HLoopInformation* loop_info) { + DCHECK(loop_info != nullptr); + LoopNode* node = new (loop_allocator_) LoopNode(loop_info); // phase-local allocator + if (last_loop_ == nullptr) { + // First loop. + DCHECK(top_loop_ == nullptr); + last_loop_ = top_loop_ = node; + } else if (loop_info->IsIn(*last_loop_->loop_info)) { + // Inner loop. + node->outer = last_loop_; + DCHECK(last_loop_->inner == nullptr); + last_loop_ = last_loop_->inner = node; + } else { + // Subsequent loop. + while (last_loop_->outer != nullptr && !loop_info->IsIn(*last_loop_->outer->loop_info)) { + last_loop_ = last_loop_->outer; + } + node->outer = last_loop_->outer; + node->previous = last_loop_; + DCHECK(last_loop_->next == nullptr); + last_loop_ = last_loop_->next = node; + } +} + +void HLoopOptimization::RemoveLoop(LoopNode* node) { + DCHECK(node != nullptr); + // TODO: implement when needed (for current set of optimizations, we don't + // need to keep recorded loop hierarchy up to date, but as we get different + // traversal, we may want to remove the node from the hierarchy here. +} + +void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { + for ( ; node != nullptr; node = node->next) { + if (node->inner != nullptr) { + TraverseLoopsInnerToOuter(node->inner); + } + // Visit loop after its inner loops have been visited. + SimplifyInduction(node); + RemoveIfEmptyLoop(node); + } +} + +void HLoopOptimization::SimplifyInduction(LoopNode* node) { + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + // Scan the phis in the header to find opportunities to optimize induction. + for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + HInstruction* addsub = nullptr; + // Find phi-add/sub cycle. + if (IsPhiAddSub(phi, &addsub)) { + // Simple case, the induction is only used by itself. Although redundant, + // later phases do not easily detect this property. Thus, eliminate here. + // Example: for (int i = 0; x != null; i++) { .... no i .... } + if (phi->GetUses().HasExactlyOneElement()) { + // Remove the cycle, including all uses. Even environment uses can be removed, + // since these computations have no effect at all. + RemoveFromCycle(phi); // removes environment uses too + RemoveFromCycle(addsub); + continue; + } + // Closed form case. Only the last value of the induction is needed. Remove all + // overhead from the loop, and replace subsequent uses with the last value. + // Example: for (int i = 0; i < 10; i++, k++) { .... no k .... } return k; + if (IsOnlyUsedAfterLoop(*node->loop_info, phi, addsub) && + induction_range_.CanGenerateLastValue(phi)) { + HInstruction* last = induction_range_.GenerateLastValue(phi, graph_, preheader); + // Remove the cycle, replacing all uses. Even environment uses can consume the final + // value, since any first real use is outside the loop (although this may imply + // that deopting may look "ahead" a bit on the phi value). + ReplaceAllUses(phi, last, addsub); + RemoveFromCycle(phi); // removes environment uses too + RemoveFromCycle(addsub); + } + } + } +} + +void HLoopOptimization::RemoveIfEmptyLoop(LoopNode* node) { + HBasicBlock* header = node->loop_info->GetHeader(); + HBasicBlock* preheader = node->loop_info->GetPreHeader(); + // Ensure there is only a single loop-body (besides the header). + HBasicBlock* body = nullptr; + for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { + if (it.Current() != header) { + if (body != nullptr) { + return; + } + body = it.Current(); + } + } + // Ensure there is only a single exit point. + if (header->GetSuccessors().size() != 2) { + return; + } + HBasicBlock* exit = (header->GetSuccessors()[0] == body) + ? header->GetSuccessors()[1] + : header->GetSuccessors()[0]; + // Ensure exit can only be reached by exiting loop (this seems typically the + // case anyway, and simplifies code generation below; TODO: perhaps relax?). + if (exit->GetPredecessors().size() != 1) { + return; + } + // Detect an empty loop: no side effects other than plain iteration. + HInstruction* addsub = nullptr; + if (IsEmptyHeader(header, &addsub) && IsEmptyBody(body, addsub)) { + HBasicBlock* entry = TryRemovePreHeader(preheader, graph_->GetEntryBlock()); + body->DisconnectAndDelete(); + exit->RemovePredecessor(header); + header->RemoveSuccessor(exit); + header->ClearDominanceInformation(); + header->SetDominator(preheader); // needed by next disconnect. + header->DisconnectAndDelete(); + // If allowed, remove preheader too, which may expose next outer empty loop + // Otherwise, link preheader directly to exit to restore the flow graph. + if (entry != nullptr) { + entry->ReplaceSuccessor(preheader, exit); + entry->AddDominatedBlock(exit); + exit->SetDominator(entry); + preheader->DisconnectAndDelete(); + } else { + preheader->AddSuccessor(exit); + preheader->AddInstruction(new (graph_->GetArena()) HGoto()); // global allocator + preheader->AddDominatedBlock(exit); + exit->SetDominator(preheader); + } + // Update hierarchy. + RemoveLoop(node); + } +} + +void HLoopOptimization::ReplaceAllUses(HInstruction* instruction, + HInstruction* replacement, + HInstruction* exclusion) { + const HUseList<HInstruction*>& uses = instruction->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end;) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + ++it; // increment before replacing + if (user != exclusion) { + user->ReplaceInput(replacement, index); + induction_range_.Replace(user, instruction, replacement); // update induction + } + } + const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses(); + for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) { + HEnvironment* user = it->GetUser(); + size_t index = it->GetIndex(); + ++it; // increment before replacing + if (user->GetHolder() != exclusion) { + user->RemoveAsUserOfInput(index); + user->SetRawEnvAt(index, replacement); + replacement->AddEnvUseAt(user, index); + } + } +} + +} // namespace art diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h new file mode 100644 index 0000000000..d12fe5ee18 --- /dev/null +++ b/compiler/optimizing/loop_optimization.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ +#define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ + +#include <string> + +#include "induction_var_range.h" +#include "nodes.h" +#include "optimization.h" + +namespace art { + +/** + * Loop optimizations. Builds a loop hierarchy and applies optimizations to + * the detected nested loops, such as removal of dead induction and empty loops. + */ +class HLoopOptimization : public HOptimization { + public: + HLoopOptimization(HGraph* graph, HInductionVarAnalysis* induction_analysis); + + void Run() OVERRIDE; + + static constexpr const char* kLoopOptimizationPassName = "loop_optimization"; + + private: + /** + * A single loop inside the loop hierarchy representation. + */ + struct LoopNode : public ArenaObject<kArenaAllocInductionVarAnalysis> { + explicit LoopNode(HLoopInformation* lp_info) + : loop_info(lp_info), + outer(nullptr), + inner(nullptr), + previous(nullptr), + next(nullptr) {} + const HLoopInformation* const loop_info; + LoopNode* outer; + LoopNode* inner; + LoopNode* previous; + LoopNode* next; + }; + + void AddLoop(HLoopInformation* loop_info); + void RemoveLoop(LoopNode* node); + + void TraverseLoopsInnerToOuter(LoopNode* node); + + void SimplifyInduction(LoopNode* node); + void RemoveIfEmptyLoop(LoopNode* node); + + void ReplaceAllUses(HInstruction* instruction, + HInstruction* replacement, + HInstruction* exclusion); + + // Range analysis based on induction variables. + InductionVarRange induction_range_; + + // Phase-local heap memory allocator for the loop optimizer. Storage obtained + // through this allocator is released when the loop optimizer is done. + ArenaAllocator* loop_allocator_; + + // Entries into the loop hierarchy representation. + LoopNode* top_loop_; + LoopNode* last_loop_; + + friend class LoopOptimizationTest; + + DISALLOW_COPY_AND_ASSIGN(HLoopOptimization); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc new file mode 100644 index 0000000000..4e007d4e9a --- /dev/null +++ b/compiler/optimizing/loop_optimization_test.cc @@ -0,0 +1,193 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "loop_optimization.h" +#include "optimizing_unit_test.h" + +namespace art { + +/** + * Fixture class for the loop optimization tests. These unit tests focus + * constructing the loop hierarchy. Actual optimizations are tested + * through the checker tests. + */ +class LoopOptimizationTest : public CommonCompilerTest { + public: + LoopOptimizationTest() + : pool_(), + allocator_(&pool_), + graph_(CreateGraph(&allocator_)), + iva_(new (&allocator_) HInductionVarAnalysis(graph_)), + loop_opt_(new (&allocator_) HLoopOptimization(graph_, iva_)) { + BuildGraph(); + } + + ~LoopOptimizationTest() { } + + /** Constructs bare minimum graph. */ + void BuildGraph() { + graph_->SetNumberOfVRegs(1); + entry_block_ = new (&allocator_) HBasicBlock(graph_); + return_block_ = new (&allocator_) HBasicBlock(graph_); + exit_block_ = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(entry_block_); + graph_->AddBlock(return_block_); + graph_->AddBlock(exit_block_); + graph_->SetEntryBlock(entry_block_); + graph_->SetExitBlock(exit_block_); + parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimInt); + entry_block_->AddInstruction(parameter_); + return_block_->AddInstruction(new (&allocator_) HReturnVoid()); + exit_block_->AddInstruction(new (&allocator_) HExit()); + entry_block_->AddSuccessor(return_block_); + return_block_->AddSuccessor(exit_block_); + } + + /** Adds a loop nest at given position before successor. */ + HBasicBlock* AddLoop(HBasicBlock* position, HBasicBlock* successor) { + HBasicBlock* header = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* body = new (&allocator_) HBasicBlock(graph_); + graph_->AddBlock(header); + graph_->AddBlock(body); + // Control flow. + position->ReplaceSuccessor(successor, header); + header->AddSuccessor(body); + header->AddSuccessor(successor); + header->AddInstruction(new (&allocator_) HIf(parameter_)); + body->AddSuccessor(header); + body->AddInstruction(new (&allocator_) HGoto()); + return header; + } + + /** Performs analysis. */ + void PerformAnalysis() { + graph_->BuildDominatorTree(); + iva_->Run(); + loop_opt_->Run(); + } + + /** Constructs string representation of computed loop hierarchy. */ + std::string LoopStructure() { + return LoopStructureRecurse(loop_opt_->top_loop_); + } + + // Helper method + std::string LoopStructureRecurse(HLoopOptimization::LoopNode* node) { + std::string s; + for ( ; node != nullptr; node = node->next) { + s.append("["); + s.append(LoopStructureRecurse(node->inner)); + s.append("]"); + } + return s; + } + + // General building fields. + ArenaPool pool_; + ArenaAllocator allocator_; + HGraph* graph_; + HInductionVarAnalysis* iva_; + HLoopOptimization* loop_opt_; + + HBasicBlock* entry_block_; + HBasicBlock* return_block_; + HBasicBlock* exit_block_; + + HInstruction* parameter_; +}; + +// +// The actual tests. +// + +TEST_F(LoopOptimizationTest, NoLoops) { + PerformAnalysis(); + EXPECT_EQ("", LoopStructure()); +} + +TEST_F(LoopOptimizationTest, SingleLoop) { + AddLoop(entry_block_, return_block_); + PerformAnalysis(); + EXPECT_EQ("[]", LoopStructure()); +} + +TEST_F(LoopOptimizationTest, LoopNest10) { + HBasicBlock* b = entry_block_; + HBasicBlock* s = return_block_; + for (int i = 0; i < 10; i++) { + s = AddLoop(b, s); + b = s->GetSuccessors()[0]; + } + PerformAnalysis(); + EXPECT_EQ("[[[[[[[[[[]]]]]]]]]]", LoopStructure()); +} + +TEST_F(LoopOptimizationTest, LoopSequence10) { + HBasicBlock* b = entry_block_; + HBasicBlock* s = return_block_; + for (int i = 0; i < 10; i++) { + b = AddLoop(b, s); + s = b->GetSuccessors()[1]; + } + PerformAnalysis(); + EXPECT_EQ("[][][][][][][][][][]", LoopStructure()); +} + +TEST_F(LoopOptimizationTest, LoopSequenceOfNests) { + HBasicBlock* b = entry_block_; + HBasicBlock* s = return_block_; + for (int i = 0; i < 10; i++) { + b = AddLoop(b, s); + s = b->GetSuccessors()[1]; + HBasicBlock* bi = b->GetSuccessors()[0]; + HBasicBlock* si = b; + for (int j = 0; j < i; j++) { + si = AddLoop(bi, si); + bi = si->GetSuccessors()[0]; + } + } + PerformAnalysis(); + EXPECT_EQ("[]" + "[[]]" + "[[[]]]" + "[[[[]]]]" + "[[[[[]]]]]" + "[[[[[[]]]]]]" + "[[[[[[[]]]]]]]" + "[[[[[[[[]]]]]]]]" + "[[[[[[[[[]]]]]]]]]" + "[[[[[[[[[[]]]]]]]]]]", + LoopStructure()); +} + +TEST_F(LoopOptimizationTest, LoopNestWithSequence) { + HBasicBlock* b = entry_block_; + HBasicBlock* s = return_block_; + for (int i = 0; i < 10; i++) { + s = AddLoop(b, s); + b = s->GetSuccessors()[0]; + } + b = s; + s = b->GetSuccessors()[1]; + for (int i = 0; i < 9; i++) { + b = AddLoop(b, s); + s = b->GetSuccessors()[1]; + } + PerformAnalysis(); + EXPECT_EQ("[[[[[[[[[[][][][][][][][][][]]]]]]]]]]", LoopStructure()); +} + +} // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index ef9bf23a17..1ff2252348 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -522,7 +522,10 @@ static bool IsLinearOrderWellFormed(const HGraph& graph) { return true; } +// TODO: return order, and give only liveness analysis ownership of graph's linear_order_? void HGraph::Linearize() { + linear_order_.clear(); + // Create a reverse post ordering with the following properties: // - Blocks in a loop are consecutive, // - Back-edge is the last block before loop exits. @@ -2607,12 +2610,8 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { LoadKind load_kind = GetLoadKind(); if (HasAddress(load_kind)) { return GetAddress() == other_load_string->GetAddress(); - } else if (HasStringReference(load_kind)) { - return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile()); } else { - DCHECK(HasDexCacheReference(load_kind)) << load_kind; - // If the string indexes and dex files are the same, dex cache element offsets - // must also be the same, so we don't need to compare them. + DCHECK(HasStringReference(load_kind)) << load_kind; return IsSameDexFile(GetDexFile(), other_load_string->GetDexFile()); } } @@ -2642,8 +2641,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { return os << "BootImageAddress"; case HLoadString::LoadKind::kDexCacheAddress: return os << "DexCacheAddress"; - case HLoadString::LoadKind::kDexCachePcRelative: - return os << "DexCachePcRelative"; + case HLoadString::LoadKind::kBssEntry: + return os << "BssEntry"; case HLoadString::LoadKind::kDexCacheViaMethod: return os << "DexCacheViaMethod"; default: diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 397abded27..5cfbf4249e 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -366,8 +366,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // is a throw-catch loop, i.e. the header is a catch block. GraphAnalysisResult AnalyzeLoops() const; - // Computes the linear order (should be called before using HLinearOrderIterator). - // Linearizes the graph such that: + // Computes a linear order for the current graph (should be called before + // using HLinearOrderIterator). Linearizes the graph such that: // (1): a block is always after its dominator, // (2): blocks of loops are contiguous. // This creates a natural and efficient ordering when visualizing live ranges. @@ -586,7 +586,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // List of blocks to perform a reverse post order tree traversal. ArenaVector<HBasicBlock*> reverse_post_order_; - // List of blocks to perform a linear order tree traversal. + // List of blocks to perform a linear order tree traversal. Unlike the reverse + // post order, this order is not incrementally kept up-to-date. ArenaVector<HBasicBlock*> linear_order_; HBasicBlock* entry_block_; @@ -5650,10 +5651,9 @@ class HLoadString FINAL : public HInstruction { // Used for strings outside the boot image referenced by JIT-compiled code. kDexCacheAddress, - // Load from resolved strings array in the dex cache using a PC-relative load. - // Used for strings outside boot image when we know that we can access - // the dex cache arrays using a PC-relative load. - kDexCachePcRelative, + // Load from an entry in the .bss section using a PC-relative load. + // Used for strings outside boot image when .bss is accessible with a PC-relative load. + kBssEntry, // Load from resolved strings array accessed through the class loaded from // the compiled method's own ArtMethod*. This is the default access type when @@ -5672,7 +5672,7 @@ class HLoadString FINAL : public HInstruction { string_index_(string_index) { SetPackedFlag<kFlagIsInDexCache>(false); SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod); - load_data_.ref.dex_file = &dex_file; + load_data_.dex_file_ = &dex_file; } void SetLoadKindWithAddress(LoadKind load_kind, uint64_t address) { @@ -5685,20 +5685,11 @@ class HLoadString FINAL : public HInstruction { const DexFile& dex_file, uint32_t string_index) { DCHECK(HasStringReference(load_kind)); - load_data_.ref.dex_file = &dex_file; + load_data_.dex_file_ = &dex_file; string_index_ = string_index; SetLoadKindInternal(load_kind); } - void SetLoadKindWithDexCacheReference(LoadKind load_kind, - const DexFile& dex_file, - uint32_t element_index) { - DCHECK(HasDexCacheReference(load_kind)); - load_data_.ref.dex_file = &dex_file; - load_data_.ref.dex_cache_element_index = element_index; - SetLoadKindInternal(load_kind); - } - LoadKind GetLoadKind() const { return GetPackedField<LoadKindField>(); } @@ -5710,8 +5701,6 @@ class HLoadString FINAL : public HInstruction { return string_index_; } - uint32_t GetDexCacheElementOffset() const; - uint64_t GetAddress() const { DCHECK(HasAddress(GetLoadKind())); return load_data_.address; @@ -5781,6 +5770,7 @@ class HLoadString FINAL : public HInstruction { static bool HasStringReference(LoadKind load_kind) { return load_kind == LoadKind::kBootImageLinkTimeAddress || load_kind == LoadKind::kBootImageLinkTimePcRelative || + load_kind == LoadKind::kBssEntry || load_kind == LoadKind::kDexCacheViaMethod; } @@ -5788,10 +5778,6 @@ class HLoadString FINAL : public HInstruction { return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress; } - static bool HasDexCacheReference(LoadKind load_kind) { - return load_kind == LoadKind::kDexCachePcRelative; - } - void SetLoadKindInternal(LoadKind load_kind); // The special input is the HCurrentMethod for kDexCacheViaMethod. @@ -5804,10 +5790,7 @@ class HLoadString FINAL : public HInstruction { uint32_t string_index_; union { - struct { - const DexFile* dex_file; // For string reference and dex cache reference. - uint32_t dex_cache_element_index; // Only for dex cache reference. - } ref; + const DexFile* dex_file_; // For string reference. uint64_t address; // Up to 64-bit, needed for kDexCacheAddress on 64-bit targets. } load_data_; @@ -5817,15 +5800,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs); // Note: defined outside class to see operator<<(., HLoadString::LoadKind). inline const DexFile& HLoadString::GetDexFile() const { - DCHECK(HasStringReference(GetLoadKind()) || HasDexCacheReference(GetLoadKind())) - << GetLoadKind(); - return *load_data_.ref.dex_file; -} - -// Note: defined outside class to see operator<<(., HLoadString::LoadKind). -inline uint32_t HLoadString::GetDexCacheElementOffset() const { - DCHECK(HasDexCacheReference(GetLoadKind())) << GetLoadKind(); - return load_data_.ref.dex_cache_element_index; + DCHECK(HasStringReference(GetLoadKind())) << GetLoadKind(); + return *load_data_.dex_file_; } // Note: defined outside class to see operator<<(., HLoadString::LoadKind). @@ -5833,7 +5809,7 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kDexCachePcRelative || + GetLoadKind() == LoadKind::kBssEntry || GetLoadKind() == LoadKind::kBootImageLinkTimeAddress || GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind(); // HLoadString::GetInputRecords() returns an empty array at this point, diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index d3a55dd365..52d6e0b3f1 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -76,6 +76,7 @@ #include "jni/quick/jni_compiler.h" #include "licm.h" #include "load_store_elimination.h" +#include "loop_optimization.h" #include "nodes.h" #include "oat_quick_method_header.h" #include "prepare_for_register_allocation.h" @@ -487,6 +488,8 @@ static HOptimization* BuildOptimization( return new (arena) LoadStoreElimination(graph, *most_recent_side_effects); } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { return new (arena) SideEffectsAnalysis(graph); + } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { + return new (arena) HLoopOptimization(graph, most_recent_induction); #ifdef ART_ENABLE_CODEGEN_arm } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) { return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats); @@ -737,6 +740,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects); HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction); + HLoopOptimization* loop = new (arena) HLoopOptimization(graph, induction); HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver); InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( graph, stats, "instruction_simplifier$after_bce"); @@ -765,6 +769,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph, licm, induction, bce, + loop, fold3, // evaluates code generated by dynamic bce simplify2, lse, diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index 6006e6cf5d..82feb95a2f 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -83,6 +83,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBssEntry: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); load_string->AddSpecialInput(base_); diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 75587af7a1..b1fdb1792d 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -92,7 +92,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitLoadString(HLoadString* load_string) OVERRIDE { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || - load_kind == HLoadString::LoadKind::kDexCachePcRelative) { + load_kind == HLoadString::LoadKind::kBssEntry) { InitializePCRelativeBasePointer(); load_string->AddSpecialInput(base_); } diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index 15cebfe71b..45a3ce411e 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -454,7 +454,7 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst } instr->SetReferenceTypeInfo( ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true)); - } else if (IsAdmissible(klass.Decode())) { + } else if (IsAdmissible(klass.Ptr())) { ReferenceTypeInfo::TypeHandle handle = handle_cache_->NewHandle(klass); is_exact = is_exact || handle->CannotBeAssignedFromOtherTypes(); instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact)); diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index a4a3e0695d..c1cfe8d00f 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -163,7 +163,7 @@ void HSharpening::ProcessLoadClass(HLoadClass* load_class) { : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); mirror::Class* klass = dex_cache->GetResolvedType(type_index); - if (compiler_driver_->IsBootImage()) { + if (codegen_->GetCompilerOptions().IsBootImage()) { // Compiling boot image. Check if the class is a boot image class. DCHECK(!runtime->UseJitCompilation()); if (!compiler_driver_->GetSupportBootImageFixup()) { @@ -281,7 +281,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { ? compilation_unit_.GetDexCache() : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); - if (compiler_driver_->IsBootImage()) { + if (codegen_->GetCompilerOptions().IsBootImage()) { // Compiling boot image. Resolve the string and allocate it if needed. DCHECK(!runtime->UseJitCompilation()); mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache); @@ -311,6 +311,8 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { !codegen_->GetCompilerOptions().GetCompilePic()) { desired_load_kind = HLoadString::LoadKind::kBootImageAddress; address = reinterpret_cast64<uint64_t>(string); + } else { + desired_load_kind = HLoadString::LoadKind::kBssEntry; } } } @@ -319,6 +321,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { switch (load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBssEntry: case HLoadString::LoadKind::kDexCacheViaMethod: load_string->SetLoadKindWithStringReference(load_kind, dex_file, string_index); break; @@ -327,13 +330,6 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { DCHECK_NE(address, 0u); load_string->SetLoadKindWithAddress(load_kind, address); break; - case HLoadString::LoadKind::kDexCachePcRelative: { - PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet()); - DexCacheArraysLayout layout(pointer_size, &dex_file); - size_t element_index = layout.StringOffset(string_index); - load_string->SetLoadKindWithDexCacheReference(load_kind, dex_file, element_index); - break; - } } } |