From 0f7dca4ca0be8d2f8776794d35edf8b51b5bc997 Mon Sep 17 00:00:00 2001 From: Vladimir Marko Date: Mon, 2 Nov 2015 14:36:43 +0000 Subject: Optimizing/X86: PC-relative dex cache array addressing. Add PC-relative dex cache array addressing for X86 and use it for better invoke-static/-direct dispatch. Also delay the initialization to the PC-relative base until needed. Change-Id: Ib8634d5edce4920cd70172fd13211809cf6948d1 --- compiler/optimizing/code_generator.cc | 2 + compiler/optimizing/code_generator.h | 8 ++ compiler/optimizing/code_generator_arm64.cc | 22 ++-- compiler/optimizing/code_generator_arm64.h | 2 +- compiler/optimizing/code_generator_x86.cc | 87 +++++++++++---- compiler/optimizing/code_generator_x86.h | 14 +++ compiler/optimizing/code_generator_x86_64.cc | 29 +++-- compiler/optimizing/code_generator_x86_64.h | 2 +- compiler/optimizing/constant_area_fixups_x86.cc | 132 ---------------------- compiler/optimizing/constant_area_fixups_x86.h | 37 ------- compiler/optimizing/nodes.cc | 10 ++ compiler/optimizing/nodes.h | 5 +- compiler/optimizing/optimizing_compiler.cc | 7 +- compiler/optimizing/pc_relative_fixups_x86.cc | 140 ++++++++++++++++++++++++ compiler/optimizing/pc_relative_fixups_x86.h | 37 +++++++ 15 files changed, 308 insertions(+), 226 deletions(-) delete mode 100644 compiler/optimizing/constant_area_fixups_x86.cc delete mode 100644 compiler/optimizing/constant_area_fixups_x86.h create mode 100644 compiler/optimizing/pc_relative_fixups_x86.cc create mode 100644 compiler/optimizing/pc_relative_fixups_x86.h (limited to 'compiler/optimizing') diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index ce92470868..5188e115e0 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -208,6 +208,7 @@ class DisassemblyScope { void CodeGenerator::GenerateSlowPaths() { size_t code_start = 0; for (SlowPathCode* slow_path : slow_paths_) { + current_slow_path_ = slow_path; if (disasm_info_ != nullptr) { code_start = GetAssembler()->CodeSize(); } @@ -216,6 +217,7 @@ void CodeGenerator::GenerateSlowPaths() { disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize()); } } + current_slow_path_ = nullptr; } void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) { diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index a92014dc79..2108abefcc 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -490,6 +490,7 @@ class CodeGenerator { compiler_options_(compiler_options), src_map_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + current_slow_path_(nullptr), current_block_index_(0), is_leaf_(true), requires_current_method_(false) { @@ -557,6 +558,10 @@ class CodeGenerator { return raw_pointer_to_labels_array + block->GetBlockId(); } + SlowPathCode* GetCurrentSlowPath() { + return current_slow_path_; + } + // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; @@ -605,6 +610,9 @@ class CodeGenerator { ArenaVector src_map_; ArenaVector slow_paths_; + // The current slow path that we're generating code for. + SlowPathCode* current_slow_path_; + // The current block index in `block_order_` of the block // we are generating code for. size_t current_block_index_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index b0be446174..7e248b402a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -598,7 +598,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, call_patches_(MethodReferenceComparator(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. AddAllocatedRegister(LocationFrom(lr)); } @@ -2872,21 +2872,21 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok break; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { // Add ADRP with its PC-relative DexCache access patch. - pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetDexCacheArrayOffset()); - vixl::Label* pc_insn_label = &pc_rel_dex_cache_patches_.back().label; + pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetDexCacheArrayOffset()); + vixl::Label* pc_insn_label = &pc_relative_dex_cache_patches_.back().label; { vixl::SingleEmissionCheckScope guard(GetVIXLAssembler()); __ adrp(XRegisterFrom(temp).X(), 0); } __ Bind(pc_insn_label); // Bind after ADRP. - pc_rel_dex_cache_patches_.back().pc_insn_label = pc_insn_label; + pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label; // Add LDR with its PC-relative DexCache access patch. - pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetDexCacheArrayOffset()); + pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, + invoke->GetDexCacheArrayOffset()); __ Ldr(XRegisterFrom(temp).X(), MemOperand(XRegisterFrom(temp).X(), 0)); - __ Bind(&pc_rel_dex_cache_patches_.back().label); // Bind after LDR. - pc_rel_dex_cache_patches_.back().pc_insn_label = pc_insn_label; + __ Bind(&pc_relative_dex_cache_patches_.back().label); // Bind after LDR. + pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label; break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { @@ -2973,7 +2973,7 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector* linker_patc method_patches_.size() + call_patches_.size() + relative_call_patches_.size() + - pc_rel_dex_cache_patches_.size(); + pc_relative_dex_cache_patches_.size(); linker_patches->reserve(size); for (const auto& entry : method_patches_) { const MethodReference& target_method = entry.first; @@ -2994,7 +2994,7 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector* linker_patc info.target_method.dex_file, info.target_method.dex_method_index)); } - for (const PcRelativeDexCacheAccessInfo& info : pc_rel_dex_cache_patches_) { + for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) { linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location() - 4u, &info.target_dex_file, info.pc_insn_label->location() - 4u, diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index ab684ea538..aa5ad386e1 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -447,7 +447,7 @@ class CodeGeneratorARM64 : public CodeGenerator { // Using ArenaDeque<> which retains element addresses on push/emplace_back(). ArenaDeque> relative_call_patches_; // PC-relative DexCache access info. - ArenaDeque pc_rel_dex_cache_patches_; + ArenaDeque pc_relative_dex_cache_patches_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64); }; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 8308d9ee20..d5d6c210bf 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -19,7 +19,6 @@ #include "art_method.h" #include "code_generator_utils.h" #include "compiled_method.h" -#include "constant_area_fixups_x86.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" @@ -27,6 +26,7 @@ #include "intrinsics_x86.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" +#include "pc_relative_fixups_x86.h" #include "thread.h" #include "utils/assembler.h" #include "utils/stack_checks.h" @@ -533,6 +533,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, isa_features_(isa_features), method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); @@ -1696,11 +1697,20 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderX86 intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { + if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::Any()); + } return; } HandleInvoke(invoke); + // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. + if (invoke->HasPcRelativeDexCache()) { + invoke->GetLocations()->SetInAt(invoke->GetCurrentMethodInputIndex(), + Location::RequiresRegister()); + } + if (codegen_->IsBaseline()) { // Baseline does not have enough registers if the current method also // needs a register. We therefore do not require a register for it, and let @@ -3779,16 +3789,6 @@ void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, MethodReference target_method ATTRIBUTE_UNUSED) { - if (desired_dispatch_info.method_load_kind == - HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) { - // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod. - return HInvokeStaticOrDirect::DispatchInfo { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, - HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, - 0u, - 0u - }; - } switch (desired_dispatch_info.code_ptr_location) { case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup: case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect: @@ -3805,6 +3805,32 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOr } } +Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, + Register temp) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + Location location = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); + if (!invoke->GetLocations()->Intrinsified()) { + return location.AsRegister(); + } + // For intrinsics we allow any location, so it may be on the stack. + if (!location.IsRegister()) { + __ movl(temp, Address(ESP, location.GetStackIndex())); + return temp; + } + // For register locations, check if the register was saved. If so, get it from the stack. + // Note: There is a chance that the register was saved but not overwritten, so we could + // save one load. However, since this is just an intrinsic slow path we prefer this + // simple and more robust approach rather that trying to determine if that's the case. + SlowPathCode* slow_path = GetCurrentSlowPath(); + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister()); + __ movl(temp, Address(ESP, stack_offset)); + return temp; + } + return location.AsRegister(); +} + void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { @@ -3823,11 +3849,16 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, method_patches_.emplace_back(invoke->GetTargetMethod()); __ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn. break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - // TODO: Implement this type. - // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch(). - LOG(FATAL) << "Unsupported"; - UNREACHABLE(); + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, + temp.AsRegister()); + uint32_t offset = invoke->GetDexCacheArrayOffset(); + __ movl(temp.AsRegister(), Address(base_reg, kDummy32BitOffset)); + // Add the patch entry and bind its label at the end of the instruction. + pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, offset); + __ Bind(&pc_relative_dex_cache_patches_.back().label); + break; + } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex()); Register method_reg; @@ -3898,23 +3929,33 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp void CodeGeneratorX86::EmitLinkerPatches(ArenaVector* linker_patches) { DCHECK(linker_patches->empty()); - linker_patches->reserve(method_patches_.size() + relative_call_patches_.size()); + size_t size = + method_patches_.size() + + relative_call_patches_.size() + + pc_relative_dex_cache_patches_.size(); + linker_patches->reserve(size); + // The label points to the end of the "movl" insn but the literal offset for method + // patch needs to point to the embedded constant which occupies the last 4 bytes. + constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; for (const MethodPatchInfo