diff options
author | 2017-01-22 15:44:39 +0000 | |
---|---|---|
committer | 2017-01-23 16:09:26 +0000 | |
commit | 133719e01111cea9d4919df4e8e90b5c51f7ad5a (patch) | |
tree | 6b0edefdbbf4201146d7cf2ff82e365e39dc8017 /compiler/optimizing | |
parent | 5e821602426718bf971c3d693c3f8ff15d85017d (diff) |
Allow multiple HX86ComputeBaseMethodAddress.
So that even graphs with irreducible loops can use it
and avoid loading methods/classes/strings through KDexCacheViaMethod.
Test: test-art-host
Change-Id: I14109cfdc82347a7af420ca0ee55172ec37ca8ef
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 175 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 49 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 42 | ||||
-rw-r--r-- | compiler/optimizing/nodes_x86.h | 4 | ||||
-rw-r--r-- | compiler/optimizing/pc_relative_fixups_x86.cc | 63 |
5 files changed, 200 insertions, 133 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 853c91fac8..89d210c3e6 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1023,7 +1023,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - method_address_offset_(-1) { + method_address_offset_(std::less<uint32_t>(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -1498,8 +1499,9 @@ void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs, DCHECK(const_area->IsEmittedAtUseSite()); __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( - const_area->GetConstant()->AsDoubleConstant()->GetValue(), - const_area->GetLocations()->InAt(0).AsRegister<Register>())); + const_area->GetConstant()->AsDoubleConstant()->GetValue(), + const_area->GetBaseMethodAddress(), + const_area->GetLocations()->InAt(0).AsRegister<Register>())); } else { DCHECK(rhs.IsDoubleStackSlot()); __ ucomisd(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex())); @@ -1511,8 +1513,9 @@ void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs, DCHECK(const_area->IsEmittedAtUseSite()); __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( - const_area->GetConstant()->AsFloatConstant()->GetValue(), - const_area->GetLocations()->InAt(0).AsRegister<Register>())); + const_area->GetConstant()->AsFloatConstant()->GetValue(), + const_area->GetBaseMethodAddress(), + const_area->GetLocations()->InAt(0).AsRegister<Register>())); } else { DCHECK(rhs.IsStackSlot()); __ ucomiss(lhs.AsFpuRegister<XmmRegister>(), Address(ESP, rhs.GetStackIndex())); @@ -2360,10 +2363,14 @@ void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) { Register constant_area = locations->InAt(1).AsRegister<Register>(); XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); if (neg->GetType() == Primitive::kPrimFloat) { - __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000), constant_area)); + __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000), + neg->GetBaseMethodAddress(), + constant_area)); __ xorps(out.AsFpuRegister<XmmRegister>(), mask); } else { - __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), constant_area)); + __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), + neg->GetBaseMethodAddress(), + constant_area)); __ xorpd(out.AsFpuRegister<XmmRegister>(), mask); } } @@ -3012,8 +3019,9 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { DCHECK(const_area->IsEmittedAtUseSite()); __ addss(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( - const_area->GetConstant()->AsFloatConstant()->GetValue(), - const_area->GetLocations()->InAt(0).AsRegister<Register>())); + const_area->GetConstant()->AsFloatConstant()->GetValue(), + const_area->GetBaseMethodAddress(), + const_area->GetLocations()->InAt(0).AsRegister<Register>())); } else { DCHECK(second.IsStackSlot()); __ addss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); @@ -3029,8 +3037,9 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { DCHECK(const_area->IsEmittedAtUseSite()); __ addsd(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( - const_area->GetConstant()->AsDoubleConstant()->GetValue(), - const_area->GetLocations()->InAt(0).AsRegister<Register>())); + const_area->GetConstant()->AsDoubleConstant()->GetValue(), + const_area->GetBaseMethodAddress(), + const_area->GetLocations()->InAt(0).AsRegister<Register>())); } else { DCHECK(second.IsDoubleStackSlot()); __ addsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); @@ -3116,8 +3125,9 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { DCHECK(const_area->IsEmittedAtUseSite()); __ subss(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( - const_area->GetConstant()->AsFloatConstant()->GetValue(), - const_area->GetLocations()->InAt(0).AsRegister<Register>())); + const_area->GetConstant()->AsFloatConstant()->GetValue(), + const_area->GetBaseMethodAddress(), + const_area->GetLocations()->InAt(0).AsRegister<Register>())); } else { DCHECK(second.IsStackSlot()); __ subss(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); @@ -3134,6 +3144,7 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { __ subsd(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( const_area->GetConstant()->AsDoubleConstant()->GetValue(), + const_area->GetBaseMethodAddress(), const_area->GetLocations()->InAt(0).AsRegister<Register>())); } else { DCHECK(second.IsDoubleStackSlot()); @@ -3304,6 +3315,7 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { __ mulss(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( const_area->GetConstant()->AsFloatConstant()->GetValue(), + const_area->GetBaseMethodAddress(), const_area->GetLocations()->InAt(0).AsRegister<Register>())); } else { DCHECK(second.IsStackSlot()); @@ -3322,6 +3334,7 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { __ mulsd(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( const_area->GetConstant()->AsDoubleConstant()->GetValue(), + const_area->GetBaseMethodAddress(), const_area->GetLocations()->InAt(0).AsRegister<Register>())); } else { DCHECK(second.IsDoubleStackSlot()); @@ -3690,6 +3703,7 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { __ divss(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( const_area->GetConstant()->AsFloatConstant()->GetValue(), + const_area->GetBaseMethodAddress(), const_area->GetLocations()->InAt(0).AsRegister<Register>())); } else { DCHECK(second.IsStackSlot()); @@ -3706,8 +3720,9 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { DCHECK(const_area->IsEmittedAtUseSite()); __ divsd(first.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( - const_area->GetConstant()->AsDoubleConstant()->GetValue(), - const_area->GetLocations()->InAt(0).AsRegister<Register>())); + const_area->GetConstant()->AsDoubleConstant()->GetValue(), + const_area->GetBaseMethodAddress(), + const_area->GetLocations()->InAt(0).AsRegister<Register>())); } else { DCHECK(second.IsDoubleStackSlot()); __ divsd(first.AsFpuRegister<XmmRegister>(), Address(ESP, second.GetStackIndex())); @@ -4454,18 +4469,7 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { - HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; - - // We disable pc-relative load when there is an irreducible loop, as the optimization - // is incompatible with it. - // TODO: Create as many X86ComputeBaseMethodAddress instructions - // as needed for methods with irreducible loops. - if (GetGraph()->HasIrreducibleLoops() && - (dispatch_info.method_load_kind == - HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) { - dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; - } - return dispatch_info; + return desired_dispatch_info; } Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, @@ -4518,7 +4522,10 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); // Bind a new fixup label at the end of the "movl" insn. uint32_t offset = invoke->GetDexCacheArrayOffset(); - __ Bind(NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset)); + __ Bind(NewPcRelativeDexCacheArrayPatch( + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(), + invoke->GetDexFileForPcRelativeDexCache(), + offset)); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { @@ -4603,31 +4610,54 @@ void CodeGeneratorX86::RecordSimplePatch() { void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { DCHECK(GetCompilerOptions().IsBootImage()); - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); + HX86ComputeBaseMethodAddress* address = nullptr; + if (GetCompilerOptions().GetCompilePic()) { + address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); + } else { + DCHECK_EQ(load_string->InputCount(), 0u); + } + string_patches_.emplace_back(address, + load_string->GetDexFile(), + load_string->GetStringIndex().index_); __ Bind(&string_patches_.back().label); } void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) { - boot_image_type_patches_.emplace_back(load_class->GetDexFile(), + HX86ComputeBaseMethodAddress* address = nullptr; + if (GetCompilerOptions().GetCompilePic()) { + address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); + } else { + DCHECK_EQ(load_class->InputCount(), 0u); + } + boot_image_type_patches_.emplace_back(address, + load_class->GetDexFile(), load_class->GetTypeIndex().index_); __ Bind(&boot_image_type_patches_.back().label); } Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) { - type_bss_entry_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_); + HX86ComputeBaseMethodAddress* address = + load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); + type_bss_entry_patches_.emplace_back( + address, load_class->GetDexFile(), load_class->GetTypeIndex().index_); return &type_bss_entry_patches_.back().label; } Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); + HX86ComputeBaseMethodAddress* address = + load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); + string_patches_.emplace_back( + address, load_string->GetDexFile(), load_string->GetStringIndex().index_); return &string_patches_.back().label; } -Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, - uint32_t element_offset) { +Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch( + HX86ComputeBaseMethodAddress* method_address, + const DexFile& dex_file, + uint32_t element_offset) { // Add the patch entry and bind its label at the end of the instruction. - pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset); + pc_relative_dex_cache_patches_.emplace_back(method_address, dex_file, element_offset); return &pc_relative_dex_cache_patches_.back().label; } @@ -4637,12 +4667,12 @@ constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( - const ArenaDeque<PatchInfo<Label>>& infos, + const ArenaDeque<X86PcRelativePatchInfo>& infos, ArenaVector<LinkerPatch>* linker_patches) { - for (const PatchInfo<Label>& info : infos) { + for (const X86PcRelativePatchInfo& info : infos) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back( - Factory(literal_offset, &info.dex_file, GetMethodAddressOffset(), info.index)); + linker_patches->push_back(Factory( + literal_offset, &info.dex_file, GetMethodAddressOffset(info.method_address), info.index)); } } @@ -6002,13 +6032,6 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( FALLTHROUGH_INTENDED; case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT. - // We disable pc-relative load when there is an irreducible loop, as the optimization - // is incompatible with it. - // TODO: Create as many X86ComputeBaseMethodAddress instructions as needed for methods - // with irreducible loops. - if (GetGraph()->HasIrreducibleLoops()) { - return HLoadClass::LoadKind::kDexCacheViaMethod; - } break; case HLoadClass::LoadKind::kBootImageAddress: break; @@ -6195,13 +6218,6 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( FALLTHROUGH_INTENDED; case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); // Note: boot image is also non-JIT. - // We disable pc-relative load when there is an irreducible loop, as the optimization - // is incompatible with it. - // TODO: Create as many X86ComputeBaseMethodAddress instructions as needed for methods - // with irreducible loops. - if (GetGraph()->HasIrreducibleLoops()) { - return HLoadString::LoadKind::kDexCacheViaMethod; - } break; case HLoadString::LoadKind::kBootImageAddress: break; @@ -7489,7 +7505,7 @@ void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress( __ Bind(&next_instruction); // Remember this offset for later use with constant area. - codegen_->SetMethodAddressOffset(GetAssembler()->CodeSize()); + codegen_->AddMethodAddressOffset(insn, GetAssembler()->CodeSize()); // Grab the return address off the stack. __ popl(reg); @@ -7536,17 +7552,20 @@ void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromCons switch (insn->GetType()) { case Primitive::kPrimFloat: __ movss(out.AsFpuRegister<XmmRegister>(), - codegen_->LiteralFloatAddress(value->AsFloatConstant()->GetValue(), const_area)); + codegen_->LiteralFloatAddress( + value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); break; case Primitive::kPrimDouble: __ movsd(out.AsFpuRegister<XmmRegister>(), - codegen_->LiteralDoubleAddress(value->AsDoubleConstant()->GetValue(), const_area)); + codegen_->LiteralDoubleAddress( + value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); break; case Primitive::kPrimInt: __ movl(out.AsRegister<Register>(), - codegen_->LiteralInt32Address(value->AsIntConstant()->GetValue(), const_area)); + codegen_->LiteralInt32Address( + value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); break; default: @@ -7559,13 +7578,18 @@ void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromCons */ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> { public: - RIPFixup(CodeGeneratorX86& codegen, size_t offset) - : codegen_(&codegen), offset_into_constant_area_(offset) {} + RIPFixup(CodeGeneratorX86& codegen, + HX86ComputeBaseMethodAddress* base_method_address, + size_t offset) + : codegen_(&codegen), + base_method_address_(base_method_address), + offset_into_constant_area_(offset) {} protected: void SetOffset(size_t offset) { offset_into_constant_area_ = offset; } CodeGeneratorX86* codegen_; + HX86ComputeBaseMethodAddress* base_method_address_; private: void Process(const MemoryRegion& region, int pos) OVERRIDE { @@ -7574,7 +7598,8 @@ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenera // The value to patch is the distance from the offset in the constant area // from the address computed by the HX86ComputeBaseMethodAddress instruction. int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_; - int32_t relative_position = constant_offset - codegen_->GetMethodAddressOffset(); + int32_t relative_position = + constant_offset - codegen_->GetMethodAddressOffset(base_method_address_); // Patch in the right value. region.StoreUnaligned<int32_t>(pos - 4, relative_position); @@ -7591,7 +7616,8 @@ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenera class JumpTableRIPFixup : public RIPFixup { public: JumpTableRIPFixup(CodeGeneratorX86& codegen, HX86PackedSwitch* switch_instr) - : RIPFixup(codegen, static_cast<size_t>(-1)), switch_instr_(switch_instr) {} + : RIPFixup(codegen, switch_instr->GetBaseMethodAddress(), static_cast<size_t>(-1)), + switch_instr_(switch_instr) {} void CreateJumpTable() { X86Assembler* assembler = codegen_->GetAssembler(); @@ -7602,7 +7628,7 @@ class JumpTableRIPFixup : public RIPFixup { // The label values in the jump table are computed relative to the // instruction addressing the constant area. - const int32_t relative_offset = codegen_->GetMethodAddressOffset(); + const int32_t relative_offset = codegen_->GetMethodAddressOffset(base_method_address_); // Populate the jump table with the correct values for the jump table. int32_t num_entries = switch_instr_->GetNumEntries(); @@ -7644,23 +7670,32 @@ void CodeGeneratorX86::Finalize(CodeAllocator* allocator) { CodeGenerator::Finalize(allocator); } -Address CodeGeneratorX86::LiteralDoubleAddress(double v, Register reg) { - AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v)); +Address CodeGeneratorX86::LiteralDoubleAddress(double v, + HX86ComputeBaseMethodAddress* method_base, + Register reg) { + AssemblerFixup* fixup = + new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddDouble(v)); return Address(reg, kDummy32BitOffset, fixup); } -Address CodeGeneratorX86::LiteralFloatAddress(float v, Register reg) { - AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v)); +Address CodeGeneratorX86::LiteralFloatAddress(float v, + HX86ComputeBaseMethodAddress* method_base, + Register reg) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddFloat(v)); return Address(reg, kDummy32BitOffset, fixup); } -Address CodeGeneratorX86::LiteralInt32Address(int32_t v, Register reg) { - AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v)); +Address CodeGeneratorX86::LiteralInt32Address(int32_t v, + HX86ComputeBaseMethodAddress* method_base, + Register reg) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddInt32(v)); return Address(reg, kDummy32BitOffset, fixup); } -Address CodeGeneratorX86::LiteralInt64Address(int64_t v, Register reg) { - AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v)); +Address CodeGeneratorX86::LiteralInt64Address(int64_t v, + HX86ComputeBaseMethodAddress* method_base, + Register reg) { + AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddInt64(v)); return Address(reg, kDummy32BitOffset, fixup); } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 9eb97658da..7350fcc48a 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -415,7 +415,9 @@ class CodeGeneratorX86 : public CodeGenerator { void RecordBootTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); Label* NewStringBssEntryPatch(HLoadString* load_string); - Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); + Label* NewPcRelativeDexCacheArrayPatch(HX86ComputeBaseMethodAddress* method_address, + const DexFile& dex_file, + uint32_t element_offset); Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index, Handle<mirror::String> handle); @@ -463,22 +465,22 @@ class CodeGeneratorX86 : public CodeGenerator { return isa_features_; } - void SetMethodAddressOffset(int32_t offset) { - method_address_offset_ = offset; + void AddMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base, int32_t offset) { + method_address_offset_.Put(method_base->GetId(), offset); } - int32_t GetMethodAddressOffset() const { - return method_address_offset_; + int32_t GetMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base) const { + return method_address_offset_.Get(method_base->GetId()); } int32_t ConstantAreaStart() const { return constant_area_start_; } - Address LiteralDoubleAddress(double v, Register reg); - Address LiteralFloatAddress(float v, Register reg); - Address LiteralInt32Address(int32_t v, Register reg); - Address LiteralInt64Address(int64_t v, Register reg); + Address LiteralDoubleAddress(double v, HX86ComputeBaseMethodAddress* method_base, Register reg); + Address LiteralFloatAddress(float v, HX86ComputeBaseMethodAddress* method_base, Register reg); + Address LiteralInt32Address(int32_t v, HX86ComputeBaseMethodAddress* method_base, Register reg); + Address LiteralInt64Address(int64_t v, HX86ComputeBaseMethodAddress* method_base, Register reg); // Load a 32-bit value into a register in the most efficient manner. void Load32BitValue(Register dest, int32_t value); @@ -603,12 +605,21 @@ class CodeGeneratorX86 : public CodeGenerator { static constexpr int32_t kDummy32BitOffset = 256; private: - Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); + struct X86PcRelativePatchInfo : PatchInfo<Label> { + X86PcRelativePatchInfo(HX86ComputeBaseMethodAddress* address, + const DexFile& target_dex_file, + uint32_t target_index) + : PatchInfo(target_dex_file, target_index), + method_address(address) {} + HX86ComputeBaseMethodAddress* method_address; + }; template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> - void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, + void EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo>& infos, ArenaVector<LinkerPatch>* linker_patches); + Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); + // Labels for each block that will be compiled. Label* block_labels_; // Indexed by block id. Label frame_entry_label_; @@ -619,15 +630,15 @@ class CodeGeneratorX86 : public CodeGenerator { const X86InstructionSetFeatures& isa_features_; // PC-relative DexCache access info. - ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; + ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_; // Patch locations for patchoat where the linker doesn't do any other work. ArenaDeque<Label> simple_patches_; // String patch locations; type depends on configuration (app .bss or boot image PIC/non-PIC). - ArenaDeque<PatchInfo<Label>> string_patches_; + ArenaDeque<X86PcRelativePatchInfo> string_patches_; // Type patch locations for boot image; type depends on configuration (boot image PIC/non-PIC). - ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; + ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; // Type patch locations for kBssEntry. - ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; + ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; @@ -642,11 +653,9 @@ class CodeGeneratorX86 : public CodeGenerator { // Fixups for jump tables that need to be patched after the constant table is generated. ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_; - // If there is a HX86ComputeBaseMethodAddress instruction in the graph - // (which shall be the sole instruction of this kind), subtracting this offset - // from the value contained in the out register of this HX86ComputeBaseMethodAddress - // instruction gives the address of the start of this method. - int32_t method_address_offset_; + // Maps a HX86ComputeBaseMethodAddress instruction id, to its offset in the + // compiled code. + ArenaSafeMap<uint32_t, int32_t> method_address_offset_; DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86); }; diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 922c3bcac9..e1b7ea53b4 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -356,23 +356,28 @@ static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) { } } -static void MathAbsFP(LocationSummary* locations, +static void MathAbsFP(HInvoke* invoke, bool is64bit, X86Assembler* assembler, CodeGeneratorX86* codegen) { + LocationSummary* locations = invoke->GetLocations(); Location output = locations->Out(); DCHECK(output.IsFpuRegister()); if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(1)->AsX86ComputeBaseMethodAddress(); DCHECK(locations->InAt(1).IsRegister()); // We also have a constant area pointer. Register constant_area = locations->InAt(1).AsRegister<Register>(); XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); if (is64bit) { - __ movsd(temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF), constant_area)); + __ movsd(temp, codegen->LiteralInt64Address( + INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area)); __ andpd(output.AsFpuRegister<XmmRegister>(), temp); } else { - __ movss(temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF), constant_area)); + __ movss(temp, codegen->LiteralInt32Address( + INT32_C(0x7FFFFFFF), method_address, constant_area)); __ andps(output.AsFpuRegister<XmmRegister>(), temp); } } else { @@ -396,7 +401,7 @@ void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_); + MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_); } void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { @@ -404,7 +409,7 @@ void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { - MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); + MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_); } static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) { @@ -486,11 +491,12 @@ void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) { GenAbsLong(invoke->GetLocations(), GetAssembler()); } -static void GenMinMaxFP(LocationSummary* locations, +static void GenMinMaxFP(HInvoke* invoke, bool is_min, bool is_double, X86Assembler* assembler, CodeGeneratorX86* codegen) { + LocationSummary* locations = invoke->GetLocations(); Location op1_loc = locations->InAt(0); Location op2_loc = locations->InAt(1); Location out_loc = locations->Out(); @@ -553,12 +559,14 @@ static void GenMinMaxFP(LocationSummary* locations, __ Bind(&nan); // Do we have a constant area pointer? if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) { + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(2)->AsX86ComputeBaseMethodAddress(); DCHECK(locations->InAt(2).IsRegister()); Register constant_area = locations->InAt(2).AsRegister<Register>(); if (is_double) { - __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, constant_area)); + __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area)); } else { - __ movss(out, codegen->LiteralInt32Address(kFloatNaN, constant_area)); + __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area)); } } else { if (is_double) { @@ -608,7 +616,7 @@ void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), + GenMinMaxFP(invoke, /* is_min */ true, /* is_double */ true, GetAssembler(), @@ -620,7 +628,7 @@ void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), + GenMinMaxFP(invoke, /* is_min */ true, /* is_double */ false, GetAssembler(), @@ -632,7 +640,7 @@ void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), + GenMinMaxFP(invoke, /* is_min */ false, /* is_double */ true, GetAssembler(), @@ -644,7 +652,7 @@ void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { } void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), + GenMinMaxFP(invoke, /* is_min */ false, /* is_double */ false, GetAssembler(), @@ -905,10 +913,16 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { __ subss(t2, t1); if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) { // Direct constant area available. + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(1)->AsX86ComputeBaseMethodAddress(); Register constant_area = locations->InAt(1).AsRegister<Register>(); - __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), constant_area)); + __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f), + method_address, + constant_area)); __ j(kBelow, &skip_incr); - __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), constant_area)); + __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f), + method_address, + constant_area)); __ Bind(&skip_incr); } else { // No constant area: go through stack. diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index fa479760fe..75893c3129 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -71,6 +71,10 @@ class HX86FPNeg FINAL : public HExpression<2> { SetRawInputAt(1, method_base); } + HX86ComputeBaseMethodAddress* GetBaseMethodAddress() const { + return InputAt(1)->AsX86ComputeBaseMethodAddress(); + } + DECLARE_INSTRUCTION(X86FPNeg); private: diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 2befc8ca4e..a1c916f43a 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -84,8 +84,8 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || load_kind == HLoadClass::LoadKind::kBssEntry) { - InitializePCRelativeBasePointer(); - load_class->AddSpecialInput(base_); + HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_class); + load_class->AddSpecialInput(method_address); } } @@ -93,8 +93,8 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || load_kind == HLoadString::LoadKind::kBssEntry) { - InitializePCRelativeBasePointer(); - load_string->AddSpecialInput(base_); + HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_string); + load_string->AddSpecialInput(method_address); } } @@ -132,13 +132,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitNeg(HNeg* neg) OVERRIDE { if (Primitive::IsFloatingPointType(neg->GetType())) { // We need to replace the HNeg with a HX86FPNeg in order to address the constant area. - InitializePCRelativeBasePointer(); + HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(neg); HGraph* graph = GetGraph(); HBasicBlock* block = neg->GetBlock(); HX86FPNeg* x86_fp_neg = new (graph->GetArena()) HX86FPNeg( neg->GetType(), neg->InputAt(0), - base_, + method_address, neg->GetDexPc()); block->ReplaceAndRemoveInstructionWith(neg, x86_fp_neg); } @@ -151,35 +151,44 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } // We need to replace the HPackedSwitch with a HX86PackedSwitch in order to // address the constant area. - InitializePCRelativeBasePointer(); + HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(switch_insn); HGraph* graph = GetGraph(); HBasicBlock* block = switch_insn->GetBlock(); HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch( switch_insn->GetStartValue(), switch_insn->GetNumEntries(), switch_insn->InputAt(0), - base_, + method_address, switch_insn->GetDexPc()); block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch); } - void InitializePCRelativeBasePointer() { - // Ensure we only initialize the pointer once. - if (base_ != nullptr) { - return; + HX86ComputeBaseMethodAddress* GetPCRelativeBasePointer(HInstruction* cursor) { + bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); + if (!has_irreducible_loops) { + // Ensure we only initialize the pointer once. + if (base_ != nullptr) { + return base_; + } } // Insert the base at the start of the entry block, move it to a better // position later in MoveBaseIfNeeded(). - base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress(); - HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); - entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction()); - DCHECK(base_ != nullptr); + HX86ComputeBaseMethodAddress* method_address = + new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress(); + if (has_irreducible_loops) { + cursor->GetBlock()->InsertInstructionBefore(method_address, cursor); + } else { + HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); + entry_block->InsertInstructionBefore(method_address, entry_block->GetFirstInstruction()); + base_ = method_address; + } + return method_address; } void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) { - InitializePCRelativeBasePointer(); + HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(insn); HX86LoadFromConstantTable* load_constant = - new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value); + new (GetGraph()->GetArena()) HX86LoadFromConstantTable(method_address, value); if (!materialize) { load_constant->MarkEmittedAtUseSite(); } @@ -204,9 +213,9 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache() && !IsCallFreeIntrinsic<IntrinsicLocationsBuilderX86>(invoke, codegen_)) { - InitializePCRelativeBasePointer(); - // Add the extra parameter base_. - invoke_static_or_direct->AddSpecialInput(base_); + HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke); + // Add the extra parameter. + invoke_static_or_direct->AddSpecialInput(method_address); base_added = true; } @@ -231,8 +240,8 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { if (!base_added) { DCHECK(invoke_static_or_direct != nullptr); DCHECK(!invoke_static_or_direct->HasCurrentMethodInput()); - InitializePCRelativeBasePointer(); - invoke_static_or_direct->AddSpecialInput(base_); + HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke); + invoke_static_or_direct->AddSpecialInput(method_address); } break; default: @@ -243,16 +252,12 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { CodeGeneratorX86* codegen_; // The generated HX86ComputeBaseMethodAddress in the entry block needed as an - // input to the HX86LoadFromConstantTable instructions. + // input to the HX86LoadFromConstantTable instructions. Only set for + // graphs with reducible loops. HX86ComputeBaseMethodAddress* base_; }; void PcRelativeFixups::Run() { - if (graph_->HasIrreducibleLoops()) { - // Do not run this optimization, as irreducible loops do not work with an instruction - // that can be live-in at the irreducible loop header. - return; - } PCRelativeHandlerVisitor visitor(graph_, codegen_); visitor.VisitInsertionOrder(); visitor.MoveBaseIfNeeded(); |