From 133719e01111cea9d4919df4e8e90b5c51f7ad5a Mon Sep 17 00:00:00 2001 From: Nicolas Geoffray Date: Sun, 22 Jan 2017 15:44:39 +0000 Subject: Allow multiple HX86ComputeBaseMethodAddress. So that even graphs with irreducible loops can use it and avoid loading methods/classes/strings through KDexCacheViaMethod. Test: test-art-host Change-Id: I14109cfdc82347a7af420ca0ee55172ec37ca8ef --- compiler/optimizing/code_generator_x86.cc | 175 +++++++++++++++----------- compiler/optimizing/code_generator_x86.h | 49 +++++--- compiler/optimizing/intrinsics_x86.cc | 42 ++++--- compiler/optimizing/nodes_x86.h | 4 + compiler/optimizing/pc_relative_fixups_x86.cc | 63 +++++----- 5 files changed, 200 insertions(+), 133 deletions(-) (limited to 'compiler/optimizing') diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 853c91fac8..89d210c3e6 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1023,7 +1023,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - method_address_offset_(-1) { + method_address_offset_(std::less(), + graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -1498,8 +1499,9 @@ void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs, DCHECK(const_area->IsEmittedAtUseSite()); __ ucomisd(lhs.AsFpuRegister(), codegen_->LiteralDoubleAddress( - const_area->GetConstant()->AsDoubleConstant()->GetValue(), - const_area->GetLocations()->InAt(0).AsRegister())); + const_area->GetConstant()->AsDoubleConstant()->GetValue(), + const_area->GetBaseMethodAddress(), + const_area->GetLocations()->InAt(0).AsRegister())); } else { DCHECK(rhs.IsDoubleStackSlot()); __ ucomisd(lhs.AsFpuRegister(), Address(ESP, rhs.GetStackIndex())); @@ -1511,8 +1513,9 @@ void InstructionCodeGeneratorX86::GenerateFPCompare(Location lhs, DCHECK(const_area->IsEmittedAtUseSite()); __ ucomiss(lhs.AsFpuRegister(), codegen_->LiteralFloatAddress( - const_area->GetConstant()->AsFloatConstant()->GetValue(), - const_area->GetLocations()->InAt(0).AsRegister())); + const_area->GetConstant()->AsFloatConstant()->GetValue(), + const_area->GetBaseMethodAddress(), + const_area->GetLocations()->InAt(0).AsRegister())); } else { DCHECK(rhs.IsStackSlot()); __ ucomiss(lhs.AsFpuRegister(), Address(ESP, rhs.GetStackIndex())); @@ -2360,10 +2363,14 @@ void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) { Register constant_area = locations->InAt(1).AsRegister(); XmmRegister mask = locations->GetTemp(0).AsFpuRegister(); if (neg->GetType() == Primitive::kPrimFloat) { - __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000), constant_area)); + __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000), + neg->GetBaseMethodAddress(), + constant_area)); __ xorps(out.AsFpuRegister(), mask); } else { - __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), constant_area)); + __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000), + neg->GetBaseMethodAddress(), + constant_area)); __ xorpd(out.AsFpuRegister(), mask); } } @@ -3012,8 +3019,9 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { DCHECK(const_area->IsEmittedAtUseSite()); __ addss(first.AsFpuRegister(), codegen_->LiteralFloatAddress( - const_area->GetConstant()->AsFloatConstant()->GetValue(), - const_area->GetLocations()->InAt(0).AsRegister())); + const_area->GetConstant()->AsFloatConstant()->GetValue(), + const_area->GetBaseMethodAddress(), + const_area->GetLocations()->InAt(0).AsRegister())); } else { DCHECK(second.IsStackSlot()); __ addss(first.AsFpuRegister(), Address(ESP, second.GetStackIndex())); @@ -3029,8 +3037,9 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { DCHECK(const_area->IsEmittedAtUseSite()); __ addsd(first.AsFpuRegister(), codegen_->LiteralDoubleAddress( - const_area->GetConstant()->AsDoubleConstant()->GetValue(), - const_area->GetLocations()->InAt(0).AsRegister())); + const_area->GetConstant()->AsDoubleConstant()->GetValue(), + const_area->GetBaseMethodAddress(), + const_area->GetLocations()->InAt(0).AsRegister())); } else { DCHECK(second.IsDoubleStackSlot()); __ addsd(first.AsFpuRegister(), Address(ESP, second.GetStackIndex())); @@ -3116,8 +3125,9 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { DCHECK(const_area->IsEmittedAtUseSite()); __ subss(first.AsFpuRegister(), codegen_->LiteralFloatAddress( - const_area->GetConstant()->AsFloatConstant()->GetValue(), - const_area->GetLocations()->InAt(0).AsRegister())); + const_area->GetConstant()->AsFloatConstant()->GetValue(), + const_area->GetBaseMethodAddress(), + const_area->GetLocations()->InAt(0).AsRegister())); } else { DCHECK(second.IsStackSlot()); __ subss(first.AsFpuRegister(), Address(ESP, second.GetStackIndex())); @@ -3134,6 +3144,7 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { __ subsd(first.AsFpuRegister(), codegen_->LiteralDoubleAddress( const_area->GetConstant()->AsDoubleConstant()->GetValue(), + const_area->GetBaseMethodAddress(), const_area->GetLocations()->InAt(0).AsRegister())); } else { DCHECK(second.IsDoubleStackSlot()); @@ -3304,6 +3315,7 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { __ mulss(first.AsFpuRegister(), codegen_->LiteralFloatAddress( const_area->GetConstant()->AsFloatConstant()->GetValue(), + const_area->GetBaseMethodAddress(), const_area->GetLocations()->InAt(0).AsRegister())); } else { DCHECK(second.IsStackSlot()); @@ -3322,6 +3334,7 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { __ mulsd(first.AsFpuRegister(), codegen_->LiteralDoubleAddress( const_area->GetConstant()->AsDoubleConstant()->GetValue(), + const_area->GetBaseMethodAddress(), const_area->GetLocations()->InAt(0).AsRegister())); } else { DCHECK(second.IsDoubleStackSlot()); @@ -3690,6 +3703,7 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { __ divss(first.AsFpuRegister(), codegen_->LiteralFloatAddress( const_area->GetConstant()->AsFloatConstant()->GetValue(), + const_area->GetBaseMethodAddress(), const_area->GetLocations()->InAt(0).AsRegister())); } else { DCHECK(second.IsStackSlot()); @@ -3706,8 +3720,9 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { DCHECK(const_area->IsEmittedAtUseSite()); __ divsd(first.AsFpuRegister(), codegen_->LiteralDoubleAddress( - const_area->GetConstant()->AsDoubleConstant()->GetValue(), - const_area->GetLocations()->InAt(0).AsRegister())); + const_area->GetConstant()->AsDoubleConstant()->GetValue(), + const_area->GetBaseMethodAddress(), + const_area->GetLocations()->InAt(0).AsRegister())); } else { DCHECK(second.IsDoubleStackSlot()); __ divsd(first.AsFpuRegister(), Address(ESP, second.GetStackIndex())); @@ -4454,18 +4469,7 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { - HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; - - // We disable pc-relative load when there is an irreducible loop, as the optimization - // is incompatible with it. - // TODO: Create as many X86ComputeBaseMethodAddress instructions - // as needed for methods with irreducible loops. - if (GetGraph()->HasIrreducibleLoops() && - (dispatch_info.method_load_kind == - HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative)) { - dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; - } - return dispatch_info; + return desired_dispatch_info; } Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, @@ -4518,7 +4522,10 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO __ movl(temp.AsRegister(), Address(base_reg, kDummy32BitOffset)); // Bind a new fixup label at the end of the "movl" insn. uint32_t offset = invoke->GetDexCacheArrayOffset(); - __ Bind(NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset)); + __ Bind(NewPcRelativeDexCacheArrayPatch( + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(), + invoke->GetDexFileForPcRelativeDexCache(), + offset)); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { @@ -4603,31 +4610,54 @@ void CodeGeneratorX86::RecordSimplePatch() { void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { DCHECK(GetCompilerOptions().IsBootImage()); - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); + HX86ComputeBaseMethodAddress* address = nullptr; + if (GetCompilerOptions().GetCompilePic()) { + address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); + } else { + DCHECK_EQ(load_string->InputCount(), 0u); + } + string_patches_.emplace_back(address, + load_string->GetDexFile(), + load_string->GetStringIndex().index_); __ Bind(&string_patches_.back().label); } void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) { - boot_image_type_patches_.emplace_back(load_class->GetDexFile(), + HX86ComputeBaseMethodAddress* address = nullptr; + if (GetCompilerOptions().GetCompilePic()) { + address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); + } else { + DCHECK_EQ(load_class->InputCount(), 0u); + } + boot_image_type_patches_.emplace_back(address, + load_class->GetDexFile(), load_class->GetTypeIndex().index_); __ Bind(&boot_image_type_patches_.back().label); } Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) { - type_bss_entry_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_); + HX86ComputeBaseMethodAddress* address = + load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); + type_bss_entry_patches_.emplace_back( + address, load_class->GetDexFile(), load_class->GetTypeIndex().index_); return &type_bss_entry_patches_.back().label; } Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); + HX86ComputeBaseMethodAddress* address = + load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); + string_patches_.emplace_back( + address, load_string->GetDexFile(), load_string->GetStringIndex().index_); return &string_patches_.back().label; } -Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, - uint32_t element_offset) { +Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch( + HX86ComputeBaseMethodAddress* method_address, + const DexFile& dex_file, + uint32_t element_offset) { // Add the patch entry and bind its label at the end of the instruction. - pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset); + pc_relative_dex_cache_patches_.emplace_back(method_address, dex_file, element_offset); return &pc_relative_dex_cache_patches_.back().label; } @@ -4637,12 +4667,12 @@ constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; template inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( - const ArenaDeque>& infos, + const ArenaDeque& infos, ArenaVector* linker_patches) { - for (const PatchInfo