diff options
Diffstat (limited to 'compiler/optimizing')
56 files changed, 1854 insertions, 1721 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 65f3c72e99..93234f9630 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -41,6 +41,8 @@ #include "code_generator_mips64.h" #endif +#include "base/bit_utils.h" +#include "base/bit_utils_iterator.h" #include "bytecode_utils.h" #include "class_linker.h" #include "compiled_method.h" @@ -58,7 +60,7 @@ #include "parallel_move_resolver.h" #include "ssa_liveness_analysis.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/assembler.h" namespace art { @@ -337,7 +339,7 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation()); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: locations->AddTemp(visitor->GetMethodLocation()); locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister()); break; @@ -350,6 +352,34 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( } } +void CodeGenerator::GenerateInvokeStaticOrDirectRuntimeCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { + MoveConstant(temp, invoke->GetDexMethodIndex()); + + // The access check is unnecessary but we do not want to introduce + // extra entrypoints for the codegens that do not support some + // invoke type and fall back to the runtime call. + + // Initialize to anything to silent compiler warnings. + QuickEntrypointEnum entrypoint = kQuickInvokeStaticTrampolineWithAccessCheck; + switch (invoke->GetInvokeType()) { + case kStatic: + entrypoint = kQuickInvokeStaticTrampolineWithAccessCheck; + break; + case kDirect: + entrypoint = kQuickInvokeDirectTrampolineWithAccessCheck; + break; + case kSuper: + entrypoint = kQuickInvokeSuperTrampolineWithAccessCheck; + break; + case kVirtual: + case kInterface: + LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType(); + UNREACHABLE(); + } + + InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), slow_path); +} void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke) { MoveConstant(invoke->GetLocations()->GetTemp(0), invoke->GetDexMethodIndex()); @@ -508,7 +538,7 @@ void CodeGenerator::GenerateUnresolvedFieldAccess( void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, Location runtime_type_index_location, Location runtime_return_location) { - DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod); + DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall); DCHECK_EQ(cls->InputCount(), 1u); LocationSummary* locations = new (cls->GetBlock()->GetGraph()->GetArena()) LocationSummary( cls, LocationSummary::kCallOnMainOnly); @@ -518,7 +548,7 @@ void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, } void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) { - DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod); + DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall); LocationSummary* locations = cls->GetLocations(); MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_); if (cls->NeedsAccessCheck()) { @@ -557,6 +587,9 @@ void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const { } void CodeGenerator::AllocateLocations(HInstruction* instruction) { + for (HEnvironment* env = instruction->GetEnvironment(); env != nullptr; env = env->GetParent()) { + env->AllocateLocations(); + } instruction->Accept(GetLocationBuilder()); DCHECK(CheckTypeConsistency(instruction)); LocationSummary* locations = instruction->GetLocations(); @@ -1400,20 +1433,6 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); } -uint32_t CodeGenerator::GetReferenceSlowFlagOffset() const { - ScopedObjectAccess soa(Thread::Current()); - mirror::Class* klass = mirror::Reference::GetJavaLangRefReference(); - DCHECK(klass->IsInitialized()); - return klass->GetSlowPathFlagOffset().Uint32Value(); -} - -uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const { - ScopedObjectAccess soa(Thread::Current()); - mirror::Class* klass = mirror::Reference::GetJavaLangRefReference(); - DCHECK(klass->IsInitialized()); - return klass->GetDisableIntrinsicFlagOffset().Uint32Value(); -} - void CodeGenerator::EmitJitRoots(uint8_t* code, Handle<mirror::ObjectArray<mirror::Object>> roots, const uint8_t* roots_data) { diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index c2b2ebfade..7bf43f7971 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -31,6 +31,7 @@ #include "nodes.h" #include "optimizing_compiler_stats.h" #include "read_barrier_option.h" +#include "stack.h" #include "stack_map_stream.h" #include "string_reference.h" #include "type_reference.h" @@ -495,6 +496,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { static void CreateCommonInvokeLocationSummary( HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); + void GenerateInvokeStaticOrDirectRuntimeCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path); void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke); @@ -541,7 +544,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { case HLoadString::LoadKind::kBssEntry: DCHECK(load->NeedsEnvironment()); return LocationSummary::kCallOnSlowPath; - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: DCHECK(load->NeedsEnvironment()); return LocationSummary::kCallOnMainOnly; case HLoadString::LoadKind::kJitTableAddress: @@ -563,18 +566,17 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { HInvokeStaticOrDirect* invoke) = 0; // Generate a call to a static or direct method. - virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0; + virtual void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; // Generate a call to a virtual method. - virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0; + virtual void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; // Copy the result of a call into the given target. virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; virtual void GenerateNop() = 0; - uint32_t GetReferenceSlowFlagOffset() const; - uint32_t GetReferenceDisableFlagOffset() const; - static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass); protected: diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index c66bd77d6b..0b3ac204ff 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -19,6 +19,8 @@ #include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" +#include "base/bit_utils.h" +#include "base/bit_utils_iterator.h" #include "code_generator_utils.h" #include "common_arm.h" #include "compiled_method.h" @@ -47,7 +49,6 @@ static bool ExpectedPairLayout(Location location) { return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); } -static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = R0; static constexpr Register kCoreAlwaysSpillRegister = R5; @@ -2396,8 +2397,8 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -2520,12 +2521,6 @@ void CodeGeneratorARM::GenerateFrameEntry() { __ cfi().RelOffsetForMany(DWARFReg(S0), 0, fpu_spill_mask_, kArmWordSize); } - if (GetGraph()->HasShouldDeoptimizeFlag()) { - // Initialize should_deoptimize flag to 0. - __ mov(IP, ShifterOperand(0)); - __ StoreToOffset(kStoreWord, IP, SP, -kShouldDeoptimizeFlagSize); - } - int adjust = GetFrameSize() - FrameEntrySpillSize(); __ AddConstant(SP, -adjust); __ cfi().AdjustCFAOffset(adjust); @@ -2536,6 +2531,12 @@ void CodeGeneratorARM::GenerateFrameEntry() { if (RequiresCurrentMethod()) { __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, 0); } + + if (GetGraph()->HasShouldDeoptimizeFlag()) { + // Initialize should_deoptimize flag to 0. + __ mov(IP, ShifterOperand(0)); + __ StoreToOffset(kStoreWord, IP, SP, GetStackOffsetOfShouldDeoptimizeFlag()); + } } void CodeGeneratorARM::GenerateFrameExit() { @@ -3554,18 +3555,10 @@ void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok IntrinsicLocationsBuilderARM intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { - if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); - } return; } HandleInvoke(invoke); - - // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. - if (invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); - } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) { @@ -3589,7 +3582,6 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { @@ -3613,7 +3605,6 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderARM::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -7137,7 +7128,7 @@ HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -7145,7 +7136,7 @@ HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind( void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -7198,7 +7189,7 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { // move. void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -7270,7 +7261,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -7332,7 +7323,7 @@ HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -7342,7 +7333,7 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(Location::RegisterLocation(R0)); } else { locations->SetOut(Location::RequiresRegister()); @@ -7429,7 +7420,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) NO_THREAD_S } // TODO: Consider re-adding the compiler code to do string dex cache lookup again. - DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); @@ -8946,7 +8937,8 @@ Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr // save one load. However, since this is just an intrinsic slow path we prefer this // simple and more robust approach rather that trying to determine if that's the case. SlowPathCode* slow_path = GetCurrentSlowPath(); - if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); __ LoadFromOffset(kLoadWord, temp, SP, stack_offset); return temp; @@ -8954,8 +8946,8 @@ Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr return location.AsRegister<Register>(); } -Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorARM::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -8983,44 +8975,24 @@ Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { - HArmDexCacheArraysBase* base = - invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase(); - Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, - temp.AsRegister<Register>()); - int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset(); - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); - break; - } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register method_reg; - Register reg = temp.AsRegister<Register>(); - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<Register>(); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); - } - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - __ LoadFromOffset(kLoadWord, - reg, - method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { + Register temp_reg = temp.AsRegister<Register>(); + PcRelativePatchInfo* labels = NewMethodBssEntryPatch( + MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); + __ BindTrackedLabel(&labels->movw_label); + __ movw(temp_reg, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->movt_label); + __ movt(temp_reg, /* placeholder */ 0u); + __ BindTrackedLabel(&labels->add_pc_label); + __ add(temp_reg, temp_reg, ShifterOperand(PC)); + __ LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset */ 0); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. + } } - return callee_method; -} - -void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: @@ -9035,11 +9007,13 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ blx(LR); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); DCHECK(!IsLeafMethod()); } -void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { +void CodeGeneratorARM::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { Register temp = temp_location.AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kArmPointerSize).Uint32Value(); @@ -9070,6 +9044,7 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp __ LoadFromOffset(kLoadWord, LR, temp, entry_point); // LR(); __ blx(LR); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeMethodPatch( @@ -9079,6 +9054,13 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeMethodPatc &pc_relative_method_patches_); } +CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewMethodBssEntryPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &method_bss_entry_patches_); +} + CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch( const DexFile& dex_file, dex::TypeIndex type_index) { return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); @@ -9094,11 +9076,6 @@ CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatc return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); } -CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch( - const DexFile& dex_file, uint32_t element_offset) { - return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); -} - CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); @@ -9157,15 +9134,13 @@ inline void CodeGeneratorARM::EmitPcRelativeLinkerPatches( void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); @@ -9179,6 +9154,8 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { @@ -9315,23 +9292,6 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr) } } -void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { - Register base_reg = base->GetLocations()->Out().AsRegister<Register>(); - CodeGeneratorARM::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); - __ BindTrackedLabel(&labels->movw_label); - __ movw(base_reg, /* placeholder */ 0u); - __ BindTrackedLabel(&labels->movt_label); - __ movt(base_reg, /* placeholder */ 0u); - __ BindTrackedLabel(&labels->add_pc_label); - __ add(base_reg, base_reg, ShifterOperand(PC)); -} - void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) { if (!trg.IsValid()) { DCHECK_EQ(type, Primitive::kPrimVoid); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 2409a4d38d..9280e6377c 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -455,9 +455,10 @@ class CodeGeneratorARM : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -482,12 +483,11 @@ class CodeGeneratorARM : public CodeGenerator { }; PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); + PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index); - PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, - uint32_t element_offset); // Add a new baker read barrier patch and return the label to be bound // before the BNE instruction. @@ -668,10 +668,10 @@ class CodeGeneratorARM : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative patch info for each HArmDexCacheArraysBase. - ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 096eb07074..34397e66bc 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -19,6 +19,8 @@ #include "arch/arm64/asm_support_arm64.h" #include "arch/arm64/instruction_set_features_arm64.h" #include "art_method.h" +#include "base/bit_utils.h" +#include "base/bit_utils_iterator.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" @@ -29,6 +31,7 @@ #include "linker/arm64/relative_patcher_arm64.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" +#include "lock_word.h" #include "offsets.h" #include "thread.h" #include "utils/arm64/assembler_arm64.h" @@ -77,7 +80,6 @@ using helpers::VIXLRegCodeFromART; using helpers::WRegisterFrom; using helpers::XRegisterFrom; -static constexpr int kCurrentMethodStackOffset = 0; // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump // table version generates 7 instructions and num_entries literals. Compare/jump sequence will // generates less code/data with a small num_entries. @@ -1448,8 +1450,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -4496,8 +4498,8 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStatic return desired_dispatch_info; } -Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorARM64::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { @@ -4526,52 +4528,33 @@ Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStati // Load method address from literal pool. __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { // Add ADRP with its PC-relative DexCache access patch. - const DexFile& dex_file = invoke->GetDexFileForPcRelativeDexCache(); - uint32_t element_offset = invoke->GetDexCacheArrayOffset(); - vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset); + MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); + vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method); EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); // Add LDR with its PC-relative DexCache access patch. vixl::aarch64::Label* ldr_label = - NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label); + NewMethodBssEntryPatch(target_method, adrp_label); EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register reg = XRegisterFrom(temp); - Register method_reg; - if (current_method.IsRegister()) { - method_reg = XRegisterFrom(current_method); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset)); - } - - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - __ Ldr(reg.X(), - MemOperand(method_reg.X(), - ArtMethod::DexCacheResolvedMethodsOffset(kArm64PointerSize).Int32Value())); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache))); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } - return callee_method; -} - -void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { - // All registers are assumed to be correctly set up. - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Bl(&frame_entry_label_); + { + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. + ExactAssemblyScope eas(GetVIXLAssembler(), + kInstructionSize, + CodeBufferCheckScope::kExactSize); + __ bl(&frame_entry_label_); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + } break; case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // LR = callee_method->entry_point_from_quick_compiled_code_; @@ -4579,14 +4562,13 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok XRegisterFrom(callee_method), ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value())); { - // To ensure that the pc position is recorded immediately after the `blr` instruction - // BLR must be the last instruction emitted in this function. - // Recording the pc will occur right after returning from this function. + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); // lr() __ blr(lr); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } break; } @@ -4594,7 +4576,8 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok DCHECK(!IsLeafMethod()); } -void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { +void CodeGeneratorARM64::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { // Use the calling convention instead of the location of the receiver, as // intrinsics may have put the receiver in a different register. In the intrinsics // slow path, the arguments have been moved to the right place, so here we are @@ -4628,12 +4611,11 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te // lr = temp->GetEntryPoint(); __ Ldr(lr, MemOperand(temp, entry_point.SizeValue())); { - // To ensure that the pc position is recorded immediately after the `blr` instruction - // BLR should be the last instruction emitted in this function. - // Recording the pc will occur right after returning from this function. + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); // lr(); __ blr(lr); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } } @@ -4654,6 +4636,15 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch( &pc_relative_method_patches_); } +vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch( + MethodReference target_method, + vixl::aarch64::Label* adrp_label) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + adrp_label, + &method_bss_entry_patches_); +} + vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, @@ -4676,13 +4667,6 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch( - const DexFile& dex_file, - uint32_t element_offset, - vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_); -} - vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { baker_read_barrier_patches_.emplace_back(custom_data); return &baker_read_barrier_patches_.back().label; @@ -4704,7 +4688,7 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral( uint64_t address) { - return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_); + return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address)); } vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral( @@ -4767,19 +4751,13 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_dex_cache_patches_.size() + pc_relative_method_patches_.size() + + method_bss_entry_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); - for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) { - linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(), - &info.target_dex_file, - info.pc_insn_label->GetLocation(), - info.offset_or_index)); - } if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); @@ -4793,6 +4771,8 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { @@ -4802,9 +4782,8 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc DCHECK_EQ(size, linker_patches->size()); } -vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value, - Uint32ToLiteralMap* map) { - return map->GetOrCreate( +vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) { + return uint32_literals_.GetOrCreate( value, [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); }); } @@ -4830,7 +4809,6 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -4843,7 +4821,6 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( @@ -4862,7 +4839,7 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -4870,7 +4847,7 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -4915,7 +4892,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { // move. void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -4997,7 +4974,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA read_barrier_option); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -5053,7 +5030,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -5062,7 +5039,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); } else { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 7a4b3d4805..d9c49d19bb 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -540,9 +540,10 @@ class CodeGeneratorARM64 : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { @@ -556,6 +557,13 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Label* NewPcRelativeMethodPatch(MethodReference target_method, vixl::aarch64::Label* adrp_label = nullptr); + // Add a new .bss entry method patch for an instruction and return + // the label to be bound before the instruction. The instruction will be + // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` + // pointing to the associated ADRP patch label). + vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method, + vixl::aarch64::Label* adrp_label = nullptr); + // Add a new PC-relative type patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing @@ -580,15 +588,6 @@ class CodeGeneratorARM64 : public CodeGenerator { dex::StringIndex string_index, vixl::aarch64::Label* adrp_label = nullptr); - // Add a new PC-relative dex cache array patch for an instruction and return - // the label to be bound before the instruction. The instruction will be - // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` - // pointing to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeDexCacheArrayPatch( - const DexFile& dex_file, - uint32_t element_offset, - vixl::aarch64::Label* adrp_label = nullptr); - // Add a new baker read barrier patch and return the label to be bound // before the CBNZ instruction. vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data); @@ -740,8 +739,7 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Literal<uint32_t>*, TypeReferenceValueComparator>; - vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value, - Uint32ToLiteralMap* map); + vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value); vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value); // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays @@ -792,10 +790,10 @@ class CodeGeneratorARM64 : public CodeGenerator { Uint32ToLiteralMap uint32_literals_; // Deduplication map for 64-bit literals, used for non-patchable method address or method code. Uint64ToLiteralMap uint64_literals_; - // PC-relative DexCache access info. - ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 1f8e1efd5e..a8b00c358b 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -19,6 +19,8 @@ #include "arch/arm/asm_support_arm.h" #include "arch/arm/instruction_set_features_arm.h" #include "art_method.h" +#include "base/bit_utils.h" +#include "base/bit_utils_iterator.h" #include "code_generator_utils.h" #include "common_arm.h" #include "compiled_method.h" @@ -76,7 +78,6 @@ static bool ExpectedPairLayout(Location location) { // Use a local definition to prevent copying mistakes. static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize); static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte; -static constexpr int kCurrentMethodStackOffset = 0; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle @@ -2500,8 +2501,8 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -2658,14 +2659,6 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0), 0, fpu_spill_mask_, kArmWordSize); } - if (GetGraph()->HasShouldDeoptimizeFlag()) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - // Initialize should_deoptimize flag to 0. - __ Mov(temp, 0); - GetAssembler()->StoreToOffset(kStoreWord, temp, sp, -kShouldDeoptimizeFlagSize); - } - int adjust = GetFrameSize() - FrameEntrySpillSize(); __ Sub(sp, sp, adjust); GetAssembler()->cfi().AdjustCFAOffset(adjust); @@ -2676,6 +2669,14 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { if (RequiresCurrentMethod()) { GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0); } + + if (GetGraph()->HasShouldDeoptimizeFlag()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + // Initialize should_deoptimize flag to 0. + __ Mov(temp, 0); + GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag()); + } } void CodeGeneratorARMVIXL::GenerateFrameExit() { @@ -3643,18 +3644,10 @@ void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* i IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { - if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any()); - } return; } HandleInvoke(invoke); - - // For PC-relative dex cache the invoke has an extra input, the PC-relative address base. - if (invoke->HasPcRelativeDexCache()) { - invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister()); - } } static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) { @@ -3678,7 +3671,6 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { @@ -3701,7 +3693,6 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); DCHECK(!codegen_->IsLeafMethod()); } @@ -7252,7 +7243,7 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -7260,7 +7251,7 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConventionARMVIXL calling_convention; CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -7313,7 +7304,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { // move. void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -7375,7 +7366,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -7444,7 +7435,7 @@ HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -7454,7 +7445,7 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(LocationFrom(r0)); } else { locations->SetOut(Location::RequiresRegister()); @@ -7532,7 +7523,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE } // TODO: Re-add the compiler code to do string dex cache lookup again. - DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConventionARMVIXL calling_convention; __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); @@ -9119,8 +9110,8 @@ vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter( return RegisterFrom(location); } -Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp) { +void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -9143,50 +9134,30 @@ Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { - HArmDexCacheArraysBase* base = - invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase(); - vixl32::Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, RegisterFrom(temp)); - int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset(); - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), base_reg, offset); + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { + PcRelativePatchInfo* labels = NewMethodBssEntryPatch( + MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); + vixl32::Register temp_reg = RegisterFrom(temp); + EmitMovwMovtPlaceholder(labels, temp_reg); + GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - vixl32::Register method_reg; - vixl32::Register reg = RegisterFrom(temp); - if (current_method.IsRegister()) { - method_reg = RegisterFrom(current_method); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, kCurrentMethodStackOffset); - } - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - GetAssembler()->LoadFromOffset( - kLoadWord, - reg, - method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - GetAssembler()->LoadFromOffset( - kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } - return callee_method; -} - -void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Bl(GetFrameEntryLabel()); + { + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. + ExactAssemblyScope aas(GetVIXLAssembler(), + vixl32::k32BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ bl(GetFrameEntryLabel()); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + } break; case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // LR = callee_method->entry_point_from_quick_compiled_code_ @@ -9196,12 +9167,14 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* inv RegisterFrom(callee_method), ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); { + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. ExactAssemblyScope aas(GetVIXLAssembler(), vixl32::k16BitT32InstructionSizeInBytes, CodeBufferCheckScope::kExactSize); // LR() __ blx(lr); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } break; } @@ -9209,7 +9182,8 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* inv DCHECK(!IsLeafMethod()); } -void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { +void CodeGeneratorARMVIXL::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { vixl32::Register temp = RegisterFrom(temp_location); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kArmPointerSize).Uint32Value(); @@ -9245,15 +9219,16 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset); // LR = temp->GetEntryPoint(); GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point); - // LR(); - // This `blx` *must* be the *last* instruction generated by this stub, so that calls to - // `RecordPcInfo()` immediately following record the correct pc. Use a scope to help guarantee - // that. - // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. - ExactAssemblyScope aas(GetVIXLAssembler(), - vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - __ blx(lr); + { + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. + // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. + ExactAssemblyScope aas(GetVIXLAssembler(), + vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + // LR(); + __ blx(lr); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + } } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMethodPatch( @@ -9263,6 +9238,13 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMe &pc_relative_method_patches_); } +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &method_bss_entry_patches_); +} + CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch( const DexFile& dex_file, dex::TypeIndex type_index) { return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); @@ -9278,11 +9260,6 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeSt return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeDexCacheArrayPatch( - const DexFile& dex_file, uint32_t element_offset) { - return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); -} - CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); @@ -9346,15 +9323,13 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); @@ -9368,6 +9343,8 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pa EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { @@ -9517,17 +9494,6 @@ void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_in } } } -void LocationsBuilderARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) { - vixl32::Register base_reg = OutputRegister(base); - CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); - codegen_->EmitMovwMovtPlaceholder(labels, base_reg); -} // Copy the result of a call into the given target. void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) { diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index ef809510ad..805a3f4366 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -538,9 +538,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; @@ -565,12 +566,11 @@ class CodeGeneratorARMVIXL : public CodeGenerator { }; PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); + PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index); - PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, - uint32_t element_offset); // Add a new baker read barrier patch and return the label to be bound // before the BNE instruction. @@ -765,10 +765,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative patch info for each HArmDexCacheArraysBase. - ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index d8ac99a9a6..b39d412ac2 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -40,10 +40,6 @@ namespace mips { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; -// We'll maximize the range of a single load instruction for dex cache array accesses -// by aligning offset -32768 with the offset of the first used element. -static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000; - Location MipsReturnLocation(Primitive::Type return_type) { switch (return_type) { case Primitive::kPrimBoolean: @@ -1060,8 +1056,8 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -1602,14 +1598,12 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_dex_cache_patches_.size() + pc_relative_method_patches_.size() + + method_bss_entry_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + pc_relative_string_patches_.size(); linker_patches->reserve(size); - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); @@ -1623,6 +1617,8 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patch EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); @@ -1635,6 +1631,13 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeMethodPa &pc_relative_method_patches_); } +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewMethodBssEntryPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &method_bss_entry_patches_); +} + CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch( const DexFile& dex_file, dex::TypeIndex type_index) { return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); @@ -1650,11 +1653,6 @@ CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPa return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch( - const DexFile& dex_file, uint32_t element_offset) { - return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); -} - CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); @@ -1674,6 +1672,7 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, Register out, Register base) { + DCHECK_NE(out, base); if (GetInstructionSetFeatures().IsR6()) { DCHECK_EQ(base, ZERO); __ Bind(&info->high_label); @@ -6999,7 +6998,7 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. - // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods + // TODO: Create as many HMipsComputeBaseMethodAddress instructions as needed for methods // with irreducible loops. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); bool is_r6 = GetInstructionSetFeatures().IsR6(); @@ -7015,12 +7014,12 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); fallback_load = false; break; - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: fallback_load = false; break; } if (fallback_load) { - desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall; } return desired_string_load_kind; } @@ -7029,6 +7028,8 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. + // TODO: Create as many HMipsComputeBaseMethodAddress instructions as needed for methods + // with irreducible loops. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); bool is_r6 = GetInstructionSetFeatures().IsR6(); bool fallback_load = has_irreducible_loops && !is_r6; @@ -7049,12 +7050,12 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); fallback_load = false; break; - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: fallback_load = false; break; } if (fallback_load) { - desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall; } return desired_class_load_kind; } @@ -7092,25 +7093,28 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticO HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. + // TODO: Create as many HMipsComputeBaseMethodAddress instructions as needed for methods + // with irreducible loops. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); bool is_r6 = GetInstructionSetFeatures().IsR6(); bool fallback_load = has_irreducible_loops && !is_r6; switch (dispatch_info.method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: break; default: fallback_load = false; break; } if (fallback_load) { - dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; + dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall; dispatch_info.method_load_data = 0; } return dispatch_info; } -void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { +void CodeGeneratorMIPS::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { // All registers are assumed to be correctly set up per the calling convention. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); @@ -7139,59 +7143,28 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke PcRelativePatchInfo* info = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); bool reordering = __ SetReorder(false); Register temp_reg = temp.AsRegister<Register>(); - EmitPcRelativeAddressPlaceholderHigh(info, temp_reg, base_reg); - __ Addiu(temp_reg, temp_reg, /* placeholder */ 0x5678); + EmitPcRelativeAddressPlaceholderHigh(info, TMP, base_reg); + __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678); __ SetReorder(reordering); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - if (is_r6) { - uint32_t offset = invoke->GetDexCacheArrayOffset(); - CodeGeneratorMIPS::PcRelativePatchInfo* info = - NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset); - bool reordering = __ SetReorder(false); - EmitPcRelativeAddressPlaceholderHigh(info, TMP, ZERO); - __ Lw(temp.AsRegister<Register>(), TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); - } else { - HMipsDexCacheArraysBase* base = - invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase(); - int32_t offset = - invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); - } - break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register reg = temp.AsRegister<Register>(); - Register method_reg; - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<Register>(); - } else { - // TODO: use the appropriate DCHECK() here if possible. - // DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ Lw(reg, SP, kCurrentMethodStackOffset); - } - - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset(kLoadWord, - reg, - method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kMipsPointerSize).Int32Value()); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ LoadFromOffset(kLoadWord, - reg, - reg, - CodeGenerator::GetCachePointerOffset(index_in_cache)); + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { + PcRelativePatchInfo* info = NewMethodBssEntryPatch( + MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); + Register temp_reg = temp.AsRegister<Register>(); + bool reordering = __ SetReorder(false); + EmitPcRelativeAddressPlaceholderHigh(info, TMP, base_reg); + __ Lw(temp_reg, TMP, /* placeholder */ 0x5678); + __ SetReorder(reordering); break; } + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. + } } switch (code_ptr_location) { @@ -7210,6 +7183,8 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke __ NopIfNoReordering(); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + DCHECK(!IsLeafMethod()); } @@ -7227,10 +7202,10 @@ void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDire locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } -void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { +void CodeGeneratorMIPS::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { // Use the calling convention instead of the location of the receiver, as // intrinsics may have put the receiver in a different register. In the intrinsics // slow path, the arguments have been moved to the right place, so here we are @@ -7262,6 +7237,7 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem // T9(); __ Jalr(T9); __ NopIfNoReordering(); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -7271,12 +7247,11 @@ void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc); @@ -7330,7 +7305,7 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { // move. void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -7349,7 +7324,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); break; case HLoadClass::LoadKind::kReferrersClass: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: base_or_current_method_reg = locations->InAt(0).AsRegister<Register>(); break; default: @@ -7427,7 +7402,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF __ SetReorder(reordering); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -7487,13 +7462,13 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { } FALLTHROUGH_INTENDED; // We need an extra register for PC-relative dex cache accesses. - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: locations->SetInAt(0, Location::RequiresRegister()); break; default: break; } - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } else { @@ -7609,7 +7584,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ } // TODO: Re-add the compiler code to do string dex cache lookup again. - DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); @@ -8731,29 +8706,11 @@ void InstructionCodeGeneratorMIPS::VisitMipsComputeBaseMethodAddress( __ Nal(); // Grab the return address off RA. __ Move(reg, RA); - // TODO: Can we share this code with that of VisitMipsDexCacheArraysBase()? // Remember this offset (the obtained PC value) for later use with constant area. __ BindPcRelBaseLabel(); } -void LocationsBuilderMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base); - locations->SetOut(Location::RequiresRegister()); -} - -void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) { - Register reg = base->GetLocations()->Out().AsRegister<Register>(); - CodeGeneratorMIPS::PcRelativePatchInfo* info = - codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); - CHECK(!codegen_->GetInstructionSetFeatures().IsR6()); - bool reordering = __ SetReorder(false); - // TODO: Reuse MipsComputeBaseMethodAddress on R2 instead of passing ZERO to force emitting NAL. - codegen_->EmitPcRelativeAddressPlaceholderHigh(info, reg, ZERO); - __ Addiu(reg, reg, /* placeholder */ 0x5678); - __ SetReorder(reordering); -} - void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { // The trampoline uses the same calling convention as dex calling conventions, // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 736b5070d9..e72e838dd9 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -552,8 +552,10 @@ class CodeGeneratorMIPS : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { @@ -583,12 +585,11 @@ class CodeGeneratorMIPS : public CodeGenerator { }; PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); + PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index); - PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, - uint32_t element_offset); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, Register out, Register base); @@ -643,10 +644,10 @@ class CodeGeneratorMIPS : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; - // PC-relative patch info for each HMipsDexCacheArraysBase. - ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 096139191e..e4f1cbd600 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -951,14 +951,14 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(graph->GetArena()), + assembler_(graph->GetArena(), &isa_features), isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -1440,14 +1440,12 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches( void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_dex_cache_patches_.size() + pc_relative_method_patches_.size() + + method_bss_entry_patches_.size() + pc_relative_type_patches_.size() + type_bss_entry_patches_.size() + pc_relative_string_patches_.size(); linker_patches->reserve(size); - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, linker_patches); @@ -1461,6 +1459,8 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); @@ -1473,6 +1473,13 @@ CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeMeth &pc_relative_method_patches_); } +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewMethodBssEntryPatch( + MethodReference target_method) { + return NewPcRelativePatch(*target_method.dex_file, + target_method.dex_method_index, + &method_bss_entry_patches_); +} + CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch( const DexFile& dex_file, dex::TypeIndex type_index) { return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); @@ -1488,11 +1495,6 @@ CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStri return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeDexCacheArrayPatch( - const DexFile& dex_file, uint32_t element_offset) { - return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_); -} - CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatch( const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); @@ -4873,11 +4875,11 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: break; } if (fallback_load) { - desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall; } return desired_string_load_kind; } @@ -4899,11 +4901,11 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: break; } if (fallback_load) { - desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall; } return desired_class_load_kind; } @@ -4915,7 +4917,8 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStati return desired_dispatch_info; } -void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { +void CodeGeneratorMIPS64::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { // All registers are assumed to be correctly set up per the calling convention. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); @@ -4948,41 +4951,16 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo kLoadDoubleword, DeduplicateUint64Literal(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { - uint32_t offset = invoke->GetDexCacheArrayOffset(); - CodeGeneratorMIPS64::PcRelativePatchInfo* info = - NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset); + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { + PcRelativePatchInfo* info = NewMethodBssEntryPatch( + MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())); EmitPcRelativeAddressPlaceholderHigh(info, AT); __ Ld(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - GpuRegister reg = temp.AsRegister<GpuRegister>(); - GpuRegister method_reg; - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<GpuRegister>(); - } else { - // TODO: use the appropriate DCHECK() here if possible. - // DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ Ld(reg, SP, kCurrentMethodStackOffset); - } - - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset(kLoadDoubleword, - reg, - method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kMips64PointerSize).Int32Value()); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ LoadFromOffset(kLoadDoubleword, - reg, - reg, - CodeGenerator::GetCachePointerOffset(index_in_cache)); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } @@ -5002,6 +4980,8 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo __ Nop(); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + DCHECK(!IsLeafMethod()); } @@ -5019,10 +4999,10 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } -void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { +void CodeGeneratorMIPS64::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { // Use the calling convention instead of the location of the receiver, as // intrinsics may have put the receiver in a different register. In the intrinsics // slow path, the arguments have been moved to the right place, so here we are @@ -5054,6 +5034,7 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t // T9(); __ Jalr(T9); __ Nop(); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -5063,12 +5044,11 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc); @@ -5105,7 +5085,7 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { // move. void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -5116,7 +5096,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S GpuRegister out = out_loc.AsRegister<GpuRegister>(); GpuRegister current_method_reg = ZERO; if (load_kind == HLoadClass::LoadKind::kReferrersClass || - load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + load_kind == HLoadClass::LoadKind::kRuntimeCall) { current_method_reg = locations->InAt(0).AsRegister<GpuRegister>(); } @@ -5170,7 +5150,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S cls->GetClass())); GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option); break; - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -5219,7 +5199,7 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { HLoadString::LoadKind load_kind = load->GetLoadKind(); LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } else { @@ -5293,7 +5273,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA } // TODO: Re-add the compiler code to do string dex cache lookup again. - DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 8405040386..6260c73614 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -314,6 +314,9 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { uint32_t num_entries, HBasicBlock* switch_block, HBasicBlock* default_block); + int32_t VecAddress(LocationSummary* locations, + size_t size, + /* out */ GpuRegister* adjusted_base); Mips64Assembler* const assembler_; CodeGeneratorMIPS64* const codegen_; @@ -518,8 +521,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { @@ -546,12 +551,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator { }; PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); + PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, dex::StringIndex string_index); - PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, - uint32_t element_offset); PcRelativePatchInfo* NewPcRelativeCallPatch(const DexFile& dex_file, uint32_t method_index); Literal* DeduplicateBootImageAddressLiteral(uint64_t address); @@ -604,10 +608,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // Deduplication map for 64-bit literals, used for non-patchable method address or method code // address. Uint64ToLiteralMap uint64_literals_; - // PC-relative patch info. - ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; // PC-relative type patch info for kBssEntry. diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index a41adca02c..f422b9fc8b 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -22,6 +22,8 @@ using namespace vixl::aarch64; // NOLINT(build/namespaces) namespace art { namespace arm64 { +using helpers::ARM64EncodableConstantOrRegister; +using helpers::Arm64CanEncodeConstantAsImmediate; using helpers::DRegisterFrom; using helpers::VRegisterFrom; using helpers::HeapOperand; @@ -34,6 +36,7 @@ using helpers::WRegisterFrom; void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + HInstruction* input = instruction->InputAt(0); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: @@ -41,13 +44,19 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: - locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction)); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimFloat: case Primitive::kPrimDouble: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + if (input->IsConstant() && + Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) { + locations->SetInAt(0, Location::ConstantLocation(input->AsConstant())); + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + } break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -57,33 +66,58 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = instruction->GetLocations(); + Location src_loc = locations->InAt(0); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ Dup(dst.V16B(), InputRegisterAt(instruction, 0)); + if (src_loc.IsConstant()) { + __ Movi(dst.V16B(), Int64ConstantFrom(src_loc)); + } else { + __ Dup(dst.V16B(), InputRegisterAt(instruction, 0)); + } break; case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ Dup(dst.V8H(), InputRegisterAt(instruction, 0)); + if (src_loc.IsConstant()) { + __ Movi(dst.V8H(), Int64ConstantFrom(src_loc)); + } else { + __ Dup(dst.V8H(), InputRegisterAt(instruction, 0)); + } break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Dup(dst.V4S(), InputRegisterAt(instruction, 0)); + if (src_loc.IsConstant()) { + __ Movi(dst.V4S(), Int64ConstantFrom(src_loc)); + } else { + __ Dup(dst.V4S(), InputRegisterAt(instruction, 0)); + } break; case Primitive::kPrimLong: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Dup(dst.V2D(), XRegisterFrom(locations->InAt(0))); + if (src_loc.IsConstant()) { + __ Movi(dst.V2D(), Int64ConstantFrom(src_loc)); + } else { + __ Dup(dst.V2D(), XRegisterFrom(src_loc)); + } break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ Dup(dst.V4S(), VRegisterFrom(locations->InAt(0)).V4S(), 0); + if (src_loc.IsConstant()) { + __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue()); + } else { + __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0); + } break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Dup(dst.V2D(), VRegisterFrom(locations->InAt(0)).V2D(), 0); + if (src_loc.IsConstant()) { + __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue()); + } else { + __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0); + } break; default: LOG(FATAL) << "Unsupported SIMD type"; diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 50b95c17cb..0395db1df9 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -15,6 +15,7 @@ */ #include "code_generator_mips64.h" +#include "mirror/array-inl.h" namespace art { namespace mips64 { @@ -22,12 +23,72 @@ namespace mips64 { // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT +VectorRegister VectorRegisterFrom(Location location) { + DCHECK(location.IsFpuRegister()); + return static_cast<VectorRegister>(location.AsFpuRegister<FpuRegister>()); +} + void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ FillB(dst, locations->InAt(0).AsRegister<GpuRegister>()); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ FillH(dst, locations->InAt(0).AsRegister<GpuRegister>()); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(dst, locations->InAt(0).AsRegister<GpuRegister>()); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(dst, locations->InAt(0).AsRegister<GpuRegister>()); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ ReplicateFPToVectorRegister(dst, + locations->InAt(0).AsFpuRegister<FpuRegister>(), + /* is_double */ false); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ ReplicateFPToVectorRegister(dst, + locations->InAt(0).AsFpuRegister<FpuRegister>(), + /* is_double */ true); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { @@ -51,13 +112,23 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in LocationSummary* locations = new (arena) LocationSummary(instruction); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), + instruction->IsVecNot() ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + break; case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK(locations); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), + (instruction->IsVecNeg() || instruction->IsVecAbs()) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -70,7 +141,18 @@ void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + Primitive::Type from = instruction->GetInputType(); + Primitive::Type to = instruction->GetResultType(); + if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Ffint_sW(dst, src); + } else { + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) { @@ -78,7 +160,45 @@ void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ FillB(dst, ZERO); + __ SubvB(dst, dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ FillH(dst, ZERO); + __ SubvH(dst, dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(dst, ZERO); + __ SubvW(dst, dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(dst, ZERO); + __ SubvD(dst, dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(dst, ZERO); + __ FsubW(dst, dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(dst, ZERO); + __ FsubD(dst, dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) { @@ -86,7 +206,47 @@ void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ FillB(dst, ZERO); // all zeroes + __ Add_aB(dst, dst, src); // dst = abs(0) + abs(src) + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ FillH(dst, ZERO); // all zeroes + __ Add_aH(dst, dst, src); // dst = abs(0) + abs(src) + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(dst, ZERO); // all zeroes + __ Add_aW(dst, dst, src); // dst = abs(0) + abs(src) + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(dst, ZERO); // all zeroes + __ Add_aD(dst, dst, src); // dst = abs(0) + abs(src) + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ LdiW(dst, -1); // all ones + __ SrliW(dst, dst, 1); + __ AndV(dst, dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ LdiD(dst, -1); // all ones + __ SrliD(dst, dst, 1); + __ AndV(dst, dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) { @@ -94,7 +254,30 @@ void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: // special case boolean-not + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ LdiB(dst, 1); + __ XorV(dst, dst, src); + break; + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ NorV(dst, src, src); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector binary operations. @@ -106,9 +289,12 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK(locations); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -121,7 +307,40 @@ void LocationsBuilderMIPS64::VisitVecAdd(HVecAdd* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ AddvB(dst, lhs, rhs); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ AddvH(dst, lhs, rhs); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ AddvW(dst, lhs, rhs); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ AddvD(dst, lhs, rhs); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FaddW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FaddD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { @@ -129,7 +348,40 @@ void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + instruction->IsRounded() + ? __ Aver_uB(dst, lhs, rhs) + : __ Ave_uB(dst, lhs, rhs); + } else { + instruction->IsRounded() + ? __ Aver_sB(dst, lhs, rhs) + : __ Ave_sB(dst, lhs, rhs); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + instruction->IsRounded() + ? __ Aver_uH(dst, lhs, rhs) + : __ Ave_uH(dst, lhs, rhs); + } else { + instruction->IsRounded() + ? __ Aver_sH(dst, lhs, rhs) + : __ Ave_sH(dst, lhs, rhs); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) { @@ -137,7 +389,40 @@ void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ SubvB(dst, lhs, rhs); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ SubvH(dst, lhs, rhs); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ SubvW(dst, lhs, rhs); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ SubvD(dst, lhs, rhs); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FsubW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FsubD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { @@ -145,7 +430,40 @@ void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ MulvB(dst, lhs, rhs); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ MulvH(dst, lhs, rhs); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ MulvW(dst, lhs, rhs); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ MulvD(dst, lhs, rhs); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FmulW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FmulD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) { @@ -153,7 +471,23 @@ void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FdivW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FdivD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) { @@ -161,7 +495,60 @@ void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Min_uB(dst, lhs, rhs); + } else { + __ Min_sB(dst, lhs, rhs); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Min_uH(dst, lhs, rhs); + } else { + __ Min_sH(dst, lhs, rhs); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Min_uW(dst, lhs, rhs); + } else { + __ Min_sW(dst, lhs, rhs); + } + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Min_uD(dst, lhs, rhs); + } else { + __ Min_sD(dst, lhs, rhs); + } + break; + // When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value. + // TODO: Fix min(x, NaN) cases for float and double. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ FminW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ FminD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecMax(HVecMax* instruction) { @@ -169,7 +556,60 @@ void LocationsBuilderMIPS64::VisitVecMax(HVecMax* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Max_uB(dst, lhs, rhs); + } else { + __ Max_sB(dst, lhs, rhs); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Max_uH(dst, lhs, rhs); + } else { + __ Max_sH(dst, lhs, rhs); + } + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Max_uW(dst, lhs, rhs); + } else { + __ Max_sW(dst, lhs, rhs); + } + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + __ Max_uD(dst, lhs, rhs); + } else { + __ Max_sD(dst, lhs, rhs); + } + break; + // When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value. + // TODO: Fix max(x, NaN) cases for float and double. + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ FmaxW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + DCHECK(!instruction->IsUnsigned()); + __ FmaxD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) { @@ -177,7 +617,27 @@ void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ AndV(dst, lhs, rhs); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) { @@ -193,7 +653,27 @@ void LocationsBuilderMIPS64::VisitVecOr(HVecOr* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ OrV(dst, lhs, rhs); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) { @@ -201,7 +681,27 @@ void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ XorV(dst, lhs, rhs); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector shift operations. @@ -213,7 +713,9 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: - DCHECK(locations); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -226,7 +728,32 @@ void LocationsBuilderMIPS64::VisitVecShl(HVecShl* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ SlliB(dst, lhs, value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ SlliH(dst, lhs, value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ SlliW(dst, lhs, value); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ SlliD(dst, lhs, value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) { @@ -234,7 +761,32 @@ void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ SraiB(dst, lhs, value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ SraiH(dst, lhs, value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ SraiW(dst, lhs, value); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ SraiD(dst, lhs, value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) { @@ -242,7 +794,32 @@ void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ SrliB(dst, lhs, value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ SrliH(dst, lhs, value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ SrliW(dst, lhs, value); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ SrliD(dst, lhs, value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { @@ -253,20 +830,143 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu LOG(FATAL) << "No SIMD for " << instr->GetId(); } +// Helper to set up locations for vector memory operations. +static void CreateVecMemLocations(ArenaAllocator* arena, + HVecMemoryOperation* instruction, + bool is_load) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (is_load) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to prepare register and offset for vector memory operations. Returns the offset and sets +// the output parameter adjusted_base to the original base or to a reserved temporary register (AT). +int32_t InstructionCodeGeneratorMIPS64::VecAddress(LocationSummary* locations, + size_t size, + /* out */ GpuRegister* adjusted_base) { + GpuRegister base = locations->InAt(0).AsRegister<GpuRegister>(); + Location index = locations->InAt(1); + int scale = TIMES_1; + switch (size) { + case 2: scale = TIMES_2; break; + case 4: scale = TIMES_4; break; + case 8: scale = TIMES_8; break; + default: break; + } + int32_t offset = mirror::Array::DataOffset(size).Int32Value(); + + if (index.IsConstant()) { + offset += index.GetConstant()->AsIntConstant()->GetValue() << scale; + __ AdjustBaseOffsetAndElementSizeShift(base, offset, scale); + *adjusted_base = base; + } else { + GpuRegister index_reg = index.AsRegister<GpuRegister>(); + if (scale != TIMES_1) { + __ Dlsa(AT, index_reg, base, scale); + } else { + __ Daddu(AT, base, index_reg); + } + *adjusted_base = AT; + } + return offset; +} + void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ true); } void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + VectorRegister reg = VectorRegisterFrom(locations->Out()); + GpuRegister base; + int32_t offset = VecAddress(locations, size, &base); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ LdB(reg, base, offset); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + // Loading 8-bytes (needed if dealing with compressed strings in StringCharAt) from unaligned + // memory address may cause a trap to the kernel if the CPU doesn't directly support unaligned + // loads and stores. + // TODO: Implement support for StringCharAt. + DCHECK(!instruction->IsStringCharAt()); + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ LdH(reg, base, offset); + break; + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ LdW(reg, base, offset); + break; + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ LdD(reg, base, offset); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ false); } void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + VectorRegister reg = VectorRegisterFrom(locations->InAt(2)); + GpuRegister base; + int32_t offset = VecAddress(locations, size, &base); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ StB(reg, base, offset); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ StH(reg, base, offset); + break; + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ StW(reg, base, offset); + break; + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ StD(reg, base, offset); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } #undef __ diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index f3ec112548..79fccfeaef 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -26,6 +26,7 @@ #include "intrinsics_x86.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" +#include "lock_word.h" #include "thread.h" #include "utils/assembler.h" #include "utils/stack_checks.h" @@ -1031,8 +1032,8 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, move_resolver_(graph->GetArena(), this), assembler_(graph->GetArena()), isa_features_(isa_features), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -1085,11 +1086,6 @@ void CodeGeneratorX86::GenerateFrameEntry() { } } - if (GetGraph()->HasShouldDeoptimizeFlag()) { - // Initialize should_deoptimize flag to 0. - __ movl(Address(ESP, -kShouldDeoptimizeFlagSize), Immediate(0)); - } - int adjust = GetFrameSize() - FrameEntrySpillSize(); __ subl(ESP, Immediate(adjust)); __ cfi().AdjustCFAOffset(adjust); @@ -1099,6 +1095,11 @@ void CodeGeneratorX86::GenerateFrameEntry() { if (RequiresCurrentMethod()) { __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); } + + if (GetGraph()->HasShouldDeoptimizeFlag()) { + // Initialize should_deoptimize flag to 0. + __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); + } } void CodeGeneratorX86::GenerateFrameExit() { @@ -2203,7 +2204,6 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -2227,7 +2227,6 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -4520,18 +4519,17 @@ Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr // save one load. However, since this is just an intrinsic slow path we prefer this // simple and more robust approach rather that trying to determine if that's the case. SlowPathCode* slow_path = GetCurrentSlowPath(); - if (slow_path != nullptr) { - if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { - int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); - __ movl(temp, Address(ESP, stack_offset)); - return temp; - } + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); + __ movl(temp, Address(ESP, stack_offset)); + return temp; } return location.AsRegister<Register>(); } -Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorX86::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -4555,45 +4553,21 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress())); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); // Bind a new fixup label at the end of the "movl" insn. - uint32_t offset = invoke->GetDexCacheArrayOffset(); - __ Bind(NewPcRelativeDexCacheArrayPatch( + __ Bind(NewMethodBssEntryPatch( invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(), - invoke->GetDexFileForPcRelativeDexCache(), - offset)); + MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()))); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register method_reg; - Register reg = temp.AsRegister<Register>(); - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<Register>(); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); - } - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - __ movl(reg, Address(method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value())); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ movl(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache))); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } - return callee_method; -} - -void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: @@ -4606,11 +4580,13 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, kX86PointerSize).Int32Value())); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); DCHECK(!IsLeafMethod()); } -void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { +void CodeGeneratorX86::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { Register temp = temp_in.AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kX86PointerSize).Uint32Value(); @@ -4638,6 +4614,7 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp // call temp->GetEntryPoint(); __ call(Address( temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value())); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { @@ -4650,6 +4627,16 @@ void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { __ Bind(&boot_image_method_patches_.back().label); } +Label* CodeGeneratorX86::NewMethodBssEntryPatch( + HX86ComputeBaseMethodAddress* method_address, + MethodReference target_method) { + // Add the patch entry and bind its label at the end of the instruction. + method_bss_entry_patches_.emplace_back(method_address, + *target_method.dex_file, + target_method.dex_method_index); + return &method_bss_entry_patches_.back().label; +} + void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) { HX86ComputeBaseMethodAddress* address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); boot_image_type_patches_.emplace_back(address, @@ -4684,15 +4671,6 @@ Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { return &string_patches_.back().label; } -Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch( - HX86ComputeBaseMethodAddress* method_address, - const DexFile& dex_file, - uint32_t element_offset) { - // Add the patch entry and bind its label at the end of the instruction. - pc_relative_dex_cache_patches_.emplace_back(method_address, dex_file, element_offset); - return &pc_relative_dex_cache_patches_.back().label; -} - // The label points to the end of the "movl" or another instruction but the literal offset // for method patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; @@ -4711,14 +4689,12 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_dex_cache_patches_.size() + boot_image_method_patches_.size() + + method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + string_patches_.size(); linker_patches->reserve(size); - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, linker_patches); @@ -4730,6 +4706,8 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche DCHECK(boot_image_type_patches_.empty()); EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); @@ -6065,7 +6043,7 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -6073,7 +6051,7 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -6127,7 +6105,7 @@ Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file, // move. void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -6187,7 +6165,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -6250,7 +6228,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -6264,7 +6242,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(Location::RegisterLocation(EAX)); } else { locations->SetOut(Location::RequiresRegister()); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 21c527e8b0..f48753b614 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -408,19 +408,19 @@ class CodeGeneratorX86 : public CodeGenerator { HInvokeStaticOrDirect* invoke) OVERRIDE; // Generate a call to a static or direct method. - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; // Generate a call to a virtual method. - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); + Label* NewMethodBssEntryPatch(HX86ComputeBaseMethodAddress* method_address, + MethodReference target_method); void RecordBootTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); void RecordBootStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); - Label* NewPcRelativeDexCacheArrayPatch(HX86ComputeBaseMethodAddress* method_address, - const DexFile& dex_file, - uint32_t element_offset); Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index, Handle<mirror::String> handle); @@ -632,10 +632,10 @@ class CodeGeneratorX86 : public CodeGenerator { X86Assembler assembler_; const X86InstructionSetFeatures& isa_features_; - // PC-relative DexCache access info. - ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; // Type patch locations for kBssEntry. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index bf1c42ae8e..57319ce735 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -23,6 +23,7 @@ #include "gc/accounting/card_table.h" #include "intrinsics.h" #include "intrinsics_x86_64.h" +#include "lock_word.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "mirror/object_reference.h" @@ -976,9 +977,10 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStati return desired_dispatch_info; } -Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorX86_64::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { // All registers are assumed to be correctly set up. + Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -1000,44 +1002,19 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { __ movq(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); // Bind a new fixup label at the end of the "movl" insn. - uint32_t offset = invoke->GetDexCacheArrayOffset(); - __ Bind(NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset)); + __ Bind(NewMethodBssEntryPatch( + MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()))); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register method_reg; - CpuRegister reg = temp.AsRegister<CpuRegister>(); - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<Register>(); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg.AsRegister(); - __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); - } - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - __ movq(reg, - Address(CpuRegister(method_reg), - ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue())); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache))); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } - return callee_method; -} - -void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { - // All registers are assumed to be correctly set up. - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: @@ -1050,11 +1027,13 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo kX86_64PointerSize).SizeValue())); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); DCHECK(!IsLeafMethod()); } -void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { +void CodeGeneratorX86_64::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { CpuRegister temp = temp_in.AsRegister<CpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue(); @@ -1083,6 +1062,7 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t // call temp->GetEntryPoint(); __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( kX86_64PointerSize).SizeValue())); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { @@ -1091,6 +1071,12 @@ void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { __ Bind(&boot_image_method_patches_.back().label); } +Label* CodeGeneratorX86_64::NewMethodBssEntryPatch(MethodReference target_method) { + // Add a patch entry and return the label. + method_bss_entry_patches_.emplace_back(*target_method.dex_file, target_method.dex_method_index); + return &method_bss_entry_patches_.back().label; +} + void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) { boot_image_type_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_); @@ -1114,13 +1100,6 @@ Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { return &string_patches_.back().label; } -Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, - uint32_t element_offset) { - // Add a patch entry and return the label. - pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset); - return &pc_relative_dex_cache_patches_.back().label; -} - // The label points to the end of the "movl" or another instruction but the literal offset // for method patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; @@ -1139,14 +1118,12 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_dex_cache_patches_.size() + boot_image_method_patches_.size() + + method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + string_patches_.size(); linker_patches->reserve(size); - EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_, - linker_patches); if (GetCompilerOptions().IsBootImage()) { EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, linker_patches); @@ -1158,6 +1135,8 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_pat DCHECK(boot_image_type_patches_.empty()); EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); } + EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, + linker_patches); EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); @@ -1246,8 +1225,8 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, assembler_(graph->GetArena()), isa_features_(isa_features), constant_area_start_(0), - pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -1319,12 +1298,6 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { } } - if (GetGraph()->HasShouldDeoptimizeFlag()) { - // Initialize should_deoptimize flag to 0. - __ movl(Address(CpuRegister(RSP), xmm_spill_location - kShouldDeoptimizeFlagSize), - Immediate(0)); - } - // Save the current method if we need it. Note that we do not // do this in HCurrentMethod, as the instruction might have been removed // in the SSA graph. @@ -1332,6 +1305,11 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(kMethodRegisterArgument)); } + + if (GetGraph()->HasShouldDeoptimizeFlag()) { + // Initialize should_deoptimize flag to 0. + __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); + } } void CodeGeneratorX86_64::GenerateFrameExit() { @@ -2392,7 +2370,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { @@ -2416,7 +2393,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -5482,7 +5458,7 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -5490,7 +5466,7 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { // Custom calling convention: RAX serves as both input and output. CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -5541,7 +5517,7 @@ Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file, // move. void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -5652,7 +5628,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -5661,7 +5637,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(Location::RegisterLocation(RAX)); } else { locations->SetOut(Location::RequiresRegister()); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 3039e0519c..33c64290d4 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -404,16 +404,17 @@ class CodeGeneratorX86_64 : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); + Label* NewMethodBssEntryPatch(MethodReference target_method); void RecordBootTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); void RecordBootStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); - Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset); Label* NewJitRootStringPatch(const DexFile& dex_file, dex::StringIndex dex_index, Handle<mirror::String> handle); @@ -602,10 +603,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { // Used for fixups to the constant area. int constant_area_start_; - // PC-relative DexCache access info. - ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_; // PC-relative method patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_method_patches_; + // PC-relative method patch info for kBssEntry. + ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; // Type patch locations for kBssEntry. diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index 721f74eeee..e73fd7ddc8 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -234,9 +234,20 @@ inline vixl::aarch64::Operand OperandFromMemOperand( } } -inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) { - DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant()) - << constant->DebugName(); +inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) { + int64_t value = CodeGenerator::GetInt64ValueOf(constant); + + // TODO: Improve this when IsSIMDConstantEncodable method is implemented in VIXL. + if (instr->IsVecReplicateScalar()) { + if (constant->IsLongConstant()) { + return false; + } else if (constant->IsFloatConstant()) { + return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue()); + } else if (constant->IsDoubleConstant()) { + return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue()); + } + return IsUint<8>(value); + } // For single uses we let VIXL handle the constant generation since it will // use registers that are not managed by the register allocator (wip0, wip1). @@ -249,8 +260,6 @@ inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst return true; } - int64_t value = CodeGenerator::GetInt64ValueOf(constant); - if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) { // Uses logical operations. return vixl::aarch64::Assembler::IsImmLogical(value, vixl::aarch64::kXRegSize); @@ -276,7 +285,7 @@ inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* inst inline Location ARM64EncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) { if (constant->IsConstant() - && CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { + && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) { return Location::ConstantLocation(constant->AsConstant()); } diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc deleted file mode 100644 index 0c832a5c35..0000000000 --- a/compiler/optimizing/dex_cache_array_fixups_arm.cc +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "dex_cache_array_fixups_arm.h" - -#include "base/arena_containers.h" -#ifdef ART_USE_OLD_ARM_BACKEND -#include "code_generator_arm.h" -#include "intrinsics_arm.h" -#else -#include "code_generator_arm_vixl.h" -#include "intrinsics_arm_vixl.h" -#endif -#include "utils/dex_cache_arrays_layout-inl.h" - -namespace art { -namespace arm { -#ifdef ART_USE_OLD_ARM_BACKEND -typedef CodeGeneratorARM CodeGeneratorARMType; -typedef IntrinsicLocationsBuilderARM IntrinsicLocationsBuilderARMType; -#else -typedef CodeGeneratorARMVIXL CodeGeneratorARMType; -typedef IntrinsicLocationsBuilderARMVIXL IntrinsicLocationsBuilderARMType; -#endif - -/** - * Finds instructions that need the dex cache arrays base as an input. - */ -class DexCacheArrayFixupsVisitor : public HGraphVisitor { - public: - DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen) - : HGraphVisitor(graph), - codegen_(down_cast<CodeGeneratorARMType*>(codegen)), - dex_cache_array_bases_(std::less<const DexFile*>(), - // Attribute memory use to code generator. - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} - - void MoveBasesIfNeeded() { - for (const auto& entry : dex_cache_array_bases_) { - // Bring the base closer to the first use (previously, it was in the - // entry block) and relieve some pressure on the register allocator - // while avoiding recalculation of the base in a loop. - HArmDexCacheArraysBase* base = entry.second; - base->MoveBeforeFirstUserAndOutOfLoops(); - } - } - - private: - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { - // If this is an invoke with PC-relative access to the dex cache methods array, - // we need to add the dex cache arrays base as the special input. - if (invoke->HasPcRelativeDexCache() && - !IsCallFreeIntrinsic<IntrinsicLocationsBuilderARMType>(invoke, codegen_)) { - HArmDexCacheArraysBase* base = - GetOrCreateDexCacheArrayBase(invoke, invoke->GetDexFileForPcRelativeDexCache()); - // Update the element offset in base. - DexCacheArraysLayout layout(kArmPointerSize, &invoke->GetDexFileForPcRelativeDexCache()); - base->UpdateElementOffset(layout.MethodOffset(invoke->GetDexMethodIndex())); - // Add the special argument base to the method. - DCHECK(!invoke->HasCurrentMethodInput()); - invoke->AddSpecialInput(base); - } - } - - HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(HInstruction* cursor, - const DexFile& dex_file) { - if (GetGraph()->HasIrreducibleLoops()) { - HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file); - cursor->GetBlock()->InsertInstructionBefore(base, cursor); - return base; - } else { - // Ensure we only initialize the pointer once for each dex file. - auto lb = dex_cache_array_bases_.lower_bound(&dex_file); - if (lb != dex_cache_array_bases_.end() && - !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) { - return lb->second; - } - - // Insert the base at the start of the entry block, move it to a better - // position later in MoveBaseIfNeeded(). - HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file); - HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); - entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction()); - dex_cache_array_bases_.PutBefore(lb, &dex_file, base); - return base; - } - } - - CodeGeneratorARMType* codegen_; - - using DexCacheArraysBaseMap = - ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>; - DexCacheArraysBaseMap dex_cache_array_bases_; -}; - -void DexCacheArrayFixups::Run() { - DexCacheArrayFixupsVisitor visitor(graph_, codegen_); - visitor.VisitInsertionOrder(); - visitor.MoveBasesIfNeeded(); -} - -} // namespace arm -} // namespace art diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h deleted file mode 100644 index 9d67a319b9..0000000000 --- a/compiler/optimizing/dex_cache_array_fixups_arm.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ -#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ - -#include "nodes.h" -#include "optimization.h" - -namespace art { - -class CodeGenerator; - -namespace arm { - -class DexCacheArrayFixups : public HOptimization { - public: - DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) - : HOptimization(graph, kDexCacheArrayFixupsArmPassName, stats), - codegen_(codegen) {} - - static constexpr const char* kDexCacheArrayFixupsArmPassName = "dex_cache_array_fixups_arm"; - - void Run() OVERRIDE; - - private: - CodeGenerator* codegen_; -}; - -} // namespace arm -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_ diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc deleted file mode 100644 index 7734f9197d..0000000000 --- a/compiler/optimizing/dex_cache_array_fixups_mips.cc +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "code_generator_mips.h" -#include "dex_cache_array_fixups_mips.h" - -#include "base/arena_containers.h" -#include "intrinsics_mips.h" -#include "utils/dex_cache_arrays_layout-inl.h" - -namespace art { -namespace mips { - -/** - * Finds instructions that need the dex cache arrays base as an input. - */ -class DexCacheArrayFixupsVisitor : public HGraphVisitor { - public: - explicit DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen) - : HGraphVisitor(graph), - codegen_(down_cast<CodeGeneratorMIPS*>(codegen)), - dex_cache_array_bases_(std::less<const DexFile*>(), - // Attribute memory use to code generator. - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {} - - void MoveBasesIfNeeded() { - for (const auto& entry : dex_cache_array_bases_) { - // Bring the base closer to the first use (previously, it was in the - // entry block) and relieve some pressure on the register allocator - // while avoiding recalculation of the base in a loop. - HMipsDexCacheArraysBase* base = entry.second; - base->MoveBeforeFirstUserAndOutOfLoops(); - } - // Computing the dex cache base for PC-relative accesses will clobber RA with - // the NAL instruction on R2. Take a note of this before generating the method - // entry. - if (!dex_cache_array_bases_.empty()) { - codegen_->ClobberRA(); - } - } - - private: - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { - // If this is an invoke with PC-relative access to the dex cache methods array, - // we need to add the dex cache arrays base as the special input. - if (invoke->HasPcRelativeDexCache() && - !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) { - // Initialize base for target method dex file if needed. - HMipsDexCacheArraysBase* base = - GetOrCreateDexCacheArrayBase(invoke->GetDexFileForPcRelativeDexCache()); - // Update the element offset in base. - DexCacheArraysLayout layout(kMipsPointerSize, &invoke->GetDexFileForPcRelativeDexCache()); - base->UpdateElementOffset(layout.MethodOffset(invoke->GetDexMethodIndex())); - // Add the special argument base to the method. - DCHECK(!invoke->HasCurrentMethodInput()); - invoke->AddSpecialInput(base); - } - } - - HMipsDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) { - return dex_cache_array_bases_.GetOrCreate( - &dex_file, - [this, &dex_file]() { - HMipsDexCacheArraysBase* base = - new (GetGraph()->GetArena()) HMipsDexCacheArraysBase(dex_file); - HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); - // Insert the base at the start of the entry block, move it to a better - // position later in MoveBaseIfNeeded(). - entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction()); - return base; - }); - } - - CodeGeneratorMIPS* codegen_; - - using DexCacheArraysBaseMap = - ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>; - DexCacheArraysBaseMap dex_cache_array_bases_; -}; - -void DexCacheArrayFixups::Run() { - CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_); - if (mips_codegen->GetInstructionSetFeatures().IsR6()) { - // Do nothing for R6 because it has PC-relative addressing. - return; - } - if (graph_->HasIrreducibleLoops()) { - // Do not run this optimization, as irreducible loops do not work with an instruction - // that can be live-in at the irreducible loop header. - return; - } - DexCacheArrayFixupsVisitor visitor(graph_, codegen_); - visitor.VisitInsertionOrder(); - visitor.MoveBasesIfNeeded(); -} - -} // namespace mips -} // namespace art diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h deleted file mode 100644 index 861a199d6c..0000000000 --- a/compiler/optimizing/dex_cache_array_fixups_mips.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright (C) 2016 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ -#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ - -#include "nodes.h" -#include "optimization.h" - -namespace art { - -class CodeGenerator; - -namespace mips { - -class DexCacheArrayFixups : public HOptimization { - public: - DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) - : HOptimization(graph, kDexCacheArrayFixupsMipsPassName, stats), - codegen_(codegen) {} - - static constexpr const char* kDexCacheArrayFixupsMipsPassName = "dex_cache_array_fixups_mips"; - - void Run() OVERRIDE; - - private: - CodeGenerator* codegen_; -}; - -} // namespace mips -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_ diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index 02816cf7ce..7dcf2440b2 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -34,6 +34,7 @@ #include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" #include "utils/assembler.h" +#include "utils/intrusive_forward_list.h" namespace art { @@ -66,6 +67,13 @@ class StringList { current->Dump(NewEntryStream()); } } + // Construct StringList from a list of elements. The value type must provide method `Dump`. + template <typename Container> + explicit StringList(const Container& list, Format format = kArrayBrackets) : StringList(format) { + for (const typename Container::value_type& current : list) { + current.Dump(NewEntryStream()); + } + } std::ostream& NewEntryStream() { if (is_empty_) { @@ -584,8 +592,8 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { LiveInterval* interval = instruction->GetLiveInterval(); StartAttributeStream("ranges") << StringList(interval->GetFirstRange(), StringList::kSetBrackets); - StartAttributeStream("uses") << StringList(interval->GetFirstUse()); - StartAttributeStream("env_uses") << StringList(interval->GetFirstEnvironmentUse()); + StartAttributeStream("uses") << StringList(interval->GetUses()); + StartAttributeStream("env_uses") << StringList(interval->GetEnvironmentUses()); StartAttributeStream("is_fixed") << interval->IsFixed(); StartAttributeStream("is_split") << interval->IsSplit(); StartAttributeStream("is_low") << interval->IsLowInterval(); diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 88473f02e5..84b20f65e3 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -695,8 +695,8 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveOp(HLoopInform /*fetch*/ nullptr, type_); default: - CHECK(false) << op; - break; + LOG(FATAL) << op; + UNREACHABLE(); } } } diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 7c833cf70c..c0ec58f824 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -1132,11 +1132,27 @@ bool InductionVarRange::GenerateLastValuePeriodic(HInductionVarAnalysis::Inducti /*out*/bool* needs_taken_test) const { DCHECK(info != nullptr); DCHECK_EQ(info->induction_class, HInductionVarAnalysis::kPeriodic); - // Count period. + // Count period and detect all-invariants. int64_t period = 1; - for (HInductionVarAnalysis::InductionInfo* p = info; - p->induction_class == HInductionVarAnalysis::kPeriodic; - p = p->op_b, ++period) {} + bool all_invariants = true; + HInductionVarAnalysis::InductionInfo* p = info; + for (; p->induction_class == HInductionVarAnalysis::kPeriodic; p = p->op_b, ++period) { + DCHECK_EQ(p->op_a->induction_class, HInductionVarAnalysis::kInvariant); + if (p->op_a->operation != HInductionVarAnalysis::kFetch) { + all_invariants = false; + } + } + DCHECK_EQ(p->induction_class, HInductionVarAnalysis::kInvariant); + if (p->operation != HInductionVarAnalysis::kFetch) { + all_invariants = false; + } + // Don't rely on FP arithmetic to be precise, unless the full period + // consist of pre-computed expressions only. + if (info->type == Primitive::kPrimFloat || info->type == Primitive::kPrimDouble) { + if (!all_invariants) { + return false; + } + } // Handle any periodic(x, periodic(.., y)) for known maximum index value m. int64_t m = 0; if (IsConstant(trip->op_a, kExact, &m) && m >= 1) { diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index f203d7f47e..142c95780e 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -56,7 +56,7 @@ static constexpr size_t kMaximumNumberOfInstructionsForSmallMethod = 3; // Limit the number of dex registers that we accumulate while inlining // to avoid creating large amount of nested environments. -static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64; +static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 32; // Limit recursive call inlining, which do not benefit from too // much inlining compared to code locality. @@ -672,6 +672,12 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile( ObjPtr<mirror::DexCache> dex_cache = dex_profile_index_to_dex_cache[class_ref.dex_profile_index]; DCHECK(dex_cache != nullptr); + + if (!dex_cache->GetDexFile()->IsTypeIndexValid(class_ref.type_index)) { + VLOG(compiler) << "Profile data corrupt: type index " << class_ref.type_index + << "is invalid in location" << dex_cache->GetDexFile()->GetLocation(); + return kInlineCacheNoData; + } ObjPtr<mirror::Class> clazz = ClassLinker::LookupResolvedType( class_ref.type_index, dex_cache, diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index df9e7164ed..a73b1246d8 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -888,7 +888,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, } HInvokeStaticOrDirect::DispatchInfo dispatch_info = { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, + HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall, HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, 0u }; diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index 6236bd87ab..b664d41013 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -25,7 +25,7 @@ #include "mirror/dex_cache-inl.h" #include "nodes.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils.h" namespace art { @@ -146,7 +146,7 @@ void IntrinsicsRecognizer::Run() { Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic()); if (!CheckInvokeType(intrinsic, invoke)) { LOG(WARNING) << "Found an intrinsic with unexpected invoke type: " - << intrinsic << " for " + << static_cast<uint32_t>(intrinsic) << " for " << art_method->PrettyMethod() << invoke->DebugName(); } else { diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 9803c9a0e9..ae5f8d1760 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -28,7 +28,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/arm/assembler_arm.h" namespace art { @@ -2624,58 +2624,6 @@ void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) { codegen_->GenerateConditionWithZero(kCondEQ, out, out); } -void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARM::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - ArmAssembler* const assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - Register obj = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - Register temp = codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0)).AsRegister<Register>(); - - // Now get declaring class. - __ ldr(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags that prevent using intrinsic. - __ ldr(IP, Address(temp, disable_flag_offset)); - __ ldr(temp, Address(temp, slow_path_flag_offset)); - __ orr(IP, IP, ShifterOperand(temp)); - __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel()); - - // Fast path. - __ ldr(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) { InvokeRuntimeCallingConvention calling_convention; IntrinsicVisitor::ComputeIntegerValueOfLocations( @@ -2782,6 +2730,7 @@ UNIMPLEMENTED_INTRINSIC(ARM, MathRoundDouble) // Could be done by changing rou UNIMPLEMENTED_INTRINSIC(ARM, MathRoundFloat) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index b511c5a18d..37d79814be 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -28,7 +28,7 @@ #include "mirror/reference.h" #include "mirror/string-inl.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/arm64/assembler_arm64.h" using namespace vixl::aarch64; // NOLINT(build/namespaces) @@ -124,12 +124,12 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { // are no pools emitted. vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), - LocationFrom(kArtMethodRegister)); + codegen->GenerateStaticOrDirectCall( + invoke_->AsInvokeStaticOrDirect(), LocationFrom(kArtMethodRegister), this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister)); + codegen->GenerateVirtualCall( + invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister), this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); } // Copy the result back to the expected output. @@ -2897,69 +2897,6 @@ void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) { GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); } -void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - MacroAssembler* masm = GetVIXLAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - Register obj = InputRegisterAt(invoke, 0); - Register out = OutputRegister(invoke); - - SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - Register temp0 = XRegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0))); - - // Now get declaring class. - __ Ldr(temp0.W(), MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags that prevent using intrinsic. - if (slow_path_flag_offset == disable_flag_offset + 1) { - // Load two adjacent flags in one 64-bit load. - __ Ldr(temp0, MemOperand(temp0, disable_flag_offset)); - } else { - UseScratchRegisterScope temps(masm); - Register temp1 = temps.AcquireW(); - __ Ldr(temp1.W(), MemOperand(temp0, disable_flag_offset)); - __ Ldr(temp0.W(), MemOperand(temp0, slow_path_flag_offset)); - __ Orr(temp0, temp1, temp0); - } - __ Cbnz(temp0, slow_path->GetEntryLabel()); - - { - // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. - vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - // Fast path. - __ Ldr(out, HeapOperand(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - } - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { InvokeRuntimeCallingConvention calling_convention; IntrinsicVisitor::ComputeIntegerValueOfLocations( @@ -3055,6 +2992,7 @@ void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) { __ Bind(&done); } +UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit) diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index 3c53517b28..ff59ce9658 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -24,7 +24,8 @@ namespace aarch64 { class MacroAssembler; -}} // namespace vixl::aarch64 +} // namespace aarch64 +} // namespace vixl namespace art { diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 1a33b0ee01..3c9b613803 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -26,7 +26,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "aarch32/constants-aarch32.h" @@ -97,11 +97,10 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { Location method_loc = MoveArguments(codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc); + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc); + codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); @@ -3000,60 +2999,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { codegen_->GenerateConditionWithZero(kCondEQ, out, out); } -void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - ArmVIXLAssembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - vixl32::Register obj = InputRegisterAt(invoke, 0); - vixl32::Register out = OutputRegister(invoke); - - SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - vixl32::Register temp0 = RegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0))); - - // Now get declaring class. - __ Ldr(temp0, MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags that prevent using intrinsic. - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - vixl32::Register temp1 = temps.Acquire(); - __ Ldr(temp1, MemOperand(temp0, disable_flag_offset)); - __ Ldr(temp0, MemOperand(temp0, slow_path_flag_offset)); - __ Orr(temp0, temp1, temp0); - __ CompareAndBranchIfNonZero(temp0, slow_path->GetEntryLabel()); - - // Fast path. - __ Ldr(out, MemOperand(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - assembler->MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) { if (features_.HasARMv8AInstructions()) { CreateFPToFPLocations(arena_, invoke); @@ -3178,6 +3123,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit) diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 4731da1ea9..4cea6dfdfb 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -23,6 +23,7 @@ #include "intrinsics.h" #include "mirror/array-inl.h" #include "mirror/string.h" +#include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/mips/assembler_mips.h" #include "utils/mips/constants_mips.h" @@ -111,12 +112,12 @@ class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), - Location::RegisterLocation(A0)); + codegen->GenerateStaticOrDirectCall( + invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0), this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0)); + codegen->GenerateVirtualCall( + invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0), this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 00afbcd8f2..d785567e0f 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -23,6 +23,7 @@ #include "intrinsics.h" #include "mirror/array-inl.h" #include "mirror/string.h" +#include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/mips64/assembler_mips64.h" #include "utils/mips64/constants_mips64.h" @@ -100,12 +101,12 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), - Location::RegisterLocation(A0)); + codegen->GenerateStaticOrDirectCall( + invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0), this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0)); + codegen->GenerateVirtualCall( + invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0), this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h index c1f9ae6425..8c69d9b643 100644 --- a/compiler/optimizing/intrinsics_utils.h +++ b/compiler/optimizing/intrinsics_utils.h @@ -56,11 +56,10 @@ class IntrinsicSlowPath : public SlowPathCode { Location method_loc = MoveArguments(codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc); + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc); + codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 57adcc3c2f..6b4851d541 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -31,7 +31,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/x86/assembler_x86.h" #include "utils/x86/constants_x86.h" @@ -796,7 +796,6 @@ static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(EAX)); - codegen->RecordPcInfo(invoke, invoke->GetDexPc()); // Copy the result back to the expected output. Location out = invoke->GetLocations()->Out(); @@ -2819,65 +2818,6 @@ void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } -void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - LocationSummary* locations = invoke->GetLocations(); - X86Assembler* assembler = GetAssembler(); - - Register obj = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0)); - DCHECK(temp_loc.Equals(locations->GetTemp(0))); - Register temp = temp_loc.AsRegister<Register>(); - - // Now get declaring class. - __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags preventing us for using intrinsic. - if (slow_path_flag_offset == disable_flag_offset + 1) { - __ cmpw(Address(temp, disable_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - } else { - __ cmpb(Address(temp, disable_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - } - - // Fast path. - __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) { return instruction->InputAt(input0) == instruction->InputAt(input1); } @@ -3429,6 +3369,7 @@ void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) { UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) +UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 773383ef1b..ef98b7be30 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -31,7 +31,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/x86_64/assembler_x86_64.h" #include "utils/x86_64/constants_x86_64.h" @@ -567,7 +567,6 @@ static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invo DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall( invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI)); - codegen->RecordPcInfo(invoke, invoke->GetDexPc()); // Copy the result back to the expected output. Location out = invoke->GetLocations()->Out(); @@ -2959,65 +2958,6 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } -void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - LocationSummary* locations = invoke->GetLocations(); - X86_64Assembler* assembler = GetAssembler(); - - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0)); - DCHECK(temp_loc.Equals(locations->GetTemp(0))); - CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - - // Now get declaring class. - __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags preventing us for using intrinsic. - if (slow_path_flag_offset == disable_flag_offset + 1) { - __ cmpw(Address(temp, disable_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - } else { - __ cmpb(Address(temp, disable_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - } - - // Fast path. - __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) { InvokeRuntimeCallingConvention calling_convention; IntrinsicVisitor::ComputeIntegerValueOfLocations( @@ -3106,6 +3046,7 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) { __ Bind(&done); } +UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 94787c99b2..d2493137fe 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -422,6 +422,23 @@ void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { // Optimization. // +bool HLoopOptimization::CanRemoveCycle() { + for (HInstruction* i : *iset_) { + // We can never remove instructions that have environment + // uses when we compile 'debuggable'. + if (i->HasEnvironmentUses() && graph_->IsDebuggable()) { + return false; + } + // A deoptimization should never have an environment input removed. + for (const HUseListNode<HEnvironment*>& use : i->GetEnvUses()) { + if (use.GetUser()->GetHolder()->IsDeoptimize()) { + return false; + } + } + } + return true; +} + void HLoopOptimization::SimplifyInduction(LoopNode* node) { HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -435,10 +452,15 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) { iset_->clear(); // prepare phi induction if (TrySetPhiInduction(phi, /*restrict_uses*/ true) && TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) { - for (HInstruction* i : *iset_) { - RemoveFromCycle(i); + // Note that it's ok to have replaced uses after the loop with the last value, without + // being able to remove the cycle. Environment uses (which are the reason we may not be + // able to remove the cycle) within the loop will still hold the right value. + if (CanRemoveCycle()) { + for (HInstruction* i : *iset_) { + RemoveFromCycle(i); + } + simplified_ = true; } - simplified_ = true; } } } @@ -499,6 +521,7 @@ void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { body = it.Current(); } } + CHECK(body != nullptr); // Ensure there is only a single exit point. if (header->GetSuccessors().size() != 2) { return; @@ -811,6 +834,11 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } return true; } else if (instruction->IsArrayGet()) { + // Deal with vector restrictions. + if (instruction->AsArrayGet()->IsStringCharAt() && + HasVectorRestrictions(restrictions, kNoStringCharAt)) { + return false; + } // Accept a right-hand-side array base[index] for // (1) exact matching vector type, // (2) loop-invariant base, @@ -1072,9 +1100,36 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric } return false; case kMips: - case kMips64: // TODO: implement MIPS SIMD. return false; + case kMips64: + if (features->AsMips64InstructionSetFeatures()->HasMsa()) { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + *restrictions |= kNoDiv; + return TrySetVectorLength(16); + case Primitive::kPrimChar: + case Primitive::kPrimShort: + *restrictions |= kNoDiv | kNoStringCharAt; + return TrySetVectorLength(8); + case Primitive::kPrimInt: + *restrictions |= kNoDiv; + return TrySetVectorLength(4); + case Primitive::kPrimLong: + *restrictions |= kNoDiv; + return TrySetVectorLength(2); + case Primitive::kPrimFloat: + *restrictions |= kNoMinMax; // min/max(x, NaN) + return TrySetVectorLength(4); + case Primitive::kPrimDouble: + *restrictions |= kNoMinMax; // min/max(x, NaN) + return TrySetVectorLength(2); + default: + break; + } // switch type + } + return false; default: return false; } // switch instruction set @@ -1270,9 +1325,10 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, // corresponding new scalar instructions in the loop. The instruction will get an // environment while being inserted from the instruction map in original program order. DCHECK(vector_mode_ == kSequential); + size_t num_args = invoke->GetNumberOfArguments(); HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect( global_allocator_, - invoke->GetNumberOfArguments(), + num_args, invoke->GetType(), invoke->GetDexPc(), invoke->GetDexMethodIndex(), @@ -1282,8 +1338,14 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, invoke->GetTargetMethod(), invoke->GetClinitCheckRequirement()); HInputsRef inputs = invoke->GetInputs(); - for (size_t index = 0; index < inputs.size(); ++index) { - new_invoke->SetArgumentAt(index, vector_map_->Get(inputs[index])); + size_t num_inputs = inputs.size(); + DCHECK_LE(num_args, num_inputs); + DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree + for (size_t index = 0; index < num_inputs; ++index) { + HInstruction* new_input = index < num_args + ? vector_map_->Get(inputs[index]) + : inputs[index]; // beyond arguments: just pass through + new_invoke->SetArgumentAt(index, new_input); } new_invoke->SetIntrinsic(invoke->GetIntrinsic(), kNeedsEnvironmentOrCache, @@ -1487,11 +1549,10 @@ bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info, return true; } -bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block) { - // Try to replace outside uses with the last value. Environment uses can consume this - // value too, since any first true use is outside the loop (although this may imply - // that de-opting may look "ahead" a bit on the phi value). If there are only environment - // uses, the value is dropped altogether, since the computations have no effect. +bool HLoopOptimization::TryReplaceWithLastValue(HLoopInformation* loop_info, + HInstruction* instruction, + HBasicBlock* block) { + // Try to replace outside uses with the last value. if (induction_range_.CanGenerateLastValue(instruction)) { HInstruction* replacement = induction_range_.GenerateLastValue(instruction, graph_, block); const HUseList<HInstruction*>& uses = instruction->GetUses(); @@ -1500,6 +1561,11 @@ bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasi size_t index = it->GetIndex(); ++it; // increment before replacing if (iset_->find(user) == iset_->end()) { // not excluded? + if (kIsDebugBuild) { + // We have checked earlier in 'IsOnlyUsedAfterLoop' that the use is after the loop. + HLoopInformation* other_loop_info = user->GetBlock()->GetLoopInformation(); + CHECK(other_loop_info == nullptr || !other_loop_info->IsIn(*loop_info)); + } user->ReplaceInput(replacement, index); induction_range_.Replace(user, instruction, replacement); // update induction } @@ -1510,9 +1576,13 @@ bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasi size_t index = it->GetIndex(); ++it; // increment before replacing if (iset_->find(user->GetHolder()) == iset_->end()) { // not excluded? - user->RemoveAsUserOfInput(index); - user->SetRawEnvAt(index, replacement); - replacement->AddEnvUseAt(user, index); + HLoopInformation* other_loop_info = user->GetHolder()->GetBlock()->GetLoopInformation(); + // Only update environment uses after the loop. + if (other_loop_info == nullptr || !other_loop_info->IsIn(*loop_info)) { + user->RemoveAsUserOfInput(index); + user->SetRawEnvAt(index, replacement); + replacement->AddEnvUseAt(user, index); + } } } induction_simplication_count_++; @@ -1531,7 +1601,7 @@ bool HLoopOptimization::TryAssignLastValue(HLoopInformation* loop_info, int32_t use_count = 0; return IsOnlyUsedAfterLoop(loop_info, instruction, collect_loop_uses, &use_count) && (use_count == 0 || - (!IsEarlyExit(loop_info) && TryReplaceWithLastValue(instruction, block))); + (!IsEarlyExit(loop_info) && TryReplaceWithLastValue(loop_info, instruction, block))); } void HLoopOptimization::RemoveDeadInstructions(const HInstructionList& list) { diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 35298d4076..cc6343aeb5 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -72,6 +72,7 @@ class HLoopOptimization : public HOptimization { kNoUnroundedHAdd = 64, // no unrounded halving add kNoAbs = 128, // no absolute value kNoMinMax = 256, // no min/max + kNoStringCharAt = 512, // no StringCharAt }; /* @@ -160,12 +161,15 @@ class HLoopOptimization : public HOptimization { /*out*/ int32_t* use_count); bool IsUsedOutsideLoop(HLoopInformation* loop_info, HInstruction* instruction); - bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block); + bool TryReplaceWithLastValue(HLoopInformation* loop_info, + HInstruction* instruction, + HBasicBlock* block); bool TryAssignLastValue(HLoopInformation* loop_info, HInstruction* instruction, HBasicBlock* block, bool collect_loop_uses); void RemoveDeadInstructions(const HInstructionList& list); + bool CanRemoveCycle(); // Whether the current 'iset_' is removable. // Compiler driver (to query ISA features). const CompilerDriver* compiler_driver_; diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 689991010e..d0047c54f2 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2623,7 +2623,7 @@ const DexFile& HInvokeStaticOrDirect::GetDexFileForPcRelativeDexCache() const { } bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const { - if (GetMethodLoadKind() != MethodLoadKind::kDexCacheViaMethod) { + if (GetMethodLoadKind() != MethodLoadKind::kRuntimeCall) { return false; } if (!IsIntrinsic()) { @@ -2643,10 +2643,10 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind return os << "BootImageLinkTimePcRelative"; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: return os << "DirectAddress"; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - return os << "DexCachePcRelative"; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: - return os << "DexCacheViaMethod"; + case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: + return os << "BssEntry"; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: + return os << "RuntimeCall"; default: LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs); UNREACHABLE(); @@ -2690,7 +2690,7 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const { void HLoadClass::SetLoadKind(LoadKind load_kind) { SetPackedField<LoadKindField>(load_kind); - if (load_kind != LoadKind::kDexCacheViaMethod && + if (load_kind != LoadKind::kRuntimeCall && load_kind != LoadKind::kReferrersClass) { RemoveAsUserOfInput(0u); SetRawInputAt(0u, nullptr); @@ -2714,8 +2714,8 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) { return os << "BssEntry"; case HLoadClass::LoadKind::kJitTableAddress: return os << "JitTableAddress"; - case HLoadClass::LoadKind::kDexCacheViaMethod: - return os << "DexCacheViaMethod"; + case HLoadClass::LoadKind::kRuntimeCall: + return os << "RuntimeCall"; default: LOG(FATAL) << "Unknown HLoadClass::LoadKind: " << static_cast<int>(rhs); UNREACHABLE(); @@ -2743,10 +2743,10 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { void HLoadString::SetLoadKind(LoadKind load_kind) { // Once sharpened, the load kind should not be changed again. - DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod); + DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall); SetPackedField<LoadKindField>(load_kind); - if (load_kind != LoadKind::kDexCacheViaMethod) { + if (load_kind != LoadKind::kRuntimeCall) { RemoveAsUserOfInput(0u); SetRawInputAt(0u, nullptr); } @@ -2766,8 +2766,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { return os << "BssEntry"; case HLoadString::LoadKind::kJitTableAddress: return os << "JitTableAddress"; - case HLoadString::LoadKind::kDexCacheViaMethod: - return os << "DexCacheViaMethod"; + case HLoadString::LoadKind::kRuntimeCall: + return os << "RuntimeCall"; default: LOG(FATAL) << "Unknown HLoadString::LoadKind: " << static_cast<int>(rhs); UNREACHABLE(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 4d96fbe24c..ffa16dd787 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1410,12 +1410,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(IntermediateAddressIndex, Instruction) #endif -#ifndef ART_ENABLE_CODEGEN_arm #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) -#else -#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \ - M(ArmDexCacheArraysBase, Instruction) -#endif #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) @@ -1424,7 +1419,6 @@ class HLoopInformationOutwardIterator : public ValueObject { #else #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ M(MipsComputeBaseMethodAddress, Instruction) \ - M(MipsDexCacheArraysBase, Instruction) \ M(MipsPackedSwitch, Instruction) #endif @@ -1485,7 +1479,8 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) H##type* As##type() { return this; } template <typename T> -class HUseListNode : public ArenaObject<kArenaAllocUseListNode> { +class HUseListNode : public ArenaObject<kArenaAllocUseListNode>, + public IntrusiveForwardListNode<HUseListNode<T>> { public: // Get the instruction which has this use as one of the inputs. T GetUser() const { return user_; } @@ -1494,10 +1489,6 @@ class HUseListNode : public ArenaObject<kArenaAllocUseListNode> { // Set the position of the input record that this use corresponds to. void SetIndex(size_t index) { index_ = index; } - // Hook for the IntrusiveForwardList<>. - // TODO: Hide this better. - IntrusiveForwardListHook hook; - private: HUseListNode(T user, size_t index) : user_(user), index_(index) {} @@ -1790,7 +1781,7 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { uint32_t dex_pc, HInstruction* holder) : vregs_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentVRegs)), - locations_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentLocations)), + locations_(arena->Adapter(kArenaAllocEnvironmentLocations)), parent_(nullptr), method_(method), dex_pc_(dex_pc), @@ -1804,6 +1795,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { to_copy.GetDexPc(), holder) {} + void AllocateLocations() { + DCHECK(locations_.empty()); + locations_.resize(vregs_.size()); + } + void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) { if (parent_ != nullptr) { parent_->SetAndCopyParentChain(allocator, parent); @@ -4161,17 +4157,13 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Used for app->boot calls with non-relocatable image and for JIT-compiled calls. kDirectAddress, - // Load from resolved methods array in the dex cache using a PC-relative load. - // Used when we need to use the dex cache, for example for invoke-static that - // may cause class initialization (the entry may point to a resolution method), - // and we know that we can access the dex cache arrays using a PC-relative load. - kDexCachePcRelative, - - // Use ArtMethod* from the resolved methods of the compiled method's own ArtMethod*. - // Used for JIT when we need to use the dex cache. This is also the last-resort-kind - // used when other kinds are unavailable (say, dex cache arrays are not PC-relative) - // or unimplemented or impractical (i.e. slow) on a particular architecture. - kDexCacheViaMethod, + // Load from an entry in the .bss section using a PC-relative load. + // Used for classes outside boot image when .bss is accessible with a PC-relative load. + kBssEntry, + + // Make a runtime call to resolve and call the method. This is the last-resort-kind + // used when other kinds are unimplemented on a particular architecture. + kRuntimeCall, }; // Determines the location of the code pointer. @@ -4192,7 +4184,6 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // - thread entrypoint offset for kStringInit method if this is a string init invoke. // Note that there are multiple string init methods, each having its own offset. // - the method address for kDirectAddress - // - the dex cache arrays offset for kDexCachePcRel. uint64_t method_load_data; }; @@ -4293,12 +4284,9 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { bool NeedsDexCacheOfDeclaringClass() const OVERRIDE; bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; } bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; } - bool HasPcRelativeDexCache() const { - return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; - } bool HasPcRelativeMethodLoadKind() const { return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative || - GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative; + GetMethodLoadKind() == MethodLoadKind::kBssEntry; } bool HasCurrentMethodInput() const { // This function can be called only after the invoke has been fully initialized by the builder. @@ -4322,11 +4310,6 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { return dispatch_info_.method_load_data; } - uint32_t GetDexCacheArrayOffset() const { - DCHECK(HasPcRelativeDexCache()); - return dispatch_info_.method_load_data; - } - const DexFile& GetDexFileForPcRelativeDexCache() const; ClinitCheckRequirement GetClinitCheckRequirement() const { @@ -4371,7 +4354,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Does this method load kind need the current method as an input? static bool NeedsCurrentMethodInput(MethodLoadKind kind) { - return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kDexCacheViaMethod; + return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kRuntimeCall; } DECLARE_INSTRUCTION(InvokeStaticOrDirect); @@ -5687,12 +5670,11 @@ class HLoadClass FINAL : public HInstruction { // Load from the root table associated with the JIT compiled method. kJitTableAddress, - // Load from resolved types array accessed through the class loaded from - // the compiled method's own ArtMethod*. This is the default access type when - // all other types are unavailable. - kDexCacheViaMethod, + // Load using a simple runtime call. This is the fall-back load kind when + // the codegen is unable to use another appropriate kind. + kRuntimeCall, - kLast = kDexCacheViaMethod + kLast = kRuntimeCall }; HLoadClass(HCurrentMethod* current_method, @@ -5713,7 +5695,7 @@ class HLoadClass FINAL : public HInstruction { DCHECK(!is_referrers_class || !needs_access_check); SetPackedField<LoadKindField>( - is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod); + is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kRuntimeCall); SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagIsInBootImage>(false); SetPackedFlag<kFlagGenerateClInitCheck>(false); @@ -5747,7 +5729,7 @@ class HLoadClass FINAL : public HInstruction { bool CanCallRuntime() const { return NeedsAccessCheck() || MustGenerateClinitCheck() || - GetLoadKind() == LoadKind::kDexCacheViaMethod || + GetLoadKind() == LoadKind::kRuntimeCall || GetLoadKind() == LoadKind::kBssEntry; } @@ -5757,7 +5739,7 @@ class HLoadClass FINAL : public HInstruction { // If the class is in the boot image, the lookup in the runtime call cannot throw. // This keeps CanThrow() consistent between non-PIC (using kBootImageAddress) and // PIC and subsequently avoids a DCE behavior dependency on the PIC option. - ((GetLoadKind() == LoadKind::kDexCacheViaMethod || + ((GetLoadKind() == LoadKind::kRuntimeCall || GetLoadKind() == LoadKind::kBssEntry) && !IsInBootImage()); } @@ -5776,7 +5758,7 @@ class HLoadClass FINAL : public HInstruction { const DexFile& GetDexFile() const { return dex_file_; } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { - return GetLoadKind() == LoadKind::kDexCacheViaMethod; + return GetLoadKind() == LoadKind::kRuntimeCall; } static SideEffects SideEffectsForArchRuntimeCalls() { @@ -5827,12 +5809,12 @@ class HLoadClass FINAL : public HInstruction { return load_kind == LoadKind::kReferrersClass || load_kind == LoadKind::kBootImageLinkTimePcRelative || load_kind == LoadKind::kBssEntry || - load_kind == LoadKind::kDexCacheViaMethod; + load_kind == LoadKind::kRuntimeCall; } void SetLoadKindInternal(LoadKind load_kind); - // The special input is the HCurrentMethod for kDexCacheViaMethod or kReferrersClass. + // The special input is the HCurrentMethod for kRuntimeCall or kReferrersClass. // For other load kinds it's empty or possibly some architecture-specific instruction // for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative. HUserRecord<HInstruction*> special_input_; @@ -5841,7 +5823,7 @@ class HLoadClass FINAL : public HInstruction { // - The compiling method's dex file if the class is defined there too. // - The compiling method's dex file if the class is referenced there. // - The dex file where the class is defined. When the load kind can only be - // kBssEntry or kDexCacheViaMethod, we cannot emit code for this `HLoadClass`. + // kBssEntry or kRuntimeCall, we cannot emit code for this `HLoadClass`. const dex::TypeIndex type_index_; const DexFile& dex_file_; @@ -5884,12 +5866,11 @@ class HLoadString FINAL : public HInstruction { // Load from the root table associated with the JIT compiled method. kJitTableAddress, - // Load from resolved strings array accessed through the class loaded from - // the compiled method's own ArtMethod*. This is the default access type when - // all other types are unavailable. - kDexCacheViaMethod, + // Load using a simple runtime call. This is the fall-back load kind when + // the codegen is unable to use another appropriate kind. + kRuntimeCall, - kLast = kDexCacheViaMethod, + kLast = kRuntimeCall, }; HLoadString(HCurrentMethod* current_method, @@ -5900,7 +5881,7 @@ class HLoadString FINAL : public HInstruction { special_input_(HUserRecord<HInstruction*>(current_method)), string_index_(string_index), dex_file_(dex_file) { - SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod); + SetPackedField<LoadKindField>(LoadKind::kRuntimeCall); } void SetLoadKind(LoadKind load_kind); @@ -5944,7 +5925,7 @@ class HLoadString FINAL : public HInstruction { } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { - return GetLoadKind() == LoadKind::kDexCacheViaMethod; + return GetLoadKind() == LoadKind::kRuntimeCall; } bool CanBeNull() const OVERRIDE { return false; } @@ -5978,7 +5959,7 @@ class HLoadString FINAL : public HInstruction { void SetLoadKindInternal(LoadKind load_kind); - // The special input is the HCurrentMethod for kDexCacheViaMethod. + // The special input is the HCurrentMethod for kRuntimeCall. // For other load kinds it's empty or possibly some architecture-specific instruction // for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative. HUserRecord<HInstruction*> special_input_; @@ -6878,9 +6859,6 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64) #include "nodes_shared.h" #endif -#ifdef ART_ENABLE_CODEGEN_arm -#include "nodes_arm.h" -#endif #ifdef ART_ENABLE_CODEGEN_mips #include "nodes_mips.h" #endif diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h deleted file mode 100644 index d9f9740e73..0000000000 --- a/compiler/optimizing/nodes_arm.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C) 2015 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_ -#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_ - -namespace art { - -class HArmDexCacheArraysBase FINAL : public HExpression<0> { - public: - explicit HArmDexCacheArraysBase(const DexFile& dex_file) - : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc), - dex_file_(&dex_file), - element_offset_(static_cast<size_t>(-1)) { } - - bool CanBeMoved() const OVERRIDE { return true; } - - void UpdateElementOffset(size_t element_offset) { - // Use the lowest offset from the requested elements so that all offsets from - // this base are non-negative because our assemblers emit negative-offset loads - // as a sequence of two or more instructions. (However, positive offsets beyond - // 4KiB also require two or more instructions, so this simple heuristic could - // be improved for cases where there is a dense cluster of elements far from - // the lowest offset. This is expected to be rare enough though, so we choose - // not to spend compile time on elaborate calculations.) - element_offset_ = std::min(element_offset_, element_offset); - } - - const DexFile& GetDexFile() const { - return *dex_file_; - } - - size_t GetElementOffset() const { - return element_offset_; - } - - DECLARE_INSTRUCTION(ArmDexCacheArraysBase); - - private: - const DexFile* dex_file_; - size_t element_offset_; - - DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase); -}; - -} // namespace art - -#endif // ART_COMPILER_OPTIMIZING_NODES_ARM_H_ diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h index 36431c1fb9..8e439d9621 100644 --- a/compiler/optimizing/nodes_mips.h +++ b/compiler/optimizing/nodes_mips.h @@ -34,38 +34,6 @@ class HMipsComputeBaseMethodAddress : public HExpression<0> { DISALLOW_COPY_AND_ASSIGN(HMipsComputeBaseMethodAddress); }; -class HMipsDexCacheArraysBase : public HExpression<0> { - public: - explicit HMipsDexCacheArraysBase(const DexFile& dex_file) - : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc), - dex_file_(&dex_file), - element_offset_(static_cast<size_t>(-1)) { } - - bool CanBeMoved() const OVERRIDE { return true; } - - void UpdateElementOffset(size_t element_offset) { - // We'll maximize the range of a single load instruction for dex cache array accesses - // by aligning offset -32768 with the offset of the first used element. - element_offset_ = std::min(element_offset_, element_offset); - } - - const DexFile& GetDexFile() const { - return *dex_file_; - } - - size_t GetElementOffset() const { - return element_offset_; - } - - DECLARE_INSTRUCTION(MipsDexCacheArraysBase); - - private: - const DexFile* dex_file_; - size_t element_offset_; - - DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase); -}; - // Mips version of HPackedSwitch that holds a pointer to the base method address. class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { public: diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index e5ab00bce3..890ba674b5 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -24,16 +24,11 @@ #include "android-base/strings.h" -#ifdef ART_ENABLE_CODEGEN_arm -#include "dex_cache_array_fixups_arm.h" -#endif - #ifdef ART_ENABLE_CODEGEN_arm64 #include "instruction_simplifier_arm64.h" #endif #ifdef ART_ENABLE_CODEGEN_mips -#include "dex_cache_array_fixups_mips.h" #include "pc_relative_fixups_mips.h" #endif @@ -522,8 +517,6 @@ static HOptimization* BuildOptimization( } else if (opt_name == CodeSinking::kCodeSinkingPassName) { return new (arena) CodeSinking(graph, stats); #ifdef ART_ENABLE_CODEGEN_arm - } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) { - return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats); } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) { return new (arena) arm::InstructionSimplifierArm(graph, stats); #endif @@ -532,8 +525,6 @@ static HOptimization* BuildOptimization( return new (arena) arm64::InstructionSimplifierArm64(graph, stats); #endif #ifdef ART_ENABLE_CODEGEN_mips - } else if (opt_name == mips::DexCacheArrayFixups::kDexCacheArrayFixupsMipsPassName) { - return new (arena) mips::DexCacheArrayFixups(graph, codegen, stats); } else if (opt_name == mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName) { return new (arena) mips::PcRelativeFixups(graph, codegen, stats); #endif @@ -641,8 +632,6 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, #if defined(ART_ENABLE_CODEGEN_arm) case kThumb2: case kArm: { - arm::DexCacheArrayFixups* fixups = - new (arena) arm::DexCacheArrayFixups(graph, codegen, stats); arm::InstructionSimplifierArm* simplifier = new (arena) arm::InstructionSimplifierArm(graph, stats); SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); @@ -653,7 +642,6 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, simplifier, side_effects, gvn, - fixups, scheduling, }; RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); @@ -682,11 +670,8 @@ void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, case kMips: { mips::PcRelativeFixups* pc_relative_fixups = new (arena) mips::PcRelativeFixups(graph, codegen, stats); - mips::DexCacheArrayFixups* dex_cache_array_fixups = - new (arena) mips::DexCacheArrayFixups(graph, codegen, stats); HOptimization* mips_optimizations[] = { pc_relative_fixups, - dex_cache_array_fixups }; RunOptimizations(mips_optimizations, arraysize(mips_optimizations), pass_observer); break; diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index bce54bf49a..21b645279e 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -59,10 +59,9 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { - // If this is an invoke with PC-relative pointer to a method, + // If this is an invoke with PC-relative load kind, // we need to add the base as the special input. - if (invoke->GetMethodLoadKind() == - HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative && + if (invoke->HasPcRelativeMethodLoadKind() && !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) { InitializePCRelativeBasePointer(); // Add the special argument base to the method. diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index c6a0b6a0d2..ce3a4966aa 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -308,8 +308,10 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { } InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc); } - UsePosition* use = current->GetFirstUse(); - EnvUsePosition* env_use = current->GetFirstEnvironmentUse(); + UsePositionList::const_iterator use_it = current->GetUses().begin(); + const UsePositionList::const_iterator use_end = current->GetUses().end(); + EnvUsePositionList::const_iterator env_use_it = current->GetEnvironmentUses().begin(); + const EnvUsePositionList::const_iterator env_use_end = current->GetEnvironmentUses().end(); // Walk over all siblings, updating locations of use positions, and // connecting them when they are adjacent. @@ -321,43 +323,47 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { LiveRange* range = current->GetFirstRange(); while (range != nullptr) { - while (use != nullptr && use->GetPosition() < range->GetStart()) { - DCHECK(use->IsSynthesized()); - use = use->GetNext(); - } - while (use != nullptr && use->GetPosition() <= range->GetEnd()) { - DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd())); - if (!use->IsSynthesized()) { - LocationSummary* locations = use->GetUser()->GetLocations(); - Location expected_location = locations->InAt(use->GetInputIndex()); + // Process uses in the closed interval [range->GetStart(), range->GetEnd()]. + // FindMatchingUseRange() expects a half-open interval, so pass `range->GetEnd() + 1u`. + size_t range_begin = range->GetStart(); + size_t range_end = range->GetEnd() + 1u; + auto matching_use_range = + FindMatchingUseRange(use_it, use_end, range_begin, range_end); + DCHECK(std::all_of(use_it, + matching_use_range.begin(), + [](const UsePosition& pos) { return pos.IsSynthesized(); })); + for (const UsePosition& use : matching_use_range) { + DCHECK(current->CoversSlow(use.GetPosition()) || (use.GetPosition() == range->GetEnd())); + if (!use.IsSynthesized()) { + LocationSummary* locations = use.GetUser()->GetLocations(); + Location expected_location = locations->InAt(use.GetInputIndex()); // The expected (actual) location may be invalid in case the input is unused. Currently // this only happens for intrinsics. if (expected_location.IsValid()) { if (expected_location.IsUnallocated()) { - locations->SetInAt(use->GetInputIndex(), source); + locations->SetInAt(use.GetInputIndex(), source); } else if (!expected_location.IsConstant()) { - AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location); + AddInputMoveFor( + interval->GetDefinedBy(), use.GetUser(), source, expected_location); } } else { - DCHECK(use->GetUser()->IsInvoke()); - DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); + DCHECK(use.GetUser()->IsInvoke()); + DCHECK(use.GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone); } } - use = use->GetNext(); } + use_it = matching_use_range.end(); // Walk over the environment uses, and update their locations. - while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) { - env_use = env_use->GetNext(); - } - - while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) { - DCHECK(current->CoversSlow(env_use->GetPosition()) - || (env_use->GetPosition() == range->GetEnd())); - HEnvironment* environment = env_use->GetEnvironment(); - environment->SetLocationAt(env_use->GetInputIndex(), source); - env_use = env_use->GetNext(); + auto matching_env_use_range = + FindMatchingUseRange(env_use_it, env_use_end, range_begin, range_end); + for (const EnvUsePosition& env_use : matching_env_use_range) { + DCHECK(current->CoversSlow(env_use.GetPosition()) + || (env_use.GetPosition() == range->GetEnd())); + HEnvironment* environment = env_use.GetEnvironment(); + environment->SetLocationAt(env_use.GetInputIndex(), source); } + env_use_it = matching_env_use_range.end(); range = range->GetNext(); } @@ -395,13 +401,8 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { current = next_sibling; } while (current != nullptr); - if (kIsDebugBuild) { - // Following uses can only be synthesized uses. - while (use != nullptr) { - DCHECK(use->IsSynthesized()); - use = use->GetNext(); - } - } + // Following uses can only be synthesized uses. + DCHECK(std::all_of(use_it, use_end, [](const UsePosition& pos) { return pos.IsSynthesized(); })); } static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop( diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 300f4c6239..5e22772844 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -20,7 +20,7 @@ #include "linear_order.h" #include "register_allocation_resolver.h" #include "ssa_liveness_analysis.h" -#include "thread-inl.h" +#include "thread-current-inl.h" namespace art { @@ -178,18 +178,17 @@ static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysi use_weight += CostForMoveAt(interval->GetStart() + 1, liveness); } - UsePosition* use = interval->GetFirstUse(); - while (use != nullptr && use->GetPosition() <= interval->GetStart()) { - // Skip uses before the start of this live interval. - use = use->GetNext(); - } - - while (use != nullptr && use->GetPosition() <= interval->GetEnd()) { - if (use->GetUser() != nullptr && use->RequiresRegister()) { + // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e. + // [interval->GetStart() + 1, interval->GetEnd() + 1) + auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), + interval->GetUses().end(), + interval->GetStart() + 1u, + interval->GetEnd() + 1u); + for (const UsePosition& use : matching_use_range) { + if (use.GetUser() != nullptr && use.RequiresRegister()) { // Cost for spilling at a register use point. - use_weight += CostForMoveAt(use->GetUser()->GetLifetimePosition() - 1, liveness); + use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness); } - use = use->GetNext(); } // We divide by the length of the interval because we want to prioritize @@ -989,16 +988,16 @@ void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) { interval = TrySplit(interval, interval->GetStart() + 1); } - UsePosition* use = interval->GetFirstUse(); - while (use != nullptr && use->GetPosition() < interval->GetStart()) { - use = use->GetNext(); - } - + // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e. + // [interval->GetStart(), interval->GetEnd() + 1) + auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), + interval->GetUses().end(), + interval->GetStart(), + interval->GetEnd() + 1u); // Split around register uses. - size_t end = interval->GetEnd(); - while (use != nullptr && use->GetPosition() <= end) { - if (use->RequiresRegister()) { - size_t position = use->GetPosition(); + for (const UsePosition& use : matching_use_range) { + if (use.RequiresRegister()) { + size_t position = use.GetPosition(); interval = TrySplit(interval, position - 1); if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) { // If we are at the very end of a basic block, we cannot split right @@ -1008,7 +1007,6 @@ void RegisterAllocatorGraphColor::SplitAtRegisterUses(LiveInterval* interval) { interval = TrySplit(interval, position); } } - use = use->GetNext(); } } @@ -1398,18 +1396,20 @@ void ColoringIteration::FindCoalesceOpportunities() { } // Try to prevent moves into fixed input locations. - UsePosition* use = interval->GetFirstUse(); - for (; use != nullptr && use->GetPosition() <= interval->GetStart(); use = use->GetNext()) { - // Skip past uses before the start of this interval. - } - for (; use != nullptr && use->GetPosition() <= interval->GetEnd(); use = use->GetNext()) { - HInstruction* user = use->GetUser(); + // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e. + // [interval->GetStart() + 1, interval->GetEnd() + 1) + auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(), + interval->GetUses().end(), + interval->GetStart() + 1u, + interval->GetEnd() + 1u); + for (const UsePosition& use : matching_use_range) { + HInstruction* user = use.GetUser(); if (user == nullptr) { // User may be null for certain intervals, such as temp intervals. continue; } LocationSummary* locations = user->GetLocations(); - Location input = locations->InAt(use->GetInputIndex()); + Location input = locations->InAt(use.GetInputIndex()); if (input.IsRegister() || input.IsFpuRegister()) { // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes // is currently not supported. diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 667afb1ec3..24a2ab24d8 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -912,9 +912,9 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { // Create an interval with lifetime holes. static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}}; LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), &allocator, -1, one); - first->first_use_ = new(&allocator) UsePosition(user, false, 8, first->first_use_); - first->first_use_ = new(&allocator) UsePosition(user, false, 7, first->first_use_); - first->first_use_ = new(&allocator) UsePosition(user, false, 6, first->first_use_); + first->uses_.push_front(*new(&allocator) UsePosition(user, false, 8)); + first->uses_.push_front(*new(&allocator) UsePosition(user, false, 7)); + first->uses_.push_front(*new(&allocator) UsePosition(user, false, 6)); locations = new (&allocator) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); @@ -934,9 +934,9 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { // before lifetime position 6 yet. static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}}; LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), &allocator, -1, three); - third->first_use_ = new(&allocator) UsePosition(user, false, 8, third->first_use_); - third->first_use_ = new(&allocator) UsePosition(user, false, 4, third->first_use_); - third->first_use_ = new(&allocator) UsePosition(user, false, 3, third->first_use_); + third->uses_.push_front(*new(&allocator) UsePosition(user, false, 8)); + third->uses_.push_front(*new(&allocator) UsePosition(user, false, 4)); + third->uses_.push_front(*new(&allocator) UsePosition(user, false, 3)); locations = new (&allocator) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); third = third->SplitAt(3); diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index 1a89567991..e78cd78aa2 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -288,6 +288,11 @@ void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* last_visited_latency_ = kArmIntegerOpLatency; } +void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM"; +} + void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { last_visited_latency_ = kArmMulIntegerLatency; } @@ -813,10 +818,5 @@ void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) { } } -void SchedulingLatencyVisitorARM::VisitArmDexCacheArraysBase(art::HArmDexCacheArraysBase*) { - last_visited_internal_latency_ = kArmIntegerOpLatency; - last_visited_latency_ = kArmIntegerOpLatency; -} - } // namespace arm } // namespace art diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h index 8d5e4f375b..897e97da49 100644 --- a/compiler/optimizing/scheduler_arm.h +++ b/compiler/optimizing/scheduler_arm.h @@ -17,7 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ +#ifdef ART_USE_OLD_ARM_BACKEND +#include "code_generator_arm.h" +#else #include "code_generator_arm_vixl.h" +#endif #include "scheduler.h" namespace art { @@ -99,6 +103,7 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { M(BitwiseNegatedRight, unused) \ M(MultiplyAccumulate, unused) \ M(IntermediateAddress, unused) \ + M(IntermediateAddressIndex, unused) \ M(DataProcWithShifterOp, unused) #define DECLARE_VISIT_INSTRUCTION(type, unused) \ diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc index 558dcc4cbc..83b487fb5b 100644 --- a/compiler/optimizing/scheduler_arm64.cc +++ b/compiler/optimizing/scheduler_arm64.cc @@ -16,6 +16,7 @@ #include "scheduler_arm64.h" #include "code_generator_utils.h" +#include "mirror/array-inl.h" namespace art { namespace arm64 { @@ -43,6 +44,13 @@ void SchedulingLatencyVisitorARM64::VisitIntermediateAddress( last_visited_latency_ = kArm64IntegerOpLatency + 2; } +void SchedulingLatencyVisitorARM64::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* instr ATTRIBUTE_UNUSED) { + // Although the code generated is a simple `add` instruction, we found through empirical results + // that spacing it from its use in memory accesses was beneficial. + last_visited_latency_ = kArm64DataProcWithShifterOpLatency + 2; +} + void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { last_visited_latency_ = kArm64MulIntegerLatency; } @@ -192,5 +200,148 @@ void SchedulingLatencyVisitorARM64::VisitTypeConversion(HTypeConversion* instr) } } +void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *instr) { + if (Primitive::IsFloatingPointType(instr->GetPackedType())) { + last_visited_latency_ = kArm64SIMDFloatingPointOpLatency; + } else { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar( + HVecReplicateScalar* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDReplicateOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) { + LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +} + +void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) { + LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +} + +void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDTypeConversionInt2FPLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecNeg(HVecNeg* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecAbs(HVecAbs* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecNot(HVecNot* instr) { + if (instr->GetPackedType() == Primitive::kPrimBoolean) { + last_visited_internal_latency_ = kArm64SIMDIntegerOpLatency; + } + last_visited_latency_ = kArm64SIMDIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecAdd(HVecAdd* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecSub(HVecSub* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecMul(HVecMul* instr) { + if (Primitive::IsFloatingPointType(instr->GetPackedType())) { + last_visited_latency_ = kArm64SIMDMulFloatingPointLatency; + } else { + last_visited_latency_ = kArm64SIMDMulIntegerLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecDiv(HVecDiv* instr) { + if (instr->GetPackedType() == Primitive::kPrimFloat) { + last_visited_latency_ = kArm64SIMDDivFloatLatency; + } else { + DCHECK(instr->GetPackedType() == Primitive::kPrimDouble); + last_visited_latency_ = kArm64SIMDDivDoubleLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecMin(HVecMin* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecMax(HVecMax* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) { + LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +} + +void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecXor(HVecXor* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecShl(HVecShl* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecShr(HVecShr* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecUShr(HVecUShr* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate( + HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDMulIntegerLatency; +} + +void SchedulingLatencyVisitorARM64::HandleVecAddress( + HVecMemoryOperation* instruction, + size_t size ATTRIBUTE_UNUSED) { + HInstruction* index = instruction->InputAt(1); + if (!index->IsConstant()) { + last_visited_internal_latency_ += kArm64DataProcWithShifterOpLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecLoad(HVecLoad* instr) { + last_visited_internal_latency_ = 0; + size_t size = Primitive::ComponentSize(instr->GetPackedType()); + + if (instr->GetPackedType() == Primitive::kPrimChar + && mirror::kUseStringCompression + && instr->IsStringCharAt()) { + // Set latencies for the uncompressed case. + last_visited_internal_latency_ += kArm64MemoryLoadLatency + kArm64BranchLatency; + HandleVecAddress(instr, size); + last_visited_latency_ = kArm64SIMDMemoryLoadLatency; + } else { + HandleVecAddress(instr, size); + last_visited_latency_ = kArm64SIMDMemoryLoadLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecStore(HVecStore* instr) { + last_visited_internal_latency_ = 0; + size_t size = Primitive::ComponentSize(instr->GetPackedType()); + HandleVecAddress(instr, size); + last_visited_latency_ = kArm64SIMDMemoryStoreLatency; +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index 7a33720655..63d5b7d6b6 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -42,6 +42,18 @@ static constexpr uint32_t kArm64LoadStringInternalLatency = 7; static constexpr uint32_t kArm64MulFloatingPointLatency = 6; static constexpr uint32_t kArm64MulIntegerLatency = 6; static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5; +static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency; + +static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10; +static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6; +static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10; +static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6; +static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12; +static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12; +static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16; +static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60; +static constexpr uint32_t kArm64SIMDDivFloatLatency = 30; +static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { public: @@ -52,29 +64,54 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { // We add a second unused parameter to be able to use this macro like the others // defined in `nodes.h`. -#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \ - M(ArrayGet , unused) \ - M(ArrayLength , unused) \ - M(ArraySet , unused) \ - M(BinaryOperation , unused) \ - M(BoundsCheck , unused) \ - M(Div , unused) \ - M(InstanceFieldGet , unused) \ - M(InstanceOf , unused) \ - M(Invoke , unused) \ - M(LoadString , unused) \ - M(Mul , unused) \ - M(NewArray , unused) \ - M(NewInstance , unused) \ - M(Rem , unused) \ - M(StaticFieldGet , unused) \ - M(SuspendCheck , unused) \ - M(TypeConversion , unused) +#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \ + M(ArrayGet , unused) \ + M(ArrayLength , unused) \ + M(ArraySet , unused) \ + M(BinaryOperation , unused) \ + M(BoundsCheck , unused) \ + M(Div , unused) \ + M(InstanceFieldGet , unused) \ + M(InstanceOf , unused) \ + M(Invoke , unused) \ + M(LoadString , unused) \ + M(Mul , unused) \ + M(NewArray , unused) \ + M(NewInstance , unused) \ + M(Rem , unused) \ + M(StaticFieldGet , unused) \ + M(SuspendCheck , unused) \ + M(TypeConversion , unused) \ + M(VecReplicateScalar , unused) \ + M(VecSetScalars , unused) \ + M(VecSumReduce , unused) \ + M(VecCnv , unused) \ + M(VecNeg , unused) \ + M(VecAbs , unused) \ + M(VecNot , unused) \ + M(VecAdd , unused) \ + M(VecHalvingAdd , unused) \ + M(VecSub , unused) \ + M(VecMul , unused) \ + M(VecDiv , unused) \ + M(VecMin , unused) \ + M(VecMax , unused) \ + M(VecAnd , unused) \ + M(VecAndNot , unused) \ + M(VecOr , unused) \ + M(VecXor , unused) \ + M(VecShl , unused) \ + M(VecShr , unused) \ + M(VecUShr , unused) \ + M(VecMultiplyAccumulate, unused) \ + M(VecLoad , unused) \ + M(VecStore , unused) #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ M(BitwiseNegatedRight, unused) \ M(MultiplyAccumulate, unused) \ M(IntermediateAddress, unused) \ + M(IntermediateAddressIndex, unused) \ M(DataProcWithShifterOp, unused) #define DECLARE_VISIT_INSTRUCTION(type, unused) \ @@ -85,6 +122,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + + private: + void HandleSimpleArithmeticSIMD(HVecOperation *instr); + void HandleVecAddress(HVecMemoryOperation* instruction, size_t size); }; class HSchedulerARM64 : public HScheduler { @@ -101,6 +142,8 @@ class HSchedulerARM64 : public HScheduler { return true; FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND) return true; + FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND) + return true; default: return HScheduler::IsSchedulable(instruction); } diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 7b8104b8ca..8bd568befd 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -128,15 +128,8 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } else { - // Use PC-relative access to the dex cache arrays. - method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative; - // Note: we use the invoke's graph instead of the codegen graph, which are - // different when inlining (the codegen graph is the most outer graph). The - // invoke's dex method index is relative to the dex file where the invoke's graph - // was built from. - DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen->GetInstructionSet()), - &invoke->GetBlock()->GetGraph()->GetDexFile()); - method_load_data = layout.MethodOffset(invoke->GetDexMethodIndex()); + // Use PC-relative access to the .bss methods arrays. + method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry; code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod; } @@ -159,7 +152,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit) { Handle<mirror::Class> klass = load_class->GetClass(); - DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod || + DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall || load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) << load_class->GetLoadKind(); DCHECK(!load_class->IsInBootImage()) << "HLoadClass should not be optimized before sharpening."; @@ -185,7 +178,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, DCHECK(!runtime->UseJitCompilation()); if (!compiler_driver->GetSupportBootImageFixup()) { // compiler_driver_test. Do not sharpen. - desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; } else if ((klass != nullptr) && compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) { is_in_boot_image = true; @@ -210,7 +203,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, // this `HLoadClass` hasn't been executed in the interpreter. // Fallback to the dex cache. // TODO(ngeoffray): Generate HDeoptimize instead. - desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; } } else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) { // AOT app compilation. Check if the class is in the boot image. @@ -229,7 +222,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, } if (!IsSameDexFile(load_class->GetDexFile(), *dex_compilation_unit.GetDexFile())) { - if ((load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) || + if ((load_kind == HLoadClass::LoadKind::kRuntimeCall) || (load_kind == HLoadClass::LoadKind::kBssEntry)) { // We actually cannot reference this class, we're forced to bail. // We cannot reference this class with Bss, as the entrypoint will lookup the class @@ -241,7 +234,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, } void HSharpening::ProcessLoadString(HLoadString* load_string) { - DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); const DexFile& dex_file = load_string->GetDexFile(); dex::StringIndex string_index = load_string->GetStringIndex(); @@ -268,7 +261,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative; } else { // compiler_driver_test. Do not sharpen. - desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + desired_load_kind = HLoadString::LoadKind::kRuntimeCall; } } else if (runtime->UseJitCompilation()) { DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); @@ -280,7 +273,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { desired_load_kind = HLoadString::LoadKind::kJitTableAddress; } } else { - desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + desired_load_kind = HLoadString::LoadKind::kRuntimeCall; } } else { // AOT app compilation. Try to lookup the string without allocating if not found. diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index b538a89a06..7b7495bf3b 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -356,14 +356,16 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until, } } - UsePosition* use = first_use_; size_t start = GetStart(); size_t end = GetEnd(); - while (use != nullptr && use->GetPosition() <= end) { - size_t use_position = use->GetPosition(); - if (use_position >= start && !use->IsSynthesized()) { - HInstruction* user = use->GetUser(); - size_t input_index = use->GetInputIndex(); + for (const UsePosition& use : GetUses()) { + size_t use_position = use.GetPosition(); + if (use_position > end) { + break; + } + if (use_position >= start && !use.IsSynthesized()) { + HInstruction* user = use.GetUser(); + size_t input_index = use.GetInputIndex(); if (user->IsPhi()) { // If the phi has a register, try to use the same. Location phi_location = user->GetLiveInterval()->ToLocation(); @@ -395,7 +397,7 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until, } else { // If the instruction is expected in a register, try to use it. LocationSummary* locations = user->GetLocations(); - Location expected = locations->InAt(use->GetInputIndex()); + Location expected = locations->InAt(use.GetInputIndex()); // We use the user's lifetime position - 1 (and not `use_position`) because the // register is blocked at the beginning of the user. size_t position = user->GetLifetimePosition() - 1; @@ -408,7 +410,6 @@ int LiveInterval::FindFirstRegisterHint(size_t* free_until, } } } - use = use->GetNext(); } return kNoRegister; diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index e9dffc1fac..a6681575a2 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -19,7 +19,9 @@ #include <iostream> +#include "base/iteration_range.h" #include "nodes.h" +#include "utils/intrusive_forward_list.h" namespace art { @@ -102,28 +104,23 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocSsaLiveness> { /** * A use position represents a live interval use at a given position. */ -class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> { +class UsePosition : public ArenaObject<kArenaAllocSsaLiveness>, + public IntrusiveForwardListNode<UsePosition> { public: - UsePosition(HInstruction* user, size_t input_index, size_t position, UsePosition* next) + UsePosition(HInstruction* user, size_t input_index, size_t position) : user_(user), input_index_(input_index), - position_(position), - next_(next) { - DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition()); + position_(position) { } explicit UsePosition(size_t position) : user_(nullptr), input_index_(kNoInput), - position_(dchecked_integral_cast<uint32_t>(position)), - next_(nullptr) { + position_(dchecked_integral_cast<uint32_t>(position)) { } size_t GetPosition() const { return position_; } - UsePosition* GetNext() const { return next_; } - void SetNext(UsePosition* next) { next_ = next; } - HInstruction* GetUser() const { return user_; } bool IsSynthesized() const { return user_ == nullptr; } @@ -138,10 +135,8 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> { return user_->GetBlock()->GetLoopInformation(); } - UsePosition* Dup(ArenaAllocator* allocator) const { - return new (allocator) UsePosition( - user_, input_index_, position_, - next_ == nullptr ? nullptr : next_->Dup(allocator)); + UsePosition* Clone(ArenaAllocator* allocator) const { + return new (allocator) UsePosition(user_, input_index_, position_); } bool RequiresRegister() const { @@ -156,33 +151,28 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> { HInstruction* const user_; const size_t input_index_; const size_t position_; - UsePosition* next_; DISALLOW_COPY_AND_ASSIGN(UsePosition); }; +using UsePositionList = IntrusiveForwardList<UsePosition>; /** * An environment use position represents a live interval for environment use at a given position. */ -class EnvUsePosition : public ArenaObject<kArenaAllocSsaLiveness> { +class EnvUsePosition : public ArenaObject<kArenaAllocSsaLiveness>, + public IntrusiveForwardListNode<EnvUsePosition> { public: EnvUsePosition(HEnvironment* environment, size_t input_index, - size_t position, - EnvUsePosition* next) + size_t position) : environment_(environment), input_index_(input_index), - position_(position), - next_(next) { + position_(position) { DCHECK(environment != nullptr); - DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition()); } size_t GetPosition() const { return position_; } - EnvUsePosition* GetNext() const { return next_; } - void SetNext(EnvUsePosition* next) { next_ = next; } - HEnvironment* GetEnvironment() const { return environment_; } size_t GetInputIndex() const { return input_index_; } @@ -190,20 +180,47 @@ class EnvUsePosition : public ArenaObject<kArenaAllocSsaLiveness> { stream << position_; } - EnvUsePosition* Dup(ArenaAllocator* allocator) const { - return new (allocator) EnvUsePosition( - environment_, input_index_, position_, - next_ == nullptr ? nullptr : next_->Dup(allocator)); + EnvUsePosition* Clone(ArenaAllocator* allocator) const { + return new (allocator) EnvUsePosition(environment_, input_index_, position_); } private: HEnvironment* const environment_; const size_t input_index_; const size_t position_; - EnvUsePosition* next_; DISALLOW_COPY_AND_ASSIGN(EnvUsePosition); }; +using EnvUsePositionList = IntrusiveForwardList<EnvUsePosition>; + +template <typename Iterator> +inline Iterator FindUseAtOrAfterPosition(Iterator first, Iterator last, size_t position) { + using value_type = const typename Iterator::value_type; + static_assert(std::is_same<value_type, const UsePosition>::value || + std::is_same<value_type, const EnvUsePosition>::value, + "Expecting value type UsePosition or EnvUsePosition."); + Iterator ret = std::find_if( + first, last, [position](const value_type& use) { return use.GetPosition() >= position; }); + // Check that the processed range is sorted. Do not check the rest of the range to avoid + // increasing the complexity of callers from O(n) to O(n^2). + DCHECK(std::is_sorted( + first, + ret, + [](const value_type& lhs, const value_type& rhs) { + return lhs.GetPosition() < rhs.GetPosition(); + })); + return ret; +} + +template <typename Iterator> +inline IterationRange<Iterator> FindMatchingUseRange(Iterator first, + Iterator last, + size_t position_begin, + size_t position_end) { + Iterator begin = FindUseAtOrAfterPosition(first, last, position_begin); + Iterator end = FindUseAtOrAfterPosition(begin, last, position_end); + return MakeIterationRange(begin, end); +} class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> { public: @@ -265,11 +282,11 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { void AddTempUse(HInstruction* instruction, size_t temp_index) { DCHECK(IsTemp()); - DCHECK(first_use_ == nullptr) << "A temporary can only have one user"; - DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user"; + DCHECK(GetUses().empty()) << "A temporary can only have one user"; + DCHECK(GetEnvironmentUses().empty()) << "A temporary cannot have environment user"; size_t position = instruction->GetLifetimePosition(); - first_use_ = new (allocator_) UsePosition( - instruction, temp_index, position, first_use_); + UsePosition* new_use = new (allocator_) UsePosition(instruction, temp_index, position); + uses_.push_front(*new_use); AddRange(position, position + 1); } @@ -306,32 +323,36 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { AddBackEdgeUses(*instruction->GetBlock()); } - if ((first_use_ != nullptr) - && (first_use_->GetUser() == actual_user) - && (first_use_->GetPosition() < position)) { + if ((!uses_.empty()) && + (uses_.front().GetUser() == actual_user) && + (uses_.front().GetPosition() < position)) { // The user uses the instruction multiple times, and one use dies before the other. // We update the use list so that the latter is first. DCHECK(!is_environment); - UsePosition* cursor = first_use_; - while ((cursor->GetNext() != nullptr) && (cursor->GetNext()->GetPosition() < position)) { - cursor = cursor->GetNext(); - } - DCHECK(first_use_->GetPosition() + 1 == position); - UsePosition* new_use = new (allocator_) UsePosition( - instruction, input_index, position, cursor->GetNext()); - cursor->SetNext(new_use); - if (first_range_->GetEnd() == first_use_->GetPosition()) { + DCHECK(uses_.front().GetPosition() + 1 == position); + UsePositionList::iterator next_pos = uses_.begin(); + UsePositionList::iterator insert_pos; + do { + insert_pos = next_pos; + ++next_pos; + } while (next_pos != uses_.end() && next_pos->GetPosition() < position); + UsePosition* new_use = new (allocator_) UsePosition(instruction, input_index, position); + uses_.insert_after(insert_pos, *new_use); + if (first_range_->GetEnd() == uses_.front().GetPosition()) { first_range_->end_ = position; } return; } if (is_environment) { - first_env_use_ = new (allocator_) EnvUsePosition( - environment, input_index, position, first_env_use_); + DCHECK(env_uses_.empty() || position <= env_uses_.front().GetPosition()); + EnvUsePosition* new_env_use = + new (allocator_) EnvUsePosition(environment, input_index, position); + env_uses_.push_front(*new_env_use); } else { - first_use_ = new (allocator_) UsePosition( - instruction, input_index, position, first_use_); + DCHECK(uses_.empty() || position <= uses_.front().GetPosition()); + UsePosition* new_use = new (allocator_) UsePosition(instruction, input_index, position); + uses_.push_front(*new_use); } if (is_environment && !keep_alive) { @@ -369,8 +390,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { if (block->IsInLoop()) { AddBackEdgeUses(*block); } - first_use_ = new (allocator_) UsePosition( - instruction, input_index, block->GetLifetimeEnd(), first_use_); + UsePosition* new_use = + new (allocator_) UsePosition(instruction, input_index, block->GetLifetimeEnd()); + uses_.push_front(*new_use); } ALWAYS_INLINE void AddRange(size_t start, size_t end) { @@ -430,7 +452,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { first_range_->start_ = from; } else { // Instruction without uses. - DCHECK(first_use_ == nullptr); + DCHECK(uses_.empty()); DCHECK(from == defined_by_->GetLifetimePosition()); first_range_ = last_range_ = range_search_start_ = new (allocator_) LiveRange(from, from + 2, nullptr); @@ -528,16 +550,17 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { return position; } - UsePosition* use = first_use_; size_t end = GetEnd(); - while (use != nullptr && use->GetPosition() <= end) { - size_t use_position = use->GetPosition(); + for (const UsePosition& use : GetUses()) { + size_t use_position = use.GetPosition(); + if (use_position > end) { + break; + } if (use_position > position) { - if (use->RequiresRegister()) { + if (use.RequiresRegister()) { return use_position; } } - use = use->GetNext(); } return kNoLifetime; } @@ -564,24 +587,25 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { return position; } - UsePosition* use = first_use_; size_t end = GetEnd(); - while (use != nullptr && use->GetPosition() <= end) { - size_t use_position = use->GetPosition(); + for (const UsePosition& use : GetUses()) { + size_t use_position = use.GetPosition(); + if (use_position > end) { + break; + } if (use_position > position) { return use_position; } - use = use->GetNext(); } return kNoLifetime; } - UsePosition* GetFirstUse() const { - return first_use_; + const UsePositionList& GetUses() const { + return parent_->uses_; } - EnvUsePosition* GetFirstEnvironmentUse() const { - return first_env_use_; + const EnvUsePositionList& GetEnvironmentUses() const { + return parent_->env_uses_; } Primitive::Type GetType() const { @@ -645,8 +669,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { next_sibling_ = new_interval; new_interval->parent_ = parent_; - new_interval->first_use_ = first_use_; - new_interval->first_env_use_ = first_env_use_; LiveRange* current = first_range_; LiveRange* previous = nullptr; // Iterate over the ranges, and either find a range that covers this position, or @@ -718,20 +740,14 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { current = current->GetNext(); } stream << "}, uses: { "; - const UsePosition* use = first_use_; - if (use != nullptr) { - do { - use->Dump(stream); - stream << " "; - } while ((use = use->GetNext()) != nullptr); + for (const UsePosition& use : GetUses()) { + use.Dump(stream); + stream << " "; } stream << "}, { "; - const EnvUsePosition* env_use = first_env_use_; - if (env_use != nullptr) { - do { - env_use->Dump(stream); - stream << " "; - } while ((env_use = env_use->GetNext()) != nullptr); + for (const EnvUsePosition& env_use : GetEnvironmentUses()) { + env_use.Dump(stream); + stream << " "; } stream << "}"; stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit(); @@ -833,12 +849,16 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { high_or_low_interval_->last_range_ = high_or_low_interval_->first_range_->GetLastRange(); high_or_low_interval_->range_search_start_ = high_or_low_interval_->first_range_; } - if (first_use_ != nullptr) { - high_or_low_interval_->first_use_ = first_use_->Dup(allocator_); + auto pos = high_or_low_interval_->uses_.before_begin(); + for (const UsePosition& use : uses_) { + UsePosition* new_use = use.Clone(allocator_); + pos = high_or_low_interval_->uses_.insert_after(pos, *new_use); } - if (first_env_use_ != nullptr) { - high_or_low_interval_->first_env_use_ = first_env_use_->Dup(allocator_); + auto env_pos = high_or_low_interval_->env_uses_.before_begin(); + for (const EnvUsePosition& env_use : env_uses_) { + EnvUsePosition* new_env_use = env_use.Clone(allocator_); + env_pos = high_or_low_interval_->env_uses_.insert_after(env_pos, *new_env_use); } } @@ -962,8 +982,8 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { range_search_start_(nullptr), first_safepoint_(nullptr), last_safepoint_(nullptr), - first_use_(nullptr), - first_env_use_(nullptr), + uses_(), + env_uses_(), type_(type), next_sibling_(nullptr), parent_(this), @@ -1005,14 +1025,12 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { } bool HasSynthesizeUseAt(size_t position) const { - UsePosition* use = first_use_; - while (use != nullptr) { - size_t use_position = use->GetPosition(); - if ((use_position == position) && use->IsSynthesized()) { + for (const UsePosition& use : GetUses()) { + size_t use_position = use.GetPosition(); + if ((use_position == position) && use.IsSynthesized()) { return true; } if (use_position > position) break; - use = use->GetNext(); } return false; } @@ -1028,11 +1046,11 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { // Add synthesized uses at the back edge of loops to help the register allocator. // Note that this method is called in decreasing liveness order, to faciliate adding - // uses at the head of the `first_use_` linked list. Because below + // uses at the head of the `uses_` list. Because below // we iterate from inner-most to outer-most, which is in increasing liveness order, - // we need to take extra care of how the `first_use_` linked list is being updated. - UsePosition* first_in_new_list = nullptr; - UsePosition* last_in_new_list = nullptr; + // we need to add subsequent entries after the last inserted entry. + const UsePositionList::iterator old_begin = uses_.begin(); + UsePositionList::iterator insert_pos = uses_.before_begin(); for (HLoopInformationOutwardIterator it(block_at_use); !it.Done(); it.Advance()) { @@ -1042,37 +1060,25 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { break; } - // We're only adding a synthesized use at the last back edge. Adding syntehsized uses on + // We're only adding a synthesized use at the last back edge. Adding synthesized uses on // all back edges is not necessary: anything used in the loop will have its use at the // last back edge. If we want branches in a loop to have better register allocation than // another branch, then it is the linear order we should change. size_t back_edge_use_position = current->GetLifetimeEnd(); - if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) { + if ((old_begin != uses_.end()) && (old_begin->GetPosition() <= back_edge_use_position)) { // There was a use already seen in this loop. Therefore the previous call to `AddUse` // already inserted the backedge use. We can stop going outward. DCHECK(HasSynthesizeUseAt(back_edge_use_position)); break; } - DCHECK(last_in_new_list == nullptr || - back_edge_use_position > last_in_new_list->GetPosition()); + DCHECK(insert_pos != uses_.before_begin() + ? back_edge_use_position > insert_pos->GetPosition() + : current == block_at_use.GetLoopInformation()) + << std::distance(uses_.before_begin(), insert_pos); UsePosition* new_use = new (allocator_) UsePosition(back_edge_use_position); - - if (last_in_new_list != nullptr) { - // Going outward. The latest created use needs to point to the new use. - last_in_new_list->SetNext(new_use); - } else { - // This is the inner-most loop. - DCHECK_EQ(current, block_at_use.GetLoopInformation()); - first_in_new_list = new_use; - } - last_in_new_list = new_use; - } - // Link the newly created linked list with `first_use_`. - if (last_in_new_list != nullptr) { - last_in_new_list->SetNext(first_use_); - first_use_ = first_in_new_list; + insert_pos = uses_.insert_after(insert_pos, *new_use); } } @@ -1091,9 +1097,9 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { SafepointPosition* first_safepoint_; SafepointPosition* last_safepoint_; - // Uses of this interval. Note that this linked list is shared amongst siblings. - UsePosition* first_use_; - EnvUsePosition* first_env_use_; + // Uses of this interval. Only the parent interval keeps these lists. + UsePositionList uses_; + EnvUsePositionList env_uses_; // The instruction type this interval corresponds to. const Primitive::Type type_; @@ -1202,14 +1208,14 @@ class SsaLivenessAnalysis : public ValueObject { // A temporary shares the same lifetime start as the instruction that requires it. DCHECK(temp->IsTemp()); HInstruction* user = GetInstructionFromPosition(temp->GetStart() / 2); - DCHECK_EQ(user, temp->GetFirstUse()->GetUser()); + DCHECK_EQ(user, temp->GetUses().front().GetUser()); return user; } size_t GetTempIndex(LiveInterval* temp) const { // We use the input index to store the index of the temporary in the user's temporary list. DCHECK(temp->IsTemp()); - return temp->GetFirstUse()->GetInputIndex(); + return temp->GetUses().front().GetInputIndex(); } size_t GetMaxLifetimePosition() const { |