diff options
Diffstat (limited to 'compiler/optimizing')
39 files changed, 1210 insertions, 759 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 65f3c72e99..136401898c 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -58,7 +58,7 @@ #include "parallel_move_resolver.h" #include "ssa_liveness_analysis.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/assembler.h" namespace art { @@ -337,7 +337,7 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( case HInvokeStaticOrDirect::MethodLoadKind::kRecursive: locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation()); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: locations->AddTemp(visitor->GetMethodLocation()); locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister()); break; @@ -350,6 +350,34 @@ void CodeGenerator::CreateCommonInvokeLocationSummary( } } +void CodeGenerator::GenerateInvokeStaticOrDirectRuntimeCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { + MoveConstant(temp, invoke->GetDexMethodIndex()); + + // The access check is unnecessary but we do not want to introduce + // extra entrypoints for the codegens that do not support some + // invoke type and fall back to the runtime call. + + // Initialize to anything to silent compiler warnings. + QuickEntrypointEnum entrypoint = kQuickInvokeStaticTrampolineWithAccessCheck; + switch (invoke->GetInvokeType()) { + case kStatic: + entrypoint = kQuickInvokeStaticTrampolineWithAccessCheck; + break; + case kDirect: + entrypoint = kQuickInvokeDirectTrampolineWithAccessCheck; + break; + case kSuper: + entrypoint = kQuickInvokeSuperTrampolineWithAccessCheck; + break; + case kVirtual: + case kInterface: + LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType(); + UNREACHABLE(); + } + + InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), slow_path); +} void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke) { MoveConstant(invoke->GetLocations()->GetTemp(0), invoke->GetDexMethodIndex()); @@ -508,7 +536,7 @@ void CodeGenerator::GenerateUnresolvedFieldAccess( void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, Location runtime_type_index_location, Location runtime_return_location) { - DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod); + DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall); DCHECK_EQ(cls->InputCount(), 1u); LocationSummary* locations = new (cls->GetBlock()->GetGraph()->GetArena()) LocationSummary( cls, LocationSummary::kCallOnMainOnly); @@ -518,7 +546,7 @@ void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, } void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) { - DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod); + DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall); LocationSummary* locations = cls->GetLocations(); MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_); if (cls->NeedsAccessCheck()) { @@ -557,6 +585,9 @@ void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const { } void CodeGenerator::AllocateLocations(HInstruction* instruction) { + for (HEnvironment* env = instruction->GetEnvironment(); env != nullptr; env = env->GetParent()) { + env->AllocateLocations(); + } instruction->Accept(GetLocationBuilder()); DCHECK(CheckTypeConsistency(instruction)); LocationSummary* locations = instruction->GetLocations(); @@ -1400,20 +1431,6 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { locations->AddTemp(Location::RequiresRegister()); } -uint32_t CodeGenerator::GetReferenceSlowFlagOffset() const { - ScopedObjectAccess soa(Thread::Current()); - mirror::Class* klass = mirror::Reference::GetJavaLangRefReference(); - DCHECK(klass->IsInitialized()); - return klass->GetSlowPathFlagOffset().Uint32Value(); -} - -uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const { - ScopedObjectAccess soa(Thread::Current()); - mirror::Class* klass = mirror::Reference::GetJavaLangRefReference(); - DCHECK(klass->IsInitialized()); - return klass->GetDisableIntrinsicFlagOffset().Uint32Value(); -} - void CodeGenerator::EmitJitRoots(uint8_t* code, Handle<mirror::ObjectArray<mirror::Object>> roots, const uint8_t* roots_data) { diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index c2b2ebfade..7bf43f7971 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -31,6 +31,7 @@ #include "nodes.h" #include "optimizing_compiler_stats.h" #include "read_barrier_option.h" +#include "stack.h" #include "stack_map_stream.h" #include "string_reference.h" #include "type_reference.h" @@ -495,6 +496,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { static void CreateCommonInvokeLocationSummary( HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor); + void GenerateInvokeStaticOrDirectRuntimeCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path); void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke); void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke); @@ -541,7 +544,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { case HLoadString::LoadKind::kBssEntry: DCHECK(load->NeedsEnvironment()); return LocationSummary::kCallOnSlowPath; - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: DCHECK(load->NeedsEnvironment()); return LocationSummary::kCallOnMainOnly; case HLoadString::LoadKind::kJitTableAddress: @@ -563,18 +566,17 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { HInvokeStaticOrDirect* invoke) = 0; // Generate a call to a static or direct method. - virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0; + virtual void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; // Generate a call to a virtual method. - virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0; + virtual void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; // Copy the result of a call into the given target. virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; virtual void GenerateNop() = 0; - uint32_t GetReferenceSlowFlagOffset() const; - uint32_t GetReferenceDisableFlagOffset() const; - static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass); protected: diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index c66bd77d6b..8300f8186e 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -47,7 +47,6 @@ static bool ExpectedPairLayout(Location location) { return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); } -static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = R0; static constexpr Register kCoreAlwaysSpillRegister = R5; @@ -3589,7 +3588,6 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) { @@ -3613,7 +3611,6 @@ void InstructionCodeGeneratorARM::VisitInvokeVirtual(HInvokeVirtual* invoke) { codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderARM::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -7137,7 +7134,7 @@ HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -7145,7 +7142,7 @@ HLoadClass::LoadKind CodeGeneratorARM::GetSupportedLoadClassKind( void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -7198,7 +7195,7 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) { // move. void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -7270,7 +7267,7 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -7332,7 +7329,7 @@ HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -7342,7 +7339,7 @@ void LocationsBuilderARM::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(Location::RegisterLocation(R0)); } else { locations->SetOut(Location::RequiresRegister()); @@ -7429,7 +7426,7 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) NO_THREAD_S } // TODO: Consider re-adding the compiler code to do string dex cache lookup again. - DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); @@ -8946,7 +8943,8 @@ Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr // save one load. However, since this is just an intrinsic slow path we prefer this // simple and more robust approach rather that trying to determine if that's the case. SlowPathCode* slow_path = GetCurrentSlowPath(); - if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); __ LoadFromOffset(kLoadWord, temp, SP, stack_offset); return temp; @@ -8954,8 +8952,8 @@ Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr return location.AsRegister<Register>(); } -Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorARM::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -8992,35 +8990,11 @@ Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register method_reg; - Register reg = temp.AsRegister<Register>(); - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<Register>(); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset); - } - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - __ LoadFromOffset(kLoadWord, - reg, - method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } - return callee_method; -} - -void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: @@ -9035,11 +9009,13 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, __ blx(LR); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); DCHECK(!IsLeafMethod()); } -void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { +void CodeGeneratorARM::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { Register temp = temp_location.AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kArmPointerSize).Uint32Value(); @@ -9070,6 +9046,7 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp __ LoadFromOffset(kLoadWord, LR, temp, entry_point); // LR(); __ blx(LR); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeMethodPatch( diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 2409a4d38d..398b6ed7d8 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -455,9 +455,10 @@ class CodeGeneratorARM : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7d9c61b76c..a84f8f3308 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -78,7 +78,6 @@ using helpers::VIXLRegCodeFromART; using helpers::WRegisterFrom; using helpers::XRegisterFrom; -static constexpr int kCurrentMethodStackOffset = 0; // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump // table version generates 7 instructions and num_entries literals. Compare/jump sequence will // generates less code/data with a small num_entries. @@ -4497,8 +4496,8 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStatic return desired_dispatch_info; } -Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorARM64::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { @@ -4539,40 +4538,22 @@ Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStati EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register reg = XRegisterFrom(temp); - Register method_reg; - if (current_method.IsRegister()) { - method_reg = XRegisterFrom(current_method); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset)); - } - - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - __ Ldr(reg.X(), - MemOperand(method_reg.X(), - ArtMethod::DexCacheResolvedMethodsOffset(kArm64PointerSize).Int32Value())); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache))); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } - return callee_method; -} - -void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { - // All registers are assumed to be correctly set up. - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Bl(&frame_entry_label_); + { + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. + ExactAssemblyScope eas(GetVIXLAssembler(), + kInstructionSize, + CodeBufferCheckScope::kExactSize); + __ bl(&frame_entry_label_); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + } break; case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // LR = callee_method->entry_point_from_quick_compiled_code_; @@ -4580,14 +4561,13 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok XRegisterFrom(callee_method), ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value())); { - // To ensure that the pc position is recorded immediately after the `blr` instruction - // BLR must be the last instruction emitted in this function. - // Recording the pc will occur right after returning from this function. + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); // lr() __ blr(lr); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } break; } @@ -4595,7 +4575,8 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok DCHECK(!IsLeafMethod()); } -void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { +void CodeGeneratorARM64::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { // Use the calling convention instead of the location of the receiver, as // intrinsics may have put the receiver in a different register. In the intrinsics // slow path, the arguments have been moved to the right place, so here we are @@ -4629,12 +4610,11 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te // lr = temp->GetEntryPoint(); __ Ldr(lr, MemOperand(temp, entry_point.SizeValue())); { - // To ensure that the pc position is recorded immediately after the `blr` instruction - // BLR should be the last instruction emitted in this function. - // Recording the pc will occur right after returning from this function. + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize); // lr(); __ blr(lr); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } } @@ -4831,7 +4811,6 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -4844,7 +4823,6 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( @@ -4863,7 +4841,7 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -4871,7 +4849,7 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -4916,7 +4894,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { // move. void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -4998,7 +4976,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA read_barrier_option); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -5054,7 +5032,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -5063,7 +5041,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); } else { diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 7a4b3d4805..5bb2ab57df 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -540,9 +540,10 @@ class CodeGeneratorARM64 : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 1f8e1efd5e..d5e3723e68 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -76,7 +76,6 @@ static bool ExpectedPairLayout(Location location) { // Use a local definition to prevent copying mistakes. static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize); static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte; -static constexpr int kCurrentMethodStackOffset = 0; static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7; // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle @@ -3678,7 +3677,6 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { @@ -3701,7 +3699,6 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); DCHECK(!codegen_->IsLeafMethod()); } @@ -7252,7 +7249,7 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -7260,7 +7257,7 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConventionARMVIXL calling_convention; CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -7313,7 +7310,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { // move. void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -7375,7 +7372,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -7444,7 +7441,7 @@ HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -7454,7 +7451,7 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(LocationFrom(r0)); } else { locations->SetOut(Location::RequiresRegister()); @@ -7532,7 +7529,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE } // TODO: Re-add the compiler code to do string dex cache lookup again. - DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConventionARMVIXL calling_convention; __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); @@ -9119,8 +9116,8 @@ vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter( return RegisterFrom(location); } -Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall( - HInvokeStaticOrDirect* invoke, Location temp) { +void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -9151,42 +9148,22 @@ Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall( GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), base_reg, offset); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - vixl32::Register method_reg; - vixl32::Register reg = RegisterFrom(temp); - if (current_method.IsRegister()) { - method_reg = RegisterFrom(current_method); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, kCurrentMethodStackOffset); - } - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - GetAssembler()->LoadFromOffset( - kLoadWord, - reg, - method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value()); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - GetAssembler()->LoadFromOffset( - kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache)); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } - return callee_method; -} - -void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: - __ Bl(GetFrameEntryLabel()); + { + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. + ExactAssemblyScope aas(GetVIXLAssembler(), + vixl32::k32BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ bl(GetFrameEntryLabel()); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + } break; case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod: // LR = callee_method->entry_point_from_quick_compiled_code_ @@ -9196,12 +9173,14 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* inv RegisterFrom(callee_method), ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value()); { + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. ExactAssemblyScope aas(GetVIXLAssembler(), vixl32::k16BitT32InstructionSizeInBytes, CodeBufferCheckScope::kExactSize); // LR() __ blx(lr); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } break; } @@ -9209,7 +9188,8 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* inv DCHECK(!IsLeafMethod()); } -void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { +void CodeGeneratorARMVIXL::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { vixl32::Register temp = RegisterFrom(temp_location); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kArmPointerSize).Uint32Value(); @@ -9245,15 +9225,16 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset); // LR = temp->GetEntryPoint(); GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point); - // LR(); - // This `blx` *must* be the *last* instruction generated by this stub, so that calls to - // `RecordPcInfo()` immediately following record the correct pc. Use a scope to help guarantee - // that. - // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. - ExactAssemblyScope aas(GetVIXLAssembler(), - vixl32::k16BitT32InstructionSizeInBytes, - CodeBufferCheckScope::kExactSize); - __ blx(lr); + { + // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc. + // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used. + ExactAssemblyScope aas(GetVIXLAssembler(), + vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + // LR(); + __ blx(lr); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + } } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMethodPatch( diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index ef809510ad..5320f71290 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -538,9 +538,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 1978534112..8560e3e5f6 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -7016,12 +7016,12 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); fallback_load = false; break; - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: fallback_load = false; break; } if (fallback_load) { - desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall; } return desired_string_load_kind; } @@ -7050,12 +7050,12 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); fallback_load = false; break; - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: fallback_load = false; break; } if (fallback_load) { - desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall; } return desired_class_load_kind; } @@ -7105,13 +7105,14 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticO break; } if (fallback_load) { - dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod; + dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall; dispatch_info.method_load_data = 0; } return dispatch_info; } -void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { +void CodeGeneratorMIPS::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { // All registers are assumed to be correctly set up per the calling convention. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); @@ -7165,33 +7166,9 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); } break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register reg = temp.AsRegister<Register>(); - Register method_reg; - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<Register>(); - } else { - // TODO: use the appropriate DCHECK() here if possible. - // DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ Lw(reg, SP, kCurrentMethodStackOffset); - } - - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset(kLoadWord, - reg, - method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kMipsPointerSize).Int32Value()); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ LoadFromOffset(kLoadWord, - reg, - reg, - CodeGenerator::GetCachePointerOffset(index_in_cache)); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } @@ -7211,6 +7188,8 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke __ NopIfNoReordering(); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + DCHECK(!IsLeafMethod()); } @@ -7228,10 +7207,10 @@ void InstructionCodeGeneratorMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDire locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } -void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { +void CodeGeneratorMIPS::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { // Use the calling convention instead of the location of the receiver, as // intrinsics may have put the receiver in a different register. In the intrinsics // slow path, the arguments have been moved to the right place, so here we are @@ -7263,6 +7242,7 @@ void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location tem // T9(); __ Jalr(T9); __ NopIfNoReordering(); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -7272,12 +7252,11 @@ void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) { codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc); @@ -7331,7 +7310,7 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { // move. void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -7350,7 +7329,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); break; case HLoadClass::LoadKind::kReferrersClass: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: base_or_current_method_reg = locations->InAt(0).AsRegister<Register>(); break; default: @@ -7428,7 +7407,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF __ SetReorder(reordering); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -7488,13 +7467,13 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { } FALLTHROUGH_INTENDED; // We need an extra register for PC-relative dex cache accesses. - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: locations->SetInAt(0, Location::RequiresRegister()); break; default: break; } - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } else { @@ -7610,7 +7589,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ } // TODO: Re-add the compiler code to do string dex cache lookup again. - DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 736b5070d9..d774219ba9 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -552,8 +552,10 @@ class CodeGeneratorMIPS : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 096139191e..da43c4e757 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -951,7 +951,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), - assembler_(graph->GetArena()), + assembler_(graph->GetArena(), &isa_features), isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), @@ -4873,11 +4873,11 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: break; } if (fallback_load) { - desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall; } return desired_string_load_kind; } @@ -4899,11 +4899,11 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: break; } if (fallback_load) { - desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall; } return desired_class_load_kind; } @@ -4915,7 +4915,8 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStati return desired_dispatch_info; } -void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { +void CodeGeneratorMIPS64::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { // All registers are assumed to be correctly set up per the calling convention. Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); @@ -4956,33 +4957,9 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo __ Ld(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - GpuRegister reg = temp.AsRegister<GpuRegister>(); - GpuRegister method_reg; - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<GpuRegister>(); - } else { - // TODO: use the appropriate DCHECK() here if possible. - // DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ Ld(reg, SP, kCurrentMethodStackOffset); - } - - // temp = temp->dex_cache_resolved_methods_; - __ LoadFromOffset(kLoadDoubleword, - reg, - method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kMips64PointerSize).Int32Value()); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ LoadFromOffset(kLoadDoubleword, - reg, - reg, - CodeGenerator::GetCachePointerOffset(index_in_cache)); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } @@ -5002,6 +4979,8 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo __ Nop(); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); + DCHECK(!IsLeafMethod()); } @@ -5019,10 +4998,10 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } -void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) { +void CodeGeneratorMIPS64::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) { // Use the calling convention instead of the location of the receiver, as // intrinsics may have put the receiver in a different register. In the intrinsics // slow path, the arguments have been moved to the right place, so here we are @@ -5054,6 +5033,7 @@ void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t // T9(); __ Jalr(T9); __ Nop(); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -5063,12 +5043,11 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0)); CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc); @@ -5105,7 +5084,7 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { // move. void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -5116,7 +5095,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S GpuRegister out = out_loc.AsRegister<GpuRegister>(); GpuRegister current_method_reg = ZERO; if (load_kind == HLoadClass::LoadKind::kReferrersClass || - load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + load_kind == HLoadClass::LoadKind::kRuntimeCall) { current_method_reg = locations->InAt(0).AsRegister<GpuRegister>(); } @@ -5170,7 +5149,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S cls->GetClass())); GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option); break; - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -5219,7 +5198,7 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { HLoadString::LoadKind load_kind = load->GetLoadKind(); LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } else { @@ -5293,7 +5272,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA } // TODO: Re-add the compiler code to do string dex cache lookup again. - DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall); InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(calling_convention.GetRegisterAt(0), out); __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index 8405040386..2e8af2185a 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -314,6 +314,9 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { uint32_t num_entries, HBasicBlock* switch_block, HBasicBlock* default_block); + int32_t VecAddress(LocationSummary* locations, + size_t size, + /* out */ GpuRegister* adjusted_base); Mips64Assembler* const assembler_; CodeGeneratorMIPS64* const codegen_; @@ -518,8 +521,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 50b95c17cb..af9e89e791 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -15,6 +15,7 @@ */ #include "code_generator_mips64.h" +#include "mirror/array-inl.h" namespace art { namespace mips64 { @@ -22,12 +23,72 @@ namespace mips64 { // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<Mips64Assembler*>(GetAssembler())-> // NOLINT +VectorRegister VectorRegisterFrom(Location location) { + DCHECK(location.IsFpuRegister()); + return static_cast<VectorRegister>(location.AsFpuRegister<FpuRegister>()); +} + void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ FillB(dst, locations->InAt(0).AsRegister<GpuRegister>()); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ FillH(dst, locations->InAt(0).AsRegister<GpuRegister>()); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(dst, locations->InAt(0).AsRegister<GpuRegister>()); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(dst, locations->InAt(0).AsRegister<GpuRegister>()); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ ReplicateFPToVectorRegister(dst, + locations->InAt(0).AsFpuRegister<FpuRegister>(), + /* is_double */ false); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ ReplicateFPToVectorRegister(dst, + locations->InAt(0).AsFpuRegister<FpuRegister>(), + /* is_double */ true); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { @@ -51,13 +112,23 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in LocationSummary* locations = new (arena) LocationSummary(instruction); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), + instruction->IsVecNot() ? Location::kOutputOverlap + : Location::kNoOutputOverlap); + break; case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK(locations); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), + (instruction->IsVecNeg() || instruction->IsVecAbs()) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -70,7 +141,18 @@ void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + Primitive::Type from = instruction->GetInputType(); + Primitive::Type to = instruction->GetResultType(); + if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Ffint_sW(dst, src); + } else { + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) { @@ -78,7 +160,45 @@ void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ FillB(dst, ZERO); + __ SubvB(dst, dst, src); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ FillH(dst, ZERO); + __ SubvH(dst, dst, src); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(dst, ZERO); + __ SubvW(dst, dst, src); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(dst, ZERO); + __ SubvD(dst, dst, src); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(dst, ZERO); + __ FsubW(dst, dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(dst, ZERO); + __ FsubD(dst, dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) { @@ -86,7 +206,47 @@ void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ FillB(dst, ZERO); // all zeroes + __ Add_aB(dst, dst, src); // dst = abs(0) + abs(src) + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ FillH(dst, ZERO); // all zeroes + __ Add_aH(dst, dst, src); // dst = abs(0) + abs(src) + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(dst, ZERO); // all zeroes + __ Add_aW(dst, dst, src); // dst = abs(0) + abs(src) + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(dst, ZERO); // all zeroes + __ Add_aD(dst, dst, src); // dst = abs(0) + abs(src) + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ LdiW(dst, -1); // all ones + __ SrliW(dst, dst, 1); + __ AndV(dst, dst, src); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ LdiD(dst, -1); // all ones + __ SrliD(dst, dst, 1); + __ AndV(dst, dst, src); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) { @@ -94,7 +254,30 @@ void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: // special case boolean-not + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ LdiB(dst, 1); + __ XorV(dst, dst, src); + break; + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ NorV(dst, src, src); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector binary operations. @@ -106,9 +289,12 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: + case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: - DCHECK(locations); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -121,7 +307,40 @@ void LocationsBuilderMIPS64::VisitVecAdd(HVecAdd* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ AddvB(dst, lhs, rhs); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ AddvH(dst, lhs, rhs); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ AddvW(dst, lhs, rhs); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ AddvD(dst, lhs, rhs); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FaddW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FaddD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { @@ -129,7 +348,40 @@ void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + instruction->IsRounded() + ? __ Aver_uB(dst, lhs, rhs) + : __ Ave_uB(dst, lhs, rhs); + } else { + instruction->IsRounded() + ? __ Aver_sB(dst, lhs, rhs) + : __ Ave_sB(dst, lhs, rhs); + } + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->IsUnsigned()) { + instruction->IsRounded() + ? __ Aver_uH(dst, lhs, rhs) + : __ Ave_uH(dst, lhs, rhs); + } else { + instruction->IsRounded() + ? __ Aver_sH(dst, lhs, rhs) + : __ Ave_sH(dst, lhs, rhs); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) { @@ -137,7 +389,40 @@ void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ SubvB(dst, lhs, rhs); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ SubvH(dst, lhs, rhs); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ SubvW(dst, lhs, rhs); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ SubvD(dst, lhs, rhs); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FsubW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FsubD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { @@ -145,7 +430,40 @@ void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ MulvB(dst, lhs, rhs); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ MulvH(dst, lhs, rhs); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ MulvW(dst, lhs, rhs); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ MulvD(dst, lhs, rhs); + break; + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FmulW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FmulD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) { @@ -153,7 +471,23 @@ void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FdivW(dst, lhs, rhs); + break; + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FdivD(dst, lhs, rhs); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) { @@ -177,7 +511,27 @@ void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ AndV(dst, lhs, rhs); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) { @@ -193,7 +547,27 @@ void LocationsBuilderMIPS64::VisitVecOr(HVecOr* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ OrV(dst, lhs, rhs); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) { @@ -201,7 +575,27 @@ void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ XorV(dst, lhs, rhs); // lanes do not matter + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector shift operations. @@ -213,7 +607,9 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: - DCHECK(locations); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -226,7 +622,32 @@ void LocationsBuilderMIPS64::VisitVecShl(HVecShl* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ SlliB(dst, lhs, value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ SlliH(dst, lhs, value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ SlliW(dst, lhs, value); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ SlliD(dst, lhs, value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) { @@ -234,7 +655,32 @@ void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ SraiB(dst, lhs, value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ SraiH(dst, lhs, value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ SraiW(dst, lhs, value); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ SraiD(dst, lhs, value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) { @@ -242,7 +688,32 @@ void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) { } void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + VectorRegister lhs = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); + switch (instruction->GetPackedType()) { + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ SrliB(dst, lhs, value); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ SrliH(dst, lhs, value); + break; + case Primitive::kPrimInt: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ SrliW(dst, lhs, value); + break; + case Primitive::kPrimLong: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ SrliD(dst, lhs, value); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { @@ -253,20 +724,143 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu LOG(FATAL) << "No SIMD for " << instr->GetId(); } +// Helper to set up locations for vector memory operations. +static void CreateVecMemLocations(ArenaAllocator* arena, + HVecMemoryOperation* instruction, + bool is_load) { + LocationSummary* locations = new (arena) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + case Primitive::kPrimChar: + case Primitive::kPrimShort: + case Primitive::kPrimInt: + case Primitive::kPrimLong: + case Primitive::kPrimFloat: + case Primitive::kPrimDouble: + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); + if (is_load) { + locations->SetOut(Location::RequiresFpuRegister()); + } else { + locations->SetInAt(2, Location::RequiresFpuRegister()); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to prepare register and offset for vector memory operations. Returns the offset and sets +// the output parameter adjusted_base to the original base or to a reserved temporary register (AT). +int32_t InstructionCodeGeneratorMIPS64::VecAddress(LocationSummary* locations, + size_t size, + /* out */ GpuRegister* adjusted_base) { + GpuRegister base = locations->InAt(0).AsRegister<GpuRegister>(); + Location index = locations->InAt(1); + int scale = TIMES_1; + switch (size) { + case 2: scale = TIMES_2; break; + case 4: scale = TIMES_4; break; + case 8: scale = TIMES_8; break; + default: break; + } + int32_t offset = mirror::Array::DataOffset(size).Int32Value(); + + if (index.IsConstant()) { + offset += index.GetConstant()->AsIntConstant()->GetValue() << scale; + __ AdjustBaseOffsetAndElementSizeShift(base, offset, scale); + *adjusted_base = base; + } else { + GpuRegister index_reg = index.AsRegister<GpuRegister>(); + if (scale != TIMES_1) { + __ Dlsa(AT, index_reg, base, scale); + } else { + __ Daddu(AT, base, index_reg); + } + *adjusted_base = AT; + } + return offset; +} + void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ true); } void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + VectorRegister reg = VectorRegisterFrom(locations->Out()); + GpuRegister base; + int32_t offset = VecAddress(locations, size, &base); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ LdB(reg, base, offset); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + // Loading 8-bytes (needed if dealing with compressed strings in StringCharAt) from unaligned + // memory address may cause a trap to the kernel if the CPU doesn't directly support unaligned + // loads and stores. + // TODO: Implement support for StringCharAt. + DCHECK(!instruction->IsStringCharAt()); + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ LdH(reg, base, offset); + break; + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ LdW(reg, base, offset); + break; + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ LdD(reg, base, offset); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ false); } void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); + LocationSummary* locations = instruction->GetLocations(); + size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + VectorRegister reg = VectorRegisterFrom(locations->InAt(2)); + GpuRegister base; + int32_t offset = VecAddress(locations, size, &base); + switch (instruction->GetPackedType()) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ StB(reg, base, offset); + break; + case Primitive::kPrimChar: + case Primitive::kPrimShort: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ StH(reg, base, offset); + break; + case Primitive::kPrimInt: + case Primitive::kPrimFloat: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ StW(reg, base, offset); + break; + case Primitive::kPrimLong: + case Primitive::kPrimDouble: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ StD(reg, base, offset); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } #undef __ diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 317ca71136..ca921b843e 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -2204,7 +2204,6 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { @@ -2228,7 +2227,6 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) { codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -4521,18 +4519,17 @@ Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOr // save one load. However, since this is just an intrinsic slow path we prefer this // simple and more robust approach rather that trying to determine if that's the case. SlowPathCode* slow_path = GetCurrentSlowPath(); - if (slow_path != nullptr) { - if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { - int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); - __ movl(temp, Address(ESP, stack_offset)); - return temp; - } + DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path. + if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) { + int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>()); + __ movl(temp, Address(ESP, stack_offset)); + return temp; } return location.AsRegister<Register>(); } -Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorX86::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -4568,33 +4565,11 @@ Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticO offset)); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register method_reg; - Register reg = temp.AsRegister<Register>(); - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<Register>(); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg; - __ movl(reg, Address(ESP, kCurrentMethodStackOffset)); - } - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - __ movl(reg, Address(method_reg, - ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value())); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ movl(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache))); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } - return callee_method; -} - -void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) { - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: @@ -4607,11 +4582,13 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, kX86PointerSize).Int32Value())); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); DCHECK(!IsLeafMethod()); } -void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { +void CodeGeneratorX86::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { Register temp = temp_in.AsRegister<Register>(); uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kX86PointerSize).Uint32Value(); @@ -4639,6 +4616,7 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp // call temp->GetEntryPoint(); __ call(Address( temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value())); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { @@ -6066,7 +6044,7 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -6074,7 +6052,7 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -6128,7 +6106,7 @@ Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file, // move. void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -6188,7 +6166,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option); break; } - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); @@ -6251,7 +6229,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -6265,7 +6243,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } - if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(Location::RegisterLocation(EAX)); } else { locations->SetOut(Location::RequiresRegister()); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 21c527e8b0..689f93e31a 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -408,10 +408,11 @@ class CodeGeneratorX86 : public CodeGenerator { HInvokeStaticOrDirect* invoke) OVERRIDE; // Generate a call to a static or direct method. - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; // Generate a call to a virtual method. - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); void RecordBootTypePatch(HLoadClass* load_class); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 6b5e4d602d..148f55139e 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -977,9 +977,10 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStati return desired_dispatch_info; } -Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { +void CodeGeneratorX86_64::GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) { // All registers are assumed to be correctly set up. + Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. switch (invoke->GetMethodLoadKind()) { case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: { @@ -1009,36 +1010,11 @@ Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStat __ Bind(NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset)); break; } - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { - Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); - Register method_reg; - CpuRegister reg = temp.AsRegister<CpuRegister>(); - if (current_method.IsRegister()) { - method_reg = current_method.AsRegister<Register>(); - } else { - DCHECK(invoke->GetLocations()->Intrinsified()); - DCHECK(!current_method.IsValid()); - method_reg = reg.AsRegister(); - __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset)); - } - // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_; - __ movq(reg, - Address(CpuRegister(method_reg), - ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue())); - // temp = temp[index_in_cache]; - // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file. - uint32_t index_in_cache = invoke->GetDexMethodIndex(); - __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache))); - break; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { + GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path); + return; // No code pointer retrieval; the runtime performs the call directly. } } - return callee_method; -} - -void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, - Location temp) { - // All registers are assumed to be correctly set up. - Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp); switch (invoke->GetCodePtrLocation()) { case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf: @@ -1051,11 +1027,13 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo kX86_64PointerSize).SizeValue())); break; } + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); DCHECK(!IsLeafMethod()); } -void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) { +void CodeGeneratorX86_64::GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) { CpuRegister temp = temp_in.AsRegister<CpuRegister>(); size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset( invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue(); @@ -1084,6 +1062,7 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t // call temp->GetEntryPoint(); __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset( kX86_64PointerSize).SizeValue())); + RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { @@ -2393,7 +2372,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { @@ -2417,7 +2395,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -5483,7 +5460,7 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadClass::LoadKind::kBootImageAddress: - case HLoadClass::LoadKind::kDexCacheViaMethod: + case HLoadClass::LoadKind::kRuntimeCall: break; } return desired_class_load_kind; @@ -5491,7 +5468,7 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { // Custom calling convention: RAX serves as both input and output. CodeGenerator::CreateLoadClassRuntimeCallLocationSummary( cls, @@ -5542,7 +5519,7 @@ Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file, // move. void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS { HLoadClass::LoadKind load_kind = cls->GetLoadKind(); - if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) { + if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); return; } @@ -5653,7 +5630,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( DCHECK(Runtime::Current()->UseJitCompilation()); break; case HLoadString::LoadKind::kBootImageAddress: - case HLoadString::LoadKind::kDexCacheViaMethod: + case HLoadString::LoadKind::kRuntimeCall: break; } return desired_string_load_kind; @@ -5662,7 +5639,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); - if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) { + if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(Location::RegisterLocation(RAX)); } else { locations->SetOut(Location::RequiresRegister()); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 3039e0519c..31debde0ce 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -404,9 +404,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke) OVERRIDE; - Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp); - void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE; - void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE; + void GenerateStaticOrDirectCall( + HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; + void GenerateVirtualCall( + HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); void RecordBootTypePatch(HLoadClass* load_class); diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 7c833cf70c..c0ec58f824 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -1132,11 +1132,27 @@ bool InductionVarRange::GenerateLastValuePeriodic(HInductionVarAnalysis::Inducti /*out*/bool* needs_taken_test) const { DCHECK(info != nullptr); DCHECK_EQ(info->induction_class, HInductionVarAnalysis::kPeriodic); - // Count period. + // Count period and detect all-invariants. int64_t period = 1; - for (HInductionVarAnalysis::InductionInfo* p = info; - p->induction_class == HInductionVarAnalysis::kPeriodic; - p = p->op_b, ++period) {} + bool all_invariants = true; + HInductionVarAnalysis::InductionInfo* p = info; + for (; p->induction_class == HInductionVarAnalysis::kPeriodic; p = p->op_b, ++period) { + DCHECK_EQ(p->op_a->induction_class, HInductionVarAnalysis::kInvariant); + if (p->op_a->operation != HInductionVarAnalysis::kFetch) { + all_invariants = false; + } + } + DCHECK_EQ(p->induction_class, HInductionVarAnalysis::kInvariant); + if (p->operation != HInductionVarAnalysis::kFetch) { + all_invariants = false; + } + // Don't rely on FP arithmetic to be precise, unless the full period + // consist of pre-computed expressions only. + if (info->type == Primitive::kPrimFloat || info->type == Primitive::kPrimDouble) { + if (!all_invariants) { + return false; + } + } // Handle any periodic(x, periodic(.., y)) for known maximum index value m. int64_t m = 0; if (IsConstant(trip->op_a, kExact, &m) && m >= 1) { diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 9be6a512f5..142c95780e 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -56,7 +56,7 @@ static constexpr size_t kMaximumNumberOfInstructionsForSmallMethod = 3; // Limit the number of dex registers that we accumulate while inlining // to avoid creating large amount of nested environments. -static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64; +static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 32; // Limit recursive call inlining, which do not benefit from too // much inlining compared to code locality. diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index df9e7164ed..a73b1246d8 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -888,7 +888,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, } HInvokeStaticOrDirect::DispatchInfo dispatch_info = { - HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod, + HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall, HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, 0u }; diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index b1d2727e39..b664d41013 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -25,7 +25,7 @@ #include "mirror/dex_cache-inl.h" #include "nodes.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils.h" namespace art { diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 9803c9a0e9..ae5f8d1760 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -28,7 +28,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/arm/assembler_arm.h" namespace art { @@ -2624,58 +2624,6 @@ void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) { codegen_->GenerateConditionWithZero(kCondEQ, out, out); } -void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARM::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - ArmAssembler* const assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - Register obj = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - Register temp = codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0)).AsRegister<Register>(); - - // Now get declaring class. - __ ldr(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags that prevent using intrinsic. - __ ldr(IP, Address(temp, disable_flag_offset)); - __ ldr(temp, Address(temp, slow_path_flag_offset)); - __ orr(IP, IP, ShifterOperand(temp)); - __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel()); - - // Fast path. - __ ldr(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) { InvokeRuntimeCallingConvention calling_convention; IntrinsicVisitor::ComputeIntegerValueOfLocations( @@ -2782,6 +2730,7 @@ UNIMPLEMENTED_INTRINSIC(ARM, MathRoundDouble) // Could be done by changing rou UNIMPLEMENTED_INTRINSIC(ARM, MathRoundFloat) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index b511c5a18d..37d79814be 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -28,7 +28,7 @@ #include "mirror/reference.h" #include "mirror/string-inl.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/arm64/assembler_arm64.h" using namespace vixl::aarch64; // NOLINT(build/namespaces) @@ -124,12 +124,12 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 { // are no pools emitted. vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), - LocationFrom(kArtMethodRegister)); + codegen->GenerateStaticOrDirectCall( + invoke_->AsInvokeStaticOrDirect(), LocationFrom(kArtMethodRegister), this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister)); + codegen->GenerateVirtualCall( + invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister), this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); } // Copy the result back to the expected output. @@ -2897,69 +2897,6 @@ void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) { GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); } -void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - MacroAssembler* masm = GetVIXLAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - Register obj = InputRegisterAt(invoke, 0); - Register out = OutputRegister(invoke); - - SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - Register temp0 = XRegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0))); - - // Now get declaring class. - __ Ldr(temp0.W(), MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags that prevent using intrinsic. - if (slow_path_flag_offset == disable_flag_offset + 1) { - // Load two adjacent flags in one 64-bit load. - __ Ldr(temp0, MemOperand(temp0, disable_flag_offset)); - } else { - UseScratchRegisterScope temps(masm); - Register temp1 = temps.AcquireW(); - __ Ldr(temp1.W(), MemOperand(temp0, disable_flag_offset)); - __ Ldr(temp0.W(), MemOperand(temp0, slow_path_flag_offset)); - __ Orr(temp0, temp1, temp0); - } - __ Cbnz(temp0, slow_path->GetEntryLabel()); - - { - // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. - vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - // Fast path. - __ Ldr(out, HeapOperand(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - } - codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { InvokeRuntimeCallingConvention calling_convention; IntrinsicVisitor::ComputeIntegerValueOfLocations( @@ -3055,6 +2992,7 @@ void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) { __ Bind(&done); } +UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit) diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 1a33b0ee01..3c9b613803 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -26,7 +26,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "aarch32/constants-aarch32.h" @@ -97,11 +97,10 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { Location method_loc = MoveArguments(codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc); + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc); + codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); @@ -3000,60 +2999,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { codegen_->GenerateConditionWithZero(kCondEQ, out, out); } -void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - ArmVIXLAssembler* assembler = GetAssembler(); - LocationSummary* locations = invoke->GetLocations(); - - vixl32::Register obj = InputRegisterAt(invoke, 0); - vixl32::Register out = OutputRegister(invoke); - - SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - vixl32::Register temp0 = RegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0))); - - // Now get declaring class. - __ Ldr(temp0, MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags that prevent using intrinsic. - UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); - vixl32::Register temp1 = temps.Acquire(); - __ Ldr(temp1, MemOperand(temp0, disable_flag_offset)); - __ Ldr(temp0, MemOperand(temp0, slow_path_flag_offset)); - __ Orr(temp0, temp1, temp0); - __ CompareAndBranchIfNonZero(temp0, slow_path->GetEntryLabel()); - - // Fast path. - __ Ldr(out, MemOperand(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - assembler->MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) { if (features_.HasARMv8AInstructions()) { CreateFPToFPLocations(arena_, invoke); @@ -3178,6 +3123,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit) diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 4731da1ea9..4cea6dfdfb 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -23,6 +23,7 @@ #include "intrinsics.h" #include "mirror/array-inl.h" #include "mirror/string.h" +#include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/mips/assembler_mips.h" #include "utils/mips/constants_mips.h" @@ -111,12 +112,12 @@ class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), - Location::RegisterLocation(A0)); + codegen->GenerateStaticOrDirectCall( + invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0), this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0)); + codegen->GenerateVirtualCall( + invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0), this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 00afbcd8f2..d785567e0f 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -23,6 +23,7 @@ #include "intrinsics.h" #include "mirror/array-inl.h" #include "mirror/string.h" +#include "scoped_thread_state_change-inl.h" #include "thread.h" #include "utils/mips64/assembler_mips64.h" #include "utils/mips64/constants_mips64.h" @@ -100,12 +101,12 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 { MoveArguments(invoke_, codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), - Location::RegisterLocation(A0)); + codegen->GenerateStaticOrDirectCall( + invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0), this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0)); + codegen->GenerateVirtualCall( + invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0), this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h index c1f9ae6425..8c69d9b643 100644 --- a/compiler/optimizing/intrinsics_utils.h +++ b/compiler/optimizing/intrinsics_utils.h @@ -56,11 +56,10 @@ class IntrinsicSlowPath : public SlowPathCode { Location method_loc = MoveArguments(codegen); if (invoke_->IsInvokeStaticOrDirect()) { - codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc); + codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this); } else { - codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc); + codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this); } - codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this); // Copy the result back to the expected output. Location out = invoke_->GetLocations()->Out(); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 57adcc3c2f..6b4851d541 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -31,7 +31,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/x86/assembler_x86.h" #include "utils/x86/constants_x86.h" @@ -796,7 +796,6 @@ static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(EAX)); - codegen->RecordPcInfo(invoke, invoke->GetDexPc()); // Copy the result back to the expected output. Location out = invoke->GetLocations()->Out(); @@ -2819,65 +2818,6 @@ void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } -void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - LocationSummary* locations = invoke->GetLocations(); - X86Assembler* assembler = GetAssembler(); - - Register obj = locations->InAt(0).AsRegister<Register>(); - Register out = locations->Out().AsRegister<Register>(); - - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0)); - DCHECK(temp_loc.Equals(locations->GetTemp(0))); - Register temp = temp_loc.AsRegister<Register>(); - - // Now get declaring class. - __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags preventing us for using intrinsic. - if (slow_path_flag_offset == disable_flag_offset + 1) { - __ cmpw(Address(temp, disable_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - } else { - __ cmpb(Address(temp, disable_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - } - - // Fast path. - __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) { return instruction->InputAt(input0) == instruction->InputAt(input1); } @@ -3429,6 +3369,7 @@ void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) { UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) +UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 773383ef1b..ef98b7be30 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -31,7 +31,7 @@ #include "mirror/reference.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" -#include "thread-inl.h" +#include "thread-current-inl.h" #include "utils/x86_64/assembler_x86_64.h" #include "utils/x86_64/constants_x86_64.h" @@ -567,7 +567,6 @@ static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invo DCHECK(invoke->IsInvokeStaticOrDirect()); codegen->GenerateStaticOrDirectCall( invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI)); - codegen->RecordPcInfo(invoke, invoke->GetDexPc()); // Copy the result back to the expected output. Location out = invoke->GetLocations()->Out(); @@ -2959,65 +2958,6 @@ void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invok GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } -void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) { - if (kEmitCompilerReadBarrier) { - // Do not intrinsify this call with the read barrier configuration. - return; - } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::SameAsFirstInput()); - locations->AddTemp(Location::RequiresRegister()); -} - -void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) { - DCHECK(!kEmitCompilerReadBarrier); - LocationSummary* locations = invoke->GetLocations(); - X86_64Assembler* assembler = GetAssembler(); - - CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>(); - CpuRegister out = locations->Out().AsRegister<CpuRegister>(); - - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); - codegen_->AddSlowPath(slow_path); - - // Load ArtMethod first. - HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect(); - DCHECK(invoke_direct != nullptr); - Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall( - invoke_direct, locations->GetTemp(0)); - DCHECK(temp_loc.Equals(locations->GetTemp(0))); - CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); - - // Now get declaring class. - __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value())); - - uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset(); - uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset(); - DCHECK_NE(slow_path_flag_offset, 0u); - DCHECK_NE(disable_flag_offset, 0u); - DCHECK_NE(slow_path_flag_offset, disable_flag_offset); - - // Check static flags preventing us for using intrinsic. - if (slow_path_flag_offset == disable_flag_offset + 1) { - __ cmpw(Address(temp, disable_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - } else { - __ cmpb(Address(temp, disable_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0)); - __ j(kNotEqual, slow_path->GetEntryLabel()); - } - - // Fast path. - __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value())); - codegen_->MaybeRecordImplicitNullCheck(invoke); - __ MaybeUnpoisonHeapReference(out); - __ Bind(slow_path->GetExitLabel()); -} - void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) { InvokeRuntimeCallingConvention calling_convention; IntrinsicVisitor::ComputeIntegerValueOfLocations( @@ -3106,6 +3046,7 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) { __ Bind(&done); } +UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 94787c99b2..c3aa976d49 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -811,6 +811,11 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } return true; } else if (instruction->IsArrayGet()) { + // Deal with vector restrictions. + if (instruction->AsArrayGet()->IsStringCharAt() && + HasVectorRestrictions(restrictions, kNoStringCharAt)) { + return false; + } // Accept a right-hand-side array base[index] for // (1) exact matching vector type, // (2) loop-invariant base, @@ -1072,9 +1077,36 @@ bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restric } return false; case kMips: - case kMips64: // TODO: implement MIPS SIMD. return false; + case kMips64: + if (features->AsMips64InstructionSetFeatures()->HasMsa()) { + switch (type) { + case Primitive::kPrimBoolean: + case Primitive::kPrimByte: + *restrictions |= kNoDiv | kNoMinMax; + return TrySetVectorLength(16); + case Primitive::kPrimChar: + case Primitive::kPrimShort: + *restrictions |= kNoDiv | kNoMinMax | kNoStringCharAt; + return TrySetVectorLength(8); + case Primitive::kPrimInt: + *restrictions |= kNoDiv | kNoMinMax; + return TrySetVectorLength(4); + case Primitive::kPrimLong: + *restrictions |= kNoDiv | kNoMinMax; + return TrySetVectorLength(2); + case Primitive::kPrimFloat: + *restrictions |= kNoMinMax; + return TrySetVectorLength(4); + case Primitive::kPrimDouble: + *restrictions |= kNoMinMax; + return TrySetVectorLength(2); + default: + break; + } // switch type + } + return false; default: return false; } // switch instruction set @@ -1270,9 +1302,10 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, // corresponding new scalar instructions in the loop. The instruction will get an // environment while being inserted from the instruction map in original program order. DCHECK(vector_mode_ == kSequential); + size_t num_args = invoke->GetNumberOfArguments(); HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect( global_allocator_, - invoke->GetNumberOfArguments(), + num_args, invoke->GetType(), invoke->GetDexPc(), invoke->GetDexMethodIndex(), @@ -1282,8 +1315,14 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, invoke->GetTargetMethod(), invoke->GetClinitCheckRequirement()); HInputsRef inputs = invoke->GetInputs(); - for (size_t index = 0; index < inputs.size(); ++index) { - new_invoke->SetArgumentAt(index, vector_map_->Get(inputs[index])); + size_t num_inputs = inputs.size(); + DCHECK_LE(num_args, num_inputs); + DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree + for (size_t index = 0; index < num_inputs; ++index) { + HInstruction* new_input = index < num_args + ? vector_map_->Get(inputs[index]) + : inputs[index]; // beyond arguments: just pass through + new_invoke->SetArgumentAt(index, new_input); } new_invoke->SetIntrinsic(invoke->GetIntrinsic(), kNeedsEnvironmentOrCache, diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index 35298d4076..75a42f3297 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -72,6 +72,7 @@ class HLoopOptimization : public HOptimization { kNoUnroundedHAdd = 64, // no unrounded halving add kNoAbs = 128, // no absolute value kNoMinMax = 256, // no min/max + kNoStringCharAt = 512, // no StringCharAt }; /* diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 689991010e..e53209f941 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -2623,7 +2623,7 @@ const DexFile& HInvokeStaticOrDirect::GetDexFileForPcRelativeDexCache() const { } bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const { - if (GetMethodLoadKind() != MethodLoadKind::kDexCacheViaMethod) { + if (GetMethodLoadKind() != MethodLoadKind::kRuntimeCall) { return false; } if (!IsIntrinsic()) { @@ -2645,8 +2645,8 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind return os << "DirectAddress"; case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: return os << "DexCachePcRelative"; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: - return os << "DexCacheViaMethod"; + case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: + return os << "RuntimeCall"; default: LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs); UNREACHABLE(); @@ -2690,7 +2690,7 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const { void HLoadClass::SetLoadKind(LoadKind load_kind) { SetPackedField<LoadKindField>(load_kind); - if (load_kind != LoadKind::kDexCacheViaMethod && + if (load_kind != LoadKind::kRuntimeCall && load_kind != LoadKind::kReferrersClass) { RemoveAsUserOfInput(0u); SetRawInputAt(0u, nullptr); @@ -2714,8 +2714,8 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) { return os << "BssEntry"; case HLoadClass::LoadKind::kJitTableAddress: return os << "JitTableAddress"; - case HLoadClass::LoadKind::kDexCacheViaMethod: - return os << "DexCacheViaMethod"; + case HLoadClass::LoadKind::kRuntimeCall: + return os << "RuntimeCall"; default: LOG(FATAL) << "Unknown HLoadClass::LoadKind: " << static_cast<int>(rhs); UNREACHABLE(); @@ -2743,10 +2743,10 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { void HLoadString::SetLoadKind(LoadKind load_kind) { // Once sharpened, the load kind should not be changed again. - DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod); + DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall); SetPackedField<LoadKindField>(load_kind); - if (load_kind != LoadKind::kDexCacheViaMethod) { + if (load_kind != LoadKind::kRuntimeCall) { RemoveAsUserOfInput(0u); SetRawInputAt(0u, nullptr); } @@ -2766,8 +2766,8 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { return os << "BssEntry"; case HLoadString::LoadKind::kJitTableAddress: return os << "JitTableAddress"; - case HLoadString::LoadKind::kDexCacheViaMethod: - return os << "DexCacheViaMethod"; + case HLoadString::LoadKind::kRuntimeCall: + return os << "RuntimeCall"; default: LOG(FATAL) << "Unknown HLoadString::LoadKind: " << static_cast<int>(rhs); UNREACHABLE(); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 4d96fbe24c..74bb2ab3c4 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -1790,7 +1790,7 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { uint32_t dex_pc, HInstruction* holder) : vregs_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentVRegs)), - locations_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentLocations)), + locations_(arena->Adapter(kArenaAllocEnvironmentLocations)), parent_(nullptr), method_(method), dex_pc_(dex_pc), @@ -1804,6 +1804,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { to_copy.GetDexPc(), holder) {} + void AllocateLocations() { + DCHECK(locations_.empty()); + locations_.resize(vregs_.size()); + } + void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) { if (parent_ != nullptr) { parent_->SetAndCopyParentChain(allocator, parent); @@ -4167,11 +4172,9 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // and we know that we can access the dex cache arrays using a PC-relative load. kDexCachePcRelative, - // Use ArtMethod* from the resolved methods of the compiled method's own ArtMethod*. - // Used for JIT when we need to use the dex cache. This is also the last-resort-kind - // used when other kinds are unavailable (say, dex cache arrays are not PC-relative) - // or unimplemented or impractical (i.e. slow) on a particular architecture. - kDexCacheViaMethod, + // Make a runtime call to resolve and call the method. This is the last-resort-kind + // used when other kinds are unimplemented on a particular architecture. + kRuntimeCall, }; // Determines the location of the code pointer. @@ -4371,7 +4374,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { // Does this method load kind need the current method as an input? static bool NeedsCurrentMethodInput(MethodLoadKind kind) { - return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kDexCacheViaMethod; + return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kRuntimeCall; } DECLARE_INSTRUCTION(InvokeStaticOrDirect); @@ -5687,12 +5690,11 @@ class HLoadClass FINAL : public HInstruction { // Load from the root table associated with the JIT compiled method. kJitTableAddress, - // Load from resolved types array accessed through the class loaded from - // the compiled method's own ArtMethod*. This is the default access type when - // all other types are unavailable. - kDexCacheViaMethod, + // Load using a simple runtime call. This is the fall-back load kind when + // the codegen is unable to use another appropriate kind. + kRuntimeCall, - kLast = kDexCacheViaMethod + kLast = kRuntimeCall }; HLoadClass(HCurrentMethod* current_method, @@ -5713,7 +5715,7 @@ class HLoadClass FINAL : public HInstruction { DCHECK(!is_referrers_class || !needs_access_check); SetPackedField<LoadKindField>( - is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod); + is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kRuntimeCall); SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check); SetPackedFlag<kFlagIsInBootImage>(false); SetPackedFlag<kFlagGenerateClInitCheck>(false); @@ -5747,7 +5749,7 @@ class HLoadClass FINAL : public HInstruction { bool CanCallRuntime() const { return NeedsAccessCheck() || MustGenerateClinitCheck() || - GetLoadKind() == LoadKind::kDexCacheViaMethod || + GetLoadKind() == LoadKind::kRuntimeCall || GetLoadKind() == LoadKind::kBssEntry; } @@ -5757,7 +5759,7 @@ class HLoadClass FINAL : public HInstruction { // If the class is in the boot image, the lookup in the runtime call cannot throw. // This keeps CanThrow() consistent between non-PIC (using kBootImageAddress) and // PIC and subsequently avoids a DCE behavior dependency on the PIC option. - ((GetLoadKind() == LoadKind::kDexCacheViaMethod || + ((GetLoadKind() == LoadKind::kRuntimeCall || GetLoadKind() == LoadKind::kBssEntry) && !IsInBootImage()); } @@ -5776,7 +5778,7 @@ class HLoadClass FINAL : public HInstruction { const DexFile& GetDexFile() const { return dex_file_; } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { - return GetLoadKind() == LoadKind::kDexCacheViaMethod; + return GetLoadKind() == LoadKind::kRuntimeCall; } static SideEffects SideEffectsForArchRuntimeCalls() { @@ -5827,12 +5829,12 @@ class HLoadClass FINAL : public HInstruction { return load_kind == LoadKind::kReferrersClass || load_kind == LoadKind::kBootImageLinkTimePcRelative || load_kind == LoadKind::kBssEntry || - load_kind == LoadKind::kDexCacheViaMethod; + load_kind == LoadKind::kRuntimeCall; } void SetLoadKindInternal(LoadKind load_kind); - // The special input is the HCurrentMethod for kDexCacheViaMethod or kReferrersClass. + // The special input is the HCurrentMethod for kRuntimeCall or kReferrersClass. // For other load kinds it's empty or possibly some architecture-specific instruction // for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative. HUserRecord<HInstruction*> special_input_; @@ -5841,7 +5843,7 @@ class HLoadClass FINAL : public HInstruction { // - The compiling method's dex file if the class is defined there too. // - The compiling method's dex file if the class is referenced there. // - The dex file where the class is defined. When the load kind can only be - // kBssEntry or kDexCacheViaMethod, we cannot emit code for this `HLoadClass`. + // kBssEntry or kRuntimeCall, we cannot emit code for this `HLoadClass`. const dex::TypeIndex type_index_; const DexFile& dex_file_; @@ -5884,12 +5886,11 @@ class HLoadString FINAL : public HInstruction { // Load from the root table associated with the JIT compiled method. kJitTableAddress, - // Load from resolved strings array accessed through the class loaded from - // the compiled method's own ArtMethod*. This is the default access type when - // all other types are unavailable. - kDexCacheViaMethod, + // Load using a simple runtime call. This is the fall-back load kind when + // the codegen is unable to use another appropriate kind. + kRuntimeCall, - kLast = kDexCacheViaMethod, + kLast = kRuntimeCall, }; HLoadString(HCurrentMethod* current_method, @@ -5900,7 +5901,7 @@ class HLoadString FINAL : public HInstruction { special_input_(HUserRecord<HInstruction*>(current_method)), string_index_(string_index), dex_file_(dex_file) { - SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod); + SetPackedField<LoadKindField>(LoadKind::kRuntimeCall); } void SetLoadKind(LoadKind load_kind); @@ -5944,7 +5945,7 @@ class HLoadString FINAL : public HInstruction { } bool NeedsDexCacheOfDeclaringClass() const OVERRIDE { - return GetLoadKind() == LoadKind::kDexCacheViaMethod; + return GetLoadKind() == LoadKind::kRuntimeCall; } bool CanBeNull() const OVERRIDE { return false; } @@ -5978,7 +5979,7 @@ class HLoadString FINAL : public HInstruction { void SetLoadKindInternal(LoadKind load_kind); - // The special input is the HCurrentMethod for kDexCacheViaMethod. + // The special input is the HCurrentMethod for kRuntimeCall. // For other load kinds it's empty or possibly some architecture-specific instruction // for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative. HUserRecord<HInstruction*> special_input_; diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 300f4c6239..2fd7b03151 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -20,7 +20,7 @@ #include "linear_order.h" #include "register_allocation_resolver.h" #include "ssa_liveness_analysis.h" -#include "thread-inl.h" +#include "thread-current-inl.h" namespace art { diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index 1a89567991..832a7e1571 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -288,6 +288,11 @@ void SchedulingLatencyVisitorARM::VisitIntermediateAddress(HIntermediateAddress* last_visited_latency_ = kArmIntegerOpLatency; } +void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* ATTRIBUTE_UNUSED) { + UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM"; +} + void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { last_visited_latency_ = kArmMulIntegerLatency; } diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h index 8d5e4f375b..897e97da49 100644 --- a/compiler/optimizing/scheduler_arm.h +++ b/compiler/optimizing/scheduler_arm.h @@ -17,7 +17,11 @@ #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ +#ifdef ART_USE_OLD_ARM_BACKEND +#include "code_generator_arm.h" +#else #include "code_generator_arm_vixl.h" +#endif #include "scheduler.h" namespace art { @@ -99,6 +103,7 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { M(BitwiseNegatedRight, unused) \ M(MultiplyAccumulate, unused) \ M(IntermediateAddress, unused) \ + M(IntermediateAddressIndex, unused) \ M(DataProcWithShifterOp, unused) #define DECLARE_VISIT_INSTRUCTION(type, unused) \ diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc index 558dcc4cbc..83b487fb5b 100644 --- a/compiler/optimizing/scheduler_arm64.cc +++ b/compiler/optimizing/scheduler_arm64.cc @@ -16,6 +16,7 @@ #include "scheduler_arm64.h" #include "code_generator_utils.h" +#include "mirror/array-inl.h" namespace art { namespace arm64 { @@ -43,6 +44,13 @@ void SchedulingLatencyVisitorARM64::VisitIntermediateAddress( last_visited_latency_ = kArm64IntegerOpLatency + 2; } +void SchedulingLatencyVisitorARM64::VisitIntermediateAddressIndex( + HIntermediateAddressIndex* instr ATTRIBUTE_UNUSED) { + // Although the code generated is a simple `add` instruction, we found through empirical results + // that spacing it from its use in memory accesses was beneficial. + last_visited_latency_ = kArm64DataProcWithShifterOpLatency + 2; +} + void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) { last_visited_latency_ = kArm64MulIntegerLatency; } @@ -192,5 +200,148 @@ void SchedulingLatencyVisitorARM64::VisitTypeConversion(HTypeConversion* instr) } } +void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *instr) { + if (Primitive::IsFloatingPointType(instr->GetPackedType())) { + last_visited_latency_ = kArm64SIMDFloatingPointOpLatency; + } else { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar( + HVecReplicateScalar* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDReplicateOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) { + LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +} + +void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) { + LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +} + +void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDTypeConversionInt2FPLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecNeg(HVecNeg* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecAbs(HVecAbs* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecNot(HVecNot* instr) { + if (instr->GetPackedType() == Primitive::kPrimBoolean) { + last_visited_internal_latency_ = kArm64SIMDIntegerOpLatency; + } + last_visited_latency_ = kArm64SIMDIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecAdd(HVecAdd* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecSub(HVecSub* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecMul(HVecMul* instr) { + if (Primitive::IsFloatingPointType(instr->GetPackedType())) { + last_visited_latency_ = kArm64SIMDMulFloatingPointLatency; + } else { + last_visited_latency_ = kArm64SIMDMulIntegerLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecDiv(HVecDiv* instr) { + if (instr->GetPackedType() == Primitive::kPrimFloat) { + last_visited_latency_ = kArm64SIMDDivFloatLatency; + } else { + DCHECK(instr->GetPackedType() == Primitive::kPrimDouble); + last_visited_latency_ = kArm64SIMDDivDoubleLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecMin(HVecMin* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecMax(HVecMax* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) { + LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +} + +void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecXor(HVecXor* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; +} + +void SchedulingLatencyVisitorARM64::VisitVecShl(HVecShl* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecShr(HVecShr* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecUShr(HVecUShr* instr) { + HandleSimpleArithmeticSIMD(instr); +} + +void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate( + HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDMulIntegerLatency; +} + +void SchedulingLatencyVisitorARM64::HandleVecAddress( + HVecMemoryOperation* instruction, + size_t size ATTRIBUTE_UNUSED) { + HInstruction* index = instruction->InputAt(1); + if (!index->IsConstant()) { + last_visited_internal_latency_ += kArm64DataProcWithShifterOpLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecLoad(HVecLoad* instr) { + last_visited_internal_latency_ = 0; + size_t size = Primitive::ComponentSize(instr->GetPackedType()); + + if (instr->GetPackedType() == Primitive::kPrimChar + && mirror::kUseStringCompression + && instr->IsStringCharAt()) { + // Set latencies for the uncompressed case. + last_visited_internal_latency_ += kArm64MemoryLoadLatency + kArm64BranchLatency; + HandleVecAddress(instr, size); + last_visited_latency_ = kArm64SIMDMemoryLoadLatency; + } else { + HandleVecAddress(instr, size); + last_visited_latency_ = kArm64SIMDMemoryLoadLatency; + } +} + +void SchedulingLatencyVisitorARM64::VisitVecStore(HVecStore* instr) { + last_visited_internal_latency_ = 0; + size_t size = Primitive::ComponentSize(instr->GetPackedType()); + HandleVecAddress(instr, size); + last_visited_latency_ = kArm64SIMDMemoryStoreLatency; +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index 7a33720655..63d5b7d6b6 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -42,6 +42,18 @@ static constexpr uint32_t kArm64LoadStringInternalLatency = 7; static constexpr uint32_t kArm64MulFloatingPointLatency = 6; static constexpr uint32_t kArm64MulIntegerLatency = 6; static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5; +static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency; + +static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10; +static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6; +static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10; +static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6; +static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12; +static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12; +static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16; +static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60; +static constexpr uint32_t kArm64SIMDDivFloatLatency = 30; +static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { public: @@ -52,29 +64,54 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { // We add a second unused parameter to be able to use this macro like the others // defined in `nodes.h`. -#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \ - M(ArrayGet , unused) \ - M(ArrayLength , unused) \ - M(ArraySet , unused) \ - M(BinaryOperation , unused) \ - M(BoundsCheck , unused) \ - M(Div , unused) \ - M(InstanceFieldGet , unused) \ - M(InstanceOf , unused) \ - M(Invoke , unused) \ - M(LoadString , unused) \ - M(Mul , unused) \ - M(NewArray , unused) \ - M(NewInstance , unused) \ - M(Rem , unused) \ - M(StaticFieldGet , unused) \ - M(SuspendCheck , unused) \ - M(TypeConversion , unused) +#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \ + M(ArrayGet , unused) \ + M(ArrayLength , unused) \ + M(ArraySet , unused) \ + M(BinaryOperation , unused) \ + M(BoundsCheck , unused) \ + M(Div , unused) \ + M(InstanceFieldGet , unused) \ + M(InstanceOf , unused) \ + M(Invoke , unused) \ + M(LoadString , unused) \ + M(Mul , unused) \ + M(NewArray , unused) \ + M(NewInstance , unused) \ + M(Rem , unused) \ + M(StaticFieldGet , unused) \ + M(SuspendCheck , unused) \ + M(TypeConversion , unused) \ + M(VecReplicateScalar , unused) \ + M(VecSetScalars , unused) \ + M(VecSumReduce , unused) \ + M(VecCnv , unused) \ + M(VecNeg , unused) \ + M(VecAbs , unused) \ + M(VecNot , unused) \ + M(VecAdd , unused) \ + M(VecHalvingAdd , unused) \ + M(VecSub , unused) \ + M(VecMul , unused) \ + M(VecDiv , unused) \ + M(VecMin , unused) \ + M(VecMax , unused) \ + M(VecAnd , unused) \ + M(VecAndNot , unused) \ + M(VecOr , unused) \ + M(VecXor , unused) \ + M(VecShl , unused) \ + M(VecShr , unused) \ + M(VecUShr , unused) \ + M(VecMultiplyAccumulate, unused) \ + M(VecLoad , unused) \ + M(VecStore , unused) #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ M(BitwiseNegatedRight, unused) \ M(MultiplyAccumulate, unused) \ M(IntermediateAddress, unused) \ + M(IntermediateAddressIndex, unused) \ M(DataProcWithShifterOp, unused) #define DECLARE_VISIT_INSTRUCTION(type, unused) \ @@ -85,6 +122,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) #undef DECLARE_VISIT_INSTRUCTION + + private: + void HandleSimpleArithmeticSIMD(HVecOperation *instr); + void HandleVecAddress(HVecMemoryOperation* instruction, size_t size); }; class HSchedulerARM64 : public HScheduler { @@ -101,6 +142,8 @@ class HSchedulerARM64 : public HScheduler { return true; FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND) return true; + FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND) + return true; default: return HScheduler::IsSchedulable(instruction); } diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 7b8104b8ca..106b709eda 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -159,7 +159,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit) { Handle<mirror::Class> klass = load_class->GetClass(); - DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod || + DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall || load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) << load_class->GetLoadKind(); DCHECK(!load_class->IsInBootImage()) << "HLoadClass should not be optimized before sharpening."; @@ -185,7 +185,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, DCHECK(!runtime->UseJitCompilation()); if (!compiler_driver->GetSupportBootImageFixup()) { // compiler_driver_test. Do not sharpen. - desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; } else if ((klass != nullptr) && compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) { is_in_boot_image = true; @@ -210,7 +210,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, // this `HLoadClass` hasn't been executed in the interpreter. // Fallback to the dex cache. // TODO(ngeoffray): Generate HDeoptimize instead. - desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod; + desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; } } else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) { // AOT app compilation. Check if the class is in the boot image. @@ -229,7 +229,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, } if (!IsSameDexFile(load_class->GetDexFile(), *dex_compilation_unit.GetDexFile())) { - if ((load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) || + if ((load_kind == HLoadClass::LoadKind::kRuntimeCall) || (load_kind == HLoadClass::LoadKind::kBssEntry)) { // We actually cannot reference this class, we're forced to bail. // We cannot reference this class with Bss, as the entrypoint will lookup the class @@ -241,7 +241,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, } void HSharpening::ProcessLoadString(HLoadString* load_string) { - DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod); + DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); const DexFile& dex_file = load_string->GetDexFile(); dex::StringIndex string_index = load_string->GetStringIndex(); @@ -268,7 +268,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative; } else { // compiler_driver_test. Do not sharpen. - desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + desired_load_kind = HLoadString::LoadKind::kRuntimeCall; } } else if (runtime->UseJitCompilation()) { DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); @@ -280,7 +280,7 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { desired_load_kind = HLoadString::LoadKind::kJitTableAddress; } } else { - desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod; + desired_load_kind = HLoadString::LoadKind::kRuntimeCall; } } else { // AOT app compilation. Try to lookup the string without allocating if not found. |