diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 25 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.cc | 102 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_mips64.cc | 8 | ||||
-rw-r--r-- | compiler/optimizing/codegen_test.cc | 33 | ||||
-rw-r--r-- | compiler/optimizing/dex_cache_array_fixups_mips.cc | 7 | ||||
-rw-r--r-- | compiler/optimizing/induction_var_range.cc | 29 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 13 |
8 files changed, 162 insertions, 57 deletions
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index cf824a14e3..26c8254c76 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1452,6 +1452,19 @@ static bool CoherentConstantAndType(Location constant, Primitive::Type type) { (cst->IsDoubleConstant() && type == Primitive::kPrimDouble); } +// Allocate a scratch register from the VIXL pool, querying first into +// the floating-point register pool, and then the the core register +// pool. This is essentially a reimplementation of +// vixl::aarch64::UseScratchRegisterScope::AcquireCPURegisterOfSize +// using a different allocation strategy. +static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssembler* masm, + vixl::aarch64::UseScratchRegisterScope* temps, + int size_in_bits) { + return masm->GetScratchFPRegisterList()->IsEmpty() + ? CPURegister(temps->AcquireRegisterOfSize(size_in_bits)) + : CPURegister(temps->AcquireVRegisterOfSize(size_in_bits)); +} + void CodeGeneratorARM64::MoveLocation(Location destination, Location source, Primitive::Type dst_type) { @@ -1563,8 +1576,16 @@ void CodeGeneratorARM64::MoveLocation(Location destination, // a move is blocked by a another move requiring a scratch FP // register, which would reserve D31). To prevent this issue, we // ask for a scratch register of any type (core or FP). - CPURegister temp = - temps.AcquireCPURegisterOfSize(destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize); + // + // Also, we start by asking for a FP scratch register first, as the + // demand of scratch core registers is higher. This is why we + // use AcquireFPOrCoreCPURegisterOfSize instead of + // UseScratchRegisterScope::AcquireCPURegisterOfSize, which + // allocates core scratch registers first. + CPURegister temp = AcquireFPOrCoreCPURegisterOfSize( + GetVIXLAssembler(), + &temps, + (destination.IsDoubleStackSlot() ? kXRegSize : kWRegSize)); __ Ldr(temp, StackOperandFrom(source)); __ Str(temp, StackOperandFrom(destination)); } diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 76be74e921..a095970a1e 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -258,8 +258,10 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { DCHECK_NE(out.AsRegister<Register>(), AT); CodeGeneratorMIPS::PcRelativePatchInfo* info = mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index); - mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base); - __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, 0); + bool reordering = __ SetReorder(false); + mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base); + __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678); + __ SetReorder(reordering); } __ B(GetExitLabel()); } @@ -313,8 +315,10 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { DCHECK_NE(out, AT); CodeGeneratorMIPS::PcRelativePatchInfo* info = mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); - mips_codegen->EmitPcRelativeAddressPlaceholder(info, TMP, base); - __ StoreToOffset(kStoreWord, out, TMP, 0); + bool reordering = __ SetReorder(false); + mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info, TMP, base); + __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678); + __ SetReorder(reordering); __ B(GetExitLabel()); } @@ -1127,16 +1131,15 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map); } -void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholder( - PcRelativePatchInfo* info, Register out, Register base) { - bool reordering = __ SetReorder(false); +void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, + Register out, + Register base) { if (GetInstructionSetFeatures().IsR6()) { DCHECK_EQ(base, ZERO); __ Bind(&info->high_label); __ Bind(&info->pc_rel_label); - // Add a 32-bit offset to PC. + // Add the high half of a 32-bit offset to PC. __ Auipc(out, /* placeholder */ 0x1234); - __ Addiu(out, out, /* placeholder */ 0x5678); } else { // If base is ZERO, emit NAL to obtain the actual base. if (base == ZERO) { @@ -1150,11 +1153,11 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholder( if (base == ZERO) { __ Bind(&info->pc_rel_label); } - __ Ori(out, out, /* placeholder */ 0x5678); - // Add a 32-bit offset to PC. + // Add the high half of a 32-bit offset to PC. __ Addu(out, out, (base == ZERO) ? RA : base); } - __ SetReorder(reordering); + // The immediately following instruction will add the sign-extended low half of the 32-bit + // offset to `out` (e.g. lw, jialc, addiu). } void CodeGeneratorMIPS::MarkGCCard(Register object, @@ -5159,7 +5162,8 @@ void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invo // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - bool has_extra_input = invoke->HasPcRelativeDexCache(); + bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); + bool has_extra_input = invoke->HasPcRelativeDexCache() && !is_r6; IntrinsicLocationsBuilderMIPS intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -5200,12 +5204,13 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( if (kEmitCompilerReadBarrier) { UNIMPLEMENTED(FATAL) << "for read barrier"; } - // We disable PC-relative load when there is an irreducible loop, as the optimization + // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods // with irreducible loops. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); - bool fallback_load = has_irreducible_loops; + bool is_r6 = GetInstructionSetFeatures().IsR6(); + bool fallback_load = has_irreducible_loops && !is_r6; switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimeAddress: DCHECK(!GetCompilerOptions().GetCompilePic()); @@ -5238,10 +5243,11 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( if (kEmitCompilerReadBarrier) { UNIMPLEMENTED(FATAL) << "for read barrier"; } - // We disable pc-relative load when there is an irreducible loop, as the optimization + // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); - bool fallback_load = has_irreducible_loops; + bool is_r6 = GetInstructionSetFeatures().IsR6(); + bool fallback_load = has_irreducible_loops && !is_r6; switch (desired_class_load_kind) { case HLoadClass::LoadKind::kReferrersClass: fallback_load = false; @@ -5259,6 +5265,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( break; case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); + // TODO: implement. fallback_load = true; break; case HLoadClass::LoadKind::kDexCacheViaMethod: @@ -5273,6 +5280,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp) { + CHECK(!GetInstructionSetFeatures().IsR6()); CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); if (!invoke->GetLocations()->Intrinsified()) { @@ -5301,13 +5309,13 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticO const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; - // We disable PC-relative load when there is an irreducible loop, as the optimization + // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization // is incompatible with it. bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); - bool fallback_load = true; + bool is_r6 = GetInstructionSetFeatures().IsR6(); + bool fallback_load = has_irreducible_loops && !is_r6; switch (dispatch_info.method_load_kind) { case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: - fallback_load = has_irreducible_loops; break; default: fallback_load = false; @@ -5325,7 +5333,8 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp. HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); - Register base_reg = invoke->HasPcRelativeDexCache() + bool is_r6 = GetInstructionSetFeatures().IsR6(); + Register base_reg = (invoke->HasPcRelativeDexCache() && !is_r6) ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()) : ZERO; @@ -5346,14 +5355,23 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress()); break; - case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: { - HMipsDexCacheArraysBase* base = - invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase(); - int32_t offset = - invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); + case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: + if (is_r6) { + uint32_t offset = invoke->GetDexCacheArrayOffset(); + CodeGeneratorMIPS::PcRelativePatchInfo* info = + NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset); + bool reordering = __ SetReorder(false); + EmitPcRelativeAddressPlaceholderHigh(info, TMP, ZERO); + __ Lw(temp.AsRegister<Register>(), TMP, /* placeholder */ 0x5678); + __ SetReorder(reordering); + } else { + HMipsDexCacheArraysBase* base = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase(); + int32_t offset = + invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset; + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset); + } break; - } case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: { Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); Register reg = temp.AsRegister<Register>(); @@ -5546,7 +5564,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); - codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); + __ Addiu(out, out, /* placeholder */ 0x5678); + __ SetReorder(reordering); break; } case HLoadClass::LoadKind::kBootImageAddress: { @@ -5562,8 +5583,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF case HLoadClass::LoadKind::kBssEntry: { CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); - codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); - __ LoadFromOffset(kLoadWord, out, out, 0); + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); + __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678); + __ SetReorder(reordering); generate_null_check = true; break; } @@ -5678,7 +5701,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); - codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); + __ Addiu(out, out, /* placeholder */ 0x5678); + __ SetReorder(reordering); return; // No dex cache slow path. } case HLoadString::LoadKind::kBootImageAddress: { @@ -5694,8 +5720,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); - codegen_->EmitPcRelativeAddressPlaceholder(info, out, base_or_current_method_reg); - __ LoadFromOffset(kLoadWord, out, out, 0); + bool reordering = __ SetReorder(false); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg); + __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678); + __ SetReorder(reordering); SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load); codegen_->AddSlowPath(slow_path); __ Beqz(out, slow_path->GetEntryLabel()); @@ -6894,8 +6922,12 @@ void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArra Register reg = base->GetLocations()->Out().AsRegister<Register>(); CodeGeneratorMIPS::PcRelativePatchInfo* info = codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset()); + CHECK(!codegen_->GetInstructionSetFeatures().IsR6()); + bool reordering = __ SetReorder(false); // TODO: Reuse MipsComputeBaseMethodAddress on R2 instead of passing ZERO to force emitting NAL. - codegen_->EmitPcRelativeAddressPlaceholder(info, reg, ZERO); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info, reg, ZERO); + __ Addiu(reg, reg, /* placeholder */ 0x5678); + __ SetReorder(reordering); } void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index c8fd325999..e92eeef88f 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -463,7 +463,7 @@ class CodeGeneratorMIPS : public CodeGenerator { Literal* DeduplicateBootImageTypeLiteral(const DexFile& dex_file, dex::TypeIndex type_index); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); - void EmitPcRelativeAddressPlaceholder(PcRelativePatchInfo* info, Register out, Register base); + void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, Register out, Register base); private: Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 192b4a5050..e96e3d75e1 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -3117,14 +3117,6 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( Location root, GpuRegister obj, uint32_t offset) { - // When handling PC-relative loads, the caller calls - // EmitPcRelativeAddressPlaceholderHigh() and then GenerateGcRootFieldLoad(). - // The relative patcher expects the two methods to emit the following patchable - // sequence of instructions in this case: - // auipc reg1, 0x1234 // 0x1234 is a placeholder for offset_high. - // lwu reg2, 0x5678(reg1) // 0x5678 is a placeholder for offset_low. - // TODO: Adjust GenerateGcRootFieldLoad() and its caller when this method is - // extended (e.g. for read barriers) so as not to break the relative patcher. GpuRegister root_reg = root.AsRegister<GpuRegister>(); if (kEmitCompilerReadBarrier) { UNIMPLEMENTED(FATAL) << "for read barrier"; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index e3f3df0ff5..763d6da6f5 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -1067,6 +1067,39 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { } #endif +#ifdef ART_ENABLE_CODEGEN_arm64 +// Regression test for b/34760542. +TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { + std::unique_ptr<const Arm64InstructionSetFeatures> features( + Arm64InstructionSetFeatures::FromCppDefines()); + ArenaPool pool; + ArenaAllocator allocator(&pool); + HGraph* graph = CreateGraph(&allocator); + arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions()); + + codegen.Initialize(); + + // The following ParallelMove used to fail this assertion: + // + // Assertion failed (!available->IsEmpty()) + // + // in vixl::aarch64::UseScratchRegisterScope::AcquireNextAvailable. + HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena()); + move->AddMove(Location::DoubleStackSlot(0), + Location::DoubleStackSlot(257), + Primitive::kPrimDouble, + nullptr); + move->AddMove(Location::DoubleStackSlot(257), + Location::DoubleStackSlot(0), + Primitive::kPrimDouble, + nullptr); + codegen.GetMoveResolver()->EmitNativeCode(move); + + InternalCodeAllocator code_allocator; + codegen.Finalize(&code_allocator); +} +#endif + #ifdef ART_ENABLE_CODEGEN_mips TEST_F(CodegenTest, MipsClobberRA) { std::unique_ptr<const MipsInstructionSetFeatures> features_mips( diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc index 04a4294c48..7734f9197d 100644 --- a/compiler/optimizing/dex_cache_array_fixups_mips.cc +++ b/compiler/optimizing/dex_cache_array_fixups_mips.cc @@ -47,7 +47,7 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { // Computing the dex cache base for PC-relative accesses will clobber RA with // the NAL instruction on R2. Take a note of this before generating the method // entry. - if (!dex_cache_array_bases_.empty() && !codegen_->GetInstructionSetFeatures().IsR6()) { + if (!dex_cache_array_bases_.empty()) { codegen_->ClobberRA(); } } @@ -92,6 +92,11 @@ class DexCacheArrayFixupsVisitor : public HGraphVisitor { }; void DexCacheArrayFixups::Run() { + CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_); + if (mips_codegen->GetInstructionSetFeatures().IsR6()) { + // Do nothing for R6 because it has PC-relative addressing. + return; + } if (graph_->HasIrreducibleLoops()) { // Do not run this optimization, as irreducible loops do not work with an instruction // that can be live-in at the irreducible loop header. diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index 3973985338..5539413aad 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -57,14 +57,18 @@ static bool IsIntAndGet(HInstruction* instruction, int64_t* value) { return false; } -/** Returns b^e for b,e >= 1. */ -static int64_t IntPow(int64_t b, int64_t e) { +/** Returns b^e for b,e >= 1. Sets overflow if arithmetic wrap-around occurred. */ +static int64_t IntPow(int64_t b, int64_t e, /*out*/ bool* overflow) { DCHECK_GE(b, 1); DCHECK_GE(e, 1); int64_t pow = 1; while (e) { if (e & 1) { + int64_t oldpow = pow; pow *= b; + if (pow < oldpow) { + *overflow = true; + } } e >>= 1; b *= b; @@ -1020,20 +1024,27 @@ bool InductionVarRange::GenerateLastValueGeometric(HInductionVarAnalysis::Induct HInstruction* opb = nullptr; if (GenerateCode(info->op_a, nullptr, graph, block, &opa, false, false) && GenerateCode(info->op_b, nullptr, graph, block, &opb, false, false)) { - // Compute f ^ m for known maximum index value m. - int64_t fpow = IntPow(f, m); if (graph != nullptr) { - DCHECK(info->operation == HInductionVarAnalysis::kMul || - info->operation == HInductionVarAnalysis::kDiv); Primitive::Type type = info->type; + // Compute f ^ m for known maximum index value m. + bool overflow = false; + int64_t fpow = IntPow(f, m, &overflow); + if (info->operation == HInductionVarAnalysis::kDiv) { + // For division, any overflow truncates to zero. + if (overflow || (type != Primitive::kPrimLong && !CanLongValueFitIntoInt(fpow))) { + fpow = 0; + } + } else if (type != Primitive::kPrimLong) { + // For multiplication, okay to truncate to required precision. + DCHECK(info->operation == HInductionVarAnalysis::kMul); + fpow = static_cast<int32_t>(fpow); + } + // Generate code. if (fpow == 0) { // Special case: repeated mul/div always yields zero. *result = graph->GetConstant(type, 0); } else { // Last value: a * f ^ m + b or a * f ^ -m + b. - if (type != Primitive::kPrimLong) { - fpow = static_cast<int32_t>(fpow); // okay to truncate - } HInstruction* e = nullptr; if (info->operation == HInductionVarAnalysis::kMul) { e = new (graph->GetArena()) HMul(type, opa, graph->GetConstant(type, fpow)); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 91d9c56d10..1a10173ed7 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -514,6 +514,18 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) { __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); } +void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) { + if (features_.HasARMv8AInstructions()) { + CreateFPToFPLocations(arena_, invoke); + } +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions()); + ArmVIXLAssembler* assembler = GetAssembler(); + __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0)); +} + void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { CreateIntToIntLocations(arena_, invoke); } @@ -2772,7 +2784,6 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble) UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxFloatFloat) UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinLongLong) UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxLongLong) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRint) UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. |