diff options
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 12 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 143 |
2 files changed, 141 insertions, 14 deletions
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 69aaee225a..1ed1b7537e 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -566,14 +566,6 @@ void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) { MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); } -static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - static void GenAbsInteger(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { @@ -588,7 +580,7 @@ static void GenAbsInteger(LocationSummary* locations, } void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToInt(arena_, invoke); + CreateIntToIntLocations(arena_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { @@ -596,7 +588,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToInt(arena_, invoke); + CreateIntToIntLocations(arena_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) { diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 7c43f2eff2..76c1410340 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -331,6 +331,14 @@ static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } +static void CreateIntToIntLocationsWithOverlap(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, @@ -2915,6 +2923,137 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) { GenBitCount(invoke, Primitive::kPrimLong, GetAssembler()); } +static void GenHighestOneBit(HInvoke* invoke, + Primitive::Type type, + CodeGeneratorARMVIXL* codegen) { + DCHECK(Primitive::IsIntOrLongType(type)); + + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + + if (type == Primitive::kPrimLong) { + LocationSummary* locations = invoke->GetLocations(); + Location in = locations->InAt(0); + Location out = locations->Out(); + + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + vixl32::Register out_reg_lo = LowRegisterFrom(out); + vixl32::Register out_reg_hi = HighRegisterFrom(out); + + __ Mov(temp, 0x80000000); // Modified immediate. + __ Clz(out_reg_lo, in_reg_lo); + __ Clz(out_reg_hi, in_reg_hi); + __ Lsr(out_reg_lo, temp, out_reg_lo); + __ Lsrs(out_reg_hi, temp, out_reg_hi); + + // Discard result for lowest 32 bits if highest 32 bits are not zero. + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the output is in a low register, so that a 16-bit MOV + // encoding can be used. If output is in a high register, then we generate + // 4 more bytes of code to avoid a branch. + Operand mov_src(0); + if (!out_reg_lo.IsLow()) { + __ Mov(LeaveFlags, temp, 0); + mov_src = Operand(temp); + } + ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + __ it(ne); + __ mov(ne, out_reg_lo, mov_src); + } else { + vixl32::Register out = OutputRegister(invoke); + vixl32::Register in = InputRegisterAt(invoke, 0); + + __ Mov(temp, 0x80000000); // Modified immediate. + __ Clz(out, in); + __ Lsr(out, temp, out); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) { + GenHighestOneBit(invoke, Primitive::kPrimInt, codegen_); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) { + CreateIntToIntLocationsWithOverlap(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) { + GenHighestOneBit(invoke, Primitive::kPrimLong, codegen_); +} + +static void GenLowestOneBit(HInvoke* invoke, + Primitive::Type type, + CodeGeneratorARMVIXL* codegen) { + DCHECK(Primitive::IsIntOrLongType(type)); + + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + + if (type == Primitive::kPrimLong) { + LocationSummary* locations = invoke->GetLocations(); + Location in = locations->InAt(0); + Location out = locations->Out(); + + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + vixl32::Register out_reg_lo = LowRegisterFrom(out); + vixl32::Register out_reg_hi = HighRegisterFrom(out); + + __ Rsb(out_reg_hi, in_reg_hi, 0); + __ Rsb(out_reg_lo, in_reg_lo, 0); + __ And(out_reg_hi, out_reg_hi, in_reg_hi); + // The result of this operation is 0 iff in_reg_lo is 0 + __ Ands(out_reg_lo, out_reg_lo, in_reg_lo); + + // Discard result for highest 32 bits if lowest 32 bits are not zero. + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the output is in a low register, so that a 16-bit MOV + // encoding can be used. If output is in a high register, then we generate + // 4 more bytes of code to avoid a branch. + Operand mov_src(0); + if (!out_reg_lo.IsLow()) { + __ Mov(LeaveFlags, temp, 0); + mov_src = Operand(temp); + } + ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + __ it(ne); + __ mov(ne, out_reg_hi, mov_src); + } else { + vixl32::Register out = OutputRegister(invoke); + vixl32::Register in = InputRegisterAt(invoke, 0); + + __ Rsb(temp, in, 0); + __ And(out, temp, in); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(invoke, Primitive::kPrimInt, codegen_); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) { + CreateIntToIntLocationsWithOverlap(arena_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(invoke, Primitive::kPrimLong, codegen_); +} + void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, LocationSummary::kNoCall, @@ -3212,10 +3351,6 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit) UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); |