diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/optimizing/intrinsics.h | 4 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm.cc | 52 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 157 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 4 |
4 files changed, 197 insertions, 20 deletions
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 863dd1c6f6..39a1313ba0 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -30,6 +30,10 @@ class DexFile; // Temporary measure until we have caught up with the Java 7 definition of Math.round. b/26327751 static constexpr bool kRoundIsPlusPointFive = false; +// Positive floating-point infinities. +static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U; +static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000); + // Recognize intrinsics from HInvoke nodes. class IntrinsicsRecognizer : public HOptimization { public: diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index 86b7bc138c..146fea1fe0 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1985,6 +1985,56 @@ void IntrinsicCodeGeneratorARM::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Bind(&done); } +void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitFloatIsInfinite(HInvoke* invoke) { + ArmAssembler* const assembler = GetAssembler(); + LocationSummary* const locations = invoke->GetLocations(); + const Register out = locations->Out().AsRegister<Register>(); + // Shifting left by 1 bit makes the value encodable as an immediate operand; + // we don't care about the sign bit anyway. + constexpr uint32_t infinity = kPositiveInfinityFloat << 1U; + + __ vmovrs(out, locations->InAt(0).AsFpuRegister<SRegister>()); + // We don't care about the sign bit, so shift left. + __ Lsl(out, out, 1); + __ eor(out, out, ShifterOperand(infinity)); + // If the result is 0, then it has 32 leading zeros, and less than that otherwise. + __ clz(out, out); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); +} + +void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM::VisitDoubleIsInfinite(HInvoke* invoke) { + ArmAssembler* const assembler = GetAssembler(); + LocationSummary* const locations = invoke->GetLocations(); + const Register out = locations->Out().AsRegister<Register>(); + // The highest 32 bits of double precision positive infinity separated into + // two constants encodable as immediate operands. + constexpr uint32_t infinity_high = 0x7f000000U; + constexpr uint32_t infinity_high2 = 0x00f00000U; + + static_assert((infinity_high | infinity_high2) == static_cast<uint32_t>(kPositiveInfinityDouble >> 32U), + "The constants do not add up to the high 32 bits of double precision positive infinity."); + __ vmovrrd(IP, out, FromLowSToD(locations->InAt(0).AsFpuRegisterPairLow<SRegister>())); + __ eor(out, out, ShifterOperand(infinity_high)); + __ eor(out, out, ShifterOperand(infinity_high2)); + // We don't care about the sign bit, so shift left. + __ orr(out, IP, ShifterOperand(out, LSL, 1)); + // If the result is 0, then it has 32 leading zeros, and less than that otherwise. + __ clz(out, out); + // Any number less than 32 logically shifted right by 5 bits results in 0; + // the same operation on 32 yields 1. + __ Lsr(out, out, 5); +} + UNIMPLEMENTED_INTRINSIC(ARM, IntegerBitCount) UNIMPLEMENTED_INTRINSIC(ARM, LongBitCount) UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble) @@ -2001,8 +2051,6 @@ UNIMPLEMENTED_INTRINSIC(ARM, MathRoundFloat) // Could be done by changing rou UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(ARM, FloatIsInfinite) -UNIMPLEMENTED_INTRINSIC(ARM, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 04ae3a6732..1d8229674c 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -47,6 +47,7 @@ using helpers::SRegisterFrom; using helpers::WRegisterFrom; using helpers::XRegisterFrom; using helpers::InputRegisterAt; +using helpers::OutputRegister; namespace { @@ -1173,31 +1174,118 @@ void IntrinsicCodeGeneratorARM64::VisitStringCharAt(HInvoke* invoke) { void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCall, + invoke->InputAt(1)->CanBeNull() + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, kIntrinsified); - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { vixl::MacroAssembler* masm = GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); + Register str = XRegisterFrom(locations->InAt(0)); + Register arg = XRegisterFrom(locations->InAt(1)); + Register out = OutputRegister(invoke); + + Register temp0 = WRegisterFrom(locations->GetTemp(0)); + Register temp1 = WRegisterFrom(locations->GetTemp(1)); + Register temp2 = WRegisterFrom(locations->GetTemp(2)); + + vixl::Label loop; + vixl::Label find_char_diff; + vixl::Label end; + + // Get offsets of count and value fields within a string object. + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + // Note that the null check must have been done earlier. DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); - Register argument = WRegisterFrom(locations->InAt(1)); - __ Cmp(argument, 0); - SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); - codegen_->AddSlowPath(slow_path); - __ B(eq, slow_path->GetEntryLabel()); + // Take slow path and throw if input can be and is null. + SlowPathCodeARM64* slow_path = nullptr; + const bool can_slow_path = invoke->InputAt(1)->CanBeNull(); + if (can_slow_path) { + slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(slow_path); + __ Cbz(arg, slow_path->GetEntryLabel()); + } - __ Ldr( - lr, MemOperand(tr, QUICK_ENTRYPOINT_OFFSET(kArm64WordSize, pStringCompareTo).Int32Value())); - __ Blr(lr); - __ Bind(slow_path->GetExitLabel()); + // Reference equality check, return 0 if same reference. + __ Subs(out, str, arg); + __ B(&end, eq); + // Load lengths of this and argument strings. + __ Ldr(temp0, MemOperand(str.X(), count_offset)); + __ Ldr(temp1, MemOperand(arg.X(), count_offset)); + // Return zero if both strings are empty. + __ Orr(out, temp0, temp1); + __ Cbz(out, &end); + // out = length diff. + __ Subs(out, temp0, temp1); + // temp2 = min(len(str), len(arg)). + __ Csel(temp2, temp1, temp0, ge); + // Shorter string is empty? + __ Cbz(temp2, &end); + + // Store offset of string value in preparation for comparison loop. + __ Mov(temp1, value_offset); + + UseScratchRegisterScope scratch_scope(masm); + Register temp4 = scratch_scope.AcquireX(); + + // Assertions that must hold in order to compare strings 4 characters at a time. + DCHECK_ALIGNED(value_offset, 8); + static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); + + const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + DCHECK_EQ(char_size, 2u); + + // Promote temp0 to an X reg, ready for LDR. + temp0 = temp0.X(); + + // Loop to compare 4x16-bit characters at a time (ok because of string data alignment). + __ Bind(&loop); + __ Ldr(temp4, MemOperand(str.X(), temp1)); + __ Ldr(temp0, MemOperand(arg.X(), temp1)); + __ Cmp(temp4, temp0); + __ B(ne, &find_char_diff); + __ Add(temp1, temp1, char_size * 4); + __ Subs(temp2, temp2, 4); + __ B(gt, &loop); + __ B(&end); + + // Promote temp1 to an X reg, ready for EOR. + temp1 = temp1.X(); + + // Find the single 16-bit character difference. + __ Bind(&find_char_diff); + // Get the bit position of the first character that differs. + __ Eor(temp1, temp0, temp4); + __ Rbit(temp1, temp1); + __ Clz(temp1, temp1); + __ Bic(temp1, temp1, 0xf); + // If the number of 16-bit chars remaining <= the index where the difference occurs (0-3), then + // the difference occurs outside the remaining string data, so just return length diff (out). + __ Cmp(temp2, Operand(temp1, LSR, 4)); + __ B(le, &end); + // Extract the characters and calculate the difference. + __ Lsr(temp0, temp0, temp1); + __ Lsr(temp4, temp4, temp1); + __ And(temp4, temp4, 0xffff); + __ Sub(out, temp4, Operand(temp0, UXTH)); + + __ Bind(&end); + + if (can_slow_path) { + __ Bind(slow_path->GetExitLabel()); + } } void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) { @@ -2201,9 +2289,46 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { __ Bind(slow_path->GetExitLabel()); } +static void GenIsInfinite(LocationSummary* locations, + bool is64bit, + vixl::MacroAssembler* masm) { + Operand infinity; + Register out; + + if (is64bit) { + infinity = kPositiveInfinityDouble; + out = XRegisterFrom(locations->Out()); + } else { + infinity = kPositiveInfinityFloat; + out = WRegisterFrom(locations->Out()); + } + + const Register zero = vixl::Assembler::AppropriateZeroRegFor(out); + + MoveFPToInt(locations, is64bit, masm); + __ Eor(out, out, infinity); + // We don't care about the sign bit, so shift left. + __ Cmp(zero, Operand(out, LSL, 1)); + __ Cset(out, eq); +} + +void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) { + GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); +} + +void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) { + GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler()); +} + UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(ARM64, FloatIsInfinite) -UNIMPLEMENTED_INTRINSIC(ARM64, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit) diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 19c6a225ac..46195c104a 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2283,10 +2283,10 @@ static void GenIsInfinite(LocationSummary* locations, // If one, or more, of the exponent bits is zero, then the number can't be infinite. if (type == Primitive::kPrimDouble) { __ MoveFromFpuHigh(TMP, in); - __ LoadConst32(AT, 0x7FF00000); + __ LoadConst32(AT, High32Bits(kPositiveInfinityDouble)); } else { __ Mfc1(TMP, in); - __ LoadConst32(AT, 0x7F800000); + __ LoadConst32(AT, kPositiveInfinityFloat); } __ Xor(TMP, TMP, AT); |