diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 145 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 1 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 1 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 1 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 1 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 1 |
6 files changed, 150 insertions, 0 deletions
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 6d04b0e9d9..1688ea7811 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2950,6 +2950,151 @@ void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) { __ Mvn(out, out); } +// The threshold for sizes of arrays to use the library provided implementation +// of CRC32.updateBytes instead of the intrinsic. +static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024; + +void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasCRC()) { + return; + } + + LocationSummary* locations + = new (allocator_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2))); + locations->SetInAt(3, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +// Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len) +// +// Note: The intrinsic is not used if len exceeds a threshold. +void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); + + auto masm = GetVIXLAssembler(); + auto locations = invoke->GetLocations(); + + auto slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(slow_path); + + Register length = WRegisterFrom(locations->InAt(3)); + __ Cmp(length, kCRC32UpdateBytesThreshold); + __ B(slow_path->GetEntryLabel(), hi); + + const uint32_t array_data_offset = + mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value(); + Register ptr = XRegisterFrom(locations->GetTemp(0)); + Register array = XRegisterFrom(locations->InAt(1)); + auto offset = locations->InAt(2); + if (offset.IsConstant()) { + int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue(); + __ Add(ptr, array, array_data_offset + offset_value); + } else { + __ Add(ptr, array, array_data_offset); + __ Add(ptr, ptr, XRegisterFrom(offset)); + } + + // The algorithm of CRC32 of bytes is: + // crc = ~crc + // process a few first bytes to make the array 8-byte aligned + // while array has 8 bytes do: + // crc = crc32_of_8bytes(crc, 8_bytes(array)) + // if array has 4 bytes: + // crc = crc32_of_4bytes(crc, 4_bytes(array)) + // if array has 2 bytes: + // crc = crc32_of_2bytes(crc, 2_bytes(array)) + // if array has a byte: + // crc = crc32_of_byte(crc, 1_byte(array)) + // crc = ~crc + + vixl::aarch64::Label loop, done; + vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte; + vixl::aarch64::Label aligned2, aligned4, aligned8; + + // Use VIXL scratch registers as the VIXL macro assembler won't use them in + // instructions below. + UseScratchRegisterScope temps(masm); + Register len = temps.AcquireW(); + Register array_elem = temps.AcquireW(); + + Register out = WRegisterFrom(locations->Out()); + __ Mvn(out, WRegisterFrom(locations->InAt(0))); + __ Mov(len, length); + + __ Tbz(ptr, 0, &aligned2); + __ Subs(len, len, 1); + __ B(&done, lo); + __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex)); + __ Crc32b(out, out, array_elem); + + __ Bind(&aligned2); + __ Tbz(ptr, 1, &aligned4); + __ Subs(len, len, 2); + __ B(&process_1byte, lo); + __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex)); + __ Crc32h(out, out, array_elem); + + __ Bind(&aligned4); + __ Tbz(ptr, 2, &aligned8); + __ Subs(len, len, 4); + __ B(&process_2bytes, lo); + __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex)); + __ Crc32w(out, out, array_elem); + + __ Bind(&aligned8); + __ Subs(len, len, 8); + // If len < 8 go to process data by 4 bytes, 2 bytes and a byte. + __ B(&process_4bytes, lo); + + // The main loop processing data by 8 bytes. + __ Bind(&loop); + __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex)); + __ Subs(len, len, 8); + __ Crc32x(out, out, array_elem.X()); + // if len >= 8, process the next 8 bytes. + __ B(&loop, hs); + + // Process the data which is less than 8 bytes. + // The code generated below works with values of len + // which come in the range [-8, 0]. + // The first three bits are used to detect whether 4 bytes or 2 bytes or + // a byte can be processed. + // The checking order is from bit 2 to bit 0: + // bit 2 is set: at least 4 bytes available + // bit 1 is set: at least 2 bytes available + // bit 0 is set: at least a byte available + __ Bind(&process_4bytes); + // Goto process_2bytes if less than four bytes available + __ Tbz(len, 2, &process_2bytes); + __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex)); + __ Crc32w(out, out, array_elem); + + __ Bind(&process_2bytes); + // Goto process_1bytes if less than two bytes available + __ Tbz(len, 1, &process_1byte); + __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex)); + __ Crc32h(out, out, array_elem); + + __ Bind(&process_1byte); + // Goto done if no bytes available + __ Tbz(len, 0, &done); + __ Ldrb(array_elem, MemOperand(ptr)); + __ Crc32b(out, out, array_elem); + + __ Bind(&done); + __ Mvn(out, out); + + __ Bind(slow_path->GetExitLabel()); +} + UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 4d45a9991c..88f1457c20 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -3060,6 +3060,7 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 21fb7d7f1c..08ba0a0adf 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2697,6 +2697,7 @@ UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(MIPS, CRC32Update) +UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 4b86f5d423..59d3ba2488 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2347,6 +2347,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitReachabilityFence(HInvoke* invoke ATTRIB UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32Update) +UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index a73f4e8b94..1d94950e4d 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -3071,6 +3071,7 @@ UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(X86, CRC32Update) +UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 88c766fabc..4f0b61d88e 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2738,6 +2738,7 @@ UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update) +UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes) UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter); |