diff options
author | 2018-12-21 16:12:17 +0000 | |
---|---|---|
committer | 2018-12-21 16:12:17 +0000 | |
commit | ca7027d30aa8b1abf4ef76dda156dd893234ddca (patch) | |
tree | def9167e4686b07da887e0a51afb7558195d26dd /compiler/optimizing | |
parent | 7201ef48fb42cc36824126a26f758f02c9200bad (diff) | |
parent | 776a7c294f6356b314505369679bffc73f4a345e (diff) |
Merge "ART: Add CRC32.updateByteBuffer intrinsic for ARM64"
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 168 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_arm_vixl.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 1 |
6 files changed, 120 insertions, 53 deletions
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 0b17c9d27e..7fb69b7463 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -2954,58 +2954,20 @@ void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) { __ Mvn(out, tmp); } -// The threshold for sizes of arrays to use the library provided implementation -// of CRC32.updateBytes instead of the intrinsic. -static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024; - -void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { - if (!codegen_->GetInstructionSetFeatures().HasCRC()) { - return; - } - - LocationSummary* locations - = new (allocator_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); - - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2))); - locations->SetInAt(3, Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister()); -} - -// Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len) +// Generate code using CRC32 instructions which calculates +// a CRC32 value of a byte. // -// Note: The intrinsic is not used if len exceeds a threshold. -void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { - DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); - - auto masm = GetVIXLAssembler(); - auto locations = invoke->GetLocations(); - - auto slow_path = - new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); - codegen_->AddSlowPath(slow_path); - - Register length = WRegisterFrom(locations->InAt(3)); - __ Cmp(length, kCRC32UpdateBytesThreshold); - __ B(slow_path->GetEntryLabel(), hi); - - const uint32_t array_data_offset = - mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value(); - Register ptr = XRegisterFrom(locations->GetTemp(0)); - Register array = XRegisterFrom(locations->InAt(1)); - auto offset = locations->InAt(2); - if (offset.IsConstant()) { - int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue(); - __ Add(ptr, array, array_data_offset + offset_value); - } else { - __ Add(ptr, array, array_data_offset); - __ Add(ptr, ptr, XRegisterFrom(offset)); - } - +// Parameters: +// masm - VIXL macro assembler +// crc - a register holding an initial CRC value +// ptr - a register holding a memory address of bytes +// length - a register holding a number of bytes to process +// out - a register to put a result of calculation +static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm, + const Register& crc, + const Register& ptr, + const Register& length, + const Register& out) { // The algorithm of CRC32 of bytes is: // crc = ~crc // process a few first bytes to make the array 8-byte aligned @@ -3029,8 +2991,7 @@ void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { Register len = temps.AcquireW(); Register array_elem = temps.AcquireW(); - Register out = WRegisterFrom(locations->Out()); - __ Mvn(out, WRegisterFrom(locations->InAt(0))); + __ Mvn(out, crc); __ Mov(len, length); __ Tbz(ptr, 0, &aligned2); @@ -3095,10 +3056,111 @@ void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { __ Bind(&done); __ Mvn(out, out); +} + +// The threshold for sizes of arrays to use the library provided implementation +// of CRC32.updateBytes instead of the intrinsic. +static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024; + +void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasCRC()) { + return; + } + + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, + LocationSummary::kCallOnSlowPath, + kIntrinsified); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2))); + locations->SetInAt(3, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +// Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len) +// +// Note: The intrinsic is not used if len exceeds a threshold. +void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); + + auto masm = GetVIXLAssembler(); + auto locations = invoke->GetLocations(); + + auto slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); + codegen_->AddSlowPath(slow_path); + + Register length = WRegisterFrom(locations->InAt(3)); + __ Cmp(length, kCRC32UpdateBytesThreshold); + __ B(slow_path->GetEntryLabel(), hi); + + const uint32_t array_data_offset = + mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value(); + Register ptr = XRegisterFrom(locations->GetTemp(0)); + Register array = XRegisterFrom(locations->InAt(1)); + auto offset = locations->InAt(2); + if (offset.IsConstant()) { + int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue(); + __ Add(ptr, array, array_data_offset + offset_value); + } else { + __ Add(ptr, array, array_data_offset); + __ Add(ptr, ptr, XRegisterFrom(offset)); + } + + Register crc = WRegisterFrom(locations->InAt(0)); + Register out = WRegisterFrom(locations->Out()); + + GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out); __ Bind(slow_path->GetExitLabel()); } +void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) { + if (!codegen_->GetInstructionSetFeatures().HasCRC()) { + return; + } + + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetInAt(2, Location::RequiresRegister()); + locations->SetInAt(3, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +// Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len) +// +// There is no need to generate code checking if addr is 0. +// The method updateByteBuffer is a private method of java.util.zip.CRC32. +// This guarantees no calls outside of the CRC32 class. +// An address of DirectBuffer is always passed to the call of updateByteBuffer. +// It might be an implementation of an empty DirectBuffer which can use a zero +// address but it must have the length to be zero. The current generated code +// correctly works with the zero length. +void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) { + DCHECK(codegen_->GetInstructionSetFeatures().HasCRC()); + + auto masm = GetVIXLAssembler(); + auto locations = invoke->GetLocations(); + + Register addr = XRegisterFrom(locations->InAt(1)); + Register ptr = XRegisterFrom(locations->GetTemp(0)); + __ Add(ptr, addr, XRegisterFrom(locations->InAt(2))); + + Register crc = WRegisterFrom(locations->InAt(0)); + Register length = WRegisterFrom(locations->InAt(3)); + Register out = WRegisterFrom(locations->Out()); + GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out); +} + UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 88f1457c20..95752fcd01 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -3061,6 +3061,7 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update) UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 08ba0a0adf..8092a1c030 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -2698,6 +2698,7 @@ UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(MIPS, CRC32Update) UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 59d3ba2488..f5577c3efc 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -2348,6 +2348,7 @@ UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy) UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32Update) UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 1d94950e4d..5ad94697e8 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -3072,6 +3072,7 @@ UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) UNIMPLEMENTED_INTRINSIC(X86, CRC32Update) UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 4f0b61d88e..62ccd49adf 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2739,6 +2739,7 @@ UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update) UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes) +UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer) UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter); |