diff options
-rw-r--r-- | compiler/optimizing/code_generator_riscv64.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_riscv64.cc | 204 |
2 files changed, 204 insertions, 1 deletions
diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h index 4a80b17c28..b86b30e889 100644 --- a/compiler/optimizing/code_generator_riscv64.h +++ b/compiler/optimizing/code_generator_riscv64.h @@ -71,7 +71,6 @@ static constexpr int32_t kFClassNaNMinValue = 0x100; V(FP16LessEquals) \ V(FP16Min) \ V(FP16Max) \ - V(StringCompareTo) \ V(StringGetCharsNoCheck) \ V(StringStringIndexOf) \ V(StringStringIndexOfAfter) \ diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc index 12eec101b7..3ce6d73fae 100644 --- a/compiler/optimizing/intrinsics_riscv64.cc +++ b/compiler/optimizing/intrinsics_riscv64.cc @@ -2797,6 +2797,210 @@ void IntrinsicCodeGeneratorRISCV64::VisitJdkUnsafeGetAndSetReference(HInvoke* in GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet); } +void IntrinsicLocationsBuilderRISCV64::VisitStringCompareTo(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, + invoke->InputAt(1)->CanBeNull() + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + // Need temporary registers for String compression's feature. + if (mirror::kUseStringCompression) { + locations->AddTemp(Location::RequiresRegister()); + } + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +void IntrinsicCodeGeneratorRISCV64::VisitStringCompareTo(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); + DCHECK(assembler->IsExtensionEnabled(Riscv64Extension::kZbb)); + LocationSummary* locations = invoke->GetLocations(); + + XRegister str = locations->InAt(0).AsRegister<XRegister>(); + XRegister arg = locations->InAt(1).AsRegister<XRegister>(); + XRegister out = locations->Out().AsRegister<XRegister>(); + + XRegister temp0 = locations->GetTemp(0).AsRegister<XRegister>(); + XRegister temp1 = locations->GetTemp(1).AsRegister<XRegister>(); + XRegister temp2 = locations->GetTemp(2).AsRegister<XRegister>(); + XRegister temp3 = kNoXRegister; + if (mirror::kUseStringCompression) { + temp3 = locations->GetTemp(3).AsRegister<XRegister>(); + } + + Riscv64Label loop; + Riscv64Label find_char_diff; + Riscv64Label end; + Riscv64Label different_compression; + + // Get offsets of count and value fields within a string object. + const int32_t count_offset = mirror::String::CountOffset().Int32Value(); + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Take slow path and throw if input can be and is null. + SlowPathCodeRISCV64* slow_path = nullptr; + const bool can_slow_path = invoke->InputAt(1)->CanBeNull(); + if (can_slow_path) { + slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke); + codegen_->AddSlowPath(slow_path); + __ Beqz(arg, slow_path->GetEntryLabel()); + } + + // Reference equality check, return 0 if same reference. + __ Sub(out, str, arg); + __ Beqz(out, &end); + + if (mirror::kUseStringCompression) { + // Load `count` fields of this and argument strings. + __ Loadwu(temp3, str, count_offset); + __ Loadwu(temp2, arg, count_offset); + // Clean out compression flag from lengths. + __ Srliw(temp0, temp3, 1u); + __ Srliw(temp1, temp2, 1u); + } else { + // Load lengths of this and argument strings. + __ Loadwu(temp0, str, count_offset); + __ Loadwu(temp1, arg, count_offset); + } + // out = length diff. + __ Subw(out, temp0, temp1); + + // Find the length of the shorter string + __ Minu(temp0, temp0, temp1); + // Shorter string is empty? + __ Beqz(temp0, &end); + + if (mirror::kUseStringCompression) { + // Extract both compression flags + __ Andi(temp3, temp3, 1); + __ Andi(temp2, temp2, 1); + __ Bne(temp2, temp3, &different_compression); + } + // Store offset of string value in preparation for comparison loop. + __ Li(temp1, value_offset); + if (mirror::kUseStringCompression) { + // For string compression, calculate the number of bytes to compare (not chars). + __ Sll(temp0, temp0, temp3); + } + + // Assertions that must hold in order to compare strings 8 bytes at a time. + DCHECK_ALIGNED(value_offset, 8); + static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); + + constexpr size_t char_size = DataType::Size(DataType::Type::kUint16); + static_assert(char_size == 2u, "Char expected to be 2 bytes wide"); + + ScratchRegisterScope scratch_scope(assembler); + XRegister temp4 = scratch_scope.AllocateXRegister(); + + // Loop to compare 4x16-bit characters at a time (ok because of string data alignment). + __ Bind(&loop); + __ Add(temp4, str, temp1); + __ Ld(temp4, temp4, 0); + __ Add(temp2, arg, temp1); + __ Ld(temp2, temp2, 0); + __ Bne(temp4, temp2, &find_char_diff); + __ Addi(temp1, temp1, char_size * 4); + // With string compression, we have compared 8 bytes, otherwise 4 chars. + __ Addi(temp0, temp0, (mirror::kUseStringCompression) ? -8 : -4); + __ Bgtz(temp0, &loop); + __ J(&end); + + // Find the single character difference. + __ Bind(&find_char_diff); + // Get the bit position of the first character that differs. + __ Xor(temp1, temp2, temp4); + __ Ctz(temp1, temp1); + + // If the number of chars remaining <= the index where the difference occurs (0-3), then + // the difference occurs outside the remaining string data, so just return length diff (out). + __ Srliw(temp1, temp1, (mirror::kUseStringCompression) ? 3 : 4); + __ Ble(temp0, temp1, &end); + + // Extract the characters and calculate the difference. + __ Slliw(temp1, temp1, (mirror::kUseStringCompression) ? 3 : 4); + if (mirror:: kUseStringCompression) { + __ Slliw(temp3, temp3, 3u); + __ Andn(temp1, temp1, temp3); + } + __ Srl(temp2, temp2, temp1); + __ Srl(temp4, temp4, temp1); + if (mirror::kUseStringCompression) { + __ Li(temp0, -256); // ~0xff + __ Sllw(temp0, temp0, temp3); // temp3 = 0 or 8, temp0 := ~0xff or ~0xffff + __ Andn(temp4, temp4, temp0); // Extract 8 or 16 bits. + __ Andn(temp2, temp2, temp0); // Extract 8 or 16 bits. + } else { + __ ZextH(temp4, temp4); + __ ZextH(temp2, temp2); + } + + __ Subw(out, temp4, temp2); + + if (mirror::kUseStringCompression) { + __ J(&end); + __ Bind(&different_compression); + + // Comparison for different compression style. + constexpr size_t c_char_size = DataType::Size(DataType::Type::kInt8); + static_assert(c_char_size == 1u, "Compressed char expected to be 1 byte wide"); + + // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer. + __ Xor(temp4, str, arg); + __ Addi(temp3, temp3, -1); // -1 if str is compressed, 0 otherwise + __ And(temp2, temp4, temp3); // str^arg if str is compressed, 0 otherwise + __ Xor(temp1, temp2, arg); // str if str is compressed, arg otherwise + __ Xor(temp2, temp2, str); // arg if str is compressed, str otherwise + + // We want to free up the temp3, currently holding `str` compression flag, for comparison. + // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat + // as unsigned. This will allow `addi temp0, temp0, -2; bgtz different_compression_loop` + // to serve as the loop condition. + __ Sh1Add(temp0, temp0, temp3); + + // Adjust temp1 and temp2 from string pointers to data pointers. + __ Addi(temp1, temp1, value_offset); + __ Addi(temp2, temp2, value_offset); + + Riscv64Label different_compression_loop; + Riscv64Label different_compression_diff; + + __ Bind(&different_compression_loop); + __ Lbu(temp4, temp1, 0); + __ Addiw(temp1, temp1, c_char_size); + __ Lhu(temp3, temp2, 0); + __ Addi(temp2, temp2, char_size); + __ Sub(temp4, temp4, temp3); + __ Bnez(temp4, &different_compression_diff); + __ Addi(temp0, temp0, -2); + __ Bgtz(temp0, &different_compression_loop); + __ J(&end); + + // Calculate the difference. + __ Bind(&different_compression_diff); + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ Andi(temp0, temp0, 1); + __ Addi(temp0, temp0, -1); + __ Xor(out, temp4, temp0); + __ Sub(out, out, temp0); + } + + __ Bind(&end); + + if (can_slow_path) { + __ Bind(slow_path->GetExitLabel()); + } +} + class VarHandleSlowPathRISCV64 : public IntrinsicSlowPathRISCV64 { public: VarHandleSlowPathRISCV64(HInvoke* invoke, std::memory_order order) |