diff options
-rw-r--r-- | compiler/optimizing/code_generator_riscv64.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/intrinsics_riscv64.cc | 122 | ||||
-rw-r--r-- | compiler/utils/riscv64/assembler_riscv64.h | 4 | ||||
-rw-r--r-- | runtime/arch/riscv64/entrypoints_init_riscv64.cc | 11 | ||||
-rw-r--r-- | runtime/arch/riscv64/quick_entrypoints_riscv64.S | 70 |
5 files changed, 200 insertions, 9 deletions
diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h index 313aa4c2df..a2bba17229 100644 --- a/compiler/optimizing/code_generator_riscv64.h +++ b/compiler/optimizing/code_generator_riscv64.h @@ -100,8 +100,6 @@ static constexpr size_t kRuntimeParameterFpuRegistersLength = V(StringCompareTo) \ V(StringEquals) \ V(StringGetCharsNoCheck) \ - V(StringIndexOf) \ - V(StringIndexOfAfter) \ V(StringStringIndexOf) \ V(StringStringIndexOfAfter) \ V(StringNewStringFromBytes) \ diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc index 668b3862ad..ba541b36f3 100644 --- a/compiler/optimizing/intrinsics_riscv64.cc +++ b/compiler/optimizing/intrinsics_riscv64.cc @@ -17,10 +17,15 @@ #include "intrinsics_riscv64.h" #include "code_generator_riscv64.h" +#include "intrinsics_utils.h" namespace art HIDDEN { namespace riscv64 { +using IntrinsicSlowPathRISCV64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorRISCV64, + SlowPathCodeRISCV64, + Riscv64Assembler>; + bool IntrinsicLocationsBuilderRISCV64::TryDispatch(HInvoke* invoke) { Dispatch(invoke); LocationSummary* res = invoke->GetLocations(); @@ -34,7 +39,7 @@ Riscv64Assembler* IntrinsicCodeGeneratorRISCV64::GetAssembler() { return codegen_->GetAssembler(); } -#define __ GetAssembler()-> +#define __ assembler-> static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = @@ -56,6 +61,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* i void IntrinsicCodeGeneratorRISCV64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); __ FMvXD(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>()); } @@ -65,6 +71,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invo void IntrinsicCodeGeneratorRISCV64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); __ FMvDX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>()); } @@ -74,6 +81,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invo void IntrinsicCodeGeneratorRISCV64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); __ FMvXW(locations->Out().AsRegister<XRegister>(), locations->InAt(0).AsFpuRegister<FRegister>()); } @@ -83,6 +91,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) void IntrinsicCodeGeneratorRISCV64::VisitFloatIntBitsToFloat(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); __ FMvWX(locations->Out().AsFpuRegister<FRegister>(), locations->InAt(0).AsRegister<XRegister>()); } @@ -92,6 +101,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) { void IntrinsicCodeGeneratorRISCV64::VisitDoubleIsInfinite(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); XRegister out = locations->Out().AsRegister<XRegister>(); __ FClassD(out, locations->InAt(0).AsFpuRegister<FRegister>()); __ Andi(out, out, kPositiveInfinity | kNegativeInfinity); @@ -104,6 +114,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitFloatIsInfinite(HInvoke* invoke) { void IntrinsicCodeGeneratorRISCV64::VisitFloatIsInfinite(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); + Riscv64Assembler* assembler = GetAssembler(); XRegister out = locations->Out().AsRegister<XRegister>(); __ FClassS(out, locations->InAt(0).AsFpuRegister<FRegister>()); __ Andi(out, out, kPositiveInfinity | kNegativeInfinity); @@ -128,6 +139,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekByte(HInvoke* invoke) { } void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekByte(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lb(rd, rs1, 0); }); } @@ -136,6 +148,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) } void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekIntNative(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lw(rd, rs1, 0); }); } @@ -144,6 +157,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke } void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekLongNative(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Ld(rd, rs1, 0); }); } @@ -152,6 +166,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitMemoryPeekShortNative(HInvoke* invok } void IntrinsicCodeGeneratorRISCV64::VisitMemoryPeekShortNative(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitMemoryPeek(invoke, [&](XRegister rd, XRegister rs1) { __ Lh(rd, rs1, 0); }); } @@ -173,6 +188,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeByte(HInvoke* invoke) { } void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeByte(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sb(rs2, rs1, 0); }); } @@ -181,6 +197,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) } void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeIntNative(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sw(rs2, rs1, 0); }); } @@ -189,6 +206,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke } void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeLongNative(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sd(rs2, rs1, 0); }); } @@ -197,6 +215,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitMemoryPokeShortNative(HInvoke* invok } void IntrinsicCodeGeneratorRISCV64::VisitMemoryPokeShortNative(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitMemoryPoke(invoke, [&](XRegister rs2, XRegister rs1) { __ Sh(rs2, rs1, 0); }); } @@ -211,6 +230,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) } void IntrinsicCodeGeneratorRISCV64::VisitIntegerReverseBytes(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { // There is no 32-bit reverse bytes instruction. __ Rev8(rd, rs1); @@ -223,6 +243,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitLongReverseBytes(HInvoke* invoke) { } void IntrinsicCodeGeneratorRISCV64::VisitLongReverseBytes(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Rev8(rd, rs1); }); } @@ -231,6 +252,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitShortReverseBytes(HInvoke* invoke) { } void IntrinsicCodeGeneratorRISCV64::VisitShortReverseBytes(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { // There is no 16-bit reverse bytes instruction. __ Rev8(rd, rs1); @@ -243,6 +265,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorRISCV64::VisitIntegerBitCount(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpopw(rd, rs1); }); } @@ -251,6 +274,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorRISCV64::VisitLongBitCount(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Cpop(rd, rs1); }); } @@ -259,8 +283,9 @@ void IntrinsicLocationsBuilderRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke } void IntrinsicCodeGeneratorRISCV64::VisitIntegerHighestOneBit(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { - ScratchRegisterScope srs(GetAssembler()); + ScratchRegisterScope srs(assembler); XRegister tmp = srs.AllocateXRegister(); XRegister tmp2 = srs.AllocateXRegister(); __ Clzw(tmp, rs1); @@ -275,8 +300,9 @@ void IntrinsicLocationsBuilderRISCV64::VisitLongHighestOneBit(HInvoke* invoke) { } void IntrinsicCodeGeneratorRISCV64::VisitLongHighestOneBit(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { - ScratchRegisterScope srs(GetAssembler()); + ScratchRegisterScope srs(assembler); XRegister tmp = srs.AllocateXRegister(); XRegister tmp2 = srs.AllocateXRegister(); __ Clz(tmp, rs1); @@ -291,8 +317,9 @@ void IntrinsicLocationsBuilderRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) } void IntrinsicCodeGeneratorRISCV64::VisitIntegerLowestOneBit(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { - ScratchRegisterScope srs(GetAssembler()); + ScratchRegisterScope srs(assembler); XRegister tmp = srs.AllocateXRegister(); __ NegW(tmp, rs1); __ And(rd, rs1, tmp); @@ -304,8 +331,9 @@ void IntrinsicLocationsBuilderRISCV64::VisitLongLowestOneBit(HInvoke* invoke) { } void IntrinsicCodeGeneratorRISCV64::VisitLongLowestOneBit(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { - ScratchRegisterScope srs(GetAssembler()); + ScratchRegisterScope srs(assembler); XRegister tmp = srs.AllocateXRegister(); __ Neg(tmp, rs1); __ And(rd, rs1, tmp); @@ -317,6 +345,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* } void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clzw(rd, rs1); }); } @@ -325,6 +354,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* in } void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Clz(rd, rs1); }); } @@ -333,6 +363,7 @@ void IntrinsicLocationsBuilderRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke } void IntrinsicCodeGeneratorRISCV64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctzw(rd, rs1); }); } @@ -341,9 +372,90 @@ void IntrinsicLocationsBuilderRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* i } void IntrinsicCodeGeneratorRISCV64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { + Riscv64Assembler* assembler = GetAssembler(); EmitIntegralUnOp(invoke, [&](XRegister rd, XRegister rs1) { __ Ctz(rd, rs1); }); } +static void GenerateVisitStringIndexOf(HInvoke* invoke, + Riscv64Assembler* assembler, + CodeGeneratorRISCV64* codegen, + bool start_at_zero) { + LocationSummary* locations = invoke->GetLocations(); + + // Note that the null check must have been done earlier. + DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); + + // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically, + // or directly dispatch for a large constant, or omit slow-path for a small constant or a char. + SlowPathCodeRISCV64* slow_path = nullptr; + HInstruction* code_point = invoke->InputAt(1); + if (code_point->IsIntConstant()) { + if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) { + // Always needs the slow-path. We could directly dispatch to it, but this case should be + // rare, so for simplicity just put the full slow-path down and branch unconditionally. + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke); + codegen->AddSlowPath(slow_path); + __ J(slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + return; + } + } else if (code_point->GetType() != DataType::Type::kUint16) { + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke); + codegen->AddSlowPath(slow_path); + ScratchRegisterScope srs(assembler); + XRegister tmp = srs.AllocateXRegister(); + __ Srliw(tmp, locations->InAt(1).AsRegister<XRegister>(), 16); + __ Bnez(tmp, slow_path->GetEntryLabel()); + } + + if (start_at_zero) { + // Start-index = 0. + XRegister tmp_reg = locations->GetTemp(0).AsRegister<XRegister>(); + __ Li(tmp_reg, 0); + } + + codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path); + CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>(); + + if (slow_path != nullptr) { + __ Bind(slow_path->GetExitLabel()); + } +} + +void IntrinsicLocationsBuilderRISCV64::VisitStringIndexOf(HInvoke* invoke) { + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32)); + + // Need to send start_index=0. + locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2))); +} + +void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOf(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true); +} + +void IntrinsicLocationsBuilderRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) { + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); + // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's + // best to align the inputs accordingly. + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); + locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); + locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32)); +} + +void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) { + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false); +} + #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(RISCV64, Name) UNIMPLEMENTED_INTRINSIC_LIST_RISCV64(MARK_UNIMPLEMENTED); #undef MARK_UNIMPLEMENTED diff --git a/compiler/utils/riscv64/assembler_riscv64.h b/compiler/utils/riscv64/assembler_riscv64.h index 15f2518c87..1411b03789 100644 --- a/compiler/utils/riscv64/assembler_riscv64.h +++ b/compiler/utils/riscv64/assembler_riscv64.h @@ -633,6 +633,10 @@ class Riscv64Assembler final : public Assembler { UNIMPLEMENTED(FATAL) << "Do not use Jump for RISCV64"; } + void Jump(Riscv64Label* label) { + J(label); + } + void Bind(Riscv64Label* label); // Load label address using PC-relative loads. diff --git a/runtime/arch/riscv64/entrypoints_init_riscv64.cc b/runtime/arch/riscv64/entrypoints_init_riscv64.cc index 98ae040ad6..45926c4574 100644 --- a/runtime/arch/riscv64/entrypoints_init_riscv64.cc +++ b/runtime/arch/riscv64/entrypoints_init_riscv64.cc @@ -122,7 +122,16 @@ void InitEntryPoints(JniEntryPoints* jpoints, qpoints->SetShrLong(nullptr); qpoints->SetUshrLong(nullptr); - // TODO(riscv64): add other entrypoints + // TODO(riscv64): More math + + // Intrinsics + qpoints->SetIndexOf(art_quick_indexof); + // TODO(riscv64): More intrinsics. + + // Read barrier. + UpdateReadBarrierEntrypoints(qpoints, /*is_active=*/ false); + qpoints->SetReadBarrierSlow(artReadBarrierSlow); + qpoints->SetReadBarrierForRootSlow(artReadBarrierForRootSlow); } } // namespace art diff --git a/runtime/arch/riscv64/quick_entrypoints_riscv64.S b/runtime/arch/riscv64/quick_entrypoints_riscv64.S index ccccb2ee85..7ee3061ac1 100644 --- a/runtime/arch/riscv64/quick_entrypoints_riscv64.S +++ b/runtime/arch/riscv64/quick_entrypoints_riscv64.S @@ -1898,4 +1898,72 @@ GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, \ GENERATE_FIELD_ENTRYPOINTS -UNDEFINED art_quick_indexof +// String's indexOf. +// +// TODO: Not very optimized. We should use the vector extension. +// On entry: +// a0: string object (known non-null) +// a1: char to match (known <= 0xFFFF) +// a2: Starting offset in string data +ENTRY art_quick_indexof +#if (STRING_COMPRESSION_FEATURE) + lwu a4, MIRROR_STRING_COUNT_OFFSET(a0) +#else + lwu a3, MIRROR_STRING_COUNT_OFFSET(a0) +#endif + addi a0, a0, MIRROR_STRING_VALUE_OFFSET +#if (STRING_COMPRESSION_FEATURE) + /* Split the count into length (a3) and compression flag (a4) */ + srliw a3, a4, 1 + andi a4, a4, 1 +#endif + /* Clamp start to [0..count) */ + sraiw a5, a2, 31 + andn a2, a2, a5 + bge a2, a3, .Lstring_indexof_nomatch + +#if (STRING_COMPRESSION_FEATURE) + beqz a4, .Lstring_indexof_compressed +#endif + /* Build pointers to start and end of the data to compare */ + sh1add a2, a2, a0 + sh1add a3, a3, a0 + + /* + * At this point we have: + * a0: original start of string data + * a1: char to compare + * a2: start of the data to test + * a3: end of the data to test + */ + +.Lstring_indexof_loop: + lhu a4, 0(a2) + beq a4, a1, .Lstring_indexof_match + addi a2, a2, 2 + bne a2, a3, .Lstring_indexof_loop +.Lstring_indexof_nomatch: + li a0, -1 + ret +.Lstring_indexof_match: + sub a0, a2, a0 + srli a0, a0, 1 + ret + +#if (STRING_COMPRESSION_FEATURE) + // Comparing compressed string one character at a time with the input character. +.Lstring_indexof_compressed: + add a2, a2, a0 + add a3, a3, a0 +.Lstring_indexof_compressed_loop: + lbu a4, (a2) + beq a4, a1, .Lstring_indexof_compressed_match + addi a2, a2, 1 + bne a2, a3, .Lstring_indexof_compressed_loop + li a0, -1 + ret +.Lstring_indexof_compressed_match: + sub a0, a2, a0 + ret +#endif +END art_quick_indexof |