diff options
author | 2023-12-11 17:02:22 +0000 | |
---|---|---|
committer | 2023-12-13 16:42:23 +0000 | |
commit | f9562dc6e14621961dd5276c6669bf0adb84e149 (patch) | |
tree | 969f4ab8e7435871e14490f80e38ecd68bab5a6d /compiler/optimizing/code_generator.cc | |
parent | 516f36d70993d9d43c9989a77bfe279051acefe5 (diff) |
riscv64: Clean up the `SystemArrayCopy` intrinsic.
Define a new optimization flag for source and destination
position match. Use it to avoid the forward-copy check
(where the assembler optimized away a BLT instruction,
so we had just a useless BNE to the next instruction) and
one position sign check.
Avoid checking that the position is inside the array. The
subsequent subtraction cannot underflow an `int32_t` and
the following BLT shall go to the slow path for negative
values anyway.
Rewrite the array type check to avoid unnecessary checks
and read barriers.
Use an allocated temporary instead of scratch register
for the marking in the read barrier slow path. Simplify
the gray bit check and the fake dependency.
Use constant position and length locations for small
constant values. (It was probably an oversight that we
used it only for large constant values.)
Emit threshold check when the length equals source or
destination length. The old code allowed the intrinsic
to process array copy of an arbirary length.
Use `ShNAdd()` for faster array address calculations.
Use helper functions and lambdas to simplify the code.
Pass registers and locations by value. Prefer load/store
macro instructions over raw load/store instructions. Use
a bare conditional branch to assert the `TMP` shall not
be clobbered.
Test: testrunner.py --target --64 --ndebug --optimizing
Bug: 283082089
Change-Id: I3f697b4a74497d6d712a92450a6a45e772430662
Diffstat (limited to 'compiler/optimizing/code_generator.cc')
-rw-r--r-- | compiler/optimizing/code_generator.cc | 23 |
1 files changed, 13 insertions, 10 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 77cebfc56c..34400c9d22 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1732,7 +1732,8 @@ void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* } } -void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { +LocationSummary* CodeGenerator::CreateSystemArrayCopyLocationSummary( + HInvoke* invoke, int32_t length_threshold, size_t num_temps) { // Check to see if we have known failures that will cause us to have to bail out // to the runtime, and just generate the runtime call directly. HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull(); @@ -1742,16 +1743,17 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { if ((src_pos != nullptr && src_pos->GetValue() < 0) || (dest_pos != nullptr && dest_pos->GetValue() < 0)) { // We will have to fail anyways. - return; + return nullptr; } - // The length must be >= 0. + // The length must be >= 0. If a positive `length_threshold` is provided, lengths + // greater or equal to the threshold are also handled by the normal implementation. HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull(); if (length != nullptr) { int32_t len = length->GetValue(); - if (len < 0) { + if (len < 0 || (length_threshold > 0 && len >= length_threshold)) { // Just call as normal. - return; + return nullptr; } } @@ -1760,13 +1762,13 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { if (optimizations.GetDestinationIsSource()) { if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) { // We only support backward copying if source and destination are the same. - return; + return nullptr; } } if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) { // We currently don't intrinsify primitive copying. - return; + return nullptr; } ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); @@ -1780,9 +1782,10 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3))); locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4))); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); + if (num_temps != 0u) { + locations->AddRegisterTemps(num_temps); + } + return locations; } void CodeGenerator::EmitJitRoots(uint8_t* code, |