summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Vladimir Marko <vmarko@google.com> 2023-12-11 17:02:22 +0000
committer VladimĂ­r Marko <vmarko@google.com> 2023-12-13 16:42:23 +0000
commitf9562dc6e14621961dd5276c6669bf0adb84e149 (patch)
tree969f4ab8e7435871e14490f80e38ecd68bab5a6d
parent516f36d70993d9d43c9989a77bfe279051acefe5 (diff)
riscv64: Clean up the `SystemArrayCopy` intrinsic.
Define a new optimization flag for source and destination position match. Use it to avoid the forward-copy check (where the assembler optimized away a BLT instruction, so we had just a useless BNE to the next instruction) and one position sign check. Avoid checking that the position is inside the array. The subsequent subtraction cannot underflow an `int32_t` and the following BLT shall go to the slow path for negative values anyway. Rewrite the array type check to avoid unnecessary checks and read barriers. Use an allocated temporary instead of scratch register for the marking in the read barrier slow path. Simplify the gray bit check and the fake dependency. Use constant position and length locations for small constant values. (It was probably an oversight that we used it only for large constant values.) Emit threshold check when the length equals source or destination length. The old code allowed the intrinsic to process array copy of an arbirary length. Use `ShNAdd()` for faster array address calculations. Use helper functions and lambdas to simplify the code. Pass registers and locations by value. Prefer load/store macro instructions over raw load/store instructions. Use a bare conditional branch to assert the `TMP` shall not be clobbered. Test: testrunner.py --target --64 --ndebug --optimizing Bug: 283082089 Change-Id: I3f697b4a74497d6d712a92450a6a45e772430662
-rw-r--r--compiler/optimizing/code_generator.cc23
-rw-r--r--compiler/optimizing/code_generator.h3
-rw-r--r--compiler/optimizing/instruction_simplifier.cc6
-rw-r--r--compiler/optimizing/intrinsics.h1
-rw-r--r--compiler/optimizing/intrinsics_riscv64.cc630
-rw-r--r--compiler/optimizing/nodes.h2
6 files changed, 247 insertions, 418 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 77cebfc56c..34400c9d22 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1732,7 +1732,8 @@ void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary*
}
}
-void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
+LocationSummary* CodeGenerator::CreateSystemArrayCopyLocationSummary(
+ HInvoke* invoke, int32_t length_threshold, size_t num_temps) {
// Check to see if we have known failures that will cause us to have to bail out
// to the runtime, and just generate the runtime call directly.
HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
@@ -1742,16 +1743,17 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
(dest_pos != nullptr && dest_pos->GetValue() < 0)) {
// We will have to fail anyways.
- return;
+ return nullptr;
}
- // The length must be >= 0.
+ // The length must be >= 0. If a positive `length_threshold` is provided, lengths
+ // greater or equal to the threshold are also handled by the normal implementation.
HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
if (length != nullptr) {
int32_t len = length->GetValue();
- if (len < 0) {
+ if (len < 0 || (length_threshold > 0 && len >= length_threshold)) {
// Just call as normal.
- return;
+ return nullptr;
}
}
@@ -1760,13 +1762,13 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
if (optimizations.GetDestinationIsSource()) {
if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
// We only support backward copying if source and destination are the same.
- return;
+ return nullptr;
}
}
if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
// We currently don't intrinsify primitive copying.
- return;
+ return nullptr;
}
ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
@@ -1780,9 +1782,10 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
+ if (num_temps != 0u) {
+ locations->AddRegisterTemps(num_temps);
+ }
+ return locations;
}
void CodeGenerator::EmitJitRoots(uint8_t* code,
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index ee2653ec55..de6fc85da4 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -649,7 +649,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
static uint32_t GetBootImageOffset(ClassRoot class_root);
static uint32_t GetBootImageOffsetOfIntrinsicDeclaringClass(HInvoke* invoke);
- static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
+ static LocationSummary* CreateSystemArrayCopyLocationSummary(
+ HInvoke* invoke, int32_t length_threshold = -1, size_t num_temps = 3);
void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
DisassemblyInformation* GetDisassemblyInformation() const { return disasm_info_; }
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 94b201e876..5d552411db 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -2406,7 +2406,9 @@ static bool IsArrayLengthOf(HInstruction* potential_length, HInstruction* potent
void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) {
HInstruction* source = instruction->InputAt(0);
+ HInstruction* source_pos = instruction->InputAt(1);
HInstruction* destination = instruction->InputAt(2);
+ HInstruction* destination_pos = instruction->InputAt(3);
HInstruction* count = instruction->InputAt(4);
SystemArrayCopyOptimizations optimizations(instruction);
if (CanEnsureNotNullAt(source, instruction)) {
@@ -2419,6 +2421,10 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction)
optimizations.SetDestinationIsSource();
}
+ if (source_pos == destination_pos) {
+ optimizations.SetSourcePositionIsDestinationPosition();
+ }
+
if (IsArrayLengthOf(count, source)) {
optimizations.SetCountIsSourceLength();
}
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 6645e5da1a..d74d5d2a40 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -222,6 +222,7 @@ class SystemArrayCopyOptimizations : public IntrinsicOptimizations {
INTRINSIC_OPTIMIZATION(DestinationIsPrimitiveArray, 8);
INTRINSIC_OPTIMIZATION(SourceIsNonPrimitiveArray, 9);
INTRINSIC_OPTIMIZATION(SourceIsPrimitiveArray, 10);
+ INTRINSIC_OPTIMIZATION(SourcePositionIsDestinationPosition, 11);
private:
DISALLOW_COPY_AND_ASSIGN(SystemArrayCopyOptimizations);
diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc
index 2714cf467e..7f99f91374 100644
--- a/compiler/optimizing/intrinsics_riscv64.cc
+++ b/compiler/optimizing/intrinsics_riscv64.cc
@@ -58,7 +58,7 @@ class ReadBarrierSystemArrayCopySlowPathRISCV64 : public SlowPathCodeRISCV64 {
__ Bind(GetEntryLabel());
Riscv64Label slow_copy_loop;
__ Bind(&slow_copy_loop);
- __ Lwu(tmp_reg, src_curr_addr, 0);
+ __ Loadwu(tmp_reg, src_curr_addr, 0);
codegen->MaybeUnpoisonHeapReference(tmp_reg);
// TODO: Inline the mark bit check before calling the runtime?
// tmp_reg = ReadBarrier::Mark(tmp_reg);
@@ -71,7 +71,7 @@ class ReadBarrierSystemArrayCopySlowPathRISCV64 : public SlowPathCodeRISCV64 {
// This runtime call does not require a stack map.
codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
codegen->MaybePoisonHeapReference(tmp_reg);
- __ Sw(tmp_reg, dst_curr_addr, 0);
+ __ Storew(tmp_reg, dst_curr_addr, 0);
__ Addi(src_curr_addr, src_curr_addr, element_size);
__ Addi(dst_curr_addr, dst_curr_addr, element_size);
__ Bne(src_curr_addr, src_stop_addr, &slow_copy_loop);
@@ -1262,61 +1262,83 @@ class ReadBarrierCasSlowPathRISCV64 : public SlowPathCodeRISCV64 {
Riscv64Label success_exit_label_;
};
+static void EmitBlt32(Riscv64Assembler* assembler,
+ XRegister rs1,
+ Location rs2,
+ Riscv64Label* label,
+ XRegister temp) {
+ if (rs2.IsConstant()) {
+ __ Li(temp, rs2.GetConstant()->AsIntConstant()->GetValue());
+ __ Blt(rs1, temp, label);
+ } else {
+ __ Blt(rs1, rs2.AsRegister<XRegister>(), label);
+ }
+}
+
static void CheckSystemArrayCopyPosition(Riscv64Assembler* assembler,
- const Location& pos,
- const XRegister& input,
- const Location& length,
+ XRegister array,
+ Location pos,
+ Location length,
SlowPathCodeRISCV64* slow_path,
- const XRegister& temp1,
- const XRegister& temp2,
- bool length_is_input_length = false) {
+ XRegister temp1,
+ XRegister temp2,
+ bool length_is_array_length,
+ bool position_sign_checked) {
const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
if (pos.IsConstant()) {
int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
+ DCHECK_GE(pos_const, 0); // Checked in location builder.
if (pos_const == 0) {
- if (!length_is_input_length) {
- // Check that length(input) >= length.
- __ Lwu(temp1, input, length_offset);
- if (length.IsConstant()) {
- __ Li(temp2, length.GetConstant()->AsIntConstant()->GetValue());
- __ Blt(temp1, temp2, slow_path->GetEntryLabel());
- } else {
- __ Blt(temp1, length.AsRegister<XRegister>(), slow_path->GetEntryLabel());
- }
+ if (!length_is_array_length) {
+ // Check that length(array) >= length.
+ __ Loadw(temp1, array, length_offset);
+ EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
}
} else {
- // Check that length(input) >= pos.
- __ Lwu(temp1, input, length_offset);
+ // Calculate length(array) - pos.
+ // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
+ // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
+ __ Loadw(temp1, array, length_offset);
__ AddConst32(temp1, temp1, -pos_const);
- __ Bltz(temp1, slow_path->GetEntryLabel());
- // Check that (length(input) - pos) >= length.
- if (length.IsConstant()) {
- __ Li(temp2, length.GetConstant()->AsIntConstant()->GetValue());
- __ Blt(temp1, temp2, slow_path->GetEntryLabel());
- } else {
- __ Blt(temp1, length.AsRegister<XRegister>(), slow_path->GetEntryLabel());
- }
+ // Check that (length(array) - pos) >= length.
+ EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
}
- } else if (length_is_input_length) {
+ } else if (length_is_array_length) {
// The only way the copy can succeed is if pos is zero.
__ Bnez(pos.AsRegister<XRegister>(), slow_path->GetEntryLabel());
} else {
// Check that pos >= 0.
XRegister pos_reg = pos.AsRegister<XRegister>();
- __ Bltz(pos_reg, slow_path->GetEntryLabel());
-
- // Check that pos <= length(input)
- __ Lwu(temp1, input, length_offset);
- __ Bgt(pos_reg, temp1, slow_path->GetEntryLabel());
+ if (!position_sign_checked) {
+ __ Bltz(pos_reg, slow_path->GetEntryLabel());
+ }
- // Check that (length(input) - pos) >= length.
+ // Calculate length(array) - pos.
+ // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
+ // as `int32_t`. If the result is negative, the BLT below shall go to the slow path.
+ __ Loadw(temp1, array, length_offset);
__ Sub(temp1, temp1, pos_reg);
- if (length.IsConstant()) {
- __ Li(temp2, length.GetConstant()->AsIntConstant()->GetValue());
- __ Blt(temp1, temp2, slow_path->GetEntryLabel());
- } else {
- __ Blt(temp1, length.AsRegister<XRegister>(), slow_path->GetEntryLabel());
+
+ // Check that (length(array) - pos) >= length.
+ EmitBlt32(assembler, temp1, length, slow_path->GetEntryLabel(), temp2);
+ }
+}
+
+static void GenArrayAddress(CodeGeneratorRISCV64* codegen,
+ XRegister dest,
+ XRegister base,
+ Location pos,
+ DataType::Type type,
+ int32_t data_offset) {
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ if (pos.IsConstant()) {
+ int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
+ __ AddConst64(dest, base, DataType::Size(type) * constant + data_offset);
+ } else {
+ codegen->GetInstructionVisitor()->ShNAdd(dest, pos.AsRegister<XRegister>(), base, type);
+ if (data_offset != 0) {
+ __ AddConst64(dest, dest, data_offset);
}
}
}
@@ -1324,67 +1346,39 @@ static void CheckSystemArrayCopyPosition(Riscv64Assembler* assembler,
// Compute base source address, base destination address, and end
// source address for System.arraycopy* intrinsics in `src_base`,
// `dst_base` and `src_end` respectively.
-static void GenSystemArrayCopyAddresses(Riscv64Assembler* assembler,
+static void GenSystemArrayCopyAddresses(CodeGeneratorRISCV64* codegen,
DataType::Type type,
- const XRegister& src,
- const Location& src_pos,
- const XRegister& dst,
- const Location& dst_pos,
- const Location& copy_length,
- const XRegister& src_base,
- const XRegister& dst_base,
- const XRegister& src_end) {
+ XRegister src,
+ Location src_pos,
+ XRegister dst,
+ Location dst_pos,
+ Location copy_length,
+ XRegister src_base,
+ XRegister dst_base,
+ XRegister src_end) {
// This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16)
<< "Unexpected element type: " << type;
const int32_t element_size = DataType::Size(type);
- const int32_t element_size_shift = DataType::SizeShift(type);
const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
- if (src_pos.IsConstant()) {
- int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
- __ AddConst32(src_base, src, element_size * constant + data_offset);
- } else {
- __ Slli(src_base, src_pos.AsRegister<XRegister>(), element_size_shift);
- __ Add(src_base, src_base, src);
- __ Addi(src_base, src_base, data_offset);
- }
-
- if (dst_pos.IsConstant()) {
- int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
- __ AddConst32(dst_base, dst, element_size * constant + data_offset);
- } else {
- __ Slli(dst_base, dst_pos.AsRegister<XRegister>(), element_size_shift);
- __ Add(dst_base, dst_base, dst);
- __ Addi(dst_base, dst_base, data_offset);
- }
-
- if (copy_length.IsConstant()) {
- int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
- __ AddConst32(src_end, src_base, element_size * constant);
- } else {
- __ Slli(src_end, copy_length.AsRegister<XRegister>(), element_size_shift);
- __ Add(src_end, src_end, src_base);
- }
+ GenArrayAddress(codegen, src_base, src, src_pos, type, data_offset);
+ GenArrayAddress(codegen, dst_base, dst, dst_pos, type, data_offset);
+ GenArrayAddress(codegen, src_end, src_base, copy_length, type, /*data_offset=*/ 0);
}
-static void SetSystemArrayCopyLocationRequires(LocationSummary* locations,
- uint32_t at,
- HInstruction* input) {
+static Location LocationForSystemArrayCopyInput(HInstruction* input) {
HIntConstant* const_input = input->AsIntConstantOrNull();
if (const_input != nullptr && IsInt<12>(const_input->GetValue())) {
- locations->SetInAt(at, Location::RequiresRegister());
+ return Location::ConstantLocation(const_input);
} else {
- locations->SetInAt(at, Location::RegisterOrConstant(input));
+ return Location::RequiresRegister();
}
}
// We can choose to use the native implementation there for longer copy lengths.
static constexpr int32_t kSystemArrayCopyThreshold = 128;
-// CodeGenerator::CreateSystemArrayCopyLocationSummary use three temporary registers.
-// We want to use two temporary registers in order to reduce the register pressure in riscv64.
-// So we don't use the CodeGenerator::CreateSystemArrayCopyLocationSummary.
void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
// The only read barrier implementation supporting the
// SystemArrayCopy intrinsic is the Baker-style read barriers.
@@ -1392,55 +1386,15 @@ void IntrinsicLocationsBuilderRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
return;
}
- // Check to see if we have known failures that will cause us to have to bail out
- // to the runtime, and just generate the runtime call directly.
- HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
- HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
-
- // The positions must be non-negative.
- if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
- (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
- // We will have to fail anyways.
- return;
- }
-
- // The length must be >= 0.
- HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
- if (length != nullptr) {
- int32_t len = length->GetValue();
- if (len < 0 || len >= kSystemArrayCopyThreshold) {
- // Just call as normal.
- return;
- }
- }
-
- SystemArrayCopyOptimizations optimizations(invoke);
-
- if (optimizations.GetDestinationIsSource()) {
- if (src_pos != nullptr && dest_pos != nullptr && src_pos->GetValue() < dest_pos->GetValue()) {
- // We only support backward copying if source and destination are the same.
- return;
- }
- }
-
- if (optimizations.GetDestinationIsPrimitiveArray() || optimizations.GetSourceIsPrimitiveArray()) {
- // We currently don't intrinsify primitive copying.
- return;
+ size_t num_temps = codegen_->EmitBakerReadBarrier() ? 4u : 2u;
+ LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
+ invoke, kSystemArrayCopyThreshold, num_temps);
+ if (locations != nullptr) {
+ // We request position and length as constants only for small integral values.
+ locations->SetInAt(1, LocationForSystemArrayCopyInput(invoke->InputAt(1)));
+ locations->SetInAt(3, LocationForSystemArrayCopyInput(invoke->InputAt(3)));
+ locations->SetInAt(4, LocationForSystemArrayCopyInput(invoke->InputAt(4)));
}
-
- ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
- // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
- locations->SetInAt(0, Location::RequiresRegister());
- SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1));
- locations->SetInAt(2, Location::RequiresRegister());
- SetSystemArrayCopyLocationRequires(locations, 3, invoke->InputAt(3));
- SetSystemArrayCopyLocationRequires(locations, 4, invoke->InputAt(4));
-
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
}
void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
@@ -1463,9 +1417,7 @@ void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
Location dest_pos = locations->InAt(3);
Location length = locations->InAt(4);
XRegister temp1 = locations->GetTemp(0).AsRegister<XRegister>();
- Location temp1_loc = Location::RegisterLocation(temp1);
XRegister temp2 = locations->GetTemp(1).AsRegister<XRegister>();
- Location temp2_loc = Location::RegisterLocation(temp2);
SlowPathCodeRISCV64* intrinsic_slow_path =
new (codegen_->GetScopedAllocator()) IntrinsicSlowPathRISCV64(invoke);
@@ -1474,36 +1426,32 @@ void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
Riscv64Label conditions_on_positions_validated;
SystemArrayCopyOptimizations optimizations(invoke);
- // If source and destination are the same, we go to slow path if we need to do
- // forward copying.
- if (src_pos.IsConstant()) {
- int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
- if (dest_pos.IsConstant()) {
- int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
- if (optimizations.GetDestinationIsSource()) {
- // Checked when building locations.
- DCHECK_GE(src_pos_constant, dest_pos_constant);
- } else if (src_pos_constant < dest_pos_constant) {
- __ Beq(src, dest, intrinsic_slow_path->GetEntryLabel());
+ // If source and destination are the same, we go to slow path if we need to do forward copying.
+ // We do not need to do this check if the source and destination positions are the same.
+ if (!optimizations.GetSourcePositionIsDestinationPosition()) {
+ if (src_pos.IsConstant()) {
+ int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+ if (dest_pos.IsConstant()) {
+ int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+ if (optimizations.GetDestinationIsSource()) {
+ // Checked when building locations.
+ DCHECK_GE(src_pos_constant, dest_pos_constant);
+ } else if (src_pos_constant < dest_pos_constant) {
+ __ Beq(src, dest, intrinsic_slow_path->GetEntryLabel());
+ }
+ } else {
+ if (!optimizations.GetDestinationIsSource()) {
+ __ Bne(src, dest, &conditions_on_positions_validated);
+ }
+ __ Li(temp1, src_pos_constant);
+ __ Bgt(dest_pos.AsRegister<XRegister>(), temp1, intrinsic_slow_path->GetEntryLabel());
}
} else {
if (!optimizations.GetDestinationIsSource()) {
__ Bne(src, dest, &conditions_on_positions_validated);
}
- __ Li(temp1, src_pos_constant);
- __ Bgt(dest_pos.AsRegister<XRegister>(), temp1, intrinsic_slow_path->GetEntryLabel());
- }
- } else {
- if (!optimizations.GetDestinationIsSource()) {
- __ Bne(src, dest, &conditions_on_positions_validated);
- }
- if (dest_pos.IsConstant()) {
- __ Li(temp2, dest_pos.GetConstant()->AsIntConstant()->GetValue());
- __ Blt(src_pos.AsRegister<XRegister>(), temp2, intrinsic_slow_path->GetEntryLabel());
- } else {
- __ Blt(src_pos.AsRegister<XRegister>(),
- dest_pos.AsRegister<XRegister>(),
- intrinsic_slow_path->GetEntryLabel());
+ XRegister src_pos_reg = src_pos.AsRegister<XRegister>();
+ EmitBlt32(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel(), temp2);
}
}
@@ -1520,9 +1468,7 @@ void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We have already checked in the LocationsBuilder for the constant case.
- if (!length.IsConstant() &&
- !optimizations.GetCountIsSourceLength() &&
- !optimizations.GetCountIsDestinationLength()) {
+ if (!length.IsConstant()) {
// Merge the following two comparisons into one:
// If the length is negative, bail out (delegate to libcore's native implementation).
// If the length >= 128 then (currently) prefer native implementation.
@@ -1531,35 +1477,46 @@ void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
}
// Validity checks: source.
CheckSystemArrayCopyPosition(assembler,
- src_pos,
src,
+ src_pos,
length,
intrinsic_slow_path,
temp1,
temp2,
- optimizations.GetCountIsSourceLength());
+ optimizations.GetCountIsSourceLength(),
+ /*position_sign_checked=*/ false);
// Validity checks: dest.
+ bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
CheckSystemArrayCopyPosition(assembler,
- dest_pos,
dest,
+ dest_pos,
length,
intrinsic_slow_path,
temp1,
temp2,
- optimizations.GetCountIsDestinationLength());
+ optimizations.GetCountIsDestinationLength(),
+ dest_position_sign_checked);
{
// We use a block to end the scratch scope before the write barrier, thus
// freeing the temporary registers so they can be used in `MarkGCCard`.
ScratchRegisterScope srs(assembler);
- Location temp3_loc; // Used only for Baker read barrier.
- XRegister temp3;
- if (codegen_->EmitBakerReadBarrier()) {
- temp3_loc = locations->GetTemp(2);
- temp3 = temp3_loc.AsRegister<XRegister>();
- } else {
- temp3 = srs.AllocateXRegister();
- }
+ bool emit_rb = codegen_->EmitBakerReadBarrier();
+ XRegister temp3 =
+ emit_rb ? locations->GetTemp(2).AsRegister<XRegister>() : srs.AllocateXRegister();
+
+ auto check_non_primitive_array_class = [&](XRegister klass, XRegister temp) {
+ // No read barrier is needed for reading a chain of constant references for comparing
+ // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
+ // /* HeapReference<Class> */ temp = klass->component_type_
+ __ Loadwu(temp, klass, component_offset);
+ codegen_->MaybeUnpoisonHeapReference(temp);
+ __ Beqz(temp, intrinsic_slow_path->GetEntryLabel());
+ // /* uint16_t */ temp = static_cast<uint16>(klass->primitive_type_);
+ __ Loadhu(temp, temp, primitive_offset);
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+ __ Bnez(temp, intrinsic_slow_path->GetEntryLabel());
+ };
if (!optimizations.GetDoesNotNeedTypeCheck()) {
// Check whether all elements of the source array are assignable to the component
@@ -1567,188 +1524,68 @@ void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
// or the destination is Object[]. If none of these checks succeed, we go to the
// slow path.
- if (codegen_->EmitBakerReadBarrier()) {
- if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // /* HeapReference<Class> */ temp1 = src->klass_
- codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
- temp1_loc,
- src,
- class_offset,
- temp3_loc,
- /* needs_null_check= */ false);
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
- temp1_loc,
- temp1,
- component_offset,
- temp3_loc,
- /* needs_null_check= */ false);
- __ Beqz(temp1, intrinsic_slow_path->GetEntryLabel());
- // If heap poisoning is enabled, `temp1` has been unpoisoned
- // by the previous call to GenerateFieldLoadWithBakerReadBarrier.
- // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
- __ Lhu(temp1, temp1, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Bnez(temp1, intrinsic_slow_path->GetEntryLabel());
- }
-
+ if (emit_rb) {
// /* HeapReference<Class> */ temp1 = dest->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
- temp1_loc,
+ Location::RegisterLocation(temp1),
dest,
class_offset,
- temp3_loc,
+ Location::RegisterLocation(temp3),
/* needs_null_check= */ false);
-
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // Bail out if the destination is not a non primitive array.
- //
- // Register `temp1` is not trashed by the read barrier emitted
- // by GenerateFieldLoadWithBakerReadBarrier below, as that
- // method produces a call to a ReadBarrierMarkRegX entry point,
- // which saves all potentially live registers, including
- // temporaries such a `temp1`.
- // /* HeapReference<Class> */ temp2 = temp1->component_type_
- codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
- temp2_loc,
- temp1,
- component_offset,
- temp3_loc,
- /* needs_null_check= */ false);
- __ Beqz(temp2, intrinsic_slow_path->GetEntryLabel());
- // If heap poisoning is enabled, `temp2` has been unpoisoned
- // by the previous call to GenerateFieldLoadWithBakerReadBarrier.
- // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
- __ Lhu(temp2, temp2, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Bnez(temp2, intrinsic_slow_path->GetEntryLabel());
- }
-
- // For the same reason given earlier, `temp1` is not trashed by the
- // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
// /* HeapReference<Class> */ temp2 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
- temp2_loc,
+ Location::RegisterLocation(temp2),
src,
class_offset,
- temp3_loc,
+ Location::RegisterLocation(temp3),
/* needs_null_check= */ false);
-
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- Riscv64Label do_copy;
- __ Beq(temp1, temp2, &do_copy);
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
- temp1_loc,
- temp1,
- component_offset,
- temp3_loc,
- /* needs_null_check= */ false);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- // We do not need to emit a read barrier for the following
- // heap reference load, as `temp1` is only used in a
- // comparison with null below, and this reference is not
- // kept afterwards.
- __ Lwu(temp1, temp1, super_offset);
- __ Bnez(temp1, intrinsic_slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ Bne(temp1, temp2, intrinsic_slow_path->GetEntryLabel());
- }
} else {
- // Non read barrier code.
-
// /* HeapReference<Class> */ temp1 = dest->klass_
- __ Lwu(temp1, dest, class_offset);
+ __ Loadwu(temp1, dest, class_offset);
+ codegen_->MaybeUnpoisonHeapReference(temp1);
// /* HeapReference<Class> */ temp2 = src->klass_
- __ Lwu(temp2, src, class_offset);
- bool did_unpoison = false;
- if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
- !optimizations.GetSourceIsNonPrimitiveArray()) {
- // One or two of the references need to be unpoisoned. Unpoison them
- // both to make the identity check valid.
- codegen_->MaybeUnpoisonHeapReference(temp1);
- codegen_->MaybeUnpoisonHeapReference(temp2);
- did_unpoison = true;
- }
-
- if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
- // Bail out if the destination is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp1->component_type_
- __ Lwu(temp3, temp1, component_offset);
- __ Beqz(temp3, intrinsic_slow_path->GetEntryLabel());
- codegen_->MaybeUnpoisonHeapReference(temp3);
- // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
- __ Lhu(temp3, temp3, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Bnez(temp3, intrinsic_slow_path->GetEntryLabel());
- }
+ __ Loadwu(temp2, src, class_offset);
+ codegen_->MaybeUnpoisonHeapReference(temp2);
+ }
+ if (optimizations.GetDestinationIsTypedObjectArray()) {
+ DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+ Riscv64Label do_copy;
+ // For class match, we can skip the source type check regardless of the optimization flag.
+ __ Beq(temp1, temp2, &do_copy);
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ // No read barrier is needed for reading a chain of constant references
+ // for comparing with null, see `ReadBarrierOption`.
+ __ Loadwu(temp1, temp1, component_offset);
+ codegen_->MaybeUnpoisonHeapReference(temp1);
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ Loadwu(temp1, temp1, super_offset);
+ // No need to unpoison the result, we're comparing against null.
+ __ Bnez(temp1, intrinsic_slow_path->GetEntryLabel());
+ // Bail out if the source is not a non primitive array.
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
- // Bail out if the source is not a non primitive array.
- // /* HeapReference<Class> */ temp3 = temp2->component_type_
- __ Lwu(temp3, temp2, component_offset);
- __ Beqz(temp3, intrinsic_slow_path->GetEntryLabel());
- codegen_->MaybeUnpoisonHeapReference(temp3);
- // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
- __ Lhu(temp3, temp3, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Bnez(temp3, intrinsic_slow_path->GetEntryLabel());
+ check_non_primitive_array_class(temp2, temp3);
}
-
- if (optimizations.GetDestinationIsTypedObjectArray()) {
- Riscv64Label do_copy;
- __ Beq(temp1, temp2, &do_copy);
- if (!did_unpoison) {
- codegen_->MaybeUnpoisonHeapReference(temp1);
- }
- // /* HeapReference<Class> */ temp1 = temp1->component_type_
- __ Lwu(temp1, temp1, component_offset);
- codegen_->MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp1 = temp1->super_class_
- __ Lwu(temp1, temp1, super_offset);
- // No need to unpoison the result, we're comparing against null.
- __ Bnez(temp1, intrinsic_slow_path->GetEntryLabel());
- __ Bind(&do_copy);
- } else {
- __ Bne(temp1, temp2, intrinsic_slow_path->GetEntryLabel());
+ __ Bind(&do_copy);
+ } else {
+ DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
+ // For class match, we can skip the array type check completely if at least one of source
+ // and destination is known to be a non primitive array, otherwise one check is enough.
+ __ Bne(temp1, temp2, intrinsic_slow_path->GetEntryLabel());
+ if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
+ !optimizations.GetSourceIsNonPrimitiveArray()) {
+ check_non_primitive_array_class(temp2, temp3);
}
}
} else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
// Bail out if the source is not a non primitive array.
- if (codegen_->EmitBakerReadBarrier()) {
- // /* HeapReference<Class> */ temp1 = src->klass_
- codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
- temp1_loc,
- src,
- class_offset,
- temp3_loc,
- /* needs_null_check= */ false);
- // /* HeapReference<Class> */ temp2 = temp1->component_type_
- codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
- temp2_loc,
- temp1,
- component_offset,
- temp3_loc,
- /* needs_null_check= */ false);
- __ Beqz(temp2, intrinsic_slow_path->GetEntryLabel());
- // If heap poisoning is enabled, `temp2` has been unpoisoned
- // by the previous call to GenerateFieldLoadWithBakerReadBarrier.
- } else {
- // /* HeapReference<Class> */ temp1 = src->klass_
- __ Lwu(temp1, src, class_offset);
- codegen_->MaybeUnpoisonHeapReference(temp1);
- // /* HeapReference<Class> */ temp2 = temp1->component_type_
- __ Lwu(temp2, temp1, component_offset);
- __ Beqz(temp2, intrinsic_slow_path->GetEntryLabel());
- codegen_->MaybeUnpoisonHeapReference(temp2);
- }
- // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
- __ Lhu(temp2, temp2, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Bnez(temp2, intrinsic_slow_path->GetEntryLabel());
+ // No read barrier is needed for reading a chain of constant references for comparing
+ // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
+ // /* HeapReference<Class> */ temp1 = src->klass_
+ __ Loadwu(temp2, src, class_offset);
+ codegen_->MaybeUnpoisonHeapReference(temp2);
+ check_non_primitive_array_class(temp2, temp3);
}
if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
@@ -1766,7 +1603,9 @@ void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
__ Beqz(length.AsRegister<XRegister>(), &done);
}
- if (codegen_->EmitBakerReadBarrier()) {
+ XRegister tmp = kNoXRegister;
+ SlowPathCodeRISCV64* read_barrier_slow_path = nullptr;
+ if (emit_rb) {
// TODO: Also convert this intrinsic to the IsGcMarking strategy?
// SystemArrayCopy implementation for Baker read barriers (see
@@ -1787,95 +1626,74 @@ void IntrinsicCodeGeneratorRISCV64::VisitSystemArrayCopy(HInvoke* invoke) {
// } while (src_ptr != end_ptr)
// }
- XRegister tmp = srs.AllocateXRegister();
-
// /* uint32_t */ monitor = src->monitor_
- __ Lwu(tmp, src, monitor_offset);
+ tmp = locations->GetTemp(3).AsRegister<XRegister>();
+ __ Loadwu(tmp, src, monitor_offset);
// /* LockWord */ lock_word = LockWord(monitor)
static_assert(sizeof(LockWord) == sizeof(int32_t),
"art::LockWord and int32_t have different sizes.");
- // Introduce a dependency on the lock_word including rb_state,
- // to prevent load-load reordering, and without using
- // a memory barrier (which would be more expensive).
+ // Shift the RB state bit to the sign bit while also clearing the low 32 bits
+ // for the fake dependency below.
+ static_assert(LockWord::kReadBarrierStateShift < 31);
+ __ Slli(tmp, tmp, 63 - LockWord::kReadBarrierStateShift);
+
+ // Introduce a dependency on the lock_word including rb_state, to prevent load-load
+ // reordering, and without using a memory barrier (which would be more expensive).
// `src` is unchanged by this operation (since Adduw adds low 32 bits
- // which are zero after left shift), but its value now depends
- // on `tmp`.
- __ Slli(tmp, tmp, 32);
+ // which are zero after left shift), but its value now depends on `tmp`.
__ AddUw(src, tmp, src);
- __ Srli(tmp, tmp, 32);
-
- // Compute base source address, base destination address, and end
- // source address for System.arraycopy* intrinsics in `src_base`,
- // `dst_base` and `src_end` respectively.
- // Note that `src_curr_addr` is computed from from `src` (and
- // `src_pos`) here, and thus honors the artificial dependency
- // of `src` on `tmp`.
- GenSystemArrayCopyAddresses(assembler,
- type,
- src,
- src_pos,
- dest,
- dest_pos,
- length,
- src_curr_addr,
- dst_curr_addr,
- src_stop_addr);
// Slow path used to copy array when `src` is gray.
- SlowPathCodeRISCV64* read_barrier_slow_path = new (codegen_->GetScopedAllocator())
+ read_barrier_slow_path = new (codegen_->GetScopedAllocator())
ReadBarrierSystemArrayCopySlowPathRISCV64(invoke, Location::RegisterLocation(tmp));
codegen_->AddSlowPath(read_barrier_slow_path);
+ }
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ // Compute base source address, base destination address, and end source address for
+ // System.arraycopy* intrinsics in `src_base`, `dst_base` and `src_end` respectively.
+ // Note that `src_curr_addr` is computed from from `src` (and `src_pos`) here, and
+ // thus honors the artificial dependency of `src` on `tmp` for read barriers.
+ GenSystemArrayCopyAddresses(codegen_,
+ type,
+ src,
+ src_pos,
+ dest,
+ dest_pos,
+ length,
+ src_curr_addr,
+ dst_curr_addr,
+ src_stop_addr);
+
+ if (emit_rb) {
+ // Given the numeric representation, it's enough to check the low bit of the RB state.
static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- // TODO(riscv64): use `bexti` instead
- __ Srli(tmp, tmp, LockWord::kReadBarrierStateShift);
- __ Andi(tmp, tmp, 1);
- __ Bnez(tmp, read_barrier_slow_path->GetEntryLabel());
-
- // Fast-path copy.
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- Riscv64Label loop;
- __ Bind(&loop);
- __ Lwu(tmp, src_curr_addr, 0);
- __ Sw(tmp, dst_curr_addr, 0);
- __ Addi(dst_curr_addr, dst_curr_addr, element_size);
- __ Addi(src_curr_addr, src_curr_addr, element_size);
- __ Bne(src_curr_addr, src_stop_addr, &loop);
-
- __ Bind(read_barrier_slow_path->GetExitLabel());
+ DCHECK_NE(tmp, kNoXRegister);
+ __ Bltz(tmp, read_barrier_slow_path->GetEntryLabel());
} else {
- // Non read barrier code.
- // Compute base source address, base destination address, and end
- // source address for System.arraycopy* intrinsics in `src_base`,
- // `dst_base` and `src_end` respectively.
- GenSystemArrayCopyAddresses(assembler,
- type,
- src,
- src_pos,
- dest,
- dest_pos,
- length,
- src_curr_addr,
- dst_curr_addr,
- src_stop_addr);
- // Iterate over the arrays and do a raw copy of the objects. We don't need to
- // poison/unpoison.
- Riscv64Label loop;
- __ Bind(&loop);
- {
- XRegister tmp = srs.AllocateXRegister();
- __ Lwu(tmp, src_curr_addr, 0);
- __ Sw(tmp, dst_curr_addr, 0);
- __ Addi(src_curr_addr, src_curr_addr, element_size);
- __ Addi(dst_curr_addr, dst_curr_addr, element_size);
- }
- __ Bne(src_curr_addr, src_stop_addr, &loop);
+ // After allocating the last scrach register, we cannot use macro load/store instructions
+ // such as `Loadwu()` and need to use raw instructions. However, all offsets below are 0.
+ DCHECK_EQ(tmp, kNoXRegister);
+ tmp = srs.AllocateXRegister();
}
+
+ // Iterate over the arrays and do a raw copy of the objects. We don't need to
+ // poison/unpoison.
+ Riscv64Label loop;
+ __ Bind(&loop);
+ __ Lwu(tmp, src_curr_addr, 0);
+ __ Sw(tmp, dst_curr_addr, 0);
+ __ Addi(src_curr_addr, src_curr_addr, element_size);
+ __ Addi(dst_curr_addr, dst_curr_addr, element_size);
+ // Bare: `TMP` shall not be clobbered.
+ __ Bne(src_curr_addr, src_stop_addr, &loop, /*is_bare=*/ true);
__ Bind(&done);
+
+ if (emit_rb) {
+ DCHECK(read_barrier_slow_path != nullptr);
+ __ Bind(read_barrier_slow_path->GetExitLabel());
+ }
}
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 7d9e50e3e7..d84ff7be73 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -253,7 +253,7 @@ class ReferenceTypeInfo : ValueObject {
bool IsNonPrimitiveArrayClass() const REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(IsValid());
- return GetTypeHandle()->IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
+ return IsArrayClass() && !GetTypeHandle()->IsPrimitiveArray();
}
bool CanArrayHold(ReferenceTypeInfo rti) const REQUIRES_SHARED(Locks::mutator_lock_) {