summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
author Vladimir Marko <vmarko@google.com> 2023-11-20 10:06:45 +0000
committer VladimĂ­r Marko <vmarko@google.com> 2023-11-24 11:52:40 +0000
commitaeefe81cc03267d7ba9a6bfaf8daef91fbd33aa0 (patch)
tree9ea7db94ab2a5b454d42c04200a8efa44580372c /compiler
parent0ac7d570c99ddb1a22429a2f51e05d48a26daa36 (diff)
riscv64: Implement VarHandle.GetAndUpdate intrinsics.
Also fix `GenerateCompareAndSet()` to avoid ANDN inside a LR/SC sequence as the instruction is not part of the base "I" instruction set. Test: testrunner.py --target --64 --ndebug --optimizing Bug: 283082089 Change-Id: I09caa0486d9bedf93a40f0f15cab1e6bef19969c
Diffstat (limited to 'compiler')
-rw-r--r--compiler/optimizing/code_generator_riscv64.cc2
-rw-r--r--compiler/optimizing/code_generator_riscv64.h20
-rw-r--r--compiler/optimizing/intrinsics_riscv64.cc678
3 files changed, 629 insertions, 71 deletions
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc
index 5006371377..fb44abce55 100644
--- a/compiler/optimizing/code_generator_riscv64.cc
+++ b/compiler/optimizing/code_generator_riscv64.cc
@@ -786,7 +786,7 @@ inline void InstructionCodeGeneratorRISCV64::FpBinOp(
}
}
-inline void InstructionCodeGeneratorRISCV64::FAdd(
+void InstructionCodeGeneratorRISCV64::FAdd(
FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
FpBinOp<FRegister, &Riscv64Assembler::FAddS, &Riscv64Assembler::FAddD>(rd, rs1, rs2, type);
}
diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h
index b1c45dea82..20c48db12c 100644
--- a/compiler/optimizing/code_generator_riscv64.h
+++ b/compiler/optimizing/code_generator_riscv64.h
@@ -160,21 +160,6 @@ static constexpr int32_t kFClassNaNMinValue = 0x100;
V(CRC32UpdateByteBuffer) \
V(MethodHandleInvokeExact) \
V(MethodHandleInvoke) \
- V(VarHandleGetAndAdd) \
- V(VarHandleGetAndAddAcquire) \
- V(VarHandleGetAndAddRelease) \
- V(VarHandleGetAndBitwiseAnd) \
- V(VarHandleGetAndBitwiseAndAcquire) \
- V(VarHandleGetAndBitwiseAndRelease) \
- V(VarHandleGetAndBitwiseOr) \
- V(VarHandleGetAndBitwiseOrAcquire) \
- V(VarHandleGetAndBitwiseOrRelease) \
- V(VarHandleGetAndBitwiseXor) \
- V(VarHandleGetAndBitwiseXorAcquire) \
- V(VarHandleGetAndBitwiseXorRelease) \
- V(VarHandleGetAndSet) \
- V(VarHandleGetAndSetAcquire) \
- V(VarHandleGetAndSetRelease) \
V(ByteValueOf) \
V(ShortValueOf) \
V(CharacterValueOf) \
@@ -183,6 +168,9 @@ static constexpr int32_t kFClassNaNMinValue = 0x100;
// Method register on invoke.
static const XRegister kArtMethodRegister = A0;
+// Helper used by codegen as well as intrinsics.
+XRegister InputXRegisterOrZero(Location location);
+
class CodeGeneratorRISCV64;
class InvokeRuntimeCallingConvention : public CallingConvention<XRegister, FRegister> {
@@ -367,6 +355,7 @@ class InstructionCodeGeneratorRISCV64 : public InstructionCodeGenerator {
void GenerateMemoryBarrier(MemBarrierKind kind);
+ void FAdd(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
void FClass(XRegister rd, FRegister rs1, DataType::Type type);
void Load(Location out, XRegister rs1, int32_t offset, DataType::Type type);
@@ -473,7 +462,6 @@ class InstructionCodeGeneratorRISCV64 : public InstructionCodeGenerator {
void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister),
void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)>
void FpBinOp(Reg rd, FRegister rs1, FRegister rs2, DataType::Type type);
- void FAdd(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
void FSub(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
void FDiv(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
void FMul(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc
index 225809d58a..fb3f11357a 100644
--- a/compiler/optimizing/intrinsics_riscv64.cc
+++ b/compiler/optimizing/intrinsics_riscv64.cc
@@ -553,6 +553,35 @@ void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) {
GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
}
+std::pair<AqRl, AqRl> GetLrScAqRl(std::memory_order order) {
+ AqRl load_aqrl = AqRl::kNone;
+ AqRl store_aqrl = AqRl::kNone;
+ if (order == std::memory_order_acquire) {
+ load_aqrl = AqRl::kAcquire;
+ } else if (order == std::memory_order_release) {
+ store_aqrl = AqRl::kRelease;
+ } else if (order == std::memory_order_seq_cst) {
+ load_aqrl = AqRl::kAqRl;
+ store_aqrl = AqRl::kRelease;
+ } else {
+ DCHECK(order == std::memory_order_relaxed);
+ }
+ return {load_aqrl, store_aqrl};
+}
+
+AqRl GetAmoAqRl(std::memory_order order) {
+ AqRl amo_aqrl = AqRl::kNone;
+ if (order == std::memory_order_acquire) {
+ amo_aqrl = AqRl::kAcquire;
+ } else if (order == std::memory_order_release) {
+ amo_aqrl = AqRl::kRelease;
+ } else {
+ DCHECK(order == std::memory_order_seq_cst);
+ amo_aqrl = AqRl::kAqRl;
+ }
+ return amo_aqrl;
+}
+
static void EmitLoadReserved(Riscv64Assembler* assembler,
DataType::Type type,
XRegister ptr,
@@ -620,18 +649,7 @@ static void GenerateCompareAndSet(Riscv64Assembler* assembler,
// from the main path attempt to emit CAS when the marked old value matched `expected`.
DCHECK_IMPLIES(expected2 != kNoXRegister, type == DataType::Type::kReference);
- AqRl load_aqrl = AqRl::kNone;
- AqRl store_aqrl = AqRl::kNone;
- if (order == std::memory_order_acquire) {
- load_aqrl = AqRl::kAcquire;
- } else if (order == std::memory_order_release) {
- store_aqrl = AqRl::kRelease;
- } else if (order == std::memory_order_seq_cst) {
- load_aqrl = AqRl::kAqRl;
- store_aqrl = AqRl::kRelease;
- } else {
- DCHECK(order == std::memory_order_relaxed);
- }
+ auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
// repeat: {
// old_value = [ptr]; // Load exclusive.
@@ -665,9 +683,9 @@ static void GenerateCompareAndSet(Riscv64Assembler* assembler,
// The `old_value` does not need to be preserved as the caller shall use `masked`
// to return the old value if needed.
to_store = old_value;
- // TODO(riscv64): We could XOR the old and new value before the loop and use XOR here
- // instead of the ANDN+OR. (The `new_value` is either Zero or a temporary we can clobber.)
- __ Andn(to_store, old_value, mask);
+ // TODO(riscv64): We could XOR the old and new value before the loop and use a single XOR here
+ // instead of the XOR+OR. (The `new_value` is either Zero or a temporary we can clobber.)
+ __ Xor(to_store, old_value, masked);
__ Or(to_store, to_store, new_value);
} else if (expected2 != kNoXRegister) {
Riscv64Label match2;
@@ -846,12 +864,109 @@ class ReadBarrierCasSlowPathRISCV64 : public SlowPathCodeRISCV64 {
enum class GetAndUpdateOp {
kSet,
kAdd,
- kAddWithByteSwap,
kAnd,
kOr,
kXor
};
+// Generate a GetAndUpdate operation.
+//
+// Only 32-bit and 64-bit atomics are currently supported, therefore smaller types need
+// special handling. The caller emits code to prepare aligned `ptr` and adjusted `arg`
+// and extract the needed bits from `old_value`. For bitwise operations, no extra
+// handling is needed here. For `GetAndUpdateOp::kSet` and `GetAndUpdateOp::kAdd` we
+// also use a special LR/SC sequence that uses a `mask` to update only the desired bits.
+// Note: The `mask` must contain the bits to keep for `GetAndUpdateOp::kSet` and
+// the bits to replace for `GetAndUpdateOp::kAdd`.
+static void GenerateGetAndUpdate(CodeGeneratorRISCV64* codegen,
+ GetAndUpdateOp get_and_update_op,
+ DataType::Type type,
+ std::memory_order order,
+ XRegister ptr,
+ XRegister arg,
+ XRegister old_value,
+ XRegister mask,
+ XRegister temp) {
+ DCHECK_EQ(mask != kNoXRegister, temp != kNoXRegister);
+ DCHECK_IMPLIES(mask != kNoXRegister, type == DataType::Type::kInt32);
+ DCHECK_IMPLIES(
+ mask != kNoXRegister,
+ (get_and_update_op == GetAndUpdateOp::kSet) || (get_and_update_op == GetAndUpdateOp::kAdd));
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ AqRl amo_aqrl = GetAmoAqRl(order);
+ switch (get_and_update_op) {
+ case GetAndUpdateOp::kSet:
+ if (type == DataType::Type::kInt64) {
+ __ AmoSwapD(old_value, arg, ptr, amo_aqrl);
+ } else if (mask == kNoXRegister) {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ AmoSwapW(old_value, arg, ptr, amo_aqrl);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ DCHECK_NE(temp, kNoXRegister);
+ auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
+ Riscv64Label retry;
+ __ Bind(&retry);
+ __ LrW(old_value, ptr, load_aqrl);
+ __ And(temp, old_value, mask);
+ __ Or(temp, temp, arg);
+ __ ScW(temp, temp, ptr, store_aqrl);
+ __ Bnez(temp, &retry, /*is_bare=*/ true); // Bare: `TMP` shall not be clobbered.
+ }
+ break;
+ case GetAndUpdateOp::kAdd:
+ if (type == DataType::Type::kInt64) {
+ __ AmoAddD(old_value, arg, ptr, amo_aqrl);
+ } else if (mask == kNoXRegister) {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ AmoAddW(old_value, arg, ptr, amo_aqrl);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ DCHECK_NE(temp, kNoXRegister);
+ auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
+ Riscv64Label retry;
+ __ Bind(&retry);
+ __ LrW(old_value, ptr, load_aqrl);
+ __ Add(temp, old_value, arg);
+ // We use `(A ^ B) ^ A == B` and with the masking `((A ^ B) & mask) ^ A`, the result
+ // contains bits from `B` for bits specified in `mask` and bits from `A` elsewhere.
+ // Note: These instructions directly depend on each other, so it's not necessarily the
+ // fastest approach but for `(A ^ ~mask) | (B & mask)` we would need an extra register for
+ // `~mask` because ANDN is not in the "I" instruction set as required for a LR/SC sequence.
+ __ Xor(temp, temp, old_value);
+ __ And(temp, temp, mask);
+ __ Xor(temp, temp, old_value);
+ __ ScW(temp, temp, ptr, store_aqrl);
+ __ Bnez(temp, &retry, /*is_bare=*/ true); // Bare: `TMP` shall not be clobbered.
+ }
+ break;
+ case GetAndUpdateOp::kAnd:
+ if (type == DataType::Type::kInt64) {
+ __ AmoAndD(old_value, arg, ptr, amo_aqrl);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ AmoAndW(old_value, arg, ptr, amo_aqrl);
+ }
+ break;
+ case GetAndUpdateOp::kOr:
+ if (type == DataType::Type::kInt64) {
+ __ AmoOrD(old_value, arg, ptr, amo_aqrl);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ AmoOrW(old_value, arg, ptr, amo_aqrl);
+ }
+ break;
+ case GetAndUpdateOp::kXor:
+ if (type == DataType::Type::kInt64) {
+ __ AmoXorD(old_value, arg, ptr, amo_aqrl);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ AmoXorW(old_value, arg, ptr, amo_aqrl);
+ }
+ break;
+ }
+}
+
class VarHandleSlowPathRISCV64 : public IntrinsicSlowPathRISCV64 {
public:
VarHandleSlowPathRISCV64(HInvoke* invoke, std::memory_order order)
@@ -1610,21 +1725,21 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
Location expected = locations->InAt(expected_index);
Location new_value = locations->InAt(new_value_index);
size_t data_size = DataType::Size(value_type);
- bool small = (data_size < 4u);
- bool byte_swap =
+ bool is_small = (data_size < 4u);
+ bool can_byte_swap =
(expected_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
- bool fp = DataType::IsFloatingPointType(value_type);
+ bool is_fp = DataType::IsFloatingPointType(value_type);
size_t temps_needed =
// The offset temp is used for the `tmp_ptr`.
1u +
// For small values, we need a temp for the `mask`, `masked` and maybe also for the `shift`.
- (small ? (return_success ? 2u : 3u) : 0u) +
+ (is_small ? (return_success ? 2u : 3u) : 0u) +
// Some cases need modified copies of `new_value` and `expected`.
- (ScratchXRegisterNeeded(expected, value_type, byte_swap) ? 1u : 0u) +
- (ScratchXRegisterNeeded(new_value, value_type, byte_swap) ? 1u : 0u) +
+ (ScratchXRegisterNeeded(expected, value_type, can_byte_swap) ? 1u : 0u) +
+ (ScratchXRegisterNeeded(new_value, value_type, can_byte_swap) ? 1u : 0u) +
// We need a scratch register either for the old value or for the result of SC.
// If we need to return a floating point old value, we need a temp for each.
- ((!return_success && fp) ? 2u : 1u);
+ ((!return_success && is_fp) ? 2u : 1u);
size_t scratch_registers_available = 2u;
DCHECK_EQ(scratch_registers_available,
ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters());
@@ -1643,7 +1758,7 @@ static XRegister PrepareXRegister(CodeGeneratorRISCV64* codegen,
XRegister mask,
bool byte_swap,
ScratchRegisterScope* srs) {
- DCHECK_EQ(shift == kNoXRegister, mask == kNoXRegister);
+ DCHECK_IMPLIES(mask != kNoXRegister, shift != kNoXRegister);
DCHECK_EQ(shift == kNoXRegister, DataType::Size(type) >= 4u);
if (loc.IsConstant()) {
// The `shift`/`mask` and `byte_swap` are irrelevant for zero input.
@@ -1671,13 +1786,35 @@ static XRegister PrepareXRegister(CodeGeneratorRISCV64* codegen,
Riscv64Assembler* assembler = codegen->GetAssembler();
__ Sllw(result.AsRegister<XRegister>(), loc.AsRegister<XRegister>(), shift);
DCHECK_NE(type, DataType::Type::kUint8);
- if (type != DataType::Type::kUint16 && type != DataType::Type::kBool) {
+ if (mask != kNoXRegister && type != DataType::Type::kUint16 && type != DataType::Type::kBool) {
__ And(result.AsRegister<XRegister>(), result.AsRegister<XRegister>(), mask);
}
}
return result.AsRegister<XRegister>();
}
+static void GenerateByteSwapAndExtract(Riscv64Assembler* assembler,
+ Location rd,
+ XRegister rs1,
+ XRegister shift,
+ DataType::Type type) {
+ // Do not apply shift in `GenerateReverseBytes()` for small types.
+ DCHECK_EQ(shift != kNoXRegister, DataType::Size(type) < 4u);
+ DataType::Type swap_type = (shift != kNoXRegister) ? DataType::Type::kInt32 : type;
+ // Also handles moving to FP registers.
+ GenerateReverseBytes(assembler, rd, rs1, swap_type);
+ if (shift != kNoXRegister) {
+ DCHECK_EQ(rs1, rd.AsRegister<XRegister>());
+ __ Sllw(rs1, rs1, shift);
+ if (type == DataType::Type::kUint16) {
+ __ Srliw(rs1, rs1, 16);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt16);
+ __ Sraiw(rs1, rs1, 16);
+ }
+ }
+}
+
static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
CodeGeneratorRISCV64* codegen,
std::memory_order order,
@@ -1727,7 +1864,8 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>());
size_t next_temp = 1u;
XRegister tmp_ptr = target.offset;
- if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
+ bool is_reference = (value_type == DataType::Type::kReference);
+ if (is_reference && codegen->EmitReadBarrier()) {
DCHECK_EQ(available_scratch_registers, 2u);
available_scratch_registers -= 1u;
DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
@@ -1751,15 +1889,16 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
XRegister mask = kNoXRegister;
XRegister masked = kNoXRegister;
size_t data_size = DataType::Size(value_type);
- if (data_size < 4u) {
+ bool is_small = (data_size < 4u);
+ if (is_small) {
// When returning "success" and not the old value, we shall not need the `shift` after
// the raw CAS operation, so use the output register as a temporary here.
shift = return_success ? locations->Out().AsRegister<XRegister>() : get_temp();
mask = get_temp();
masked = get_temp();
- __ Andi(shift, tmp_ptr, 3);
+ // Upper bits of the shift are not used, so we do not need to clear them.
+ __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte));
__ Andi(tmp_ptr, tmp_ptr, -4);
- __ Slli(shift, shift, WhichPowerOf2(kBitsPerByte));
__ Li(mask, (1 << (data_size * kBitsPerByte)) - 1);
__ Sllw(mask, mask, shift);
}
@@ -1770,9 +1909,10 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
PrepareXRegister(codegen, expected, value_type, shift, mask, byte_swap, &srs);
XRegister new_value_reg =
PrepareXRegister(codegen, new_value, value_type, shift, mask, byte_swap, &srs);
- DataType::Type cas_type = DataType::IsFloatingPointType(value_type)
+ bool is_fp = DataType::IsFloatingPointType(value_type);
+ DataType::Type cas_type = is_fp
? IntTypeForFloatingPointType(value_type)
- : (data_size >= 4u ? value_type : DataType::Type::kInt32);
+ : (is_small ? DataType::Type::kInt32 : value_type);
// Prepare registers for old value and the result of the store conditional.
XRegister old_value;
@@ -1781,7 +1921,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
// Use a temp for the old value and the output register for the store conditional result.
old_value = get_temp();
store_result = out.AsRegister<XRegister>();
- } else if (DataType::IsFloatingPointType(value_type)) {
+ } else if (is_fp) {
// We need two temporary registers.
old_value = get_temp();
store_result = get_temp();
@@ -1795,7 +1935,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
Riscv64Label* exit_loop = &exit_loop_label;
Riscv64Label* cmp_failure = &exit_loop_label;
- if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
+ if (is_reference && codegen->EmitReadBarrier()) {
// The `old_value_temp` is used first for marking the `old_value` and then for the unmarked
// reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
XRegister old_value_temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
@@ -1849,24 +1989,11 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
if (return_success) {
// Nothing to do, the result register already contains 1 on success and 0 on failure.
} else if (byte_swap) {
- // Do not apply shift in `GenerateReverseBytes()` for small types.
- DataType::Type swap_type = data_size < 4u ? DataType::Type::kInt32 : value_type;
- // Also handles moving to FP registers.
- GenerateReverseBytes(assembler, out, old_value, swap_type);
- if (data_size < 4u) {
- DCHECK(Location::RegisterLocation(old_value).Equals(out));
- __ Sllw(old_value, old_value, shift);
- if (value_type == DataType::Type::kUint16) {
- __ Srliw(old_value, old_value, 16);
- } else {
- DCHECK_EQ(value_type, DataType::Type::kInt16);
- __ Sraiw(old_value, old_value, 16);
- }
- }
- } else if (DataType::IsFloatingPointType(value_type)) {
+ GenerateByteSwapAndExtract(assembler, out, old_value, shift, value_type);
+ } else if (is_fp) {
codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
- } else if (data_size < 4u) {
- __ Srl(old_value, masked, shift);
+ } else if (is_small) {
+ __ Srlw(old_value, masked, shift);
if (value_type == DataType::Type::kInt8) {
__ SextB(old_value, old_value);
} else if (value_type == DataType::Type::kInt16) {
@@ -1881,8 +2008,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
// Check that we have allocated the right number of temps. We may need more registers
// for byte swapped CAS in the slow path, so skip this check for the main path in that case.
- bool has_byte_swap =
- (expected_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
+ bool has_byte_swap = (expected_index == 3u) && (!is_reference && data_size != 1u);
if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) {
// We allocate a temporary register for the class object for a static field `VarHandle` but
// we do not update the `next_temp` if it's otherwise unused after the address calculation.
@@ -1964,13 +2090,457 @@ void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvo
invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
}
+static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
+ CodeGeneratorRISCV64* codegen,
+ GetAndUpdateOp get_and_update_op) {
+ VarHandleOptimizations optimizations(invoke);
+ if (optimizations.GetDoNotIntrinsify()) {
+ return;
+ }
+
+ if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
+ // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
+ // the passed reference and reloads it from the field, thus seeing the new value
+ // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
+ return;
+ }
+
+ LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
+ uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
+ DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
+ DataType::Type value_type = invoke->GetType();
+ DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, arg_index));
+ Location arg = locations->InAt(arg_index);
+
+ bool is_fp = DataType::IsFloatingPointType(value_type);
+ if (is_fp) {
+ if (get_and_update_op == GetAndUpdateOp::kAdd) {
+ // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
+ locations->SetInAt(invoke->GetNumberOfArguments() - 1u, Location::RequiresFpuRegister());
+ } else {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ }
+ }
+
+ size_t data_size = DataType::Size(value_type);
+ bool can_byte_swap =
+ (arg_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
+ bool can_use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (can_byte_swap || is_fp);
+ bool is_small = (data_size < 4u);
+ bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd);
+ bool is_bitwise =
+ (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd);
+
+ size_t temps_needed =
+ // The offset temp is used for the `tmp_ptr`.
+ 1u +
+ // For small values, we need temps for `shift` and maybe also `mask` and `temp`.
+ (is_small ? (is_bitwise ? 1u : 3u) : 0u) +
+ // Some cases need modified copies of `arg`.
+ (is_small_and || ScratchXRegisterNeeded(arg, value_type, can_byte_swap) ? 1u : 0u) +
+ // For FP types, we need a temp for `old_value` which cannot be loaded directly to `out`.
+ (is_fp ? 1u : 0u);
+ if (can_use_cas) {
+ size_t cas_temps_needed =
+ // The offset temp is used for the `tmp_ptr`.
+ 1u +
+ // For small values, we need a temp for `shift`.
+ (is_small ? 1u : 0u) +
+ // And we always need temps for `old_value`, `new_value` and `reloaded_old_value`.
+ 3u;
+ DCHECK_GE(cas_temps_needed, temps_needed);
+ temps_needed = cas_temps_needed;
+ }
+
+ size_t scratch_registers_available = 2u;
+ DCHECK_EQ(scratch_registers_available,
+ ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters());
+ size_t old_temp_count = locations->GetTempCount();
+ DCHECK_EQ(old_temp_count, (arg_index == 1u) ? 2u : 1u);
+ if (temps_needed > old_temp_count + scratch_registers_available) {
+ locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available));
+ }
+}
+
static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
CodeGeneratorRISCV64* codegen,
GetAndUpdateOp get_and_update_op,
std::memory_order order,
bool byte_swap = false) {
- UNUSED(invoke, codegen, get_and_update_op, order, byte_swap);
- LOG(FATAL) << "Unimplemented!";
+ uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
+ DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
+ DataType::Type value_type = invoke->GetType();
+ DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, arg_index));
+
+ Riscv64Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ Location arg = locations->InAt(arg_index);
+ DCHECK_IMPLIES(arg.IsConstant(), arg.GetConstant()->IsZeroBitPattern());
+ Location out = locations->Out();
+
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+ VarHandleSlowPathRISCV64* slow_path = nullptr;
+ if (!byte_swap) {
+ slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
+ GenerateVarHandleTarget(invoke, target, codegen);
+ if (slow_path != nullptr) {
+ slow_path->SetGetAndUpdateOp(get_and_update_op);
+ __ Bind(slow_path->GetNativeByteOrderLabel());
+ }
+ }
+
+ // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
+ if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
+ DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+ // Mark card for object, the new value shall be stored.
+ bool new_value_can_be_null = true; // TODO: Worth finding out this information?
+ codegen->MarkGCCard(target.object, arg.AsRegister<XRegister>(), new_value_can_be_null);
+ }
+
+ size_t data_size = DataType::Size(value_type);
+ bool is_fp = DataType::IsFloatingPointType(value_type);
+ bool use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (byte_swap || is_fp);
+ bool is_small = (data_size < 4u);
+ bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd);
+ bool is_reference = (value_type == DataType::Type::kReference);
+ DataType::Type op_type = is_fp
+ ? IntTypeForFloatingPointType(value_type)
+ : (is_small || is_reference ? DataType::Type::kInt32 : value_type);
+
+ ScratchRegisterScope srs(assembler);
+ DCHECK_EQ(srs.AvailableXRegisters(), 2u);
+ size_t available_scratch_registers = use_cas
+ // We use scratch registers differently for the CAS path.
+ ? 0u
+ // Reserve one scratch register for `PrepareXRegister()` or similar `arg_reg` allocation.
+ : (is_small_and || ScratchXRegisterNeeded(arg, value_type, byte_swap) ? 1u : 2u);
+
+ // Reuse the `target.offset` temporary for the pointer to the target location,
+ // except for references that need the offset for the non-Baker read barrier.
+ DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>());
+ size_t next_temp = 1u;
+ XRegister tmp_ptr = target.offset;
+ if (is_reference && codegen->EmitNonBakerReadBarrier()) {
+ DCHECK_EQ(available_scratch_registers, 2u);
+ available_scratch_registers -= 1u;
+ tmp_ptr = srs.AllocateXRegister();
+ }
+ __ Add(tmp_ptr, target.object, target.offset);
+
+ auto get_temp = [&]() {
+ if (available_scratch_registers != 0u) {
+ available_scratch_registers -= 1u;
+ return srs.AllocateXRegister();
+ } else {
+ XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
+ next_temp += 1u;
+ return temp;
+ }
+ };
+
+ XRegister shift = kNoXRegister;
+ XRegister mask = kNoXRegister;
+ XRegister prepare_mask = kNoXRegister;
+ XRegister temp = kNoXRegister;
+ XRegister arg_reg = kNoXRegister;
+ if (is_small) {
+ shift = get_temp();
+ // Upper bits of the shift are not used, so we do not need to clear them.
+ __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte));
+ __ Andi(tmp_ptr, tmp_ptr, -4);
+ switch (get_and_update_op) {
+ case GetAndUpdateOp::kAdd:
+ if (byte_swap) {
+ // The mask is not needed in the CAS path.
+ DCHECK(use_cas);
+ break;
+ }
+ FALLTHROUGH_INTENDED;
+ case GetAndUpdateOp::kSet:
+ mask = get_temp();
+ temp = get_temp();
+ __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1);
+ __ Sllw(mask, mask, shift);
+ // The argument does not need to be masked for `GetAndUpdateOp::kAdd`,
+ // the mask shall be applied after the ADD instruction.
+ prepare_mask = (get_and_update_op == GetAndUpdateOp::kSet) ? mask : kNoXRegister;
+ break;
+ case GetAndUpdateOp::kAnd:
+ // We need to set all other bits, so we always need a temp.
+ arg_reg = srs.AllocateXRegister();
+ if (data_size == 1u) {
+ __ Ori(arg_reg, InputXRegisterOrZero(arg), ~0xff);
+ DCHECK(!byte_swap);
+ } else {
+ DCHECK_EQ(data_size, 2u);
+ __ Li(arg_reg, ~0xffff);
+ __ Or(arg_reg, InputXRegisterOrZero(arg), arg_reg);
+ if (byte_swap) {
+ __ Rev8(arg_reg, arg_reg);
+ __ Rori(arg_reg, arg_reg, 48);
+ }
+ }
+ __ Rolw(arg_reg, arg_reg, shift);
+ break;
+ case GetAndUpdateOp::kOr:
+ case GetAndUpdateOp::kXor:
+ // Signed values need to be truncated but we're keeping `prepare_mask == kNoXRegister`.
+ if (value_type == DataType::Type::kInt8 && !arg.IsConstant()) {
+ DCHECK(!byte_swap);
+ arg_reg = srs.AllocateXRegister();
+ __ ZextB(arg_reg, arg.AsRegister<XRegister>());
+ __ Sllw(arg_reg, arg_reg, shift);
+ } else if (value_type == DataType::Type::kInt16 && !arg.IsConstant() && !byte_swap) {
+ arg_reg = srs.AllocateXRegister();
+ __ ZextH(arg_reg, arg.AsRegister<XRegister>());
+ __ Sllw(arg_reg, arg_reg, shift);
+ } // else handled by `PrepareXRegister()` below.
+ break;
+ }
+ }
+ if (arg_reg == kNoXRegister && !use_cas) {
+ arg_reg = PrepareXRegister(codegen, arg, value_type, shift, prepare_mask, byte_swap, &srs);
+ }
+ if (mask != kNoXRegister && get_and_update_op == GetAndUpdateOp::kSet) {
+ __ Not(mask, mask); // We need to flip the mask for `kSet`, see `GenerateGetAndUpdate()`.
+ }
+
+ if (use_cas) {
+ // Allocate scratch registers for temps that can theoretically be clobbered on retry.
+ // (Even though the `retry` label shall never be far enough for `TMP` to be clobbered.)
+ DCHECK_EQ(available_scratch_registers, 0u); // Reserved for the two uses below.
+ XRegister old_value = srs.AllocateXRegister();
+ XRegister new_value = srs.AllocateXRegister();
+ // Allocate other needed temporaries.
+ XRegister reloaded_old_value = get_temp();
+ XRegister store_result = reloaded_old_value; // Clobber reloaded old value by store result.
+ FRegister ftmp = is_fp ? srs.AllocateFRegister() : kNoFRegister;
+
+ Riscv64Label retry;
+ __ Bind(&retry);
+ codegen->GetInstructionVisitor()->Load(
+ Location::RegisterLocation(old_value), tmp_ptr, /*offset=*/ 0, op_type);
+ if (byte_swap) {
+ GenerateByteSwapAndExtract(assembler, out, old_value, shift, value_type);
+ } else {
+ DCHECK(is_fp);
+ codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
+ }
+ if (is_fp) {
+ codegen->GetInstructionVisitor()->FAdd(
+ ftmp, out.AsFpuRegister<FRegister>(), arg.AsFpuRegister<FRegister>(), value_type);
+ codegen->MoveLocation(
+ Location::RegisterLocation(new_value), Location::FpuRegisterLocation(ftmp), op_type);
+ } else if (value_type == DataType::Type::kInt64) {
+ __ Add(new_value, out.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
+ } else {
+ DCHECK_EQ(op_type, DataType::Type::kInt32);
+ __ Addw(new_value, out.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
+ }
+ if (byte_swap) {
+ DataType::Type swap_type = op_type;
+ if (is_small) {
+ DCHECK_EQ(data_size, 2u);
+ // We want to update only 16 bits of the 32-bit location. The 16 bits we want to replace
+ // are present in both `old_value` and `out` but in different bits and byte order.
+ // To update the 16 bits, we can XOR the new value with the `out`, byte swap as Uint16
+ // (extracting only the bits we want to update), shift and XOR with the old value.
+ swap_type = DataType::Type::kUint16;
+ __ Xor(new_value, new_value, out.AsRegister<XRegister>());
+ }
+ GenerateReverseBytes(assembler, Location::RegisterLocation(new_value), new_value, swap_type);
+ if (is_small) {
+ __ Sllw(new_value, new_value, shift);
+ __ Xor(new_value, new_value, old_value);
+ }
+ }
+ GenerateCompareAndSet(assembler,
+ op_type,
+ order,
+ /*strong=*/ true,
+ /*cmp_failure=*/ &retry,
+ tmp_ptr,
+ new_value,
+ /*old_value=*/ reloaded_old_value,
+ /*mask=*/ kNoXRegister,
+ /*masked=*/ kNoXRegister,
+ store_result,
+ /*expected=*/ old_value);
+ } else {
+ XRegister old_value = is_fp ? get_temp() : out.AsRegister<XRegister>();
+ GenerateGetAndUpdate(
+ codegen, get_and_update_op, op_type, order, tmp_ptr, arg_reg, old_value, mask, temp);
+ if (byte_swap) {
+ GenerateByteSwapAndExtract(assembler, out, old_value, shift, value_type);
+ } else if (is_fp) {
+ codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
+ } else if (is_small) {
+ __ Srlw(old_value, old_value, shift);
+ DCHECK_NE(value_type, DataType::Type::kUint8);
+ if (value_type == DataType::Type::kInt8) {
+ __ SextB(old_value, old_value);
+ } else if (value_type == DataType::Type::kBool) {
+ __ ZextB(old_value, old_value);
+ } else if (value_type == DataType::Type::kInt16) {
+ __ SextH(old_value, old_value);
+ } else {
+ DCHECK_EQ(value_type, DataType::Type::kUint16);
+ __ ZextH(old_value, old_value);
+ }
+ } else if (is_reference) {
+ __ ZextW(old_value, old_value);
+ if (codegen->EmitBakerReadBarrier()) {
+ // Use RA as temp. It is clobbered in the slow path anyway.
+ static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
+ SlowPathCodeRISCV64* rb_slow_path =
+ codegen->AddGcRootBakerBarrierBarrierSlowPath(invoke, out, kBakerReadBarrierTemp);
+ codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out, kBakerReadBarrierTemp);
+ } else if (codegen->EmitNonBakerReadBarrier()) {
+ Location base_loc = Location::RegisterLocation(target.object);
+ Location index = Location::RegisterLocation(target.offset);
+ SlowPathCodeRISCV64* rb_slow_path = codegen->AddReadBarrierSlowPath(
+ invoke, out, out, base_loc, /*offset=*/ 0u, index);
+ __ J(rb_slow_path->GetEntryLabel());
+ __ Bind(rb_slow_path->GetExitLabel());
+ }
+ }
+ }
+
+ if (slow_path != nullptr) {
+ DCHECK(!byte_swap);
+ __ Bind(slow_path->GetExitLabel());
+ }
+
+ // Check that we have allocated the right number of temps. We may need more registers
+ // for byte swapped CAS in the slow path, so skip this check for the main path in that case.
+ bool has_byte_swap = (arg_index == 3u) && (!is_reference && data_size != 1u);
+ if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) {
+ // We allocate a temporary register for the class object for a static field `VarHandle` but
+ // we do not update the `next_temp` if it's otherwise unused after the address calculation.
+ CHECK_EQ(arg_index, 1u);
+ CHECK_EQ(next_temp, 1u);
+ CHECK_EQ(locations->GetTempCount(), 2u);
+ }
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
+ CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
+ GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
}
void VarHandleSlowPathRISCV64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {