riscv64: Implement VarHandle.GetAndUpdate intrinsics.

Also fix `GenerateCompareAndSet()` to avoid ANDN inside a LR/SC sequence as the instruction is not part of the base "I" instruction set. Test: testrunner.py --target --64 --ndebug --optimizing Bug: 283082089 Change-Id: I09caa0486d9bedf93a40f0f15cab1e6bef19969c
author: Vladimir Marko <vmarko@google.com> 2023-11-20 10:06:45 +0000
committer: Vladimír Marko <vmarko@google.com> 2023-11-24 11:52:40 +0000
commit: aeefe81cc03267d7ba9a6bfaf8daef91fbd33aa0 (patch)
tree: 9ea7db94ab2a5b454d42c04200a8efa44580372c /compiler
parent: 0ac7d570c99ddb1a22429a2f51e05d48a26daa36 (diff)
3 files changed, 629 insertions, 71 deletions
diff --git a/compiler/optimizing/code_generator_riscv64.cc b/compiler/optimizing/code_generator_riscv64.cc
index 5006371377..fb44abce55 100644
--- a/compiler/optimizing/code_generator_riscv64.cc
+++ b/compiler/optimizing/code_generator_riscv64.cc
@@ -786,7 +786,7 @@ inline void InstructionCodeGeneratorRISCV64::FpBinOp(
   }
 }
 
-inline void InstructionCodeGeneratorRISCV64::FAdd(
+void InstructionCodeGeneratorRISCV64::FAdd(
     FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type) {
   FpBinOp<FRegister, &Riscv64Assembler::FAddS, &Riscv64Assembler::FAddD>(rd, rs1, rs2, type);
 }
diff --git a/compiler/optimizing/code_generator_riscv64.h b/compiler/optimizing/code_generator_riscv64.h
index b1c45dea82..20c48db12c 100644
--- a/compiler/optimizing/code_generator_riscv64.h
+++ b/compiler/optimizing/code_generator_riscv64.h
@@ -160,21 +160,6 @@ static constexpr int32_t kFClassNaNMinValue = 0x100;
   V(CRC32UpdateByteBuffer)                      \
   V(MethodHandleInvokeExact)                    \
   V(MethodHandleInvoke)                         \
-  V(VarHandleGetAndAdd)                         \
-  V(VarHandleGetAndAddAcquire)                  \
-  V(VarHandleGetAndAddRelease)                  \
-  V(VarHandleGetAndBitwiseAnd)                  \
-  V(VarHandleGetAndBitwiseAndAcquire)           \
-  V(VarHandleGetAndBitwiseAndRelease)           \
-  V(VarHandleGetAndBitwiseOr)                   \
-  V(VarHandleGetAndBitwiseOrAcquire)            \
-  V(VarHandleGetAndBitwiseOrRelease)            \
-  V(VarHandleGetAndBitwiseXor)                  \
-  V(VarHandleGetAndBitwiseXorAcquire)           \
-  V(VarHandleGetAndBitwiseXorRelease)           \
-  V(VarHandleGetAndSet)                         \
-  V(VarHandleGetAndSetAcquire)                  \
-  V(VarHandleGetAndSetRelease)                  \
   V(ByteValueOf)                                \
   V(ShortValueOf)                               \
   V(CharacterValueOf)                           \
@@ -183,6 +168,9 @@ static constexpr int32_t kFClassNaNMinValue = 0x100;
 // Method register on invoke.
 static const XRegister kArtMethodRegister = A0;
 
+// Helper used by codegen as well as intrinsics.
+XRegister InputXRegisterOrZero(Location location);
+
 class CodeGeneratorRISCV64;
 
 class InvokeRuntimeCallingConvention : public CallingConvention<XRegister, FRegister> {
@@ -367,6 +355,7 @@ class InstructionCodeGeneratorRISCV64 : public InstructionCodeGenerator {
 
   void GenerateMemoryBarrier(MemBarrierKind kind);
 
+  void FAdd(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
   void FClass(XRegister rd, FRegister rs1, DataType::Type type);
 
   void Load(Location out, XRegister rs1, int32_t offset, DataType::Type type);
@@ -473,7 +462,6 @@ class InstructionCodeGeneratorRISCV64 : public InstructionCodeGenerator {
             void (Riscv64Assembler::*opS)(Reg, FRegister, FRegister),
             void (Riscv64Assembler::*opD)(Reg, FRegister, FRegister)>
   void FpBinOp(Reg rd, FRegister rs1, FRegister rs2, DataType::Type type);
-  void FAdd(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
   void FSub(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
   void FDiv(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
   void FMul(FRegister rd, FRegister rs1, FRegister rs2, DataType::Type type);
diff --git a/compiler/optimizing/intrinsics_riscv64.cc b/compiler/optimizing/intrinsics_riscv64.cc
index 225809d58a..fb3f11357a 100644
--- a/compiler/optimizing/intrinsics_riscv64.cc
+++ b/compiler/optimizing/intrinsics_riscv64.cc
@@ -553,6 +553,35 @@ void IntrinsicCodeGeneratorRISCV64::VisitStringIndexOfAfter(HInvoke* invoke) {
   GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
 }
 
+std::pair<AqRl, AqRl> GetLrScAqRl(std::memory_order order) {
+  AqRl load_aqrl = AqRl::kNone;
+  AqRl store_aqrl = AqRl::kNone;
+  if (order == std::memory_order_acquire) {
+    load_aqrl = AqRl::kAcquire;
+  } else if (order == std::memory_order_release) {
+    store_aqrl = AqRl::kRelease;
+  } else if (order == std::memory_order_seq_cst) {
+    load_aqrl = AqRl::kAqRl;
+    store_aqrl = AqRl::kRelease;
+  } else {
+    DCHECK(order == std::memory_order_relaxed);
+  }
+  return {load_aqrl, store_aqrl};
+}
+
+AqRl GetAmoAqRl(std::memory_order order) {
+  AqRl amo_aqrl = AqRl::kNone;
+  if (order == std::memory_order_acquire) {
+    amo_aqrl = AqRl::kAcquire;
+  } else if (order == std::memory_order_release) {
+    amo_aqrl = AqRl::kRelease;
+  } else {
+    DCHECK(order == std::memory_order_seq_cst);
+    amo_aqrl = AqRl::kAqRl;
+  }
+  return amo_aqrl;
+}
+
 static void EmitLoadReserved(Riscv64Assembler* assembler,
                              DataType::Type type,
                              XRegister ptr,
@@ -620,18 +649,7 @@ static void GenerateCompareAndSet(Riscv64Assembler* assembler,
   // from the main path attempt to emit CAS when the marked old value matched `expected`.
   DCHECK_IMPLIES(expected2 != kNoXRegister, type == DataType::Type::kReference);
 
-  AqRl load_aqrl = AqRl::kNone;
-  AqRl store_aqrl = AqRl::kNone;
-  if (order == std::memory_order_acquire) {
-    load_aqrl = AqRl::kAcquire;
-  } else if (order == std::memory_order_release) {
-    store_aqrl = AqRl::kRelease;
-  } else if (order == std::memory_order_seq_cst) {
-    load_aqrl = AqRl::kAqRl;
-    store_aqrl = AqRl::kRelease;
-  } else {
-    DCHECK(order == std::memory_order_relaxed);
-  }
+  auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
 
   // repeat: {
   //   old_value = [ptr];  // Load exclusive.
@@ -665,9 +683,9 @@ static void GenerateCompareAndSet(Riscv64Assembler* assembler,
     // The `old_value` does not need to be preserved as the caller shall use `masked`
     // to return the old value if needed.
     to_store = old_value;
-    // TODO(riscv64): We could XOR the old and new value before the loop and use XOR here
-    // instead of the ANDN+OR. (The `new_value` is either Zero or a temporary we can clobber.)
-    __ Andn(to_store, old_value, mask);
+    // TODO(riscv64): We could XOR the old and new value before the loop and use a single XOR here
+    // instead of the XOR+OR. (The `new_value` is either Zero or a temporary we can clobber.)
+    __ Xor(to_store, old_value, masked);
     __ Or(to_store, to_store, new_value);
   } else if (expected2 != kNoXRegister) {
     Riscv64Label match2;
@@ -846,12 +864,109 @@ class ReadBarrierCasSlowPathRISCV64 : public SlowPathCodeRISCV64 {
 enum class GetAndUpdateOp {
   kSet,
   kAdd,
-  kAddWithByteSwap,
   kAnd,
   kOr,
   kXor
 };
 
+// Generate a GetAndUpdate operation.
+//
+// Only 32-bit and 64-bit atomics are currently supported, therefore smaller types need
+// special handling. The caller emits code to prepare aligned `ptr` and adjusted `arg`
+// and extract the needed bits from `old_value`. For bitwise operations, no extra
+// handling is needed here. For `GetAndUpdateOp::kSet` and `GetAndUpdateOp::kAdd` we
+// also use a special LR/SC sequence that uses a `mask` to update only the desired bits.
+// Note: The `mask` must contain the bits to keep for `GetAndUpdateOp::kSet` and
+// the bits to replace for `GetAndUpdateOp::kAdd`.
+static void GenerateGetAndUpdate(CodeGeneratorRISCV64* codegen,
+                                 GetAndUpdateOp get_and_update_op,
+                                 DataType::Type type,
+                                 std::memory_order order,
+                                 XRegister ptr,
+                                 XRegister arg,
+                                 XRegister old_value,
+                                 XRegister mask,
+                                 XRegister temp) {
+  DCHECK_EQ(mask != kNoXRegister, temp != kNoXRegister);
+  DCHECK_IMPLIES(mask != kNoXRegister, type == DataType::Type::kInt32);
+  DCHECK_IMPLIES(
+      mask != kNoXRegister,
+      (get_and_update_op == GetAndUpdateOp::kSet) || (get_and_update_op == GetAndUpdateOp::kAdd));
+  Riscv64Assembler* assembler = codegen->GetAssembler();
+  AqRl amo_aqrl = GetAmoAqRl(order);
+  switch (get_and_update_op) {
+    case GetAndUpdateOp::kSet:
+      if (type == DataType::Type::kInt64) {
+        __ AmoSwapD(old_value, arg, ptr, amo_aqrl);
+      } else if (mask == kNoXRegister) {
+        DCHECK_EQ(type, DataType::Type::kInt32);
+        __ AmoSwapW(old_value, arg, ptr, amo_aqrl);
+      } else {
+        DCHECK_EQ(type, DataType::Type::kInt32);
+        DCHECK_NE(temp, kNoXRegister);
+        auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
+        Riscv64Label retry;
+        __ Bind(&retry);
+        __ LrW(old_value, ptr, load_aqrl);
+        __ And(temp, old_value, mask);
+        __ Or(temp, temp, arg);
+        __ ScW(temp, temp, ptr, store_aqrl);
+        __ Bnez(temp, &retry, /*is_bare=*/ true);  // Bare: `TMP` shall not be clobbered.
+      }
+      break;
+    case GetAndUpdateOp::kAdd:
+      if (type == DataType::Type::kInt64) {
+        __ AmoAddD(old_value, arg, ptr, amo_aqrl);
+      } else if (mask == kNoXRegister) {
+        DCHECK_EQ(type, DataType::Type::kInt32);
+         __ AmoAddW(old_value, arg, ptr, amo_aqrl);
+      } else {
+        DCHECK_EQ(type, DataType::Type::kInt32);
+        DCHECK_NE(temp, kNoXRegister);
+        auto [load_aqrl, store_aqrl] = GetLrScAqRl(order);
+        Riscv64Label retry;
+        __ Bind(&retry);
+        __ LrW(old_value, ptr, load_aqrl);
+        __ Add(temp, old_value, arg);
+        // We use `(A ^ B) ^ A == B` and with the masking `((A ^ B) & mask) ^ A`, the result
+        // contains bits from `B` for bits specified in `mask` and bits from `A` elsewhere.
+        // Note: These instructions directly depend on each other, so it's not necessarily the
+        // fastest approach but for `(A ^ ~mask) | (B & mask)` we would need an extra register for
+        // `~mask` because ANDN is not in the "I" instruction set as required for a LR/SC sequence.
+        __ Xor(temp, temp, old_value);
+        __ And(temp, temp, mask);
+        __ Xor(temp, temp, old_value);
+        __ ScW(temp, temp, ptr, store_aqrl);
+        __ Bnez(temp, &retry, /*is_bare=*/ true);  // Bare: `TMP` shall not be clobbered.
+      }
+      break;
+    case GetAndUpdateOp::kAnd:
+      if (type == DataType::Type::kInt64) {
+        __ AmoAndD(old_value, arg, ptr, amo_aqrl);
+      } else {
+        DCHECK_EQ(type, DataType::Type::kInt32);
+        __ AmoAndW(old_value, arg, ptr, amo_aqrl);
+      }
+      break;
+    case GetAndUpdateOp::kOr:
+      if (type == DataType::Type::kInt64) {
+        __ AmoOrD(old_value, arg, ptr, amo_aqrl);
+      } else {
+        DCHECK_EQ(type, DataType::Type::kInt32);
+        __ AmoOrW(old_value, arg, ptr, amo_aqrl);
+      }
+      break;
+    case GetAndUpdateOp::kXor:
+      if (type == DataType::Type::kInt64) {
+        __ AmoXorD(old_value, arg, ptr, amo_aqrl);
+      } else {
+        DCHECK_EQ(type, DataType::Type::kInt32);
+        __ AmoXorW(old_value, arg, ptr, amo_aqrl);
+      }
+      break;
+  }
+}
+
 class VarHandleSlowPathRISCV64 : public IntrinsicSlowPathRISCV64 {
  public:
   VarHandleSlowPathRISCV64(HInvoke* invoke, std::memory_order order)
@@ -1610,21 +1725,21 @@ static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
     Location expected = locations->InAt(expected_index);
     Location new_value = locations->InAt(new_value_index);
     size_t data_size = DataType::Size(value_type);
-    bool small = (data_size < 4u);
-    bool byte_swap =
+    bool is_small = (data_size < 4u);
+    bool can_byte_swap =
         (expected_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
-    bool fp = DataType::IsFloatingPointType(value_type);
+    bool is_fp = DataType::IsFloatingPointType(value_type);
     size_t temps_needed =
         // The offset temp is used for the `tmp_ptr`.
         1u +
         // For small values, we need a temp for the `mask`, `masked` and maybe also for the `shift`.
-        (small ? (return_success ? 2u : 3u) : 0u) +
+        (is_small ? (return_success ? 2u : 3u) : 0u) +
         // Some cases need modified copies of `new_value` and `expected`.
-        (ScratchXRegisterNeeded(expected, value_type, byte_swap) ? 1u : 0u) +
-        (ScratchXRegisterNeeded(new_value, value_type, byte_swap) ? 1u : 0u) +
+        (ScratchXRegisterNeeded(expected, value_type, can_byte_swap) ? 1u : 0u) +
+        (ScratchXRegisterNeeded(new_value, value_type, can_byte_swap) ? 1u : 0u) +
         // We need a scratch register either for the old value or for the result of SC.
         // If we need to return a floating point old value, we need a temp for each.
-        ((!return_success && fp) ? 2u : 1u);
+        ((!return_success && is_fp) ? 2u : 1u);
     size_t scratch_registers_available = 2u;
     DCHECK_EQ(scratch_registers_available,
               ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters());
@@ -1643,7 +1758,7 @@ static XRegister PrepareXRegister(CodeGeneratorRISCV64* codegen,
                                   XRegister mask,
                                   bool byte_swap,
                                   ScratchRegisterScope* srs) {
-  DCHECK_EQ(shift == kNoXRegister, mask == kNoXRegister);
+  DCHECK_IMPLIES(mask != kNoXRegister, shift != kNoXRegister);
   DCHECK_EQ(shift == kNoXRegister, DataType::Size(type) >= 4u);
   if (loc.IsConstant()) {
     // The `shift`/`mask` and `byte_swap` are irrelevant for zero input.
@@ -1671,13 +1786,35 @@ static XRegister PrepareXRegister(CodeGeneratorRISCV64* codegen,
     Riscv64Assembler* assembler = codegen->GetAssembler();
     __ Sllw(result.AsRegister<XRegister>(), loc.AsRegister<XRegister>(), shift);
     DCHECK_NE(type, DataType::Type::kUint8);
-    if (type != DataType::Type::kUint16 && type != DataType::Type::kBool) {
+    if (mask != kNoXRegister && type != DataType::Type::kUint16 && type != DataType::Type::kBool) {
       __ And(result.AsRegister<XRegister>(), result.AsRegister<XRegister>(), mask);
     }
   }
   return result.AsRegister<XRegister>();
 }
 
+static void GenerateByteSwapAndExtract(Riscv64Assembler* assembler,
+                                       Location rd,
+                                       XRegister rs1,
+                                       XRegister shift,
+                                       DataType::Type type) {
+  // Do not apply shift in `GenerateReverseBytes()` for small types.
+  DCHECK_EQ(shift != kNoXRegister, DataType::Size(type) < 4u);
+  DataType::Type swap_type = (shift != kNoXRegister) ? DataType::Type::kInt32 : type;
+  // Also handles moving to FP registers.
+  GenerateReverseBytes(assembler, rd, rs1, swap_type);
+  if (shift != kNoXRegister) {
+    DCHECK_EQ(rs1, rd.AsRegister<XRegister>());
+    __ Sllw(rs1, rs1, shift);
+    if (type == DataType::Type::kUint16) {
+      __ Srliw(rs1, rs1, 16);
+    } else {
+      DCHECK_EQ(type, DataType::Type::kInt16);
+      __ Sraiw(rs1, rs1, 16);
+    }
+  }
+}
+
 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
                                                      CodeGeneratorRISCV64* codegen,
                                                      std::memory_order order,
@@ -1727,7 +1864,8 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
   DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>());
   size_t next_temp = 1u;
   XRegister tmp_ptr = target.offset;
-  if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
+  bool is_reference = (value_type == DataType::Type::kReference);
+  if (is_reference && codegen->EmitReadBarrier()) {
     DCHECK_EQ(available_scratch_registers, 2u);
     available_scratch_registers -= 1u;
     DCHECK_EQ(expected_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
@@ -1751,15 +1889,16 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
   XRegister mask = kNoXRegister;
   XRegister masked = kNoXRegister;
   size_t data_size = DataType::Size(value_type);
-  if (data_size < 4u) {
+  bool is_small = (data_size < 4u);
+  if (is_small) {
     // When returning "success" and not the old value, we shall not need the `shift` after
     // the raw CAS operation, so use the output register as a temporary here.
     shift = return_success ? locations->Out().AsRegister<XRegister>() : get_temp();
     mask = get_temp();
     masked = get_temp();
-    __ Andi(shift, tmp_ptr, 3);
+    // Upper bits of the shift are not used, so we do not need to clear them.
+    __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte));
     __ Andi(tmp_ptr, tmp_ptr, -4);
-    __ Slli(shift, shift, WhichPowerOf2(kBitsPerByte));
     __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1);
     __ Sllw(mask, mask, shift);
   }
@@ -1770,9 +1909,10 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
       PrepareXRegister(codegen, expected, value_type, shift, mask, byte_swap, &srs);
   XRegister new_value_reg =
       PrepareXRegister(codegen, new_value, value_type, shift, mask, byte_swap, &srs);
-  DataType::Type cas_type = DataType::IsFloatingPointType(value_type)
+  bool is_fp = DataType::IsFloatingPointType(value_type);
+  DataType::Type cas_type = is_fp
       ? IntTypeForFloatingPointType(value_type)
-      : (data_size >= 4u ? value_type : DataType::Type::kInt32);
+      : (is_small ? DataType::Type::kInt32 : value_type);
 
   // Prepare registers for old value and the result of the store conditional.
   XRegister old_value;
@@ -1781,7 +1921,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
     // Use a temp for the old value and the output register for the store conditional result.
     old_value = get_temp();
     store_result = out.AsRegister<XRegister>();
-  } else if (DataType::IsFloatingPointType(value_type)) {
+  } else if (is_fp) {
     // We need two temporary registers.
     old_value = get_temp();
     store_result = get_temp();
@@ -1795,7 +1935,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
   Riscv64Label* exit_loop = &exit_loop_label;
   Riscv64Label* cmp_failure = &exit_loop_label;
 
-  if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
+  if (is_reference && codegen->EmitReadBarrier()) {
     // The `old_value_temp` is used first for marking the `old_value` and then for the unmarked
     // reloaded old value for subsequent CAS in the slow path. It cannot be a scratch register.
     XRegister old_value_temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
@@ -1849,24 +1989,11 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
   if (return_success) {
     // Nothing to do, the result register already contains 1 on success and 0 on failure.
   } else if (byte_swap) {
-    // Do not apply shift in `GenerateReverseBytes()` for small types.
-    DataType::Type swap_type = data_size < 4u ? DataType::Type::kInt32 : value_type;
-    // Also handles moving to FP registers.
-    GenerateReverseBytes(assembler, out, old_value, swap_type);
-    if (data_size < 4u) {
-      DCHECK(Location::RegisterLocation(old_value).Equals(out));
-      __ Sllw(old_value, old_value, shift);
-      if (value_type == DataType::Type::kUint16) {
-        __ Srliw(old_value, old_value, 16);
-      } else {
-        DCHECK_EQ(value_type, DataType::Type::kInt16);
-        __ Sraiw(old_value, old_value, 16);
-      }
-    }
-  } else if (DataType::IsFloatingPointType(value_type)) {
+    GenerateByteSwapAndExtract(assembler, out, old_value, shift, value_type);
+  } else if (is_fp) {
     codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
-  } else if (data_size < 4u) {
-    __ Srl(old_value, masked, shift);
+  } else if (is_small) {
+    __ Srlw(old_value, masked, shift);
     if (value_type == DataType::Type::kInt8) {
       __ SextB(old_value, old_value);
     } else if (value_type == DataType::Type::kInt16) {
@@ -1881,8 +2008,7 @@ static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
 
   // Check that we have allocated the right number of temps. We may need more registers
   // for byte swapped CAS in the slow path, so skip this check for the main path in that case.
-  bool has_byte_swap =
-      (expected_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
+  bool has_byte_swap = (expected_index == 3u) && (!is_reference && data_size != 1u);
   if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) {
     // We allocate a temporary register for the class object for a static field `VarHandle` but
     // we do not update the `next_temp` if it's otherwise unused after the address calculation.
@@ -1964,13 +2090,457 @@ void IntrinsicCodeGeneratorRISCV64::VisitVarHandleWeakCompareAndSetRelease(HInvo
       invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
 }
 
+static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
+                                                 CodeGeneratorRISCV64* codegen,
+                                                 GetAndUpdateOp get_and_update_op) {
+  VarHandleOptimizations optimizations(invoke);
+  if (optimizations.GetDoNotIntrinsify()) {
+    return;
+  }
+
+  if (invoke->GetType() == DataType::Type::kReference && codegen->EmitNonBakerReadBarrier()) {
+    // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
+    // the passed reference and reloads it from the field, thus seeing the new value
+    // that we have just stored. (And it also gets the memory visibility wrong.) b/173104084
+    return;
+  }
+
+  LocationSummary* locations = CreateVarHandleCommonLocations(invoke, codegen);
+  uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
+  DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
+  DataType::Type value_type = invoke->GetType();
+  DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, arg_index));
+  Location arg = locations->InAt(arg_index);
+
+  bool is_fp = DataType::IsFloatingPointType(value_type);
+  if (is_fp) {
+    if (get_and_update_op == GetAndUpdateOp::kAdd) {
+      // For ADD, do not use ZR for zero bit pattern (+0.0f or +0.0).
+      locations->SetInAt(invoke->GetNumberOfArguments() - 1u, Location::RequiresFpuRegister());
+    } else {
+      DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+    }
+  }
+
+  size_t data_size = DataType::Size(value_type);
+  bool can_byte_swap =
+      (arg_index == 3u) && (value_type != DataType::Type::kReference && data_size != 1u);
+  bool can_use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (can_byte_swap || is_fp);
+  bool is_small = (data_size < 4u);
+  bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd);
+  bool is_bitwise =
+      (get_and_update_op != GetAndUpdateOp::kSet && get_and_update_op != GetAndUpdateOp::kAdd);
+
+  size_t temps_needed =
+      // The offset temp is used for the `tmp_ptr`.
+      1u +
+      // For small values, we need temps for `shift` and maybe also `mask` and `temp`.
+      (is_small ? (is_bitwise ? 1u : 3u) : 0u) +
+      // Some cases need modified copies of `arg`.
+      (is_small_and || ScratchXRegisterNeeded(arg, value_type, can_byte_swap) ? 1u : 0u) +
+      // For FP types, we need a temp for `old_value` which cannot be loaded directly to `out`.
+      (is_fp ? 1u : 0u);
+  if (can_use_cas) {
+    size_t cas_temps_needed =
+        // The offset temp is used for the `tmp_ptr`.
+        1u +
+        // For small values, we need a temp for `shift`.
+        (is_small ? 1u : 0u) +
+        // And we always need temps for `old_value`, `new_value` and `reloaded_old_value`.
+        3u;
+    DCHECK_GE(cas_temps_needed, temps_needed);
+    temps_needed = cas_temps_needed;
+  }
+
+  size_t scratch_registers_available = 2u;
+  DCHECK_EQ(scratch_registers_available,
+            ScratchRegisterScope(codegen->GetAssembler()).AvailableXRegisters());
+  size_t old_temp_count = locations->GetTempCount();
+  DCHECK_EQ(old_temp_count, (arg_index == 1u) ? 2u : 1u);
+  if (temps_needed > old_temp_count + scratch_registers_available) {
+    locations->AddRegisterTemps(temps_needed - (old_temp_count + scratch_registers_available));
+  }
+}
+
 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
                                           CodeGeneratorRISCV64* codegen,
                                           GetAndUpdateOp get_and_update_op,
                                           std::memory_order order,
                                           bool byte_swap = false) {
-  UNUSED(invoke, codegen, get_and_update_op, order, byte_swap);
-  LOG(FATAL) << "Unimplemented!";
+  uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
+  DCHECK_EQ(arg_index, 1u + GetExpectedVarHandleCoordinatesCount(invoke));
+  DataType::Type value_type = invoke->GetType();
+  DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, arg_index));
+
+  Riscv64Assembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+  Location arg = locations->InAt(arg_index);
+  DCHECK_IMPLIES(arg.IsConstant(), arg.GetConstant()->IsZeroBitPattern());
+  Location out = locations->Out();
+
+  VarHandleTarget target = GetVarHandleTarget(invoke);
+  VarHandleSlowPathRISCV64* slow_path = nullptr;
+  if (!byte_swap) {
+    slow_path = GenerateVarHandleChecks(invoke, codegen, order, value_type);
+    GenerateVarHandleTarget(invoke, target, codegen);
+    if (slow_path != nullptr) {
+      slow_path->SetGetAndUpdateOp(get_and_update_op);
+      __ Bind(slow_path->GetNativeByteOrderLabel());
+    }
+  }
+
+  // This needs to be before the temp registers, as MarkGCCard also uses scratch registers.
+  if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(arg_index))) {
+    DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+    // Mark card for object, the new value shall be stored.
+    bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(target.object, arg.AsRegister<XRegister>(), new_value_can_be_null);
+  }
+
+  size_t data_size = DataType::Size(value_type);
+  bool is_fp = DataType::IsFloatingPointType(value_type);
+  bool use_cas = (get_and_update_op == GetAndUpdateOp::kAdd) && (byte_swap || is_fp);
+  bool is_small = (data_size < 4u);
+  bool is_small_and = is_small && (get_and_update_op == GetAndUpdateOp::kAnd);
+  bool is_reference = (value_type == DataType::Type::kReference);
+  DataType::Type op_type = is_fp
+      ? IntTypeForFloatingPointType(value_type)
+      : (is_small || is_reference ? DataType::Type::kInt32 : value_type);
+
+  ScratchRegisterScope srs(assembler);
+  DCHECK_EQ(srs.AvailableXRegisters(), 2u);
+  size_t available_scratch_registers = use_cas
+      // We use scratch registers differently for the CAS path.
+      ? 0u
+      // Reserve one scratch register for `PrepareXRegister()` or similar `arg_reg` allocation.
+      : (is_small_and || ScratchXRegisterNeeded(arg, value_type, byte_swap) ? 1u : 2u);
+
+  // Reuse the `target.offset` temporary for the pointer to the target location,
+  // except for references that need the offset for the non-Baker read barrier.
+  DCHECK_EQ(target.offset, locations->GetTemp(0u).AsRegister<XRegister>());
+  size_t next_temp = 1u;
+  XRegister tmp_ptr = target.offset;
+  if (is_reference && codegen->EmitNonBakerReadBarrier()) {
+    DCHECK_EQ(available_scratch_registers, 2u);
+    available_scratch_registers -= 1u;
+    tmp_ptr = srs.AllocateXRegister();
+  }
+  __ Add(tmp_ptr, target.object, target.offset);
+
+  auto get_temp = [&]() {
+    if (available_scratch_registers != 0u) {
+      available_scratch_registers -= 1u;
+      return srs.AllocateXRegister();
+    } else {
+      XRegister temp = locations->GetTemp(next_temp).AsRegister<XRegister>();
+      next_temp += 1u;
+      return temp;
+    }
+  };
+
+  XRegister shift = kNoXRegister;
+  XRegister mask = kNoXRegister;
+  XRegister prepare_mask = kNoXRegister;
+  XRegister temp = kNoXRegister;
+  XRegister arg_reg = kNoXRegister;
+  if (is_small) {
+    shift = get_temp();
+    // Upper bits of the shift are not used, so we do not need to clear them.
+    __ Slli(shift, tmp_ptr, WhichPowerOf2(kBitsPerByte));
+    __ Andi(tmp_ptr, tmp_ptr, -4);
+    switch (get_and_update_op) {
+      case GetAndUpdateOp::kAdd:
+        if (byte_swap) {
+          // The mask is not needed in the CAS path.
+          DCHECK(use_cas);
+          break;
+        }
+        FALLTHROUGH_INTENDED;
+      case GetAndUpdateOp::kSet:
+        mask = get_temp();
+        temp = get_temp();
+        __ Li(mask, (1 << (data_size * kBitsPerByte)) - 1);
+        __ Sllw(mask, mask, shift);
+        // The argument does not need to be masked for `GetAndUpdateOp::kAdd`,
+        // the mask shall be applied after the ADD instruction.
+        prepare_mask = (get_and_update_op == GetAndUpdateOp::kSet) ? mask : kNoXRegister;
+        break;
+      case GetAndUpdateOp::kAnd:
+        // We need to set all other bits, so we always need a temp.
+        arg_reg = srs.AllocateXRegister();
+        if (data_size == 1u) {
+          __ Ori(arg_reg, InputXRegisterOrZero(arg), ~0xff);
+          DCHECK(!byte_swap);
+        } else {
+          DCHECK_EQ(data_size, 2u);
+          __ Li(arg_reg, ~0xffff);
+          __ Or(arg_reg, InputXRegisterOrZero(arg), arg_reg);
+          if (byte_swap) {
+            __ Rev8(arg_reg, arg_reg);
+            __ Rori(arg_reg, arg_reg, 48);
+          }
+        }
+        __ Rolw(arg_reg, arg_reg, shift);
+        break;
+      case GetAndUpdateOp::kOr:
+      case GetAndUpdateOp::kXor:
+        // Signed values need to be truncated but we're keeping `prepare_mask == kNoXRegister`.
+        if (value_type == DataType::Type::kInt8 && !arg.IsConstant()) {
+          DCHECK(!byte_swap);
+          arg_reg = srs.AllocateXRegister();
+          __ ZextB(arg_reg, arg.AsRegister<XRegister>());
+          __ Sllw(arg_reg, arg_reg, shift);
+        } else if (value_type == DataType::Type::kInt16 && !arg.IsConstant() && !byte_swap) {
+          arg_reg = srs.AllocateXRegister();
+          __ ZextH(arg_reg, arg.AsRegister<XRegister>());
+          __ Sllw(arg_reg, arg_reg, shift);
+        }  // else handled by `PrepareXRegister()` below.
+        break;
+    }
+  }
+  if (arg_reg == kNoXRegister && !use_cas) {
+    arg_reg = PrepareXRegister(codegen, arg, value_type, shift, prepare_mask, byte_swap, &srs);
+  }
+  if (mask != kNoXRegister && get_and_update_op == GetAndUpdateOp::kSet) {
+    __ Not(mask, mask);  // We need to flip the mask for `kSet`, see `GenerateGetAndUpdate()`.
+  }
+
+  if (use_cas) {
+    // Allocate scratch registers for temps that can theoretically be clobbered on retry.
+    // (Even though the `retry` label shall never be far enough for `TMP` to be clobbered.)
+    DCHECK_EQ(available_scratch_registers, 0u);  // Reserved for the two uses below.
+    XRegister old_value = srs.AllocateXRegister();
+    XRegister new_value = srs.AllocateXRegister();
+    // Allocate other needed temporaries.
+    XRegister reloaded_old_value = get_temp();
+    XRegister store_result = reloaded_old_value;  // Clobber reloaded old value by store result.
+    FRegister ftmp = is_fp ? srs.AllocateFRegister() : kNoFRegister;
+
+    Riscv64Label retry;
+    __ Bind(&retry);
+    codegen->GetInstructionVisitor()->Load(
+        Location::RegisterLocation(old_value), tmp_ptr, /*offset=*/ 0, op_type);
+    if (byte_swap) {
+      GenerateByteSwapAndExtract(assembler, out, old_value, shift, value_type);
+    } else {
+      DCHECK(is_fp);
+      codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
+    }
+    if (is_fp) {
+      codegen->GetInstructionVisitor()->FAdd(
+          ftmp, out.AsFpuRegister<FRegister>(), arg.AsFpuRegister<FRegister>(), value_type);
+      codegen->MoveLocation(
+          Location::RegisterLocation(new_value), Location::FpuRegisterLocation(ftmp), op_type);
+    } else if (value_type == DataType::Type::kInt64) {
+      __ Add(new_value, out.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
+    } else {
+      DCHECK_EQ(op_type, DataType::Type::kInt32);
+      __ Addw(new_value, out.AsRegister<XRegister>(), arg.AsRegister<XRegister>());
+    }
+    if (byte_swap) {
+      DataType::Type swap_type = op_type;
+      if (is_small) {
+        DCHECK_EQ(data_size, 2u);
+        // We want to update only 16 bits of the 32-bit location. The 16 bits we want to replace
+        // are present in both `old_value` and `out` but in different bits and byte order.
+        // To update the 16 bits, we can XOR the new value with the `out`, byte swap as Uint16
+        // (extracting only the bits we want to update), shift and XOR with the old value.
+        swap_type = DataType::Type::kUint16;
+        __ Xor(new_value, new_value, out.AsRegister<XRegister>());
+      }
+      GenerateReverseBytes(assembler, Location::RegisterLocation(new_value), new_value, swap_type);
+      if (is_small) {
+        __ Sllw(new_value, new_value, shift);
+        __ Xor(new_value, new_value, old_value);
+      }
+    }
+    GenerateCompareAndSet(assembler,
+                          op_type,
+                          order,
+                          /*strong=*/ true,
+                          /*cmp_failure=*/ &retry,
+                          tmp_ptr,
+                          new_value,
+                          /*old_value=*/ reloaded_old_value,
+                          /*mask=*/ kNoXRegister,
+                          /*masked=*/ kNoXRegister,
+                          store_result,
+                          /*expected=*/ old_value);
+  } else {
+    XRegister old_value = is_fp ? get_temp() : out.AsRegister<XRegister>();
+    GenerateGetAndUpdate(
+        codegen, get_and_update_op, op_type, order, tmp_ptr, arg_reg, old_value, mask, temp);
+    if (byte_swap) {
+      GenerateByteSwapAndExtract(assembler, out, old_value, shift, value_type);
+    } else if (is_fp) {
+      codegen->MoveLocation(out, Location::RegisterLocation(old_value), value_type);
+    } else if (is_small) {
+      __ Srlw(old_value, old_value, shift);
+      DCHECK_NE(value_type, DataType::Type::kUint8);
+      if (value_type == DataType::Type::kInt8) {
+        __ SextB(old_value, old_value);
+      } else if (value_type == DataType::Type::kBool) {
+        __ ZextB(old_value, old_value);
+      } else if (value_type == DataType::Type::kInt16) {
+        __ SextH(old_value, old_value);
+      } else {
+        DCHECK_EQ(value_type, DataType::Type::kUint16);
+        __ ZextH(old_value, old_value);
+      }
+    } else if (is_reference) {
+      __ ZextW(old_value, old_value);
+      if (codegen->EmitBakerReadBarrier()) {
+        // Use RA as temp. It is clobbered in the slow path anyway.
+        static constexpr Location kBakerReadBarrierTemp = Location::RegisterLocation(RA);
+        SlowPathCodeRISCV64* rb_slow_path =
+            codegen->AddGcRootBakerBarrierBarrierSlowPath(invoke, out, kBakerReadBarrierTemp);
+        codegen->EmitBakerReadBarierMarkingCheck(rb_slow_path, out, kBakerReadBarrierTemp);
+      } else if (codegen->EmitNonBakerReadBarrier()) {
+        Location base_loc = Location::RegisterLocation(target.object);
+        Location index = Location::RegisterLocation(target.offset);
+        SlowPathCodeRISCV64* rb_slow_path = codegen->AddReadBarrierSlowPath(
+            invoke, out, out, base_loc, /*offset=*/ 0u, index);
+        __ J(rb_slow_path->GetEntryLabel());
+        __ Bind(rb_slow_path->GetExitLabel());
+      }
+    }
+  }
+
+  if (slow_path != nullptr) {
+    DCHECK(!byte_swap);
+    __ Bind(slow_path->GetExitLabel());
+  }
+
+  // Check that we have allocated the right number of temps. We may need more registers
+  // for byte swapped CAS in the slow path, so skip this check for the main path in that case.
+  bool has_byte_swap = (arg_index == 3u) && (!is_reference && data_size != 1u);
+  if ((!has_byte_swap || byte_swap) && next_temp != locations->GetTempCount()) {
+    // We allocate a temporary register for the class object for a static field `VarHandle` but
+    // we do not update the `next_temp` if it's otherwise unused after the address calculation.
+    CHECK_EQ(arg_index, 1u);
+    CHECK_EQ(next_temp, 1u);
+    CHECK_EQ(locations->GetTempCount(), 2u);
+  }
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSet(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kSet);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kSet, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAdd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kAnd);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kOr);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, codegen_, GetAndUpdateOp::kXor);
+}
+
+void IntrinsicCodeGeneratorRISCV64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
 }
 
 void VarHandleSlowPathRISCV64::EmitByteArrayViewCode(CodeGenerator* codegen_in) {
author	Vladimir Marko <vmarko@google.com>	2023-11-20 10:06:45 +0000
committer	Vladimír Marko <vmarko@google.com>	2023-11-24 11:52:40 +0000
commit	aeefe81cc03267d7ba9a6bfaf8daef91fbd33aa0 (patch)
tree	9ea7db94ab2a5b454d42c04200a8efa44580372c /compiler
parent	0ac7d570c99ddb1a22429a2f51e05d48a26daa36 (diff)