3 files changed, 247 insertions, 134 deletions
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index fa61c6713f..0747a3a5cd 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5124,12 +5124,12 @@ void InstructionCodeGeneratorX86_64::Bswap(Location value,
                                            CpuRegister* temp) {
   switch (type) {
     case DataType::Type::kInt16:
-      // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+      // This should sign-extend, even if reimplemented with an XCHG of 8-bit registers.
       __ bswapl(value.AsRegister<CpuRegister>());
       __ sarl(value.AsRegister<CpuRegister>(), Immediate(16));
       break;
     case DataType::Type::kUint16:
-      // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+      // TODO: Can be done with an XCHG of 8-bit registers. This is straight from Quick.
       __ bswapl(value.AsRegister<CpuRegister>());
       __ shrl(value.AsRegister<CpuRegister>(), Immediate(16));
       break;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index ed760f190d..f8bf126eeb 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -1302,6 +1302,11 @@ static void DecideVarHandleIntrinsic(HInvoke* invoke) {
       if (IsVarHandleGetAndAdd(invoke) &&
           (value_type == DataType::Type::kReference || value_type == DataType::Type::kBool)) {
         // We should only add numerical types.
+        //
+        // For byte array views floating-point types are not allowed, see javadoc comments for
+        // java.lang.invoke.MethodHandles.byteArrayViewVarHandle(). But ART treats them as numeric
+        // types in ByteArrayViewVarHandle::Access(). Consequently we do generate intrinsic code,
+        // but it always fails access mode check at runtime.
         optimizations.SetDoNotIntrinsify();
         return;
       } else if (IsVarHandleGetAndBitwiseOp(invoke) && !DataType::IsIntegralType(value_type)) {
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index c716201313..66d0743022 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -3224,6 +3224,14 @@ void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
   __ imulq(y);
 }
 
+enum class GetAndUpdateOp {
+  kSet,
+  kAdd,
+  kBitwiseAnd,
+  kBitwiseOr,
+  kBitwiseXor
+};
+
 class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
  public:
   explicit VarHandleSlowPathX86_64(HInvoke* invoke)
@@ -3238,6 +3246,18 @@ class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
     is_atomic_ = is_atomic;
   }
 
+  void SetNeedAnyStoreBarrier(bool need_any_store_barrier) {
+    need_any_store_barrier_ = need_any_store_barrier;
+  }
+
+  void SetNeedAnyAnyBarrier(bool need_any_any_barrier) {
+    need_any_any_barrier_ = need_any_any_barrier;
+  }
+
+  void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
+    get_and_update_op_ = get_and_update_op;
+  }
+
   Label* GetByteArrayViewCheckLabel() {
     return &byte_array_view_check_label_;
   }
@@ -3270,6 +3290,9 @@ class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
   // Arguments forwarded to specific methods.
   bool is_volatile_;
   bool is_atomic_;
+  bool need_any_store_barrier_;
+  bool need_any_any_barrier_;
+  GetAndUpdateOp get_and_update_op_;
 };
 
 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
@@ -3641,19 +3664,6 @@ static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
 
   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
   DCHECK_LE(expected_coordinates_count, 2u);  // Filtered by the `DoNotIntrinsify` flag above.
-  if (expected_coordinates_count > 1u) {
-    switch (mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic())) {
-      case mirror::VarHandle::AccessModeTemplate::kGet:
-      case mirror::VarHandle::AccessModeTemplate::kSet:
-      case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
-      case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
-        break;
-      default:
-        // TODO: Add support for all intrinsics.
-        return false;
-    }
-  }
-
   return true;
 }
 
@@ -4037,14 +4047,6 @@ void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeRelease(HInvo
   GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
 }
 
-enum class GetAndUpdateOp {
-  kSet,
-  kAdd,
-  kBitwiseAnd,
-  kBitwiseOr,
-  kBitwiseXor
-};
-
 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
   if (!HasVarHandleIntrinsicImplementation(invoke)) {
     return;
@@ -4079,16 +4081,15 @@ static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
   }
 }
 
-static void GenerateVarHandleGetAndSet(CodeGeneratorX86_64* codegen,
-                                       HInvoke* invoke,
-                                       LocationSummary* locations,
-                                       uint32_t value_index,
+static void GenerateVarHandleGetAndSet(HInvoke* invoke,
+                                       CodeGeneratorX86_64* codegen,
+                                       Location value,
                                        DataType::Type type,
                                        Address field_addr,
-                                       CpuRegister ref) {
+                                       CpuRegister ref,
+                                       bool byte_swap) {
   X86_64Assembler* assembler = codegen->GetAssembler();
-
-  Location value = locations->InAt(value_index);
+  LocationSummary* locations = invoke->GetLocations();
   Location out = locations->Out();
   uint32_t temp_count = locations->GetTempCount();
 
@@ -4098,15 +4099,24 @@ static void GenerateVarHandleGetAndSet(CodeGeneratorX86_64* codegen,
     Location temp = locations->GetTemp(temp_count - 1);
     codegen->Move(temp, value);
     bool is64bit = (type == DataType::Type::kFloat64);
+    DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
+    if (byte_swap) {
+      codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
+    }
     if (is64bit) {
       __ xchgq(temp.AsRegister<CpuRegister>(), field_addr);
     } else {
       __ xchgl(temp.AsRegister<CpuRegister>(), field_addr);
     }
+    if (byte_swap) {
+      codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
+    }
     __ movd(out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit);
   } else if (type == DataType::Type::kReference) {
     // `getAndSet` for references: load reference and atomically exchange it with the field.
     // Output register is the same as the one holding new value, so no need to move the result.
+    DCHECK(!byte_swap);
+
     CpuRegister temp1 = locations->GetTemp(temp_count - 1).AsRegister<CpuRegister>();
     CpuRegister temp2 = locations->GetTemp(temp_count - 2).AsRegister<CpuRegister>();
     CpuRegister valreg = value.AsRegister<CpuRegister>();
@@ -4139,6 +4149,9 @@ static void GenerateVarHandleGetAndSet(CodeGeneratorX86_64* codegen,
   } else {
     // `getAndSet` for integral types: atomically exchange the new value with the field. Output
     // register is the same as the one holding new value. Do sign extend / zero extend as needed.
+    if (byte_swap) {
+      codegen->GetInstructionCodegen()->Bswap(value, type);
+    }
     CpuRegister valreg = value.AsRegister<CpuRegister>();
     DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
     switch (type) {
@@ -4171,6 +4184,9 @@ static void GenerateVarHandleGetAndSet(CodeGeneratorX86_64* codegen,
         DCHECK(false) << "unexpected type in getAndSet intrinsic";
         UNREACHABLE();
     }
+    if (byte_swap) {
+      codegen->GetInstructionCodegen()->Bswap(value, type);
+    }
   }
 }
 
@@ -4191,64 +4207,96 @@ static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) {
 
   // A temporary to compute the bitwise operation on the old and the new values.
   locations->AddTemp(Location::RequiresRegister());
+  // We need value to be either in a register, or a 32-bit constant (as there are no arithmetic
+  // instructions that accept 64-bit immediate on x86_64).
+  locations->SetInAt(new_value_index, DataType::Is64BitType(type)
+      ? Location::RequiresRegister()
+      : Location::RegisterOrConstant(invoke->InputAt(new_value_index)));
   // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary.
   locations->SetOut(Location::RegisterLocation(RAX));
 }
 
-static void GenerateVarHandleGetAndBitwiseOp(CodeGeneratorX86_64* codegen,
-                                             LocationSummary* locations,
-                                             uint32_t value_index,
-                                             DataType::Type type,
-                                             Address field_addr,
-                                             GetAndUpdateOp get_and_update_op) {
+static void GenerateVarHandleGetAndOp(HInvoke* invoke,
+                                      CodeGeneratorX86_64* codegen,
+                                      Location value,
+                                      DataType::Type type,
+                                      Address field_addr,
+                                      GetAndUpdateOp get_and_update_op,
+                                      bool byte_swap) {
   X86_64Assembler* assembler = codegen->GetAssembler();
-
-  Location value = locations->InAt(value_index);
+  LocationSummary* locations = invoke->GetLocations();
   Location temp_loc = locations->GetTemp(locations->GetTempCount() - 1);
+  Location rax_loc = locations->Out();
   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-  CpuRegister rax = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister rax = rax_loc.AsRegister<CpuRegister>();
   DCHECK_EQ(rax.AsRegister(), RAX);
+  bool is64Bit = DataType::Is64BitType(type);
 
   NearLabel retry;
   __ Bind(&retry);
 
-  // Load field value into register.
+  // Load field value into RAX and copy it into a temporary register for the operation.
   codegen->LoadFromMemoryNoReference(type, Location::RegisterLocation(RAX), field_addr);
+  codegen->Move(temp_loc, rax_loc);
+  if (byte_swap) {
+    // Byte swap the temporary, since we need to perform operation in native endianness.
+    codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
+  }
 
-  // Move the new value into a temporary register.
-  codegen->Move(temp_loc, value);
+  DCHECK(!value.IsConstant() || !is64Bit);
+  int32_t const_value = value.IsConstant()
+      ? CodeGenerator::GetInt32ValueOf(value.GetConstant())
+      : 0;
 
-  // Use temporary register as first operand and destination of the bitwise operations, as the
-  // operation is symmetric and we need the old value in RAX for CMPXCHG. Use 32-bit registers for
-  // 8/16/32-bit types to save on the REX prefix that might be needed for 64 bits.
-  bool is64Bit = DataType::Is64BitType(type);
+  // Use 32-bit registers for 8/16/32-bit types to save on the REX prefix.
   switch (get_and_update_op) {
+    case GetAndUpdateOp::kAdd:
+      DCHECK(byte_swap);  // The non-byte-swapping path should use a faster XADD instruction.
+      if (is64Bit) {
+        __ addq(temp, value.AsRegister<CpuRegister>());
+      } else if (value.IsConstant()) {
+        __ addl(temp, Immediate(const_value));
+      } else {
+        __ addl(temp, value.AsRegister<CpuRegister>());
+      }
+      break;
     case GetAndUpdateOp::kBitwiseAnd:
       if (is64Bit) {
-        __ andq(temp, rax);
+        __ andq(temp, value.AsRegister<CpuRegister>());
+      } else if (value.IsConstant()) {
+        __ andl(temp, Immediate(const_value));
       } else {
-        __ andl(temp, rax);
+        __ andl(temp, value.AsRegister<CpuRegister>());
       }
       break;
     case GetAndUpdateOp::kBitwiseOr:
       if (is64Bit) {
-        __ orq(temp, rax);
+        __ orq(temp, value.AsRegister<CpuRegister>());
+      } else if (value.IsConstant()) {
+        __ orl(temp, Immediate(const_value));
       } else {
-        __ orl(temp, rax);
+        __ orl(temp, value.AsRegister<CpuRegister>());
       }
       break;
     case GetAndUpdateOp::kBitwiseXor:
       if (is64Bit) {
-        __ xorq(temp, rax);
+        __ xorq(temp, value.AsRegister<CpuRegister>());
+      } else if (value.IsConstant()) {
+        __ xorl(temp, Immediate(const_value));
       } else {
-        __ xorl(temp, rax);
+        __ xorl(temp, value.AsRegister<CpuRegister>());
       }
       break;
     default:
-      DCHECK(false) <<  "expected bitwise operation";
+      DCHECK(false) <<  "unexpected operation";
       UNREACHABLE();
   }
 
+  if (byte_swap) {
+    // RAX still contains the original value, but we need to byte swap the temporary back.
+    codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
+  }
+
   switch (type) {
     case DataType::Type::kBool:
     case DataType::Type::kUint8:
@@ -4274,23 +4322,10 @@ static void GenerateVarHandleGetAndBitwiseOp(CodeGeneratorX86_64* codegen,
 
   __ j(kNotZero, &retry);
 
-  // Sign-extend or zero-extend the result if necessary. Output is in RAX after CMPXCHG.
-  switch (type) {
-    case DataType::Type::kBool:
-    case DataType::Type::kUint8:
-      __ movzxb(rax, rax);
-      break;
-    case DataType::Type::kInt8:
-      __ movsxb(rax, rax);
-      break;
-    case DataType::Type::kUint16:
-      __ movzxw(rax, rax);
-      break;
-    case DataType::Type::kInt16:
-      __ movsxw(rax, rax);
-      break;
-    default:
-      break;
+  // The result is in RAX after CMPXCHG. Byte swap if necessary, but do not sign/zero extend,
+  // as it has already been done by `LoadFromMemoryNoReference` above (and not altered by CMPXCHG).
+  if (byte_swap) {
+    codegen->GetInstructionCodegen()->Bswap(rax_loc, type);
   }
 }
 
@@ -4312,92 +4347,150 @@ static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
     locations->SetInAt(new_value_index, Location::RequiresFpuRegister());
     // CMPXCHG clobbers RAX.
     locations->AddTemp(Location::RegisterLocation(RAX));
-    // A temporary to hold the new value for CMPXCHG.
-    locations->AddTemp(Location::RequiresRegister());
     // An FP temporary to load the old value from the field and perform FP addition.
     locations->AddTemp(Location::RequiresFpuRegister());
+    // A temporary to hold the new value for CMPXCHG.
+    locations->AddTemp(Location::RequiresRegister());
   } else {
     DCHECK_NE(type, DataType::Type::kReference);
     // Use the same register for both the new value and output to take advantage of XADD.
-    // It doesn't have to be RAX, but we need to choose some to make sure it's the same.
+    // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls
+    // back to GenerateVarHandleGetAndOp that expects out in RAX.
     locations->SetOut(Location::RegisterLocation(RAX));
     locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
+    if (GetExpectedVarHandleCoordinatesCount(invoke) == 2) {
+      // For byte array views with non-native endianness we need extra BSWAP operations, so we
+      // cannot use XADD and have to fallback to a generic implementation based on CMPXCH. In that
+      // case we need two temporary registers: one to hold value instead of RAX (which may get
+      // clobbered by repeated CMPXCHG) and one for performing the operation. At compile time we
+      // cannot distinguish this case from arrays or native-endian byte array views.
+      locations->AddTemp(Location::RequiresRegister());
+      locations->AddTemp(Location::RequiresRegister());
+    }
   }
 }
 
-static void GenerateVarHandleGetAndAdd(CodeGeneratorX86_64* codegen,
-                                       LocationSummary* locations,
-                                       uint32_t value_index,
+static void GenerateVarHandleGetAndAdd(HInvoke* invoke,
+                                       CodeGeneratorX86_64* codegen,
+                                       Location value,
                                        DataType::Type type,
-                                       Address field_addr) {
+                                       Address field_addr,
+                                       bool byte_swap) {
   X86_64Assembler* assembler = codegen->GetAssembler();
-
-  Location value = locations->InAt(value_index);
+  LocationSummary* locations = invoke->GetLocations();
   Location out = locations->Out();
   uint32_t temp_count = locations->GetTempCount();
 
   if (DataType::IsFloatingPointType(type)) {
+    if (byte_swap) {
+      // This code should never be executed: it is the case of a byte array view (since it requires
+      // a byte swap), and varhandles for byte array views support numeric atomic update access mode
+      // only for int and long, but not for floating-point types (see javadoc comments for
+      // java.lang.invoke.MethodHandles.byteArrayViewVarHandle()). But ART varhandle implementation
+      // for byte array views treats floating-point types them as numeric types in
+      // ByteArrayViewVarHandle::Access(). Terefore we do generate intrinsic code, but it always
+      // fails access mode check at runtime prior to reaching this point. Illegal instruction UD2
+      // ensures that if control flow gets here by mistake, we will notice.
+      __ ud2();
+    }
+
     // `getAndAdd` for floating-point types: load the old FP value into a temporary FP register and
     // in RAX for CMPXCHG, add the new FP value to the old one, move it to a non-FP temporary for
     // CMPXCHG and loop until CMPXCHG succeeds. Move the result from RAX to the output FP register.
     bool is64bit = (type == DataType::Type::kFloat64);
-    XmmRegister fptemp = locations->GetTemp(temp_count - 1).AsFpuRegister<XmmRegister>();
-    CpuRegister temp = locations->GetTemp(temp_count - 2).AsRegister<CpuRegister>();
+    DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
+    XmmRegister fptemp = locations->GetTemp(temp_count - 2).AsFpuRegister<XmmRegister>();
+    Location rax_loc = Location::RegisterLocation(RAX);
+    Location temp_loc = locations->GetTemp(temp_count - 1);
+    CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
 
     NearLabel retry;
     __ Bind(&retry);
+
+    // Read value from memory into an FP register and copy in into RAX.
     if (is64bit) {
       __ movsd(fptemp, field_addr);
-      __ movd(CpuRegister(RAX), fptemp, is64bit);
-      __ addsd(fptemp, value.AsFpuRegister<XmmRegister>());
-      __ movd(temp, fptemp, is64bit);
-      __ LockCmpxchgq(field_addr, temp);
     } else {
       __ movss(fptemp, field_addr);
-      __ movd(CpuRegister(RAX), fptemp, is64bit);
+    }
+    __ movd(CpuRegister(RAX), fptemp, is64bit);
+    // If necessary, byte swap RAX and update the value in FP register to also be byte-swapped.
+    if (byte_swap) {
+      codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
+      __ movd(fptemp, CpuRegister(RAX), is64bit);
+    }
+    // Perform the FP addition and move it to a temporary register to prepare for CMPXCHG.
+    if (is64bit) {
+      __ addsd(fptemp, value.AsFpuRegister<XmmRegister>());
+    } else {
       __ addss(fptemp, value.AsFpuRegister<XmmRegister>());
-      __ movd(temp, fptemp, is64bit);
+    }
+    __ movd(temp, fptemp, is64bit);
+    // If necessary, byte swap RAX before CMPXCHG and the temporary before copying to FP register.
+    if (byte_swap) {
+      codegen->GetInstructionCodegen()->Bswap(temp_loc, bswap_type);
+      codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
+    }
+    if (is64bit) {
+      __ LockCmpxchgq(field_addr, temp);
+    } else {
       __ LockCmpxchgl(field_addr, temp);
     }
+
     __ j(kNotZero, &retry);
 
-    // The old value is in RAX.
+    // The old value is in RAX, byte swap if necessary.
+    if (byte_swap) {
+      codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
+    }
     __ movd(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit);
   } else {
-    // `getAndAdd` for integral types: atomically exchange the new value with the field and add the
-    // old value to the field. Output register is the same as the one holding new value. Do sign
-    // extend / zero extend as needed.
-    CpuRegister valreg = value.AsRegister<CpuRegister>();
-    DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
-    switch (type) {
-      case DataType::Type::kBool:
-      case DataType::Type::kUint8:
-        __ LockXaddb(field_addr, valreg);
-        __ movzxb(valreg, valreg);
-        break;
-      case DataType::Type::kInt8:
-        __ LockXaddb(field_addr, valreg);
-        __ movsxb(valreg, valreg);
-        break;
-      case DataType::Type::kUint16:
-        __ LockXaddw(field_addr, valreg);
-        __ movzxw(valreg, valreg);
-        break;
-      case DataType::Type::kInt16:
-        __ LockXaddw(field_addr, valreg);
-        __ movsxw(valreg, valreg);
-        break;
-      case DataType::Type::kInt32:
-      case DataType::Type::kUint32:
-        __ LockXaddl(field_addr, valreg);
-        break;
-      case DataType::Type::kInt64:
-      case DataType::Type::kUint64:
-        __ LockXaddq(field_addr, valreg);
-        break;
-      default:
-        DCHECK(false) << "unexpected type in getAndAdd intrinsic";
-        UNREACHABLE();
+    if (byte_swap) {
+      // We cannot use XADD since we need to byte-swap the old value when reading it from memory,
+      // and then byte-swap the sum before writing it to memory. So fallback to the slower generic
+      // implementation that is also used for bitwise operations.
+      // Move value from RAX to a temporary register, as RAX may get clobbered by repeated CMPXCHG.
+      DCHECK_EQ(GetExpectedVarHandleCoordinatesCount(invoke), 2u);
+      Location temp = locations->GetTemp(temp_count - 2);
+      codegen->Move(temp, value);
+      GenerateVarHandleGetAndOp(
+          invoke, codegen, temp, type, field_addr, GetAndUpdateOp::kAdd, byte_swap);
+    } else {
+      // `getAndAdd` for integral types: atomically exchange the new value with the field and add
+      // the old value to the field. Output register is the same as the one holding new value. Do
+      // sign extend / zero extend as needed.
+      CpuRegister valreg = value.AsRegister<CpuRegister>();
+      DCHECK_EQ(valreg, out.AsRegister<CpuRegister>());
+      switch (type) {
+        case DataType::Type::kBool:
+        case DataType::Type::kUint8:
+          __ LockXaddb(field_addr, valreg);
+          __ movzxb(valreg, valreg);
+          break;
+        case DataType::Type::kInt8:
+          __ LockXaddb(field_addr, valreg);
+          __ movsxb(valreg, valreg);
+          break;
+        case DataType::Type::kUint16:
+          __ LockXaddw(field_addr, valreg);
+          __ movzxw(valreg, valreg);
+          break;
+        case DataType::Type::kInt16:
+          __ LockXaddw(field_addr, valreg);
+          __ movsxw(valreg, valreg);
+          break;
+        case DataType::Type::kInt32:
+        case DataType::Type::kUint32:
+          __ LockXaddl(field_addr, valreg);
+          break;
+        case DataType::Type::kInt64:
+        case DataType::Type::kUint64:
+          __ LockXaddq(field_addr, valreg);
+          break;
+        default:
+          DCHECK(false) << "unexpected type in getAndAdd intrinsic";
+          UNREACHABLE();
+      }
     }
   }
 }
@@ -4406,19 +4499,27 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
                                           CodeGeneratorX86_64* codegen,
                                           GetAndUpdateOp get_and_update_op,
                                           bool need_any_store_barrier,
-                                          bool need_any_any_barrier) {
+                                          bool need_any_any_barrier,
+                                          bool byte_swap = false) {
   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
 
   X86_64Assembler* assembler = codegen->GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
 
   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
-  uint32_t value_index = number_of_arguments - 1;
+  Location value = locations->InAt(number_of_arguments - 1);
   DataType::Type type = invoke->GetType();
 
-  VarHandleSlowPathX86_64* slow_path = GenerateVarHandleChecks(invoke, codegen, type);
+  VarHandleSlowPathX86_64* slow_path = nullptr;
   VarHandleTarget target = GetVarHandleTarget(invoke);
-  GenerateVarHandleTarget(invoke, target, codegen);
+  if (!byte_swap) {
+    slow_path = GenerateVarHandleChecks(invoke, codegen, type);
+    slow_path->SetGetAndUpdateOp(get_and_update_op);
+    slow_path->SetNeedAnyStoreBarrier(need_any_store_barrier);
+    slow_path->SetNeedAnyAnyBarrier(need_any_any_barrier);
+    GenerateVarHandleTarget(invoke, target, codegen);
+    __ Bind(slow_path->GetNativeByteOrderLabel());
+  }
 
   CpuRegister ref(target.object);
   Address field_addr(ref, CpuRegister(target.offset), TIMES_1, 0);
@@ -4429,16 +4530,16 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
 
   switch (get_and_update_op) {
     case GetAndUpdateOp::kSet:
-      GenerateVarHandleGetAndSet(codegen, invoke, locations, value_index, type, field_addr, ref);
+      GenerateVarHandleGetAndSet(invoke, codegen, value, type, field_addr, ref, byte_swap);
       break;
     case GetAndUpdateOp::kAdd:
-      GenerateVarHandleGetAndAdd(codegen, locations, value_index, type, field_addr);
+      GenerateVarHandleGetAndAdd(invoke, codegen, value, type, field_addr, byte_swap);
       break;
     case GetAndUpdateOp::kBitwiseAnd:
     case GetAndUpdateOp::kBitwiseOr:
     case GetAndUpdateOp::kBitwiseXor:
-      GenerateVarHandleGetAndBitwiseOp(
-          codegen, locations, value_index, type, field_addr, get_and_update_op);
+      GenerateVarHandleGetAndOp(
+          invoke, codegen, value, type, field_addr, get_and_update_op, byte_swap);
       break;
   }
 
@@ -4446,7 +4547,9 @@ static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
     codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
 
-  __ Bind(slow_path->GetExitLabel());
+  if (!byte_swap) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
@@ -4722,9 +4825,14 @@ void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen
       GenerateVarHandleCompareAndSetOrExchange(
           invoke, codegen, /*is_cmpxchg=*/ true, /*byte_swap=*/ true);
       break;
-    default:
-      DCHECK(false);
-      UNREACHABLE();
+    case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
+      GenerateVarHandleGetAndUpdate(invoke,
+                                    codegen,
+                                    get_and_update_op_,
+                                    need_any_store_barrier_,
+                                    need_any_any_barrier_,
+                                    /*byte_swap=*/ true);
+      break;
   }
 
   __ jmp(GetExitLabel());