arm64: Implement VarHandle GetAndBitwiseOp intrinsics.

Using benchmarks provided by
    https://android-review.googlesource.com/1420959
on blueline little cores with fixed frequency 1420800:
                                      before after
GetAndBitwiseOrStaticFieldInt         23.917 0.026
GetAndBitwiseOrFieldInt               26.828 0.026
GetAndBitwiseOrAcquireStaticFieldInt  23.908 0.025
GetAndBitwiseOrAcquireFieldInt        26.822 0.026
GetAndBitwiseOrReleaseStaticFieldInt  23.906 0.026
GetAndBitwiseOrReleaseFieldInt        26.831 0.026
GetAndBitwiseXorStaticFieldInt        23.829 0.026
GetAndBitwiseXorFieldInt              26.784 0.026
GetAndBitwiseXorAcquireStaticFieldInt 23.841 0.025
GetAndBitwiseXorAcquireFieldInt       26.788 0.026
GetAndBitwiseXorReleaseStaticFieldInt 23.835 0.026
GetAndBitwiseXorReleaseFieldInt       26.788 0.026
GetAndBitwiseAndStaticFieldInt        23.874 0.026
GetAndBitwiseAndFieldInt              26.797 0.026
GetAndBitwiseAndAcquireStaticFieldInt 23.871 0.025
GetAndBitwiseAndAcquireFieldInt       26.792 0.026
GetAndBitwiseAndReleaseStaticFieldInt 23.865 0.026
GetAndBitwiseAndReleaseFieldInt       26.786 0.026

Test: Covered by existing test 712-varhandle-invocations.
Test: testrunner.py --target --64 --optimizing
Bug: 71781600
Change-Id: I8263f6b1b125c46190da06a9a05e60335bf69992
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index aa641e8..f19e286 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1367,7 +1367,10 @@
 
 enum class GetAndUpdateOp {
   kSet,
-  kAdd
+  kAdd,
+  kAnd,
+  kOr,
+  kXor
 };
 
 static void GenerateGetAndUpdate(CodeGeneratorARM64* codegen,
@@ -1393,10 +1396,14 @@
       if (arg.IsVRegister()) {
         old_value_reg = arg.IsD() ? temps.AcquireX() : temps.AcquireW();
         new_value = old_value_reg;  // Use the same temporary.
-      } else {
-        old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
-        new_value = old_value.IsX() ? temps.AcquireX() : temps.AcquireW();
+        break;
       }
+      FALLTHROUGH_INTENDED;
+    case GetAndUpdateOp::kAnd:
+    case GetAndUpdateOp::kOr:
+    case GetAndUpdateOp::kXor:
+      old_value_reg = old_value.IsX() ? old_value.X() : old_value.W();
+      new_value = old_value.IsX() ? temps.AcquireX() : temps.AcquireW();
       break;
   }
 
@@ -1423,6 +1430,15 @@
         __ Add(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
       }
       break;
+    case GetAndUpdateOp::kAnd:
+      __ And(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
+      break;
+    case GetAndUpdateOp::kOr:
+      __ Orr(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
+      break;
+    case GetAndUpdateOp::kXor:
+      __ Eor(new_value, old_value_reg, arg.IsX() ? arg.X() : arg.W());
+      break;
   }
   EmitStoreExclusive(codegen, load_store_type, ptr, store_result, new_value, use_store_release);
   __ Cbnz(store_result, &loop_label);
@@ -4008,8 +4024,14 @@
           (value_type == DataType::Type::kReference || value_type == DataType::Type::kBool)) {
         // We should only add numerical types.
         return false;
+      } else if (IsVarHandleGetAndBitwiseOp(invoke) && !DataType::IsIntegralType(value_type)) {
+        // We can only apply operators to bitwise integral types.
+        // Note that bitwise VarHandle operations accept a non-integral boolean type and
+        // perform the appropriate logical operation. However, the result is the same as
+        // using the bitwise operation on our boolean representation and this fits well
+        // with DataType::IsIntegralType() treating the compiler type kBool as integral.
+        return false;
       }
-      DCHECK(!IsVarHandleGetAndBitwiseOp(invoke));  // Unimplemented.
       if (value_type != return_type) {
         return false;
       }
@@ -4703,6 +4725,77 @@
   GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAdd, std::memory_order_release);
 }
 
+void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kAnd);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kAnd, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kOr);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kOr, std::memory_order_release);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
+  CreateVarHandleGetAndUpdateLocations(invoke, GetAndUpdateOp::kXor);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
+  GenerateVarHandleGetAndUpdate(invoke, codegen_, GetAndUpdateOp::kXor, std::memory_order_release);
+}
 
 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
@@ -4731,15 +4824,6 @@
 
 UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvokeExact)
 UNIMPLEMENTED_INTRINSIC(ARM64, MethodHandleInvoke)
-UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseAnd)
-UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseAndAcquire)
-UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseAndRelease)
-UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseOr)
-UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseOrAcquire)
-UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseOrRelease)
-UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseXor)
-UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseXorAcquire)
-UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseXorRelease)
 
 UNREACHABLE_INTRINSICS(ARM64)