arm64: Implement Unsafe GetAndSet/-Add intrinsics.

Test: testrunner.py --target --64 --optimizing
Bug: 26264765
Bug: 202868177
Change-Id: I9836a2eeee160e6c520e98ccb8af6f66b76bf00a
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index f5a6f8d..a184dad 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -6738,7 +6738,7 @@
   MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
 }
 
-void CodeGeneratorARM64::GenerateIntrinsicCasMoveWithBakerReadBarrier(
+void CodeGeneratorARM64::GenerateIntrinsicMoveWithBakerReadBarrier(
     vixl::aarch64::Register marked_old_value,
     vixl::aarch64::Register old_value) {
   DCHECK(EmitBakerReadBarrier());
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 5232181..7ff08f5 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -144,19 +144,8 @@
   V(SystemArrayCopyByte)                      \
   V(SystemArrayCopyInt)                       \
   /* 1.8 */                                   \
-  V(UnsafeGetAndAddInt)                       \
-  V(UnsafeGetAndAddLong)                      \
-  V(UnsafeGetAndSetInt)                       \
-  V(UnsafeGetAndSetLong)                      \
-  V(UnsafeGetAndSetObject)                    \
   V(MethodHandleInvokeExact)                  \
-  V(MethodHandleInvoke)                       \
-  /* OpenJDK 11 */                            \
-  V(JdkUnsafeGetAndAddInt)                    \
-  V(JdkUnsafeGetAndAddLong)                   \
-  V(JdkUnsafeGetAndSetInt)                    \
-  V(JdkUnsafeGetAndSetLong)                   \
-  V(JdkUnsafeGetAndSetReference)
+  V(MethodHandleInvoke)
 
 class SlowPathCodeARM64 : public SlowPathCode {
  public:
@@ -904,9 +893,9 @@
                                uint32_t offset,
                                vixl::aarch64::Label* fixup_label,
                                ReadBarrierOption read_barrier_option);
-  // Generate MOV for the `old_value` in intrinsic CAS and mark it with Baker read barrier.
-  void GenerateIntrinsicCasMoveWithBakerReadBarrier(vixl::aarch64::Register marked_old_value,
-                                                    vixl::aarch64::Register old_value);
+  // Generate MOV for the `old_value` in intrinsic and mark it with Baker read barrier.
+  void GenerateIntrinsicMoveWithBakerReadBarrier(vixl::aarch64::Register marked_old_value,
+                                                 vixl::aarch64::Register old_value);
   // Fast path implementation of ReadBarrier::Barrier for a heap
   // reference field load when Baker's read barriers are used.
   // Overload suitable for Unsafe.getObject/-Volatile() intrinsic.
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 9add804..be3fb9a 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1360,7 +1360,7 @@
     // Mark the `old_value_` from the main path and compare with `expected_`.
     if (kUseBakerReadBarrier) {
       DCHECK(mark_old_value_slow_path_ == nullptr);
-      arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_temp_, old_value_);
+      arm64_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_temp_, old_value_);
     } else {
       DCHECK(mark_old_value_slow_path_ != nullptr);
       __ B(mark_old_value_slow_path_->GetEntryLabel());
@@ -1414,7 +1414,7 @@
       __ Bind(&mark_old_value);
       if (kUseBakerReadBarrier) {
         DCHECK(update_old_value_slow_path_ == nullptr);
-        arm64_codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(old_value_, old_value_temp_);
+        arm64_codegen->GenerateIntrinsicMoveWithBakerReadBarrier(old_value_, old_value_temp_);
       } else {
         // Note: We could redirect the `failure` above directly to the entry label and bind
         // the exit label in the main path, but the main path would need to access the
@@ -1692,6 +1692,138 @@
   __ Cbnz(store_result, &loop_label);
 }
 
+static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
+                                              HInvoke* invoke,
+                                              CodeGeneratorARM64* codegen) {
+  const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
+  LocationSummary* locations =
+      new (allocator) LocationSummary(invoke,
+                                      can_call
+                                          ? LocationSummary::kCallOnSlowPath
+                                          : LocationSummary::kNoCall,
+                                      kIntrinsified);
+  if (can_call && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+static void GenUnsafeGetAndUpdate(HInvoke* invoke,
+                                  DataType::Type type,
+                                  CodeGeneratorARM64* codegen,
+                                  GetAndUpdateOp get_and_update_op) {
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Register out = RegisterFrom(locations->Out(), type);            // Result.
+  Register base = WRegisterFrom(locations->InAt(1));              // Object pointer.
+  Register offset = XRegisterFrom(locations->InAt(2));            // Long offset.
+  Register arg = RegisterFrom(locations->InAt(3), type);          // Expected.
+  Register tmp_ptr = XRegisterFrom(locations->GetTemp(0));        // Pointer to actual memory.
+
+  // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
+  if (type == DataType::Type::kReference) {
+    DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+    // Mark card for object as a new value shall be stored.
+    bool new_value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(base, /*value=*/ arg, new_value_can_be_null);
+  }
+
+  __ Add(tmp_ptr, base.X(), Operand(offset));
+  GenerateGetAndUpdate(codegen,
+                       get_and_update_op,
+                       type,
+                       std::memory_order_seq_cst,
+                       tmp_ptr,
+                       arg,
+                       /*old_value=*/ out);
+
+  if (type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
+    DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
+    if (kUseBakerReadBarrier) {
+      codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out.W(), out.W());
+    } else {
+      codegen->GenerateReadBarrierSlow(
+          invoke,
+          Location::RegisterLocation(out.GetCode()),
+          Location::RegisterLocation(out.GetCode()),
+          Location::RegisterLocation(base.GetCode()),
+          /*offset=*/ 0u,
+          /*index=*/ Location::RegisterLocation(offset.GetCode()));
+    }
+  }
+}
+
+void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
+  VisitJdkUnsafeGetAndAddInt(invoke);
+}
+void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
+  VisitJdkUnsafeGetAndAddLong(invoke);
+}
+void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
+  VisitJdkUnsafeGetAndSetInt(invoke);
+}
+void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
+  VisitJdkUnsafeGetAndSetLong(invoke);
+}
+void IntrinsicLocationsBuilderARM64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
+  VisitJdkUnsafeGetAndSetReference(invoke);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
+  CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
+  CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
+  CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
+  CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+void IntrinsicLocationsBuilderARM64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
+  CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
+  VisitJdkUnsafeGetAndAddInt(invoke);
+}
+void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
+  VisitJdkUnsafeGetAndAddLong(invoke);
+}
+void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
+  VisitJdkUnsafeGetAndSetInt(invoke);
+}
+void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
+  VisitJdkUnsafeGetAndSetLong(invoke);
+}
+void IntrinsicCodeGeneratorARM64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
+  VisitJdkUnsafeGetAndSetReference(invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
+  GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
+}
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
+  GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
+}
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
+  GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
+}
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
+  GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
+}
+void IntrinsicCodeGeneratorARM64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
+  GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
+}
+
 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
   LocationSummary* locations =
       new (allocator_) LocationSummary(invoke,
@@ -5571,7 +5703,7 @@
     __ Sxth(out.W(), old_value.W());
   } else if (value_type == DataType::Type::kReference && codegen->EmitReadBarrier()) {
     if (kUseBakerReadBarrier) {
-      codegen->GenerateIntrinsicCasMoveWithBakerReadBarrier(out.W(), old_value.W());
+      codegen->GenerateIntrinsicMoveWithBakerReadBarrier(out.W(), old_value.W());
     } else {
       codegen->GenerateReadBarrierSlow(
           invoke,
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index 380011e..590bc34 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -177,6 +177,16 @@
   }
 }
 
+static inline bool IsUnsafeGetAndSetReference(HInvoke* invoke) {
+  switch (invoke->GetIntrinsic()) {
+    case Intrinsics::kUnsafeGetAndSetObject:
+    case Intrinsics::kJdkUnsafeGetAndSetReference:
+      return true;
+    default:
+      return false;
+  }
+}
+
 static inline bool IsVarHandleCASFamily(HInvoke* invoke) {
   mirror::VarHandle::AccessModeTemplate access_mode =
       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
diff --git a/test/2260-checker-inline-unimplemented-intrinsics/src/Main.java b/test/2260-checker-inline-unimplemented-intrinsics/src/Main.java
index 3f125ca..3953010 100644
--- a/test/2260-checker-inline-unimplemented-intrinsics/src/Main.java
+++ b/test/2260-checker-inline-unimplemented-intrinsics/src/Main.java
@@ -53,20 +53,21 @@
     }
 
     // UnsafeGetAndAddInt/Long are part of core-oj and we will not inline on host.
+    // And they are actually implemented on arm64.
 
-    /// CHECK-START-{ARM,ARM64}: int Main.$noinline$add(java.lang.Object, long, int) inliner (before)
+    /// CHECK-START: int Main.$noinline$add(java.lang.Object, long, int) inliner (before)
     /// CHECK:     InvokeVirtual intrinsic:UnsafeGetAndAddInt
 
-    /// CHECK-START-{ARM,ARM64}: int Main.$noinline$add(java.lang.Object, long, int) inliner (after)
+    /// CHECK-START-ARM: int Main.$noinline$add(java.lang.Object, long, int) inliner (after)
     /// CHECK-NOT: InvokeVirtual intrinsic:UnsafeGetAndAddInt
     private static int $noinline$add(Object o, long offset, int delta) {
         return unsafe.getAndAddInt(o, offset, delta);
     }
 
-    /// CHECK-START-{ARM,ARM64}: long Main.$noinline$add(java.lang.Object, long, long) inliner (before)
+    /// CHECK-START: long Main.$noinline$add(java.lang.Object, long, long) inliner (before)
     /// CHECK:     InvokeVirtual intrinsic:UnsafeGetAndAddLong
 
-    /// CHECK-START-{ARM,ARM64}: long Main.$noinline$add(java.lang.Object, long, long) inliner (after)
+    /// CHECK-START-ARM: long Main.$noinline$add(java.lang.Object, long, long) inliner (after)
     /// CHECK-NOT: InvokeVirtual intrinsic:UnsafeGetAndAddLong
     private static long $noinline$add(Object o, long offset, long delta) {
         return unsafe.getAndAddLong(o, offset, delta);