x86_64: Implement VarHandle.set{Opaque,Release,Volatile} for fields.

Benchmarks improvements (using benchmarks provided by
https://android-review.googlesource.com/1420959):

  benchmark                    before  after
  --------------------------------------------
  SetVolatileStaticFieldInt    2.782   0.00678
  SetVolatileStaticFieldString 3.040   0.00678
  SetVolatileFieldInt          3.082   0.00678
  SetVolatileFieldString       3.317   0.00678
  SetOpaqueStaticFieldInt      2.804   0.00177
  SetOpaqueStaticFieldString   3.040   0.00230
  SetOpaqueFieldInt            3.080   0.00174
  SetOpaqueFieldString         3.317   0.00224
  SetReleaseStaticFieldInt     2.795   0.00177
  SetReleaseStaticFieldString  3.042   0.00281
  SetReleaseFieldInt           3.076   0.00174
  SetReleaseFieldString        3.326   0.00225

Bug: 65872996

Test: lunch aosp_cf_x86_64_phone-userdebug \
  && art/test.py --host -r -t 712-varhandle-invocations --64
Test: Repeat with ART_USE_READ_BARRIER=false.
Test: Repeat with ART_HEAP_POISONING=true.
Change-Id: Idc5922ba4542608cf2b8d9ba686206b9956baaf6
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index d484f04..da9a1ab 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5024,6 +5024,7 @@
                                                     Address field_addr,
                                                     CpuRegister base,
                                                     bool is_volatile,
+                                                    bool is_atomic,
                                                     bool value_can_be_null) {
   LocationSummary* locations = instruction->GetLocations();
   Location value = locations->InAt(value_index);
@@ -5081,10 +5082,17 @@
     case DataType::Type::kInt64: {
       if (value.IsConstant()) {
         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-        codegen_->MoveInt64ToAddress(field_addr,
-                                     Address::displace(field_addr, sizeof(int32_t)),
-                                     v,
-                                     instruction);
+        if (is_atomic) {
+          // Move constant into a register, then atomically store the register to memory.
+          CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
+          __ movq(temp, Immediate(v));
+          __ movq(field_addr, temp);
+        } else {
+          codegen_->MoveInt64ToAddress(field_addr,
+                                       Address::displace(field_addr, sizeof(int32_t)),
+                                       v,
+                                       instruction);
+        }
         maybe_record_implicit_null_check_done = true;
       } else {
         __ movq(field_addr, value.AsRegister<CpuRegister>());
@@ -5105,10 +5113,17 @@
     case DataType::Type::kFloat64: {
       if (value.IsConstant()) {
         int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
-        codegen_->MoveInt64ToAddress(field_addr,
-                                     Address::displace(field_addr, sizeof(int32_t)),
-                                     v,
-                                     instruction);
+        if (is_atomic) {
+          // Move constant into a register, then atomically store the register to memory.
+          CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
+          __ movq(temp, Immediate(v));
+          __ movq(field_addr, temp);
+        } else {
+          codegen_->MoveInt64ToAddress(field_addr,
+                                       Address::displace(field_addr, sizeof(int32_t)),
+                                       v,
+                                       instruction);
+        }
         maybe_record_implicit_null_check_done = true;
       } else {
         __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
@@ -5164,6 +5179,7 @@
                  Address(base, offset),
                  base,
                  is_volatile,
+                 /*is_atomic=*/ false,
                  value_can_be_null);
 
   if (is_predicated) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 8a08e85..db0b9d7 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -248,6 +248,7 @@
                       Address field_addr,
                       CpuRegister base,
                       bool is_volatile,
+                      bool is_atomic,
                       bool value_can_be_null);
 
  private:
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 471f021..a8718ee 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -3266,7 +3266,10 @@
   CreateVarHandleCommonLocations(invoke);
 }
 
-static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+static void GenerateVarHandleSet(HInvoke* invoke,
+                                 CodeGeneratorX86_64* codegen,
+                                 bool is_volatile,
+                                 bool is_atomic) {
   X86_64Assembler* assembler = codegen->GetAssembler();
 
   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
@@ -3277,11 +3280,15 @@
   GenerateVarHandleTarget(invoke, target, codegen);
 
   switch (invoke->GetIntrinsic()) {
-    case Intrinsics::kVarHandleSet:
+    case Intrinsics::kVarHandleSetRelease:
+      codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+      break;
+    case Intrinsics::kVarHandleSetVolatile:
+      // setVolatile needs kAnyStore barrier, but HandleFieldSet takes care of that.
       break;
     default:
-      // TODO: implement setOpaque, setRelease, setVolatile.
-      LOG(FATAL) << "unsupported intrinsic " << invoke->GetIntrinsic();
+      // Other intrinsics don't need a barrier.
+      break;
   }
 
   const uint32_t last_temp_index = invoke->GetLocations()->GetTempCount() - 1;
@@ -3296,9 +3303,12 @@
                                 value_type,
                                 dst,
                                 CpuRegister(target.object),
-                                /*is_volatile=*/ false,
+                                is_volatile,
+                                is_atomic,
                                 /*value_can_be_null=*/ true);
 
+  // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
+
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -3307,7 +3317,31 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
-  GenerateVarHandleSet(invoke, codegen_);
+  GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
+  CreateVarHandleSetLocations(invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
+  GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
+  CreateVarHandleSetLocations(invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
+  GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
+  CreateVarHandleSetLocations(invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
+  GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ true, /*is_atomic=*/ true);
 }
 
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
@@ -3371,9 +3405,6 @@
 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSet)
 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSetAcquire)
 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSetRelease)
-UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetOpaque)
-UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetRelease)
-UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetVolatile)
 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleWeakCompareAndSet)
 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleWeakCompareAndSetAcquire)
 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleWeakCompareAndSetPlain)