X86: Add the other get VarHandles (getVolatile, getAcquire, getOpaque)

This commit implements VarHandle.getVolatile, getAcquire and getOpaque
intrinsics.

Test: ART_HEAP_POISONING=true art/test.py --host -r -t 712-varhandle-invocation --32
Test: ART_HEAP_POISONING=false art/test.py --host -r -t 712-varhandle-invocation --32
Test: ART_USE_READ_BARRIER=true art/test.py --host -r -t 712-varhandle-invocation --32
Test: ART_USE_READ_BARRIER=false art/test.py --host -r -t 712-varhandle-invocation --32
Bug: 65872996
Change-Id: I38501c226c9d5af0a9e5a1230abcb3114aad4737
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index ddda17b..a6f8384 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -3384,7 +3384,7 @@
   return locations->InAt(1).AsRegister<Register>();
 }
 
-void IntrinsicLocationsBuilderX86::VisitVarHandleGet(HInvoke* invoke) {
+static void CreateVarHandleGetLocations(HInvoke* invoke) {
   // The only read barrier implementation supporting the
   // VarHandleGet intrinsic is the Baker-style read barriers.
   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
@@ -3410,6 +3410,10 @@
   switch (DataType::Kind(type)) {
     case DataType::Type::kInt64:
       locations->AddTemp(Location::RequiresRegister());
+      if (invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
+        // We need an XmmRegister for Int64 to ensure an atomic load
+        locations->AddTemp(Location::RequiresFpuRegister());
+      }
       FALLTHROUGH_INTENDED;
     case DataType::Type::kInt32:
     case DataType::Type::kReference:
@@ -3422,19 +3426,19 @@
   }
 }
 
-void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) {
+static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
   // The only read barrier implementation supporting the
   // VarHandleGet intrinsic is the Baker-style read barriers.
   DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
 
-  X86Assembler* assembler = codegen_->GetAssembler();
+  X86Assembler* assembler = codegen->GetAssembler();
   LocationSummary* locations = invoke->GetLocations();
   Register varhandle_object = locations->InAt(0).AsRegister<Register>();
   DataType::Type type = invoke->GetType();
   DCHECK_NE(type, DataType::Type::kVoid);
   Register temp = locations->GetTemp(0).AsRegister<Register>();
-  SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
-  codegen_->AddSlowPath(slow_path);
+  SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
+  codegen->AddSlowPath(slow_path);
 
   GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
 
@@ -3450,28 +3454,63 @@
   // Get the field referred by the VarHandle. The returned register contains the object reference
   // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
   // declaring class will be placed in 'temp' register.
-  Register ref = GenerateVarHandleFieldReference(invoke, codegen_, temp, offset);
+  Register ref = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
+  Address field_addr(ref, offset, TIMES_1, 0);
 
   // Load the value from the field
-  CodeGeneratorX86* codegen_x86 = down_cast<CodeGeneratorX86*>(codegen_);
-  if (type == DataType::Type::kReference) {
-    if (kCompilerReadBarrierOption == kWithReadBarrier) {
-      codegen_x86->GenerateReferenceLoadWithBakerReadBarrier(invoke,
-                                                             out,
-                                                             ref,
-                                                             Address(ref, offset, TIMES_1, 0),
-                                                             /* needs_null_check= */ false);
-    } else {
-      __ movl(out.AsRegister<Register>(), Address(ref, offset, TIMES_1, 0));
-      __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
-    }
+  if (type == DataType::Type::kReference && kCompilerReadBarrierOption == kWithReadBarrier) {
+    codegen->GenerateReferenceLoadWithBakerReadBarrier(
+        invoke, out, ref, field_addr, /* needs_null_check= */ false);
+  } else if (type == DataType::Type::kInt64 &&
+             invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
+    XmmRegister xmm_temp = locations->GetTemp(2).AsFpuRegister<XmmRegister>();
+    codegen->LoadFromMemoryNoBarrier(type, out, field_addr, xmm_temp, /* is_atomic_load= */ true);
   } else {
-    codegen_x86->MoveFromMemory(type, out, ref, offset);
+    codegen->LoadFromMemoryNoBarrier(type, out, field_addr);
+  }
+
+  if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetVolatile ||
+      invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAcquire) {
+    // Load fence to prevent load-load reordering.
+    // Note that this is a no-op, thanks to the x86 memory model.
+    codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   }
 
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86::VisitVarHandleGet(HInvoke* invoke) {
+  CreateVarHandleGetLocations(invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) {
+  GenerateVarHandleGet(invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
+  CreateVarHandleGetLocations(invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
+  GenerateVarHandleGet(invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
+  CreateVarHandleGetLocations(invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
+  GenerateVarHandleGet(invoke, codegen_);
+}
+
+void IntrinsicLocationsBuilderX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
+  CreateVarHandleGetLocations(invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
+  GenerateVarHandleGet(invoke, codegen_);
+}
+
 static void CreateVarHandleSetLocations(HInvoke* invoke) {
   // The only read barrier implementation supporting the
   // VarHandleGet intrinsic is the Baker-style read barriers.
@@ -3927,10 +3966,10 @@
       Location eax = Location::RegisterLocation(EAX);
       NearLabel try_again;
       __ Bind(&try_again);
-      codegen->MoveFromMemory(type, temp_float, reference, offset);
+      __ movss(temp_float.AsFpuRegister<XmmRegister>(), field_addr);
       __ movd(EAX, temp_float.AsFpuRegister<XmmRegister>());
       __ addss(temp_float.AsFpuRegister<XmmRegister>(),
-              value_loc.AsFpuRegister<XmmRegister>());
+               value_loc.AsFpuRegister<XmmRegister>());
       GenPrimitiveLockedCmpxchg(type,
                                 codegen,
                                 /* expected_value= */ eax,
@@ -4062,7 +4101,7 @@
   NearLabel try_again;
   __ Bind(&try_again);
   // Place the expected value in EAX for cmpxchg
-  codegen->MoveFromMemory(type, locations->Out(), reference, offset);
+  codegen->LoadFromMemoryNoBarrier(type, locations->Out(), field_addr);
   codegen->Move32(locations->GetTemp(0), locations->InAt(value_index));
   GenerateBitwiseOp(invoke, codegen, temp, out);
   GenPrimitiveLockedCmpxchg(type,
@@ -4211,14 +4250,11 @@
 UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchange)
 UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchangeAcquire)
 UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchangeRelease)
-UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAcquire)
 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAddAcquire)
 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAddRelease)
 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndSet)
 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndSetAcquire)
 UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndSetRelease)
-UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetOpaque)
-UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetVolatile)
 
 UNREACHABLE_INTRINSICS(X86)