arm64: Fix VarHandle intrinsics for non-Baker read barrier.

It turns out the artReadBarrierSlow() ignores the passed
reference and reloads the field from the object. This makes
some of the VarHandle intrinsics broken for the TABLELOOKUP
configuration. This change disables the broken variants of
these intrinsics (but leaves support code in place) and
cleans up locations for those variants that remain active.

Also refactor reference argument checks and do a few other
small cleanups (renames, comment updates, etc.).

Test: testrunner.py --target --64 --optimizing
Test: Repeat with ART_USE_READ_BARRIER=false ART_HEAP_POISONING=true.
Test: Repeat with ART_READ_BARRIER_TYPE=TABLELOOKUP.
      (Ignore two pre-existing checker test failures.)
Bug: 71781600
Change-Id: I8d28a4883771a8db2b283737bb643b36c7038c26
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7becf23..dba8a2d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -723,6 +723,7 @@
         Intrinsics intrinsic = instruction_->AsInvoke()->GetIntrinsic();
         DCHECK(intrinsic == Intrinsics::kUnsafeGetObject ||
                intrinsic == Intrinsics::kUnsafeGetObjectVolatile ||
+               intrinsic == Intrinsics::kUnsafeCASObject ||
                mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
                    mirror::VarHandle::AccessModeTemplate::kGet ||
                mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 864a9e3..34d3f1a 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -921,9 +921,8 @@
                codegen_);
 }
 
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, HInvoke* invoke) {
+static void CreateUnsafeCASLocations(ArenaAllocator* allocator, HInvoke* invoke) {
   bool can_call = kEmitCompilerReadBarrier &&
-      kUseBakerReadBarrier &&
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   LocationSummary* locations =
       new (allocator) LocationSummary(invoke,
@@ -1041,17 +1040,17 @@
   }
 }
 
-static void GenCas(CodeGeneratorARM64* codegen,
-                   DataType::Type type,
-                   std::memory_order order,
-                   bool strong,
-                   vixl::aarch64::Label* cmp_failure,
-                   Register ptr,
-                   Register new_value,
-                   Register old_value,
-                   Register store_result,
-                   Register expected,
-                   Register expected2 = Register()) {
+static void GenerateCompareAndSet(CodeGeneratorARM64* codegen,
+                                  DataType::Type type,
+                                  std::memory_order order,
+                                  bool strong,
+                                  vixl::aarch64::Label* cmp_failure,
+                                  Register ptr,
+                                  Register new_value,
+                                  Register old_value,
+                                  Register store_result,
+                                  Register expected,
+                                  Register expected2 = Register()) {
   // The `expected2` is valid only for reference slow path and represents the unmarked old value
   // from the main path attempt to emit CAS when the marked old value matched `expected`.
   DCHECK(type == DataType::Type::kReference || !expected2.IsValid());
@@ -1196,24 +1195,24 @@
     __ Add(tmp_ptr, base_.X(), Operand(offset_));
 
     vixl::aarch64::Label mark_old_value;
-    GenCas(arm64_codegen,
-           DataType::Type::kReference,
-           order_,
-           strong_,
-           /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(),
-           tmp_ptr,
-           new_value_,
-           /*old_value=*/ old_value_temp_,
-           store_result,
-           expected_,
-           /*expected2=*/ old_value_);
+    GenerateCompareAndSet(arm64_codegen,
+                          DataType::Type::kReference,
+                          order_,
+                          strong_,
+                          /*cmp_failure=*/ update_old_value_ ? &mark_old_value : GetExitLabel(),
+                          tmp_ptr,
+                          new_value_,
+                          /*old_value=*/ old_value_temp_,
+                          store_result,
+                          expected_,
+                          /*expected2=*/ old_value_);
     if (update_old_value_) {
       // To reach this point, the `old_value_temp_` must be either a from-space or a to-space
       // reference of the `expected_` object. Update the `old_value_` to the to-space reference.
       __ Mov(old_value_, expected_);
     }
 
-    // Z=true from the CMP+CCMP in GenCas() above indicates comparison success.
+    // Z=true from the CMP+CCMP in GenerateCompareAndSet() above indicates comparison success.
     // For strong CAS, that's the overall success. For weak CAS, the code also needs
     // to check the `store_result` after returning from the slow path.
     __ B(GetExitLabel());
@@ -1306,25 +1305,25 @@
 
   __ Add(tmp_ptr, base.X(), Operand(offset));
 
-  GenCas(codegen,
-         type,
-         std::memory_order_seq_cst,
-         /*strong=*/ true,
-         cmp_failure,
-         tmp_ptr,
-         new_value,
-         old_value,
-         /*store_result=*/ old_value.W(),  // Reuse `old_value` for ST*XR* result.
-         expected);
+  GenerateCompareAndSet(codegen,
+                        type,
+                        std::memory_order_seq_cst,
+                        /*strong=*/ true,
+                        cmp_failure,
+                        tmp_ptr,
+                        new_value,
+                        old_value,
+                        /*store_result=*/ old_value.W(),  // Reuse `old_value` for ST*XR* result.
+                        expected);
   __ Bind(exit_loop);
   __ Cset(out, eq);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
-  CreateIntIntIntIntIntToInt(allocator_, invoke);
+  CreateUnsafeCASLocations(allocator_, invoke);
 }
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) {
-  CreateIntIntIntIntIntToInt(allocator_, invoke);
+  CreateUnsafeCASLocations(allocator_, invoke);
 }
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
   // The only read barrier implementation supporting the
@@ -1333,7 +1332,7 @@
     return;
   }
 
-  CreateIntIntIntIntIntToInt(allocator_, invoke);
+  CreateUnsafeCASLocations(allocator_, invoke);
   if (kEmitCompilerReadBarrier) {
     // We need two non-scratch temporary registers for read barrier.
     LocationSummary* locations = invoke->GetLocations();
@@ -3693,42 +3692,6 @@
   GenerateDivideUnsigned(invoke, codegen_);
 }
 
-// Check access mode and the primitive type from VarHandle.varType.
-// The `var_type_no_rb`, if valid, shall be filled with VarHandle.varType read without read barrier.
-static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
-                                                        CodeGeneratorARM64* codegen,
-                                                        SlowPathCodeARM64* slow_path,
-                                                        DataType::Type type,
-                                                        Register var_type_no_rb = Register()) {
-  mirror::VarHandle::AccessMode access_mode =
-      mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
-  Primitive::Type primitive_type = DataTypeToPrimitive(type);
-
-  MacroAssembler* masm = codegen->GetVIXLAssembler();
-  Register varhandle = InputRegisterAt(invoke, 0);
-
-  const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
-  const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
-  const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
-
-  UseScratchRegisterScope temps(masm);
-  if (!var_type_no_rb.IsValid()) {
-    var_type_no_rb = temps.AcquireW();
-  }
-  Register temp2 = temps.AcquireW();
-
-  // Check that the operation is permitted and the primitive type of varhandle.varType.
-  // We do not need a read barrier when loading a reference only for loading constant
-  // primitive field through the reference. Use LDP to load the fields together.
-  DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
-  __ Ldp(var_type_no_rb, temp2, HeapOperand(varhandle, var_type_offset.Int32Value()));
-  codegen->GetAssembler()->MaybeUnpoisonHeapReference(var_type_no_rb);
-  __ Tbz(temp2, static_cast<uint32_t>(access_mode), slow_path->GetEntryLabel());
-  __ Ldrh(temp2, HeapOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
-  __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
-  __ B(slow_path->GetEntryLabel(), ne);
-}
-
 // Generate subtype check without read barriers.
 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorARM64* codegen,
                                                     SlowPathCodeARM64* slow_path,
@@ -3761,6 +3724,59 @@
   __ Bind(&success);
 }
 
+// Check access mode and the primitive type from VarHandle.varType.
+// Check reference arguments against the VarHandle.varType; this is a subclass check
+// without read barrier, so it can have false negatives which we handle in the slow path.
+static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
+                                                        CodeGeneratorARM64* codegen,
+                                                        SlowPathCodeARM64* slow_path,
+                                                        DataType::Type type) {
+  mirror::VarHandle::AccessMode access_mode =
+      mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
+  Primitive::Type primitive_type = DataTypeToPrimitive(type);
+
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
+  Register varhandle = InputRegisterAt(invoke, 0);
+
+  const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
+  const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
+  const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
+
+  UseScratchRegisterScope temps(masm);
+  Register var_type_no_rb = temps.AcquireW();
+  Register temp2 = temps.AcquireW();
+
+  // Check that the operation is permitted and the primitive type of varhandle.varType.
+  // We do not need a read barrier when loading a reference only for loading constant
+  // primitive field through the reference. Use LDP to load the fields together.
+  DCHECK_EQ(var_type_offset.Int32Value() + 4, access_mode_bit_mask_offset.Int32Value());
+  __ Ldp(var_type_no_rb, temp2, HeapOperand(varhandle, var_type_offset.Int32Value()));
+  codegen->GetAssembler()->MaybeUnpoisonHeapReference(var_type_no_rb);
+  __ Tbz(temp2, static_cast<uint32_t>(access_mode), slow_path->GetEntryLabel());
+  __ Ldrh(temp2, HeapOperand(var_type_no_rb, primitive_type_offset.Int32Value()));
+  __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
+  __ B(slow_path->GetEntryLabel(), ne);
+
+  temps.Release(temp2);
+
+  if (type == DataType::Type::kReference) {
+    // Check reference arguments against the varType.
+    // False negatives due to varType being an interface or array type
+    // or due to the missing read barrier are handled by the slow path.
+    size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
+    uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
+    uint32_t number_of_arguments = invoke->GetNumberOfArguments();
+    for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
+      HInstruction* arg = invoke->InputAt(arg_index);
+      DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
+      if (!arg->IsNullConstant()) {
+        Register arg_reg = WRegisterFrom(invoke->GetLocations()->InAt(arg_index));
+        GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, var_type_no_rb);
+      }
+    }
+  }
+}
+
 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
                                               CodeGeneratorARM64* codegen,
                                               SlowPathCodeARM64* slow_path) {
@@ -3955,12 +3971,24 @@
     return;
   }
 
+  if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+      invoke->GetType() == DataType::Type::kReference &&
+      invoke->GetIntrinsic() != Intrinsics::kVarHandleGet &&
+      invoke->GetIntrinsic() != Intrinsics::kVarHandleGetOpaque) {
+    // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
+    // the passed reference and reloads it from the field. This gets the memory visibility
+    // wrong for Acquire/Volatile operations.
+    return;
+  }
+
   LocationSummary* locations = CreateVarHandleFieldLocations(invoke);
 
   // Add a temporary for offset if we cannot use the output register.
   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
-    // To preserve the offset value across the non-Baker read barrier
-    // slow path, use a fixed callee-save register.
+    // To preserve the offset value across the non-Baker read barrier slow path
+    // for loading the declaring class, use a fixed callee-save register.
+    // For simplicity, use this also for instance fields, even though we could use
+    // the output register for primitives and an arbitrary temporary for references.
     locations->AddTemp(Location::RegisterLocation(CTZ(kArm64CalleeSaveRefSpills)));
   } else if (DataType::IsFloatingPointType(invoke->GetType())) {
     locations->AddTemp(Location::RequiresRegister());
@@ -4070,9 +4098,10 @@
   LocationSummary* locations = CreateVarHandleFieldLocations(invoke);
 
   // Add a temporary for offset.
-  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
-    // To preserve the offset value across the non-Baker read barrier
-    // slow path, use a fixed callee-save register.
+  if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+      GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {
+    // To preserve the offset value across the non-Baker read barrier slow path
+    // for loading the declaring class, use a fixed callee-save register.
     locations->AddTemp(Location::RegisterLocation(CTZ(kArm64CalleeSaveRefSpills)));
   } else {
     locations->AddTemp(Location::RequiresRegister());
@@ -4096,18 +4125,7 @@
   codegen->AddSlowPath(slow_path);
 
   GenerateVarHandleFieldCheck(invoke, codegen, slow_path);
-  {
-    UseScratchRegisterScope temps(masm);
-    Register var_type_no_rb = temps.AcquireW();
-    GenerateVarHandleAccessModeAndVarTypeChecks(
-        invoke, codegen, slow_path, value_type, var_type_no_rb);
-    if (value_type == DataType::Type::kReference && !value.IsZero()) {
-      // If the value type is a reference, check it against the varType.
-      // False negatives due to varType being an interface or array type
-      // or due to the missing read barrier are handled by the slow path.
-      GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, value.W(), var_type_no_rb);
-    }
-  }
+  GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, value_type);
 
   // The temporary allocated for loading `offset`.
   Register offset =
@@ -4183,18 +4201,35 @@
     return;
   }
 
+  uint32_t number_of_arguments = invoke->GetNumberOfArguments();
+  DataType::Type value_type = invoke->InputAt(number_of_arguments - 1u)->GetType();
+  if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+      value_type == DataType::Type::kReference) {
+    // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
+    // the passed reference and reloads it from the field. This breaks the read barriers
+    // in slow path in different ways. The marked old value may not actually be a to-space
+    // reference to the same object as `old_value`, breaking slow path assumptions. And
+    // for CompareAndExchange, marking the old value after comparison failure may actually
+    // return the reference to `expected`, erroneously indicating success even though we
+    // did not set the new value. (And it also gets the memory visibility wrong.)
+    return;
+  }
+
   LocationSummary* locations = CreateVarHandleFieldLocations(invoke);
 
   // Add a temporary for offset.
-  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
-    // To preserve the offset value across the non-Baker read barrier
-    // slow path, use a fixed callee-save register.
+  if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+      GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {
+    // To preserve the offset value across the non-Baker read barrier slow path
+    // for loading the declaring class, use a fixed callee-save register.
     locations->AddTemp(Location::RegisterLocation(CTZ(kArm64CalleeSaveRefSpills)));
+    // Not implemented for references, see above.
+    // Note that we would also need a callee-save register instead of the temporary
+    // reserved in CreateVarHandleFieldLocations() for the class object.
+    DCHECK_NE(value_type, DataType::Type::kReference);
   } else {
     locations->AddTemp(Location::RequiresRegister());
   }
-  uint32_t number_of_arguments = invoke->GetNumberOfArguments();
-  DataType::Type value_type = invoke->InputAt(number_of_arguments - 1u)->GetType();
   if (!return_success && DataType::IsFloatingPointType(value_type)) {
     // Add a temporary for old value and exclusive store result if floating point
     // `expected` and/or `new_value` take scratch registers.
@@ -4255,21 +4290,7 @@
   codegen->AddSlowPath(slow_path);
 
   GenerateVarHandleFieldCheck(invoke, codegen, slow_path);
-  {
-    UseScratchRegisterScope temps(masm);
-    Register var_type_no_rb = temps.AcquireW();
-    GenerateVarHandleAccessModeAndVarTypeChecks(
-        invoke, codegen, slow_path, value_type, var_type_no_rb);
-    // If the value type is a reference, check `expected` and `new_value` against the varType.
-    // False negatives due to varType being an interface or array type
-    // or due to the missing read barrier are handled by the slow path.
-    if (value_type == DataType::Type::kReference && !expected.IsZero()) {
-      GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, expected.W(), var_type_no_rb);
-    }
-    if (value_type == DataType::Type::kReference && !new_value.IsZero()) {
-      GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, new_value.W(), var_type_no_rb);
-    }
-  }
+  GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, value_type);
 
   // The temporary allocated for loading `offset`.
   Register offset =
@@ -4363,23 +4384,23 @@
     cmp_failure = rb_slow_path->GetEntryLabel();
   }
 
-  GenCas(codegen,
-         cas_type,
-         order,
-         strong,
-         cmp_failure,
-         tmp_ptr,
-         new_value_reg,
-         old_value,
-         store_result,
-         expected_reg);
+  GenerateCompareAndSet(codegen,
+                        cas_type,
+                        order,
+                        strong,
+                        cmp_failure,
+                        tmp_ptr,
+                        new_value_reg,
+                        old_value,
+                        store_result,
+                        expected_reg);
   __ Bind(exit_loop);
 
   if (return_success) {
     if (strong) {
       __ Cset(out.W(), eq);
     } else {
-      // On success, the Z flag is set and the store result is 1, see GenCas().
+      // On success, the Z flag is set and the store result is 1, see GenerateCompareAndSet().
       // On failure, either the Z flag is clear or the store result is 0.
       // Determine the final success value with a CSEL.
       __ Csel(out.W(), store_result, wzr, eq);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 4dbf3d9..ea2ed57 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -3272,23 +3272,23 @@
   }
 
   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
-  DataType::Type type = invoke->GetType();
+  DataType::Type return_type = invoke->GetType();
   mirror::VarHandle::AccessModeTemplate access_mode_template =
       mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
   switch (access_mode_template) {
     case mirror::VarHandle::AccessModeTemplate::kGet:
       // The return type should be the same as varType, so it shouldn't be void.
-      if (type == DataType::Type::kVoid) {
+      if (return_type == DataType::Type::kVoid) {
         return false;
       }
       break;
     case mirror::VarHandle::AccessModeTemplate::kSet:
-      if (type != DataType::Type::kVoid) {
+      if (return_type != DataType::Type::kVoid) {
         return false;
       }
       break;
     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet: {
-      if (type != DataType::Type::kBool) {
+      if (return_type != DataType::Type::kBool) {
         return false;
       }
       uint32_t expected_value_index = number_of_arguments - 2;
@@ -3305,13 +3305,17 @@
       DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
       if (IsVarHandleGetAndAdd(invoke) &&
           (value_type == DataType::Type::kReference || value_type == DataType::Type::kBool)) {
-        // We should only add numerical types
+        // We should only add numerical types.
         return false;
       } else if (IsVarHandleGetAndBitwiseOp(invoke) && !DataType::IsIntegralType(value_type)) {
         // We can only apply operators to bitwise integral types.
+        // Note that bitwise VarHandle operations accept a non-integral boolean type and
+        // perform the appropriate logical operation. However, the result is the same as
+        // using the bitwise operation on our boolean representation and this fits well
+        // with DataType::IsIntegralType() treating the compiler type kBool as integral.
         return false;
       }
-      if (value_type != type) {
+      if (value_type != return_type) {
         return false;
       }
       break;
@@ -3322,7 +3326,7 @@
       DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
       DataType::Type new_value_type = GetDataTypeFromShorty(invoke, new_value_index);
 
-      if (expected_value_type != new_value_type || type != expected_value_type) {
+      if (expected_value_type != new_value_type || return_type != expected_value_type) {
         return false;
       }
       break;