arm64: Implement VarHandle intrinsics for arrays.

Using benchmarks provided by
    https://android-review.googlesource.com/1420959
on blueline little cores with fixed frequency 1420800:
                      before after
GetArrayElementInt    26.925 0.020
GetArrayElementString 27.585 0.024
SetArrayElementInt    28.168 0.020
SetArrayElementString 31.162 0.024

Test: Covered by existing tests.
Test: testrunner.py --target --64 --optimizing
Bug: 71781600
Change-Id: I1be78e6edc2d51210c95597c875489554833ecfc
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index e7315ec..fd4992a 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -3799,8 +3799,8 @@
 }
 
 // Check access mode and the primitive type from VarHandle.varType.
-// Check reference arguments against the VarHandle.varType; this is a subclass check
-// without read barrier, so it can have false negatives which we handle in the slow path.
+// Check reference arguments against the VarHandle.varType; for references this is a subclass
+// check without read barrier, so it can have false negatives which we handle in the slow path.
 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
                                                         CodeGeneratorARM64* codegen,
                                                         SlowPathCodeARM64* slow_path,
@@ -3873,9 +3873,9 @@
   __ Cbnz(temp, slow_path->GetEntryLabel());
 }
 
-static void GenerateVarHandleInstanceFieldCheck(HInvoke* invoke,
-                                                CodeGeneratorARM64* codegen,
-                                                SlowPathCodeARM64* slow_path) {
+static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
+                                                 CodeGeneratorARM64* codegen,
+                                                 SlowPathCodeARM64* slow_path) {
   MacroAssembler* masm = codegen->GetVIXLAssembler();
   Register varhandle = InputRegisterAt(invoke, 0);
   Register object = InputRegisterAt(invoke, 1);
@@ -3883,13 +3883,13 @@
   const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
   const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
 
+  // Null-check the object.
+  __ Cbz(object, slow_path->GetEntryLabel());
+
   UseScratchRegisterScope temps(masm);
   Register temp = temps.AcquireW();
   Register temp2 = temps.AcquireW();
 
-  // Null-check the object.
-  __ Cbz(object, slow_path->GetEntryLabel());
-
   // Check that the VarHandle references an instance field by checking that
   // coordinateType1 == null. coordinateType0 should not be null, but this is handled by the
   // type compatibility check with the source object's type, which will fail for null.
@@ -3906,15 +3906,101 @@
       codegen, slow_path, object, temp, /*object_can_be_null=*/ false);
 }
 
-static void GenerateVarHandleFieldCheck(HInvoke* invoke,
-                                        CodeGeneratorARM64* codegen,
-                                        SlowPathCodeARM64* slow_path) {
+static DataType::Type GetVarHandleExpectedValueType(HInvoke* invoke,
+                                                    size_t expected_coordinates_count) {
+  DCHECK_EQ(expected_coordinates_count, GetExpectedVarHandleCoordinatesCount(invoke));
+  uint32_t number_of_arguments = invoke->GetNumberOfArguments();
+  DCHECK_GE(number_of_arguments, /* VarHandle object */ 1u + expected_coordinates_count);
+  if (number_of_arguments == /* VarHandle object */ 1u + expected_coordinates_count) {
+    return invoke->GetType();
+  } else {
+    return invoke->InputAt(number_of_arguments - 1u)->GetType();
+  }
+}
+
+static void GenerateVarHandleArrayChecks(HInvoke* invoke,
+                                         CodeGeneratorARM64* codegen,
+                                         SlowPathCodeARM64* slow_path) {
+  MacroAssembler* masm = codegen->GetVIXLAssembler();
+  Register varhandle = InputRegisterAt(invoke, 0);
+  Register object = InputRegisterAt(invoke, 1);
+  Register index = InputRegisterAt(invoke, 2);
+  DataType::Type value_type =
+      GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
+  Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
+
+  const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
+  const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
+  const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
+  const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
+  const MemberOffset class_offset = mirror::Object::ClassOffset();
+  const MemberOffset array_length_offset = mirror::Array::LengthOffset();
+
+  // Null-check the object.
+  __ Cbz(object, slow_path->GetEntryLabel());
+
+  UseScratchRegisterScope temps(masm);
+  Register temp = temps.AcquireW();
+  Register temp2 = temps.AcquireW();
+
+  // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
+  // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
+  // coordinateType0 shall not be null but we do not explicitly verify that.
+  DCHECK_EQ(coordinate_type0_offset.Int32Value() + 4, coordinate_type1_offset.Int32Value());
+  __ Ldp(temp, temp2, HeapOperand(varhandle, coordinate_type0_offset.Int32Value()));
+  codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
+  // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
+  __ Cbz(temp2, slow_path->GetEntryLabel());
+
+  // Check that the coordinateType0 is an array type. We do not need a read barrier
+  // for loading constant reference fields (or chains of them) for comparison with null,
+  // or for finally loading a constant primitive field (primitive type) below.
+  __ Ldr(temp2, HeapOperand(temp, component_type_offset.Int32Value()));
+  codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+  __ Cbz(temp2, slow_path->GetEntryLabel());
+
+  // Check that the array component type matches the primitive type.
+  __ Ldrh(temp2, HeapOperand(temp2, primitive_type_offset.Int32Value()));
+  if (primitive_type == Primitive::kPrimNot) {
+    static_assert(Primitive::kPrimNot == 0);
+    __ Cbnz(temp2, slow_path->GetEntryLabel());
+  } else {
+    __ Cmp(temp2, static_cast<uint16_t>(primitive_type));
+    __ B(slow_path->GetEntryLabel(), ne);
+  }
+
+  // Check object class against componentType0.
+  //
+  // This is an exact check and we defer other cases to the runtime. This includes
+  // conversion to array of superclass references, which is valid but subsequently
+  // requires all update operations to check that the value can indeed be stored.
+  // We do not want to perform such extra checks in the intrinsified code.
+  //
+  // We do this check without read barrier, so there can be false negatives which we
+  // defer to the slow path. There shall be no false negatives for array classes in the
+  // boot image (including Object[] and primitive arrays) because they are non-movable.
+  __ Ldr(temp2, HeapOperand(object, class_offset.Int32Value()));
+  codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp2);
+  __ Cmp(temp, temp2);
+  __ B(slow_path->GetEntryLabel(), ne);
+
+  // Check for array index out of bounds.
+  __ Ldr(temp, HeapOperand(object, array_length_offset.Int32Value()));
+  __ Cmp(index, temp);
+  __ B(slow_path->GetEntryLabel(), hs);
+}
+
+static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
+                                              CodeGeneratorARM64* codegen,
+                                              SlowPathCodeARM64* slow_path) {
   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
-  DCHECK_LE(expected_coordinates_count, 1u);
   if (expected_coordinates_count == 0u) {
     GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
+  } else if (expected_coordinates_count == 1u) {
+    GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
   } else {
-    GenerateVarHandleInstanceFieldCheck(invoke, codegen, slow_path);
+    DCHECK_EQ(expected_coordinates_count, 2u);
+    GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
   }
 }
 
@@ -3927,52 +4013,77 @@
   MacroAssembler* masm = codegen->GetVIXLAssembler();
   Register varhandle = InputRegisterAt(invoke, 0);
   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
-  DCHECK_LE(expected_coordinates_count, 1u);
   LocationSummary* locations = invoke->GetLocations();
 
   VarHandleTarget target;
   // The temporary allocated for loading the offset.
   target.offset = WRegisterFrom(locations->GetTemp((expected_coordinates_count == 0u) ? 1u : 0u));
-  // The reference to the object that holds the field to operate on.
+  // The reference to the object that holds the value to operate on.
   target.object = (expected_coordinates_count == 0u)
       ? WRegisterFrom(locations->GetTemp(0u))
       : InputRegisterAt(invoke, 1);
 
-  // For static fields, we need to fill the `target.object` with the declaring class,
-  // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
-  // we do not need the declaring class, so we can forget the `ArtMethod*` when
-  // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
-  Register method = (expected_coordinates_count == 0) ? target.object : target.offset;
+  if (expected_coordinates_count <= 1u) {
+    // For static fields, we need to fill the `target.object` with the declaring class,
+    // so we can use `target.object` as temporary for the `ArtMethod*`. For instance fields,
+    // we do not need the declaring class, so we can forget the `ArtMethod*` when
+    // we load the `target.offset`, so use the `target.offset` to hold the `ArtMethod*`.
+    Register method = (expected_coordinates_count == 0) ? target.object : target.offset;
 
-  const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
-  const MemberOffset offset_offset = ArtField::OffsetOffset();
+    const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
+    const MemberOffset offset_offset = ArtField::OffsetOffset();
 
-  // Load the ArtField, the offset and, if needed, declaring class.
-  __ Ldr(method.X(), HeapOperand(varhandle, art_field_offset.Int32Value()));
-  __ Ldr(target.offset, MemOperand(method.X(), offset_offset.Int32Value()));
-  if (expected_coordinates_count == 0u) {
-    codegen->GenerateGcRootFieldLoad(invoke,
-                                     LocationFrom(target.object),
-                                     method.X(),
-                                     ArtField::DeclaringClassOffset().Int32Value(),
-                                     /*fixup_label=*/ nullptr,
-                                     kCompilerReadBarrierOption);
+    // Load the ArtField, the offset and, if needed, declaring class.
+    __ Ldr(method.X(), HeapOperand(varhandle, art_field_offset.Int32Value()));
+    __ Ldr(target.offset, MemOperand(method.X(), offset_offset.Int32Value()));
+    if (expected_coordinates_count == 0u) {
+      codegen->GenerateGcRootFieldLoad(invoke,
+                                       LocationFrom(target.object),
+                                       method.X(),
+                                       ArtField::DeclaringClassOffset().Int32Value(),
+                                       /*fixup_label=*/ nullptr,
+                                       kCompilerReadBarrierOption);
+    }
+  } else {
+    DCHECK_EQ(expected_coordinates_count, 2u);
+    DataType::Type value_type =
+        GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
+    size_t size_shift = DataType::SizeShift(value_type);
+    MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
+
+    Register index = InputRegisterAt(invoke, 2);
+    Register shifted_index = index;
+    if (size_shift != 0u) {
+      shifted_index = target.offset;
+      __ Lsl(shifted_index, index, size_shift);
+    }
+    __ Add(target.offset, shifted_index, data_offset.Int32Value());
   }
 
   return target;
 }
 
-static bool IsValidFieldVarHandleExpected(HInvoke* invoke) {
+static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
-  if (expected_coordinates_count > 1u) {
-    // Only field VarHandle is currently supported.
+  if (expected_coordinates_count > 2u) {
+    // Invalid coordinate count. This invoke shall throw at runtime.
     return false;
   }
-  if (expected_coordinates_count == 1u &&
+  if (expected_coordinates_count != 0u &&
       invoke->InputAt(1)->GetType() != DataType::Type::kReference) {
-    // For an instance field, the object must be a reference.
+    // Except for static fields (no coordinates), the first coordinate must be a reference.
     return false;
   }
+  if (expected_coordinates_count == 2u) {
+    // For arrays and views, the second coordinate must be convertible to `int`.
+    // In this context, `boolean` is not convertible but we have to look at the shorty
+    // as compiler transformations can give the invoke a valid boolean input.
+    DataType::Type index_type = GetDataTypeFromShorty(invoke, 2);
+    if (index_type == DataType::Type::kBool ||
+        DataType::Kind(index_type) != DataType::Type::kInt32) {
+      return false;
+    }
+  }
 
   uint32_t number_of_arguments = invoke->GetNumberOfArguments();
   DataType::Type return_type = invoke->GetType();
@@ -4037,7 +4148,7 @@
   return true;
 }
 
-static LocationSummary* CreateVarHandleFieldLocations(HInvoke* invoke) {
+static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
   DataType::Type return_type = invoke->GetType();
 
@@ -4045,10 +4156,12 @@
   LocationSummary* locations =
       new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
-  if (expected_coordinates_count == 1u) {
-    // For instance fields, this is the source object.
-    locations->SetInAt(1, Location::RequiresRegister());
-  } else {
+  // Require coordinates in registers. These are the object holding the value
+  // to operate on (except for static fields) and index (for arrays and views).
+  for (size_t i = 0; i != expected_coordinates_count; ++i) {
+    locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
+  }
+  if (expected_coordinates_count == 0u) {
     // Add a temporary to hold the declaring class.
     locations->AddTemp(Location::RequiresRegister());
   }
@@ -4087,7 +4200,7 @@
 }
 
 static void CreateVarHandleGetLocations(HInvoke* invoke) {
-  if (!IsValidFieldVarHandleExpected(invoke)) {
+  if (!HasVarHandleIntrinsicImplementation(invoke)) {
     return;
   }
 
@@ -4101,15 +4214,12 @@
     return;
   }
 
-  CreateVarHandleFieldLocations(invoke);
+  CreateVarHandleCommonLocations(invoke);
 }
 
 static void GenerateVarHandleGet(HInvoke* invoke,
                                  CodeGeneratorARM64* codegen,
                                  bool use_load_acquire) {
-  // Implemented only for fields.
-  size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
-  DCHECK_LE(expected_coordinates_count, 1u);
   DataType::Type type = invoke->GetType();
   DCHECK_NE(type, DataType::Type::kVoid);
 
@@ -4121,12 +4231,12 @@
       new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
   codegen->AddSlowPath(slow_path);
 
-  GenerateVarHandleFieldCheck(invoke, codegen, slow_path);
+  GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
   GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
 
   VarHandleTarget target = GenerateVarHandleTarget(invoke, codegen);
 
-  // Load the value from the field.
+  // Load the value from the target location.
   if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Piggy-back on the field load path using introspection for the Baker read barrier.
     // The `target.offset` is a temporary, use it for field address.
@@ -4191,19 +4301,16 @@
 }
 
 static void CreateVarHandleSetLocations(HInvoke* invoke) {
-  if (!IsValidFieldVarHandleExpected(invoke)) {
+  if (!HasVarHandleIntrinsicImplementation(invoke)) {
     return;
   }
 
-  CreateVarHandleFieldLocations(invoke);
+  CreateVarHandleCommonLocations(invoke);
 }
 
 static void GenerateVarHandleSet(HInvoke* invoke,
                                  CodeGeneratorARM64* codegen,
                                  bool use_store_release) {
-  // Implemented only for fields.
-  size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
-  DCHECK_LE(expected_coordinates_count, 1u);
   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
 
@@ -4214,12 +4321,12 @@
       new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
   codegen->AddSlowPath(slow_path);
 
-  GenerateVarHandleFieldCheck(invoke, codegen, slow_path);
+  GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
   GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, value_type);
 
   VarHandleTarget target = GenerateVarHandleTarget(invoke, codegen);
 
-  // Store the value to the field.
+  // Store the value to the target location.
   {
     CPURegister source = value;
     UseScratchRegisterScope temps(masm);
@@ -4279,7 +4386,7 @@
 }
 
 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke, bool return_success) {
-  if (!IsValidFieldVarHandleExpected(invoke)) {
+  if (!HasVarHandleIntrinsicImplementation(invoke)) {
     return;
   }
 
@@ -4297,11 +4404,11 @@
     return;
   }
 
-  LocationSummary* locations = CreateVarHandleFieldLocations(invoke);
+  LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
 
   if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
     // We need callee-save registers for both the class object and offset instead of
-    // the temporaries reserved in CreateVarHandleFieldLocations().
+    // the temporaries reserved in CreateVarHandleCommonLocations().
     static_assert(POPCOUNT(kArm64CalleeSaveRefSpills) >= 2u);
     uint32_t first_callee_save = CTZ(kArm64CalleeSaveRefSpills);
     uint32_t second_callee_save = CTZ(kArm64CalleeSaveRefSpills ^ (1u << first_callee_save));
@@ -4357,9 +4464,7 @@
                                                      bool strong) {
   DCHECK(return_success || strong);
 
-  // Implemented only for fields.
   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
-  DCHECK_LE(expected_coordinates_count, 1u);
   uint32_t expected_index = invoke->GetNumberOfArguments() - 2;
   uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
   DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
@@ -4375,7 +4480,7 @@
       new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
   codegen->AddSlowPath(slow_path);
 
-  GenerateVarHandleFieldCheck(invoke, codegen, slow_path);
+  GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
   GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, value_type);
 
   VarHandleTarget target = GenerateVarHandleTarget(invoke, codegen);
@@ -4387,8 +4492,8 @@
     codegen->MarkGCCard(target.object, new_value.W(), new_value_can_be_null);
   }
 
-  // Reuse the `offset` temporary for the pointer to the field, except
-  // for references that need the offset for the read barrier.
+  // Reuse the `offset` temporary for the pointer to the target location,
+  // except for references that need the offset for the read barrier.
   UseScratchRegisterScope temps(masm);
   Register tmp_ptr = target.offset.X();
   if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
@@ -4563,7 +4668,7 @@
 
 static void CreateVarHandleGetAndUpdateLocations(HInvoke* invoke,
                                                  GetAndUpdateOp get_and_update_op) {
-  if (!IsValidFieldVarHandleExpected(invoke)) {
+  if (!HasVarHandleIntrinsicImplementation(invoke)) {
     return;
   }
 
@@ -4575,7 +4680,7 @@
     return;
   }
 
-  LocationSummary* locations = CreateVarHandleFieldLocations(invoke);
+  LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
 
   if (DataType::IsFloatingPointType(invoke->GetType())) {
     if (get_and_update_op == GetAndUpdateOp::kAdd) {
@@ -4595,9 +4700,7 @@
                                           CodeGeneratorARM64* codegen,
                                           GetAndUpdateOp get_and_update_op,
                                           std::memory_order order) {
-  // Implemented only for fields.
   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
-  DCHECK_LE(expected_coordinates_count, 1u);
   uint32_t arg_index = invoke->GetNumberOfArguments() - 1;
   DataType::Type value_type = GetDataTypeFromShorty(invoke, arg_index);
 
@@ -4610,7 +4713,7 @@
       new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
   codegen->AddSlowPath(slow_path);
 
-  GenerateVarHandleFieldCheck(invoke, codegen, slow_path);
+  GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
   GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, value_type);
 
   VarHandleTarget target = GenerateVarHandleTarget(invoke, codegen);
@@ -4623,8 +4726,8 @@
     codegen->MarkGCCard(target.object, arg.W(), new_value_can_be_null);
   }
 
-  // Reuse the `target.offset` temporary for the pointer to the field, except
-  // for references that need the offset for the non-Baker read barrier.
+  // Reuse the `target.offset` temporary for the pointer to the target location,
+  // except for references that need the offset for the non-Baker read barrier.
   UseScratchRegisterScope temps(masm);
   Register tmp_ptr = target.offset.X();
   if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&