Clean up VarHandles.set*() for arrays and byte array views.

This CL is a clean up for https://r.android.com/1875684. Changes:

- Use setters instead of constructor arguments.

- Rerun benchmarks and add improvements for arrays to commit message
  (as aosp/1875684 forgot to mention changes for arrays).

- Do BSWAP at compile time if the argument is a constant value.
  `HandleFieldSet` now can optionally perform byte swap. The function
  is rewritten so that constant and non-constant values are handled in
  separate branches, which makes both cases simpler.

- Add helper function `CodeGeneratorX86_64::GetInstructionCodegen` to
  reduce boilerplate.

Benchmarks improvements (using benchmarks provided by
https://android-review.googlesource.com/1420959):

  benchmark                             before aosp/1875684   now
  ----------------------------------------------------------------
  VarHandleSetArrayElementInt                   2.79         0.002
  VarHandleSetArrayElementString                3.09         0.003
  VarHandleSetByteArrayViewInt                  2.89         0.004
  VarHandleSetByteArrayViewBigEndianInt         2.89         0.004

Bug: 71781600
Test: lunch aosp_cf_x86_64_phone-userdebug \
  && art/test.py --host -r -t 712-varhandle-invocations --64
Test: Repeat with ART_USE_READ_BARRIER=false.
Test: Repeat with ART_HEAP_POISONING=true.
Change-Id: I8cc37321228ffb2833a1158a75ced65f18af968e
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 6deb035..e601b40 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5117,6 +5117,48 @@
   }
 }
 
+void InstructionCodeGeneratorX86_64::Bswap(Location value,
+                                           DataType::Type type,
+                                           CpuRegister* temp) {
+  switch (type) {
+    case DataType::Type::kInt16:
+      // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+      __ bswapl(value.AsRegister<CpuRegister>());
+      __ sarl(value.AsRegister<CpuRegister>(), Immediate(16));
+      break;
+    case DataType::Type::kUint16:
+      // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
+      __ bswapl(value.AsRegister<CpuRegister>());
+      __ shrl(value.AsRegister<CpuRegister>(), Immediate(16));
+      break;
+    case DataType::Type::kInt32:
+    case DataType::Type::kUint32:
+      __ bswapl(value.AsRegister<CpuRegister>());
+      break;
+    case DataType::Type::kInt64:
+    case DataType::Type::kUint64:
+      __ bswapq(value.AsRegister<CpuRegister>());
+      break;
+    case DataType::Type::kFloat32: {
+      DCHECK_NE(temp, nullptr);
+      __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
+      __ bswapl(*temp);
+      __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ false);
+      break;
+    }
+    case DataType::Type::kFloat64: {
+      DCHECK_NE(temp, nullptr);
+      __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
+      __ bswapq(*temp);
+      __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ true);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
+      UNREACHABLE();
+  }
+}
+
 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
                                                     uint32_t value_index,
                                                     uint32_t extra_temp_index,
@@ -5125,7 +5167,8 @@
                                                     CpuRegister base,
                                                     bool is_volatile,
                                                     bool is_atomic,
-                                                    bool value_can_be_null) {
+                                                    bool value_can_be_null,
+                                                    bool byte_swap) {
   LocationSummary* locations = instruction->GetLocations();
   Location value = locations->InAt(value_index);
 
@@ -5135,38 +5178,80 @@
 
   bool maybe_record_implicit_null_check_done = false;
 
-  switch (field_type) {
-    case DataType::Type::kBool:
-    case DataType::Type::kUint8:
-    case DataType::Type::kInt8: {
-      if (value.IsConstant()) {
+  if (value.IsConstant()) {
+    switch (field_type) {
+      case DataType::Type::kBool:
+      case DataType::Type::kUint8:
+      case DataType::Type::kInt8:
         __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
-      } else {
-        __ movb(field_addr, value.AsRegister<CpuRegister>());
+        break;
+      case DataType::Type::kUint16:
+      case DataType::Type::kInt16: {
+        int16_t v = CodeGenerator::GetInt16ValueOf(value.GetConstant());
+        if (byte_swap) {
+          v = BSWAP(v);
+        }
+        __ movw(field_addr, Immediate(v));
+        break;
       }
-      break;
-    }
-
-    case DataType::Type::kUint16:
-    case DataType::Type::kInt16: {
-      if (value.IsConstant()) {
-        __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
-      } else {
-        __ movw(field_addr, value.AsRegister<CpuRegister>());
-      }
-      break;
-    }
-
-    case DataType::Type::kInt32:
-    case DataType::Type::kReference: {
-      if (value.IsConstant()) {
+      case DataType::Type::kUint32:
+      case DataType::Type::kInt32:
+      case DataType::Type::kFloat32:
+      case DataType::Type::kReference: {
         int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+        if (byte_swap) {
+          v = BSWAP(v);
+        }
         // `field_type == DataType::Type::kReference` implies `v == 0`.
         DCHECK((field_type != DataType::Type::kReference) || (v == 0));
         // Note: if heap poisoning is enabled, no need to poison
         // (negate) `v` if it is a reference, as it would be null.
         __ movl(field_addr, Immediate(v));
-      } else {
+        break;
+      }
+      case DataType::Type::kUint64:
+      case DataType::Type::kInt64:
+      case DataType::Type::kFloat64: {
+        int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
+        if (byte_swap) {
+          v = BSWAP(v);
+        }
+        if (is_atomic) {
+          // Move constant into a register, then atomically store the register to memory.
+          CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
+          __ movq(temp, Immediate(v));
+          __ movq(field_addr, temp);
+        } else {
+          Address field_addr2 = Address::displace(field_addr, sizeof(int32_t));
+          codegen_->MoveInt64ToAddress(field_addr, field_addr2, v, instruction);
+        }
+        maybe_record_implicit_null_check_done = true;
+        break;
+      }
+      case DataType::Type::kVoid:
+        LOG(FATAL) << "Unreachable type " << field_type;
+        UNREACHABLE();
+    }
+  } else {
+    if (byte_swap) {
+      // Swap byte order in-place in the input register (we will restore it later).
+      CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
+      Bswap(value, field_type, &temp);
+    }
+
+    switch (field_type) {
+      case DataType::Type::kBool:
+      case DataType::Type::kUint8:
+      case DataType::Type::kInt8:
+        __ movb(field_addr, value.AsRegister<CpuRegister>());
+        break;
+      case DataType::Type::kUint16:
+      case DataType::Type::kInt16:
+        __ movw(field_addr, value.AsRegister<CpuRegister>());
+        break;
+      case DataType::Type::kUint32:
+      case DataType::Type::kInt32:
+      case DataType::Type::kReference:
         if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
           CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
           __ movl(temp, value.AsRegister<CpuRegister>());
@@ -5175,67 +5260,27 @@
         } else {
           __ movl(field_addr, value.AsRegister<CpuRegister>());
         }
-      }
-      break;
-    }
-
-    case DataType::Type::kInt64: {
-      if (value.IsConstant()) {
-        int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-        if (is_atomic) {
-          // Move constant into a register, then atomically store the register to memory.
-          CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
-          __ movq(temp, Immediate(v));
-          __ movq(field_addr, temp);
-        } else {
-          codegen_->MoveInt64ToAddress(field_addr,
-                                       Address::displace(field_addr, sizeof(int32_t)),
-                                       v,
-                                       instruction);
-        }
-        maybe_record_implicit_null_check_done = true;
-      } else {
+        break;
+      case DataType::Type::kUint64:
+      case DataType::Type::kInt64:
         __ movq(field_addr, value.AsRegister<CpuRegister>());
-      }
-      break;
-    }
-
-    case DataType::Type::kFloat32: {
-      if (value.IsConstant()) {
-        int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
-        __ movl(field_addr, Immediate(v));
-      } else {
+        break;
+      case DataType::Type::kFloat32:
         __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
-      }
-      break;
-    }
-
-    case DataType::Type::kFloat64: {
-      if (value.IsConstant()) {
-        int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
-        if (is_atomic) {
-          // Move constant into a register, then atomically store the register to memory.
-          CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
-          __ movq(temp, Immediate(v));
-          __ movq(field_addr, temp);
-        } else {
-          codegen_->MoveInt64ToAddress(field_addr,
-                                       Address::displace(field_addr, sizeof(int32_t)),
-                                       v,
-                                       instruction);
-        }
-        maybe_record_implicit_null_check_done = true;
-      } else {
+        break;
+      case DataType::Type::kFloat64:
         __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
-      }
-      break;
+        break;
+      case DataType::Type::kVoid:
+        LOG(FATAL) << "Unreachable type " << field_type;
+        UNREACHABLE();
     }
 
-    case DataType::Type::kUint32:
-    case DataType::Type::kUint64:
-    case DataType::Type::kVoid:
-      LOG(FATAL) << "Unreachable type " << field_type;
-      UNREACHABLE();
+    if (byte_swap) {
+      // Restore byte order.
+      CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
+      Bswap(value, field_type, &temp);
+    }
   }
 
   if (!maybe_record_implicit_null_check_done) {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 1115c83..a130994 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -249,7 +249,10 @@
                       CpuRegister base,
                       bool is_volatile,
                       bool is_atomic,
-                      bool value_can_be_null);
+                      bool value_can_be_null,
+                      bool byte_swap = false);
+
+  void Bswap(Location value, DataType::Type type, CpuRegister* temp = nullptr);
 
  private:
   // Generate code for the given suspend check. If not null, `successor`
@@ -421,6 +424,10 @@
     return InstructionSet::kX86_64;
   }
 
+  InstructionCodeGeneratorX86_64* GetInstructionCodegen() {
+    return down_cast<InstructionCodeGeneratorX86_64*>(GetInstructionVisitor());
+  }
+
   const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const;
 
   // Emit a write barrier.
@@ -650,7 +657,6 @@
   void GenerateExplicitNullCheck(HNullCheck* instruction) override;
   void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls);
 
-
   void MaybeIncrementHotness(bool is_frame_entry);
 
   static void BlockNonVolatileXmmRegisters(LocationSummary* locations);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index c536492..be3af2d 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -184,53 +184,12 @@
   locations->SetOut(Location::SameAsFirstInput());
 }
 
-static void GenReverseBytes(Location out,
-                            DataType::Type type,
-                            X86_64Assembler* assembler,
-                            CpuRegister temp = CpuRegister(kNoRegister)) {
-  switch (type) {
-    case DataType::Type::kInt16:
-      // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
-      __ bswapl(out.AsRegister<CpuRegister>());
-      __ sarl(out.AsRegister<CpuRegister>(), Immediate(16));
-      break;
-    case DataType::Type::kUint16:
-      // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
-      __ bswapl(out.AsRegister<CpuRegister>());
-      __ shrl(out.AsRegister<CpuRegister>(), Immediate(16));
-      break;
-    case DataType::Type::kInt32:
-    case DataType::Type::kUint32:
-      __ bswapl(out.AsRegister<CpuRegister>());
-      break;
-    case DataType::Type::kInt64:
-    case DataType::Type::kUint64:
-      __ bswapq(out.AsRegister<CpuRegister>());
-      break;
-    case DataType::Type::kFloat32:
-      DCHECK_NE(temp.AsRegister(), kNoRegister);
-      __ movd(temp, out.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
-      __ bswapl(temp);
-      __ movd(out.AsFpuRegister<XmmRegister>(), temp, /*is64bit=*/ false);
-      break;
-    case DataType::Type::kFloat64:
-      DCHECK_NE(temp.AsRegister(), kNoRegister);
-      __ movd(temp, out.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
-      __ bswapq(temp);
-      __ movd(out.AsFpuRegister<XmmRegister>(), temp, /*is64bit=*/ true);
-      break;
-    default:
-      LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
-      UNREACHABLE();
-  }
-}
-
 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
   CreateIntToIntLocations(allocator_, invoke);
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
-  GenReverseBytes(invoke->GetLocations()->Out(), DataType::Type::kInt32, GetAssembler());
+  codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt32);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
@@ -238,7 +197,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
-  GenReverseBytes(invoke->GetLocations()->Out(), DataType::Type::kInt64, GetAssembler());
+  codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt64);
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
@@ -246,7 +205,7 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
-  GenReverseBytes(invoke->GetLocations()->Out(), DataType::Type::kInt16, GetAssembler());
+  codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt16);
 }
 
 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -3233,10 +3192,16 @@
 
 class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
  public:
-  VarHandleSlowPathX86_64(HInvoke* invoke, bool is_atomic, bool is_volatile)
-      : IntrinsicSlowPathX86_64(invoke),
-        is_volatile_(is_volatile),
-        is_atomic_(is_atomic) {
+  explicit VarHandleSlowPathX86_64(HInvoke* invoke)
+      : IntrinsicSlowPathX86_64(invoke) {
+  }
+
+  void SetVolatile(bool is_volatile) {
+    is_volatile_ = is_volatile;
+  }
+
+  void SetAtomic(bool is_atomic) {
+    is_atomic_ = is_atomic;
   }
 
   Label* GetByteArrayViewCheckLabel() {
@@ -3555,11 +3520,9 @@
 
 static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
                                                         CodeGeneratorX86_64* codegen,
-                                                        DataType::Type type,
-                                                        bool is_volatile = false,
-                                                        bool is_atomic = false) {
+                                                        DataType::Type type) {
   VarHandleSlowPathX86_64* slow_path =
-      new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke, is_volatile, is_atomic);
+      new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
   codegen->AddSlowPath(slow_path);
 
   GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
@@ -3610,8 +3573,7 @@
     __ movq(method, Address(varhandle, art_field_offset));
     __ movl(CpuRegister(target.offset), Address(method, offset_offset));
     if (expected_coordinates_count == 0u) {
-      InstructionCodeGeneratorX86_64* instr_codegen =
-          down_cast<InstructionCodeGeneratorX86_64*>(codegen->GetInstructionVisitor());
+      InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
       instr_codegen->GenerateGcRootFieldLoad(invoke,
                                              Location::RegisterLocation(target.object),
                                              Address(method, ArtField::DeclaringClassOffset()),
@@ -3742,7 +3704,7 @@
     codegen->LoadFromMemoryNoReference(type, out, src);
     if (byte_swap) {
       CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-      GenReverseBytes(out, type, assembler, temp);
+      codegen->GetInstructionCodegen()->Bswap(out, type, &temp);
     }
   }
 
@@ -3792,16 +3754,7 @@
   }
 
   LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
-  if (GetExpectedVarHandleCoordinatesCount(invoke) > 1) {
-    // Ensure that the value is in register: for byte array views we may need to swap byte order
-    // inplace (and then swap it back).
-    uint32_t value_index = invoke->GetNumberOfArguments() - 1;
-    if (DataType::IsFloatingPointType(GetDataTypeFromShorty(invoke, value_index))) {
-      locations->SetInAt(value_index, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(value_index, Location::RequiresRegister());
-    }
-  }
+
   // Extra temporary is used for card in MarkGCCard and to move 64-bit constants to memory.
   locations->AddTemp(Location::RequiresRegister());
 }
@@ -3815,7 +3768,6 @@
 
   LocationSummary* locations = invoke->GetLocations();
   const uint32_t last_temp_index = locations->GetTempCount() - 1;
-  CpuRegister temp = locations->GetTemp(last_temp_index).AsRegister<CpuRegister>();
 
   uint32_t value_index = invoke->GetNumberOfArguments() - 1;
   DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
@@ -3823,12 +3775,11 @@
   VarHandleTarget target = GetVarHandleTarget(invoke);
   VarHandleSlowPathX86_64* slow_path = nullptr;
   if (!byte_swap) {
-    slow_path = GenerateVarHandleChecks(invoke, codegen, value_type, is_volatile, is_atomic);
+    slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
+    slow_path->SetVolatile(is_volatile);
+    slow_path->SetAtomic(is_atomic);
     GenerateVarHandleTarget(invoke, target, codegen);
     __ Bind(slow_path->GetNativeByteOrderLabel());
-  } else {
-    // Swap bytes inplace in the input register (later we will restore it).
-    GenReverseBytes(locations->InAt(value_index), value_type, assembler, temp);
   }
 
   switch (invoke->GetIntrinsic()) {
@@ -3846,25 +3797,21 @@
   Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
 
   // Store the value to the field.
-  InstructionCodeGeneratorX86_64* instr_codegen =
-        down_cast<InstructionCodeGeneratorX86_64*>(codegen->GetInstructionVisitor());
-  instr_codegen->HandleFieldSet(invoke,
-                                value_index,
-                                last_temp_index,
-                                value_type,
-                                dst,
-                                CpuRegister(target.object),
-                                is_volatile,
-                                is_atomic,
-                                /*value_can_be_null=*/ true);
+  codegen->GetInstructionCodegen()->HandleFieldSet(invoke,
+                                                   value_index,
+                                                   last_temp_index,
+                                                   value_type,
+                                                   dst,
+                                                   CpuRegister(target.object),
+                                                   is_volatile,
+                                                   is_atomic,
+                                                   /*value_can_be_null=*/ true,
+                                                   byte_swap);
 
   // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
 
   if (!byte_swap) {
     __ Bind(slow_path->GetExitLabel());
-  } else {
-    // Restore byte order in the input register.
-    GenReverseBytes(locations->InAt(value_index), value_type, assembler, temp);
   }
 }