x86_64: Implement VarHandle.set() for static and instance fields.

Benchmarks improvements (using benchmarks provided by
https://android-review.googlesource.com/1420959):

  benchmark                    before  after
  --------------------------------------------
  SetStaticFieldInt            2.421   0.00176
  SetStaticFieldString         2.628   0.00230
  SetFieldInt                  2.665   0.00175
  SetFieldString               2.914   0.00225

Bug: 65872996

Test: lunch aosp_cf_x86_64_phone-userdebug \
  && art/test.py --host -r -t 712-varhandle-invocations --64
Test: Repeat with ART_USE_READ_BARRIER=false.
Test: Repeat with ART_HEAP_POISONING=true.
Change-Id: I13acda88ab881c1274fcd8e154de61150409d974
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 8076221..d484f04 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5009,8 +5009,8 @@
   }
   if (needs_write_barrier) {
     // Temporary registers for the write barrier.
-    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
   } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
     // Temporary register for the reference poisoning.
     locations->AddTemp(Location::RequiresRegister());
@@ -5018,18 +5018,15 @@
 }
 
 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
-                                                    const FieldInfo& field_info,
+                                                    uint32_t value_index,
+                                                    uint32_t extra_temp_index,
+                                                    DataType::Type field_type,
+                                                    Address field_addr,
+                                                    CpuRegister base,
+                                                    bool is_volatile,
                                                     bool value_can_be_null) {
-  DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
-
   LocationSummary* locations = instruction->GetLocations();
-  CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
-  Location value = locations->InAt(1);
-  bool is_volatile = field_info.IsVolatile();
-  DataType::Type field_type = field_info.GetFieldType();
-  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
-  bool is_predicated =
-      instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
+  Location value = locations->InAt(value_index);
 
   if (is_volatile) {
     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
@@ -5037,21 +5034,14 @@
 
   bool maybe_record_implicit_null_check_done = false;
 
-  NearLabel pred_is_null;
-  if (is_predicated) {
-    __ testl(base, base);
-    __ j(kZero, &pred_is_null);
-  }
-
   switch (field_type) {
     case DataType::Type::kBool:
     case DataType::Type::kUint8:
     case DataType::Type::kInt8: {
       if (value.IsConstant()) {
-        __ movb(Address(base, offset),
-                Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
+        __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
       } else {
-        __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
+        __ movb(field_addr, value.AsRegister<CpuRegister>());
       }
       break;
     }
@@ -5059,10 +5049,9 @@
     case DataType::Type::kUint16:
     case DataType::Type::kInt16: {
       if (value.IsConstant()) {
-        __ movw(Address(base, offset),
-                Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
+        __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
       } else {
-        __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
+        __ movw(field_addr, value.AsRegister<CpuRegister>());
       }
       break;
     }
@@ -5075,15 +5064,15 @@
         DCHECK((field_type != DataType::Type::kReference) || (v == 0));
         // Note: if heap poisoning is enabled, no need to poison
         // (negate) `v` if it is a reference, as it would be null.
-        __ movl(Address(base, offset), Immediate(v));
+        __ movl(field_addr, Immediate(v));
       } else {
         if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
-          CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+          CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
           __ movl(temp, value.AsRegister<CpuRegister>());
           __ PoisonHeapReference(temp);
-          __ movl(Address(base, offset), temp);
+          __ movl(field_addr, temp);
         } else {
-          __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
+          __ movl(field_addr, value.AsRegister<CpuRegister>());
         }
       }
       break;
@@ -5092,39 +5081,37 @@
     case DataType::Type::kInt64: {
       if (value.IsConstant()) {
         int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
-        codegen_->MoveInt64ToAddress(Address(base, offset),
-                                     Address(base, offset + sizeof(int32_t)),
+        codegen_->MoveInt64ToAddress(field_addr,
+                                     Address::displace(field_addr, sizeof(int32_t)),
                                      v,
                                      instruction);
         maybe_record_implicit_null_check_done = true;
       } else {
-        __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
+        __ movq(field_addr, value.AsRegister<CpuRegister>());
       }
       break;
     }
 
     case DataType::Type::kFloat32: {
       if (value.IsConstant()) {
-        int32_t v =
-            bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
-        __ movl(Address(base, offset), Immediate(v));
+        int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+        __ movl(field_addr, Immediate(v));
       } else {
-        __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+        __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
       }
       break;
     }
 
     case DataType::Type::kFloat64: {
       if (value.IsConstant()) {
-        int64_t v =
-            bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
-        codegen_->MoveInt64ToAddress(Address(base, offset),
-                                     Address(base, offset + sizeof(int32_t)),
+        int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+        codegen_->MoveInt64ToAddress(field_addr,
+                                     Address::displace(field_addr, sizeof(int32_t)),
                                      v,
                                      instruction);
         maybe_record_implicit_null_check_done = true;
       } else {
-        __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+        __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
       }
       break;
     }
@@ -5140,15 +5127,44 @@
     codegen_->MaybeRecordImplicitNullCheck(instruction);
   }
 
-  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index))) {
     CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
-    CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
+    CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
     codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
   }
 
   if (is_volatile) {
     codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
   }
+}
+
+void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
+                                                    const FieldInfo& field_info,
+                                                    bool value_can_be_null) {
+  DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+
+  LocationSummary* locations = instruction->GetLocations();
+  CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
+  bool is_volatile = field_info.IsVolatile();
+  DataType::Type field_type = field_info.GetFieldType();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+  bool is_predicated =
+      instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
+
+  NearLabel pred_is_null;
+  if (is_predicated) {
+    __ testl(base, base);
+    __ j(kZero, &pred_is_null);
+  }
+
+  HandleFieldSet(instruction,
+                 /*value_index=*/ 1,
+                 /*extra_temp_index=*/ 1,
+                 field_type,
+                 Address(base, offset),
+                 base,
+                 is_volatile,
+                 value_can_be_null);
 
   if (is_predicated) {
     __ Bind(&pred_is_null);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index f5d8da5..8a08e85 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -241,6 +241,14 @@
                                const Address& address,
                                Label* fixup_label,
                                ReadBarrierOption read_barrier_option);
+  void HandleFieldSet(HInstruction* instruction,
+                      uint32_t value_index,
+                      uint32_t extra_temp_index,
+                      DataType::Type field_type,
+                      Address field_addr,
+                      CpuRegister base,
+                      bool is_volatile,
+                      bool value_can_be_null);
 
  private:
   // Generate code for the given suspend check. If not null, `successor`
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 8ca7473..471f021 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -3123,6 +3123,10 @@
       }
       break;
     case mirror::VarHandle::AccessModeTemplate::kSet:
+      if (return_type != DataType::Type::kVoid) {
+        return false;
+      }
+      break;
     case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
     case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
     case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
@@ -3133,11 +3137,7 @@
   return true;
 }
 
-static void CreateVarHandleGetLocations(HInvoke* invoke) {
-  if (!HasVarHandleIntrinsicImplementation(invoke)) {
-    return;
-  }
-
+static void CreateVarHandleCommonLocations(HInvoke* invoke) {
   size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
   DataType::Type return_type = invoke->GetType();
 
@@ -3150,19 +3150,46 @@
   for (size_t i = 0; i != expected_coordinates_count; ++i) {
     locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
   }
-  DCHECK(return_type != DataType::Type::kVoid);
-  if (DataType::IsFloatingPointType(return_type)) {
-    locations->SetOut(Location::RequiresFpuRegister());
-  } else {
-    locations->SetOut(Location::RequiresRegister());
+  if (return_type != DataType::Type::kVoid) {
+    if (DataType::IsFloatingPointType(return_type)) {
+      locations->SetOut(Location::RequiresFpuRegister());
+    } else {
+      locations->SetOut(Location::RequiresRegister());
+    }
+  }
+  uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
+  uint32_t number_of_arguments = invoke->GetNumberOfArguments();
+  for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
+    HInstruction* arg = invoke->InputAt(arg_index);
+    if (DataType::IsFloatingPointType(arg->GetType())) {
+      locations->SetInAt(arg_index, Location::FpuRegisterOrConstant(arg));
+    } else {
+      locations->SetInAt(arg_index, Location::RegisterOrConstant(arg));
+    }
   }
 
   // Add a temporary for offset.
   locations->AddTemp(Location::RequiresRegister());
+
   if (expected_coordinates_count == 0u) {
     // Add a temporary to hold the declaring class.
     locations->AddTemp(Location::RequiresRegister());
   }
+
+  mirror::VarHandle::AccessModeTemplate access_mode_template =
+      mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
+  if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) {
+    // Add an extra temporary register for card in MarkGCCard.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+static void CreateVarHandleGetLocations(HInvoke* invoke) {
+  if (!HasVarHandleIntrinsicImplementation(invoke)) {
+    return;
+  }
+
+  CreateVarHandleCommonLocations(invoke);
 }
 
 static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
@@ -3231,6 +3258,58 @@
   GenerateVarHandleGet(invoke, codegen_);
 }
 
+static void CreateVarHandleSetLocations(HInvoke* invoke) {
+  if (!HasVarHandleIntrinsicImplementation(invoke)) {
+    return;
+  }
+
+  CreateVarHandleCommonLocations(invoke);
+}
+
+static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+  X86_64Assembler* assembler = codegen->GetAssembler();
+
+  uint32_t value_index = invoke->GetNumberOfArguments() - 1;
+  DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
+
+  SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
+  VarHandleTarget target = GetVarHandleTarget(invoke);
+  GenerateVarHandleTarget(invoke, target, codegen);
+
+  switch (invoke->GetIntrinsic()) {
+    case Intrinsics::kVarHandleSet:
+      break;
+    default:
+      // TODO: implement setOpaque, setRelease, setVolatile.
+      LOG(FATAL) << "unsupported intrinsic " << invoke->GetIntrinsic();
+  }
+
+  const uint32_t last_temp_index = invoke->GetLocations()->GetTempCount() - 1;
+  Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
+
+  // Store the value to the field.
+  InstructionCodeGeneratorX86_64* instr_codegen =
+        down_cast<InstructionCodeGeneratorX86_64*>(codegen->GetInstructionVisitor());
+  instr_codegen->HandleFieldSet(invoke,
+                                value_index,
+                                last_temp_index,
+                                value_type,
+                                dst,
+                                CpuRegister(target.object),
+                                /*is_volatile=*/ false,
+                                /*value_can_be_null=*/ true);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) {
+  CreateVarHandleSetLocations(invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
+  GenerateVarHandleSet(invoke, codegen_);
+}
+
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update)
@@ -3292,7 +3371,6 @@
 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSet)
 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSetAcquire)
 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSetRelease)
-UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSet)
 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetOpaque)
 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetRelease)
 UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetVolatile)