x86_64: Implement VarHandle.set() for static and instance fields.
Benchmarks improvements (using benchmarks provided by
https://android-review.googlesource.com/1420959):
benchmark before after
--------------------------------------------
SetStaticFieldInt 2.421 0.00176
SetStaticFieldString 2.628 0.00230
SetFieldInt 2.665 0.00175
SetFieldString 2.914 0.00225
Bug: 65872996
Test: lunch aosp_cf_x86_64_phone-userdebug \
&& art/test.py --host -r -t 712-varhandle-invocations --64
Test: Repeat with ART_USE_READ_BARRIER=false.
Test: Repeat with ART_HEAP_POISONING=true.
Change-Id: I13acda88ab881c1274fcd8e154de61150409d974
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 8076221..d484f04 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -5009,8 +5009,8 @@
}
if (needs_write_barrier) {
// Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
} else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
// Temporary register for the reference poisoning.
locations->AddTemp(Location::RequiresRegister());
@@ -5018,18 +5018,15 @@
}
void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
- const FieldInfo& field_info,
+ uint32_t value_index,
+ uint32_t extra_temp_index,
+ DataType::Type field_type,
+ Address field_addr,
+ CpuRegister base,
+ bool is_volatile,
bool value_can_be_null) {
- DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
-
LocationSummary* locations = instruction->GetLocations();
- CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
- Location value = locations->InAt(1);
- bool is_volatile = field_info.IsVolatile();
- DataType::Type field_type = field_info.GetFieldType();
- uint32_t offset = field_info.GetFieldOffset().Uint32Value();
- bool is_predicated =
- instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
+ Location value = locations->InAt(value_index);
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
@@ -5037,21 +5034,14 @@
bool maybe_record_implicit_null_check_done = false;
- NearLabel pred_is_null;
- if (is_predicated) {
- __ testl(base, base);
- __ j(kZero, &pred_is_null);
- }
-
switch (field_type) {
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8: {
if (value.IsConstant()) {
- __ movb(Address(base, offset),
- Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
+ __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
} else {
- __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
+ __ movb(field_addr, value.AsRegister<CpuRegister>());
}
break;
}
@@ -5059,10 +5049,9 @@
case DataType::Type::kUint16:
case DataType::Type::kInt16: {
if (value.IsConstant()) {
- __ movw(Address(base, offset),
- Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
+ __ movw(field_addr, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
} else {
- __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
+ __ movw(field_addr, value.AsRegister<CpuRegister>());
}
break;
}
@@ -5075,15 +5064,15 @@
DCHECK((field_type != DataType::Type::kReference) || (v == 0));
// Note: if heap poisoning is enabled, no need to poison
// (negate) `v` if it is a reference, as it would be null.
- __ movl(Address(base, offset), Immediate(v));
+ __ movl(field_addr, Immediate(v));
} else {
if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
- CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
__ movl(temp, value.AsRegister<CpuRegister>());
__ PoisonHeapReference(temp);
- __ movl(Address(base, offset), temp);
+ __ movl(field_addr, temp);
} else {
- __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
+ __ movl(field_addr, value.AsRegister<CpuRegister>());
}
}
break;
@@ -5092,39 +5081,37 @@
case DataType::Type::kInt64: {
if (value.IsConstant()) {
int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
- codegen_->MoveInt64ToAddress(Address(base, offset),
- Address(base, offset + sizeof(int32_t)),
+ codegen_->MoveInt64ToAddress(field_addr,
+ Address::displace(field_addr, sizeof(int32_t)),
v,
instruction);
maybe_record_implicit_null_check_done = true;
} else {
- __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
+ __ movq(field_addr, value.AsRegister<CpuRegister>());
}
break;
}
case DataType::Type::kFloat32: {
if (value.IsConstant()) {
- int32_t v =
- bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
- __ movl(Address(base, offset), Immediate(v));
+ int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+ __ movl(field_addr, Immediate(v));
} else {
- __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+ __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
}
break;
}
case DataType::Type::kFloat64: {
if (value.IsConstant()) {
- int64_t v =
- bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
- codegen_->MoveInt64ToAddress(Address(base, offset),
- Address(base, offset + sizeof(int32_t)),
+ int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+ codegen_->MoveInt64ToAddress(field_addr,
+ Address::displace(field_addr, sizeof(int32_t)),
v,
instruction);
maybe_record_implicit_null_check_done = true;
} else {
- __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+ __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
}
break;
}
@@ -5140,15 +5127,44 @@
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index))) {
CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
- CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
+ CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
}
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
}
+}
+
+void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
+ const FieldInfo& field_info,
+ bool value_can_be_null) {
+ DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+
+ LocationSummary* locations = instruction->GetLocations();
+ CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
+ bool is_volatile = field_info.IsVolatile();
+ DataType::Type field_type = field_info.GetFieldType();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+ bool is_predicated =
+ instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
+
+ NearLabel pred_is_null;
+ if (is_predicated) {
+ __ testl(base, base);
+ __ j(kZero, &pred_is_null);
+ }
+
+ HandleFieldSet(instruction,
+ /*value_index=*/ 1,
+ /*extra_temp_index=*/ 1,
+ field_type,
+ Address(base, offset),
+ base,
+ is_volatile,
+ value_can_be_null);
if (is_predicated) {
__ Bind(&pred_is_null);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index f5d8da5..8a08e85 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -241,6 +241,14 @@
const Address& address,
Label* fixup_label,
ReadBarrierOption read_barrier_option);
+ void HandleFieldSet(HInstruction* instruction,
+ uint32_t value_index,
+ uint32_t extra_temp_index,
+ DataType::Type field_type,
+ Address field_addr,
+ CpuRegister base,
+ bool is_volatile,
+ bool value_can_be_null);
private:
// Generate code for the given suspend check. If not null, `successor`
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 8ca7473..471f021 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -3123,6 +3123,10 @@
}
break;
case mirror::VarHandle::AccessModeTemplate::kSet:
+ if (return_type != DataType::Type::kVoid) {
+ return false;
+ }
+ break;
case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
@@ -3133,11 +3137,7 @@
return true;
}
-static void CreateVarHandleGetLocations(HInvoke* invoke) {
- if (!HasVarHandleIntrinsicImplementation(invoke)) {
- return;
- }
-
+static void CreateVarHandleCommonLocations(HInvoke* invoke) {
size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
DataType::Type return_type = invoke->GetType();
@@ -3150,19 +3150,46 @@
for (size_t i = 0; i != expected_coordinates_count; ++i) {
locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
}
- DCHECK(return_type != DataType::Type::kVoid);
- if (DataType::IsFloatingPointType(return_type)) {
- locations->SetOut(Location::RequiresFpuRegister());
- } else {
- locations->SetOut(Location::RequiresRegister());
+ if (return_type != DataType::Type::kVoid) {
+ if (DataType::IsFloatingPointType(return_type)) {
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ locations->SetOut(Location::RequiresRegister());
+ }
+ }
+ uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
+ uint32_t number_of_arguments = invoke->GetNumberOfArguments();
+ for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
+ HInstruction* arg = invoke->InputAt(arg_index);
+ if (DataType::IsFloatingPointType(arg->GetType())) {
+ locations->SetInAt(arg_index, Location::FpuRegisterOrConstant(arg));
+ } else {
+ locations->SetInAt(arg_index, Location::RegisterOrConstant(arg));
+ }
}
// Add a temporary for offset.
locations->AddTemp(Location::RequiresRegister());
+
if (expected_coordinates_count == 0u) {
// Add a temporary to hold the declaring class.
locations->AddTemp(Location::RequiresRegister());
}
+
+ mirror::VarHandle::AccessModeTemplate access_mode_template =
+ mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
+ if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet) {
+ // Add an extra temporary register for card in MarkGCCard.
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+static void CreateVarHandleGetLocations(HInvoke* invoke) {
+ if (!HasVarHandleIntrinsicImplementation(invoke)) {
+ return;
+ }
+
+ CreateVarHandleCommonLocations(invoke);
}
static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
@@ -3231,6 +3258,58 @@
GenerateVarHandleGet(invoke, codegen_);
}
+static void CreateVarHandleSetLocations(HInvoke* invoke) {
+ if (!HasVarHandleIntrinsicImplementation(invoke)) {
+ return;
+ }
+
+ CreateVarHandleCommonLocations(invoke);
+}
+
+static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
+ X86_64Assembler* assembler = codegen->GetAssembler();
+
+ uint32_t value_index = invoke->GetNumberOfArguments() - 1;
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
+
+ SlowPathCode* slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
+ VarHandleTarget target = GetVarHandleTarget(invoke);
+ GenerateVarHandleTarget(invoke, target, codegen);
+
+ switch (invoke->GetIntrinsic()) {
+ case Intrinsics::kVarHandleSet:
+ break;
+ default:
+ // TODO: implement setOpaque, setRelease, setVolatile.
+ LOG(FATAL) << "unsupported intrinsic " << invoke->GetIntrinsic();
+ }
+
+ const uint32_t last_temp_index = invoke->GetLocations()->GetTempCount() - 1;
+ Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
+
+ // Store the value to the field.
+ InstructionCodeGeneratorX86_64* instr_codegen =
+ down_cast<InstructionCodeGeneratorX86_64*>(codegen->GetInstructionVisitor());
+ instr_codegen->HandleFieldSet(invoke,
+ value_index,
+ last_temp_index,
+ value_type,
+ dst,
+ CpuRegister(target.object),
+ /*is_volatile=*/ false,
+ /*value_can_be_null=*/ true);
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) {
+ CreateVarHandleSetLocations(invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
+ GenerateVarHandleSet(invoke, codegen_);
+}
+
UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update)
@@ -3292,7 +3371,6 @@
UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSet)
UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSetAcquire)
UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleGetAndSetRelease)
-UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSet)
UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetOpaque)
UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetRelease)
UNIMPLEMENTED_INTRINSIC(X86_64, VarHandleSetVolatile)