X86: Implement VarHandle.getAndAdd intrinsic
This commit implements VarHandle getAndAdd intrinsic. This also implied
adding xadd instruction and tests for it.
Test: art/test.py --host -r -t 712-varhandle-invocation --32
Test: m test-art-host-gtest
Bug: 65872996
Change-Id: I84dd95ba6464c8a73ace03a13817147c7099677a
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 823b011..4e24aab 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1962,18 +1962,16 @@
CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
}
-static void GenPrimitiveCAS(DataType::Type type,
- CodeGeneratorX86* codegen,
- Location expected_value,
- Location new_value,
- Register base,
- Register offset,
- Location out,
- // Only necessary for floating point
- Register temp = Register::kNoRegister) {
+static void GenPrimitiveLockedCmpxchg(DataType::Type type,
+ CodeGeneratorX86* codegen,
+ Location expected_value,
+ Location new_value,
+ Register base,
+ Register offset,
+ // Only necessary for floating point
+ Register temp = Register::kNoRegister) {
X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
- DCHECK_EQ(out.AsRegister<Register>(), EAX);
if (DataType::Kind(type) == DataType::Type::kInt32) {
DCHECK_EQ(expected_value.AsRegister<Register>(), EAX);
}
@@ -2016,6 +2014,21 @@
}
// LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
// don't need scheduling barriers at this time.
+}
+
+static void GenPrimitiveCAS(DataType::Type type,
+ CodeGeneratorX86* codegen,
+ Location expected_value,
+ Location new_value,
+ Register base,
+ Register offset,
+ Location out,
+ // Only necessary for floating point
+ Register temp = Register::kNoRegister) {
+ X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
+ DCHECK_EQ(out.AsRegister<Register>(), EAX);
+
+ GenPrimitiveLockedCmpxchg(type, codegen, expected_value, new_value, base, offset, temp);
// Convert ZF into the Boolean result.
__ setb(kZero, out.AsRegister<Register>());
@@ -3133,6 +3146,7 @@
return false;
}
+ uint32_t number_of_arguments = invoke->GetNumberOfArguments();
DataType::Type type = invoke->GetType();
mirror::VarHandle::AccessModeTemplate access_mode_template =
mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
@@ -3146,7 +3160,6 @@
if (type != DataType::Type::kBool) {
return false;
}
- uint32_t number_of_arguments = invoke->GetNumberOfArguments();
uint32_t expected_value_index = number_of_arguments - 2;
uint32_t new_value_index = number_of_arguments - 1;
DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index);
@@ -3157,13 +3170,25 @@
}
break;
}
+ case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
+ if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndAdd) {
+ if (value_type == DataType::Type::kReference || value_type == DataType::Type::kVoid) {
+ // We should not add references
+ return false;
+ }
+ }
+ if (value_type != type) {
+ return false;
+ }
+ break;
+ }
case mirror::VarHandle::AccessModeTemplate::kGet:
// The return type should be the same as varType, so it shouldn't be void
if (type == DataType::Type::kVoid) {
return false;
}
break;
- case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
// Unimplemented intrinsics
UNREACHABLE();
@@ -3776,6 +3801,137 @@
GenerateVarHandleCompareAndSet(invoke, codegen_);
}
+void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
+ // The only read barrier implementation supporting the
+ // VarHandleGet intrinsic is the Baker-style read barriers.
+ if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+ return;
+ }
+
+ if (!IsValidFieldVarHandleExpected(invoke)) {
+ return;
+ }
+
+ // The last argument should be the value we intend to set.
+ uint32_t value_index = invoke->GetNumberOfArguments() - 1;
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
+ if (DataType::Is64BitType(value_type)) {
+ // We avoid the case of an Int64/Float64 value because we would need to place it in a register
+ // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
+ // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
+ // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
+ return;
+ }
+
+ ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
+ LocationSummary* locations = new (allocator) LocationSummary(
+ invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->SetInAt(0, Location::RequiresRegister());
+ size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
+ if (expected_coordinates_count == 1u) {
+ // For instance fields, this is the source object
+ locations->SetInAt(1, Location::RequiresRegister());
+ } else {
+ // For static fields, we need another temp because one will be busy with the declaring class.
+ locations->AddTemp(Location::RequiresRegister());
+ }
+
+ if (DataType::IsFloatingPointType(value_type)) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RegisterLocation(EAX));
+ locations->SetInAt(value_index, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+ } else {
+ // xadd updates the register argument with the old value. ByteRegister required for xaddb.
+ locations->SetInAt(value_index, Location::RegisterLocation(EAX));
+ locations->SetOut(Location::RegisterLocation(EAX));
+ }
+}
+
+void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
+ // The only read barrier implementation supporting the
+ // VarHandleGet intrinsic is the Baker-style read barriers.
+ DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+ CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_);
+ X86Assembler* assembler = codegen->GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ uint32_t number_of_arguments = invoke->GetNumberOfArguments();
+ uint32_t value_index = number_of_arguments - 1;
+ DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
+ DCHECK_EQ(type, invoke->GetType());
+ Location value_loc = locations->InAt(value_index);
+ Register vh_object = locations->InAt(0).AsRegister<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
+ codegen->AddSlowPath(slow_path);
+
+ GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
+
+ GenerateVarTypePrimitiveTypeCheck(vh_object, temp, type, slow_path, assembler);
+
+ Register offset = locations->GetTemp(1).AsRegister<Register>();
+ // Get the field referred by the VarHandle. The returned register contains the object reference
+ // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
+ // declaring class will be placed in 'temp' register.
+ Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
+
+ size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
+ temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
+ DCHECK_NE(temp, reference);
+ Address field_addr(reference, offset, TIMES_1, 0);
+
+ switch (type) {
+ case DataType::Type::kInt8:
+ __ LockXaddb(field_addr, value_loc.AsRegister<ByteRegister>());
+ __ movsxb(locations->Out().AsRegister<Register>(),
+ locations->Out().AsRegister<ByteRegister>());
+ break;
+ case DataType::Type::kInt16:
+ __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
+ __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
+ break;
+ case DataType::Type::kUint16:
+ __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
+ __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
+ break;
+ case DataType::Type::kInt32:
+ __ LockXaddl(field_addr, value_loc.AsRegister<Register>());
+ break;
+ case DataType::Type::kFloat32: {
+ Location temp_float =
+ (expected_coordinates_count == 1u) ? locations->GetTemp(2) : locations->GetTemp(3);
+ DCHECK(temp_float.IsFpuRegister());
+ Location eax = Location::RegisterLocation(EAX);
+ NearLabel try_again;
+ __ Bind(&try_again);
+ codegen->MoveFromMemory(type, temp_float, reference, offset);
+ __ movd(EAX, temp_float.AsFpuRegister<XmmRegister>());
+ __ addss(temp_float.AsFpuRegister<XmmRegister>(),
+ value_loc.AsFpuRegister<XmmRegister>());
+ GenPrimitiveLockedCmpxchg(type,
+ codegen,
+ /* expected= */ eax,
+ /* new_value= */ temp_float,
+ reference,
+ offset,
+ temp);
+ __ j(kNotZero, &try_again);
+
+ // The old value is present in EAX.
+ codegen->Move32(locations->Out(), eax);
+ break;
+ }
+ default:
+ UNREACHABLE();
+ }
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
+
UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
@@ -3831,7 +3987,6 @@
UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchangeAcquire)
UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchangeRelease)
UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAcquire)
-UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAdd)
UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAddAcquire)
UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAddRelease)
UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseAnd)
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index c8ea229..da53138 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -3677,6 +3677,29 @@
}
+void X86Assembler::xaddb(const Address& address, ByteRegister reg) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0xC0);
+ EmitOperand(reg, address);
+}
+
+void X86Assembler::xaddw(const Address& address, Register reg) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitOperandSizeOverride();
+ EmitUint8(0x0F);
+ EmitUint8(0xC1);
+ EmitOperand(reg, address);
+}
+
+void X86Assembler::xaddl(const Address& address, Register reg) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0x0F);
+ EmitUint8(0xC1);
+ EmitOperand(reg, address);
+}
+
+
void X86Assembler::mfence() {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index c546927..1c4f826 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -817,6 +817,10 @@
void cmpxchgl(const Address& address, Register reg);
void cmpxchg8b(const Address& address);
+ void xaddb(const Address& address, ByteRegister reg);
+ void xaddw(const Address& address, Register reg);
+ void xaddl(const Address& address, Register reg);
+
void mfence();
X86Assembler* fs();
@@ -859,6 +863,30 @@
lock()->cmpxchg8b(address);
}
+ void LockXaddb(const Address& address, Register reg) {
+ // For testing purpose
+ lock()->xaddb(address, static_cast<ByteRegister>(reg));
+ }
+
+ void LockXaddb(const Address& address, ByteRegister reg) {
+ lock()->xaddb(address, reg);
+ }
+
+ void LockXaddw(const Address& address, Register reg) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ // We make sure that the operand size override bytecode is emited before the lock bytecode.
+ // We test against clang which enforces this bytecode order.
+ EmitOperandSizeOverride();
+ EmitUint8(0xF0);
+ EmitUint8(0x0F);
+ EmitUint8(0xC1);
+ EmitOperand(reg, address);
+ }
+
+ void LockXaddl(const Address& address, Register reg) {
+ lock()->xaddl(address, reg);
+ }
+
//
// Misc. functionality
//
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index d1c2cbe..ee0f8a1 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -351,6 +351,21 @@
"lock cmpxchg8b {mem}"), "lock_cmpxchg8b");
}
+TEST_F(AssemblerX86Test, LockXaddb) {
+ DriverStr(RepeatAw(&x86::X86Assembler::LockXaddb,
+ "lock xaddb %{reg}, {mem}"), "lock_xaddb");
+}
+
+TEST_F(AssemblerX86Test, LockXaddw) {
+ DriverStr(RepeatAr(&x86::X86Assembler::LockXaddw,
+ "lock xaddw %{reg}, {mem}"), "lock_xaddw");
+}
+
+TEST_F(AssemblerX86Test, LockXaddl) {
+ DriverStr(RepeatAR(&x86::X86Assembler::LockXaddl,
+ "lock xaddl %{reg}, {mem}"), "lock_xaddl");
+}
+
TEST_F(AssemblerX86Test, FPUIntegerLoadS) {
DriverStr(RepeatA(&x86::X86Assembler::filds, "fildl {mem}"), "fildd");
}
diff --git a/test/712-varhandle-invocations/util-src/generate_java.py b/test/712-varhandle-invocations/util-src/generate_java.py
index f9927db..5d4bced 100644
--- a/test/712-varhandle-invocations/util-src/generate_java.py
+++ b/test/712-varhandle-invocations/util-src/generate_java.py
@@ -77,7 +77,7 @@
BOOLEAN_TYPE = ValueType("boolean", "Boolean", [ "true", "false" ], ordinal = 0, width = 1, supports_numeric=False)
BYTE_TYPE=ValueType("byte", "Byte", [ "(byte) -128", "(byte) -61", "(byte) 7", "(byte) 127", "(byte) 33" ], ordinal=1, width=1)
SHORT_TYPE=ValueType("short", "Short", [ "(short) -32768", "(short) -384", "(short) 32767", "(short) 0xaa55" ], ordinal=2, width=2)
-CHAR_TYPE=ValueType("char", "Character", [ r"'A'", r"'#'", r"'$'", r"'Z'", r"'t'", r"'c'" ], ordinal=3, width=2)
+CHAR_TYPE=ValueType("char", "Character", [ r"'A'", r"'#'", r"'$'", r"'Z'", r"'t'", r"'c'", r"Character.MAX_VALUE", r"Character.MIN_LOW_SURROGATE"], ordinal=3, width=2)
INT_TYPE=ValueType("int", "Integer", [ "-0x01234567", "0x7f6e5d4c", "0x12345678", "0x10215220", "42" ], ordinal=4, width=4)
LONG_TYPE=ValueType("long", "Long", [ "-0x0123456789abcdefl", "0x789abcdef0123456l", "0xfedcba9876543210l" ], ordinal=5, width=8)
FLOAT_TYPE=ValueType("float", "Float", [ "-7.77e23f", "1.234e-17f", "3.40e36f", "-8.888e3f", "4.442e11f" ], ordinal=6, width=4, supports_bitwise=False)