diff options
author | 2020-09-14 13:22:40 +0000 | |
---|---|---|
committer | 2020-09-18 08:21:00 +0000 | |
commit | eb2c9dd52679d590428a33556f45fb49eaef5e0e (patch) | |
tree | 9bb849944b66c9b3ec20536e7c5a37af41a54195 | |
parent | f61e0617b89e4ad60373f36da56812c20a3e0b20 (diff) |
X86: Implement VarHandle.getAndAdd intrinsic
This commit implements VarHandle getAndAdd intrinsic. This also implied
adding xadd instruction and tests for it.
Test: art/test.py --host -r -t 712-varhandle-invocation --32
Test: m test-art-host-gtest
Bug: 65872996
Change-Id: I84dd95ba6464c8a73ace03a13817147c7099677a
-rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 181 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 23 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86.h | 28 | ||||
-rw-r--r-- | compiler/utils/x86/assembler_x86_test.cc | 15 | ||||
-rw-r--r-- | test/712-varhandle-invocations/util-src/generate_java.py | 2 |
5 files changed, 235 insertions, 14 deletions
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 823b011a4f..4e24aab1ad 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -1962,18 +1962,16 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke); } -static void GenPrimitiveCAS(DataType::Type type, - CodeGeneratorX86* codegen, - Location expected_value, - Location new_value, - Register base, - Register offset, - Location out, - // Only necessary for floating point - Register temp = Register::kNoRegister) { +static void GenPrimitiveLockedCmpxchg(DataType::Type type, + CodeGeneratorX86* codegen, + Location expected_value, + Location new_value, + Register base, + Register offset, + // Only necessary for floating point + Register temp = Register::kNoRegister) { X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); - DCHECK_EQ(out.AsRegister<Register>(), EAX); if (DataType::Kind(type) == DataType::Type::kInt32) { DCHECK_EQ(expected_value.AsRegister<Register>(), EAX); } @@ -2016,6 +2014,21 @@ static void GenPrimitiveCAS(DataType::Type type, } // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we // don't need scheduling barriers at this time. +} + +static void GenPrimitiveCAS(DataType::Type type, + CodeGeneratorX86* codegen, + Location expected_value, + Location new_value, + Register base, + Register offset, + Location out, + // Only necessary for floating point + Register temp = Register::kNoRegister) { + X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); + DCHECK_EQ(out.AsRegister<Register>(), EAX); + + GenPrimitiveLockedCmpxchg(type, codegen, expected_value, new_value, base, offset, temp); // Convert ZF into the Boolean result. __ setb(kZero, out.AsRegister<Register>()); @@ -3133,6 +3146,7 @@ static bool IsValidFieldVarHandleExpected(HInvoke* invoke) { return false; } + uint32_t number_of_arguments = invoke->GetNumberOfArguments(); DataType::Type type = invoke->GetType(); mirror::VarHandle::AccessModeTemplate access_mode_template = mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic()); @@ -3146,7 +3160,6 @@ static bool IsValidFieldVarHandleExpected(HInvoke* invoke) { if (type != DataType::Type::kBool) { return false; } - uint32_t number_of_arguments = invoke->GetNumberOfArguments(); uint32_t expected_value_index = number_of_arguments - 2; uint32_t new_value_index = number_of_arguments - 1; DataType::Type expected_value_type = GetDataTypeFromShorty(invoke, expected_value_index); @@ -3157,13 +3170,25 @@ static bool IsValidFieldVarHandleExpected(HInvoke* invoke) { } break; } + case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: { + DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1); + if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndAdd) { + if (value_type == DataType::Type::kReference || value_type == DataType::Type::kVoid) { + // We should not add references + return false; + } + } + if (value_type != type) { + return false; + } + break; + } case mirror::VarHandle::AccessModeTemplate::kGet: // The return type should be the same as varType, so it shouldn't be void if (type == DataType::Type::kVoid) { return false; } break; - case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: // Unimplemented intrinsics UNREACHABLE(); @@ -3776,6 +3801,137 @@ void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* GenerateVarHandleCompareAndSet(invoke, codegen_); } +void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAdd(HInvoke* invoke) { + // The only read barrier implementation supporting the + // VarHandleGet intrinsic is the Baker-style read barriers. + if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) { + return; + } + + if (!IsValidFieldVarHandleExpected(invoke)) { + return; + } + + // The last argument should be the value we intend to set. + uint32_t value_index = invoke->GetNumberOfArguments() - 1; + DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index); + if (DataType::Is64BitType(value_type)) { + // We avoid the case of an Int64/Float64 value because we would need to place it in a register + // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to + // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from + // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887) + return; + } + + ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); + LocationSummary* locations = new (allocator) LocationSummary( + invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); + locations->AddTemp(Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + locations->SetInAt(0, Location::RequiresRegister()); + size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); + if (expected_coordinates_count == 1u) { + // For instance fields, this is the source object + locations->SetInAt(1, Location::RequiresRegister()); + } else { + // For static fields, we need another temp because one will be busy with the declaring class. + locations->AddTemp(Location::RequiresRegister()); + } + + if (DataType::IsFloatingPointType(value_type)) { + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RegisterLocation(EAX)); + locations->SetInAt(value_index, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + } else { + // xadd updates the register argument with the old value. ByteRegister required for xaddb. + locations->SetInAt(value_index, Location::RegisterLocation(EAX)); + locations->SetOut(Location::RegisterLocation(EAX)); + } +} + +void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) { + // The only read barrier implementation supporting the + // VarHandleGet intrinsic is the Baker-style read barriers. + DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); + + CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_); + X86Assembler* assembler = codegen->GetAssembler(); + LocationSummary* locations = invoke->GetLocations(); + uint32_t number_of_arguments = invoke->GetNumberOfArguments(); + uint32_t value_index = number_of_arguments - 1; + DataType::Type type = GetDataTypeFromShorty(invoke, value_index); + DCHECK_EQ(type, invoke->GetType()); + Location value_loc = locations->InAt(value_index); + Register vh_object = locations->InAt(0).AsRegister<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); + codegen->AddSlowPath(slow_path); + + GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler); + + GenerateVarTypePrimitiveTypeCheck(vh_object, temp, type, slow_path, assembler); + + Register offset = locations->GetTemp(1).AsRegister<Register>(); + // Get the field referred by the VarHandle. The returned register contains the object reference + // or the declaring class. The field offset will be placed in 'offset'. For static fields, the + // declaring class will be placed in 'temp' register. + Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset); + + size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke); + temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>(); + DCHECK_NE(temp, reference); + Address field_addr(reference, offset, TIMES_1, 0); + + switch (type) { + case DataType::Type::kInt8: + __ LockXaddb(field_addr, value_loc.AsRegister<ByteRegister>()); + __ movsxb(locations->Out().AsRegister<Register>(), + locations->Out().AsRegister<ByteRegister>()); + break; + case DataType::Type::kInt16: + __ LockXaddw(field_addr, value_loc.AsRegister<Register>()); + __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>()); + break; + case DataType::Type::kUint16: + __ LockXaddw(field_addr, value_loc.AsRegister<Register>()); + __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>()); + break; + case DataType::Type::kInt32: + __ LockXaddl(field_addr, value_loc.AsRegister<Register>()); + break; + case DataType::Type::kFloat32: { + Location temp_float = + (expected_coordinates_count == 1u) ? locations->GetTemp(2) : locations->GetTemp(3); + DCHECK(temp_float.IsFpuRegister()); + Location eax = Location::RegisterLocation(EAX); + NearLabel try_again; + __ Bind(&try_again); + codegen->MoveFromMemory(type, temp_float, reference, offset); + __ movd(EAX, temp_float.AsFpuRegister<XmmRegister>()); + __ addss(temp_float.AsFpuRegister<XmmRegister>(), + value_loc.AsFpuRegister<XmmRegister>()); + GenPrimitiveLockedCmpxchg(type, + codegen, + /* expected= */ eax, + /* new_value= */ temp_float, + reference, + offset, + temp); + __ j(kNotZero, &try_again); + + // The old value is present in EAX. + codegen->Move32(locations->Out(), eax); + break; + } + default: + UNREACHABLE(); + } + + __ Bind(slow_path->GetExitLabel()); +} + + UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) @@ -3831,7 +3987,6 @@ UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchange) UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchangeAcquire) UNIMPLEMENTED_INTRINSIC(X86, VarHandleCompareAndExchangeRelease) UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAcquire) -UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAdd) UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAddAcquire) UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndAddRelease) UNIMPLEMENTED_INTRINSIC(X86, VarHandleGetAndBitwiseAnd) diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index c8ea229992..da53138239 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -3677,6 +3677,29 @@ void X86Assembler::cmpxchg8b(const Address& address) { } +void X86Assembler::xaddb(const Address& address, ByteRegister reg) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xC0); + EmitOperand(reg, address); +} + +void X86Assembler::xaddw(const Address& address, Register reg) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitOperandSizeOverride(); + EmitUint8(0x0F); + EmitUint8(0xC1); + EmitOperand(reg, address); +} + +void X86Assembler::xaddl(const Address& address, Register reg) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0x0F); + EmitUint8(0xC1); + EmitOperand(reg, address); +} + + void X86Assembler::mfence() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index c5469270e4..1c4f82663e 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -817,6 +817,10 @@ class X86Assembler final : public Assembler { void cmpxchgl(const Address& address, Register reg); void cmpxchg8b(const Address& address); + void xaddb(const Address& address, ByteRegister reg); + void xaddw(const Address& address, Register reg); + void xaddl(const Address& address, Register reg); + void mfence(); X86Assembler* fs(); @@ -859,6 +863,30 @@ class X86Assembler final : public Assembler { lock()->cmpxchg8b(address); } + void LockXaddb(const Address& address, Register reg) { + // For testing purpose + lock()->xaddb(address, static_cast<ByteRegister>(reg)); + } + + void LockXaddb(const Address& address, ByteRegister reg) { + lock()->xaddb(address, reg); + } + + void LockXaddw(const Address& address, Register reg) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + // We make sure that the operand size override bytecode is emited before the lock bytecode. + // We test against clang which enforces this bytecode order. + EmitOperandSizeOverride(); + EmitUint8(0xF0); + EmitUint8(0x0F); + EmitUint8(0xC1); + EmitOperand(reg, address); + } + + void LockXaddl(const Address& address, Register reg) { + lock()->xaddl(address, reg); + } + // // Misc. functionality // diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index d1c2cbe3cb..ee0f8a1952 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -351,6 +351,21 @@ TEST_F(AssemblerX86Test, LockCmpxchg8b) { "lock cmpxchg8b {mem}"), "lock_cmpxchg8b"); } +TEST_F(AssemblerX86Test, LockXaddb) { + DriverStr(RepeatAw(&x86::X86Assembler::LockXaddb, + "lock xaddb %{reg}, {mem}"), "lock_xaddb"); +} + +TEST_F(AssemblerX86Test, LockXaddw) { + DriverStr(RepeatAr(&x86::X86Assembler::LockXaddw, + "lock xaddw %{reg}, {mem}"), "lock_xaddw"); +} + +TEST_F(AssemblerX86Test, LockXaddl) { + DriverStr(RepeatAR(&x86::X86Assembler::LockXaddl, + "lock xaddl %{reg}, {mem}"), "lock_xaddl"); +} + TEST_F(AssemblerX86Test, FPUIntegerLoadS) { DriverStr(RepeatA(&x86::X86Assembler::filds, "fildl {mem}"), "fildd"); } diff --git a/test/712-varhandle-invocations/util-src/generate_java.py b/test/712-varhandle-invocations/util-src/generate_java.py index f9927db1b1..5d4bced7ab 100644 --- a/test/712-varhandle-invocations/util-src/generate_java.py +++ b/test/712-varhandle-invocations/util-src/generate_java.py @@ -77,7 +77,7 @@ class ValueType(JavaType): BOOLEAN_TYPE = ValueType("boolean", "Boolean", [ "true", "false" ], ordinal = 0, width = 1, supports_numeric=False) BYTE_TYPE=ValueType("byte", "Byte", [ "(byte) -128", "(byte) -61", "(byte) 7", "(byte) 127", "(byte) 33" ], ordinal=1, width=1) SHORT_TYPE=ValueType("short", "Short", [ "(short) -32768", "(short) -384", "(short) 32767", "(short) 0xaa55" ], ordinal=2, width=2) -CHAR_TYPE=ValueType("char", "Character", [ r"'A'", r"'#'", r"'$'", r"'Z'", r"'t'", r"'c'" ], ordinal=3, width=2) +CHAR_TYPE=ValueType("char", "Character", [ r"'A'", r"'#'", r"'$'", r"'Z'", r"'t'", r"'c'", r"Character.MAX_VALUE", r"Character.MIN_LOW_SURROGATE"], ordinal=3, width=2) INT_TYPE=ValueType("int", "Integer", [ "-0x01234567", "0x7f6e5d4c", "0x12345678", "0x10215220", "42" ], ordinal=4, width=4) LONG_TYPE=ValueType("long", "Long", [ "-0x0123456789abcdefl", "0x789abcdef0123456l", "0xfedcba9876543210l" ], ordinal=5, width=8) FLOAT_TYPE=ValueType("float", "Float", [ "-7.77e23f", "1.234e-17f", "3.40e36f", "-8.888e3f", "4.442e11f" ], ordinal=6, width=4, supports_bitwise=False) |