arm64: Implement VarHandle GetAndSet intrinsics.
Add an extra test to 160-read-barrier-stress. The main path
is sufficiently exercised by 712-varhandle-invocations.
Using benchmarks provided by
https://android-review.googlesource.com/1420959
on blueline little cores with fixed frequency 1420800:
before after
GetAndSetStaticFieldInt 23.809 0.027
GetAndSetStaticFieldString 27.112 0.035
GetAndSetFieldInt 26.988 0.028
GetAndSetFieldString 29.626 0.034
GetAndSetAcquireStaticFieldInt 23.900 0.025
GetAndSetAcquireStaticFieldString 27.084 0.034
GetAndSetAcquireFieldInt 26.972 0.026
GetAndSetAcquireFieldString 29.617 0.032
GetAndSetReleaseStaticFieldInt 23.876 0.027
GetAndSetReleaseStaticFieldString 27.093 0.035
GetAndSetReleaseFieldInt 26.969 0.028
GetAndSetReleaseFieldString 29.609 0.034
Test: testrunner.py --target --64 --optimizing
Test: Repeat with ART_USE_READ_BARRIER=false ART_HEAP_POISONING=true.
Test: Repeat with ART_READ_BARRIER_TYPE=TABLELOOKUP.
(Ignore two pre-existing checker test failures.)
Bug: 71781600
Change-Id: I6a03858b1446354919cd4d08348ef93b725aafc6
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index dba8a2d..2de2cfb 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -729,7 +729,9 @@
mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
mirror::VarHandle::AccessModeTemplate::kCompareAndSet ||
mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
- mirror::VarHandle::AccessModeTemplate::kCompareAndExchange)
+ mirror::VarHandle::AccessModeTemplate::kCompareAndExchange ||
+ mirror::VarHandle::GetAccessModeTemplateByIntrinsic(intrinsic) ==
+ mirror::VarHandle::AccessModeTemplate::kGetAndUpdate)
<< instruction_->AsInvoke()->GetIntrinsic();
DCHECK_EQ(offset_, 0u);
DCHECK(index_.IsRegister());
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 34d3f1a..fb72fcf 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1365,6 +1365,29 @@
GenUnsafeCas(invoke, DataType::Type::kReference, codegen_);
}
+static void GenerateGetAndSet(CodeGeneratorARM64* codegen,
+ DataType::Type type,
+ std::memory_order order,
+ Register ptr,
+ Register new_value,
+ Register old_value) {
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
+ UseScratchRegisterScope temps(masm);
+ Register store_result = temps.AcquireW();
+
+ bool use_load_acquire =
+ (order == std::memory_order_acquire) || (order == std::memory_order_seq_cst);
+ bool use_store_release =
+ (order == std::memory_order_release) || (order == std::memory_order_seq_cst);
+ DCHECK(use_load_acquire || use_store_release);
+
+ vixl::aarch64::Label loop_label;
+ __ Bind(&loop_label);
+ EmitLoadExclusive(codegen, type, ptr, old_value, use_load_acquire);
+ EmitStoreExclusive(codegen, type, ptr, store_result, new_value, use_store_release);
+ __ Cbnz(store_result, &loop_label);
+}
+
void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
LocationSummary* locations =
new (allocator_) LocationSummary(invoke,
@@ -3921,9 +3944,15 @@
}
break;
}
- case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
- LOG(FATAL) << "Unimplemented!";
- UNREACHABLE();
+ case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
+ DCHECK(!IsVarHandleGetAndAdd(invoke)); // Unimplemented.
+ DCHECK(!IsVarHandleGetAndBitwiseOp(invoke)); // Unimplemented.
+ if (value_type != return_type) {
+ return false;
+ }
+ break;
+ }
}
return true;
@@ -4483,6 +4512,154 @@
invoke, codegen_, std::memory_order_release, /*return_success=*/ true, /*strong=*/ false);
}
+static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
+ if (!IsValidFieldVarHandleExpected(invoke)) {
+ return;
+ }
+
+ if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ invoke->GetType() == DataType::Type::kReference) {
+ // Unsupported for non-Baker read barrier because the artReadBarrierSlow() ignores
+ // the passed reference and reloads it from the field, thus seeing the new value
+ // that we have just stored. (And it also gets the memory visibility wrong.)
+ return;
+ }
+
+ LocationSummary* locations = CreateVarHandleFieldLocations(invoke);
+
+ // Add a temporary for offset.
+ if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ GetExpectedVarHandleCoordinatesCount(invoke) == 0u) {
+ // To preserve the offset value across the non-Baker read barrier slow path
+ // for loading the declaring class, use a fixed callee-save register.
+ locations->AddTemp(Location::RegisterLocation(CTZ(kArm64CalleeSaveRefSpills)));
+ } else {
+ locations->AddTemp(Location::RequiresRegister());
+ }
+ if (DataType::IsFloatingPointType(invoke->GetType()) &&
+ !IsConstantZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
+ // Add a temporary for `old_value` if floating point `new_value` takes a scratch register.
+ locations->AddTemp(Location::RequiresRegister());
+ }
+}
+
+static void GenerateVarHandleGetAndSet(HInvoke* invoke,
+ CodeGeneratorARM64* codegen,
+ std::memory_order order) {
+ // Implemented only for fields.
+ size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
+ DCHECK_LE(expected_coordinates_count, 1u);
+ uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
+ DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
+
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+ CPURegister new_value = InputCPURegisterOrZeroRegAt(invoke, new_value_index);
+ CPURegister out = helpers::OutputCPURegister(invoke);
+
+ SlowPathCodeARM64* slow_path =
+ new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
+ codegen->AddSlowPath(slow_path);
+
+ GenerateVarHandleFieldCheck(invoke, codegen, slow_path);
+ GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, value_type);
+
+ // The temporary allocated for loading `offset`.
+ Register offset =
+ WRegisterFrom(locations->GetTemp((expected_coordinates_count == 0u) ? 1u : 0u));
+ // The object reference in which to CAS the field.
+ Register object = (expected_coordinates_count == 0u)
+ ? WRegisterFrom(locations->GetTemp(0u))
+ : InputRegisterAt(invoke, 1);
+
+ GenerateVarHandleFieldReference(invoke, codegen, object, offset);
+
+ // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
+ if (CodeGenerator::StoreNeedsWriteBarrier(value_type, invoke->InputAt(new_value_index))) {
+ // Mark card for object, the new value shall be stored.
+ bool new_value_can_be_null = true; // TODO: Worth finding out this information?
+ codegen->MarkGCCard(object, new_value.W(), new_value_can_be_null);
+ }
+
+ // Reuse the `offset` temporary for the pointer to the field, except
+ // for references that need the offset for the non-Baker read barrier.
+ UseScratchRegisterScope temps(masm);
+ Register tmp_ptr = offset.X();
+ if ((kEmitCompilerReadBarrier && !kUseBakerReadBarrier) &&
+ value_type == DataType::Type::kReference) {
+ tmp_ptr = temps.AcquireX();
+ }
+ __ Add(tmp_ptr, object.X(), offset.X());
+
+ // Move floating point value to temporary.
+ Register new_value_reg = MoveToTempIfFpRegister(new_value, value_type, masm, &temps);
+ DataType::Type load_store_type = DataType::IsFloatingPointType(value_type)
+ ? ((value_type == DataType::Type::kFloat32) ? DataType::Type::kInt32 : DataType::Type::kInt64)
+ : value_type;
+
+ // Prepare register for old value.
+ Register old_value;
+ if (DataType::IsFloatingPointType(value_type) && !new_value.IsZero()) {
+ // We need a temporary register but we have already used a scratch register for
+ // the new value unless it is zero bit pattern (+0.0f or +0.0) and need anothe one
+ // in GenerateGetAndSet(). We have allocated a normal temporary to handle that.
+ Location temp = locations->GetTemp((expected_coordinates_count == 0u) ? 2u : 1u);
+ old_value =
+ (load_store_type == DataType::Type::kInt64) ? XRegisterFrom(temp) : WRegisterFrom(temp);
+ } else if ((kEmitCompilerReadBarrier && kUseBakerReadBarrier) &&
+ value_type == DataType::Type::kReference) {
+ // Load the old value initially to a scratch register.
+ // We shall move it to `out` later with a read barrier.
+ old_value = temps.AcquireW();
+ } else {
+ // Use the output register for the old value.
+ old_value = (load_store_type == DataType::Type::kInt64) ? out.X() : out.W();
+ }
+
+ GenerateGetAndSet(codegen, load_store_type, order, tmp_ptr, new_value_reg, old_value);
+
+ if (DataType::IsFloatingPointType(value_type)) {
+ __ Fmov((value_type == DataType::Type::kFloat64) ? out.D() : out.S(), old_value);
+ } else if (kEmitCompilerReadBarrier && value_type == DataType::Type::kReference) {
+ if (kUseBakerReadBarrier) {
+ codegen->GenerateUnsafeCasOldValueMovWithBakerReadBarrier(out.W(), old_value);
+ } else {
+ codegen->GenerateReadBarrierSlow(
+ invoke,
+ Location::RegisterLocation(out.W().GetCode()),
+ Location::RegisterLocation(old_value.GetCode()),
+ Location::RegisterLocation(object.GetCode()),
+ /*offset=*/ 0u,
+ /*index=*/ Location::RegisterLocation(offset.GetCode()));
+ }
+ }
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
+ CreateVarHandleGetAndSetLocations(invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSet(HInvoke* invoke) {
+ GenerateVarHandleGetAndSet(invoke, codegen_, std::memory_order_seq_cst);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
+ CreateVarHandleGetAndSetLocations(invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
+ GenerateVarHandleGetAndSet(invoke, codegen_, std::memory_order_acquire);
+}
+
+void IntrinsicLocationsBuilderARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
+ CreateVarHandleGetAndSetLocations(invoke);
+}
+
+void IntrinsicCodeGeneratorARM64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
+ GenerateVarHandleGetAndSet(invoke, codegen_, std::memory_order_release);
+}
+
UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
UNIMPLEMENTED_INTRINSIC(ARM64, StringBufferAppend);
@@ -4522,9 +4699,6 @@
UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseXor)
UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseXorAcquire)
UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndBitwiseXorRelease)
-UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndSet)
-UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndSetAcquire)
-UNIMPLEMENTED_INTRINSIC(ARM64, VarHandleGetAndSetRelease)
UNREACHABLE_INTRINSICS(ARM64)
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index b4ef5dd..3349c52 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -118,6 +118,34 @@
return DataType::FromShorty(shorty[index]);
}
+static inline bool IsVarHandleGetAndBitwiseOp(HInvoke* invoke) {
+ switch (invoke->GetIntrinsic()) {
+ case Intrinsics::kVarHandleGetAndBitwiseOr:
+ case Intrinsics::kVarHandleGetAndBitwiseOrAcquire:
+ case Intrinsics::kVarHandleGetAndBitwiseOrRelease:
+ case Intrinsics::kVarHandleGetAndBitwiseXor:
+ case Intrinsics::kVarHandleGetAndBitwiseXorAcquire:
+ case Intrinsics::kVarHandleGetAndBitwiseXorRelease:
+ case Intrinsics::kVarHandleGetAndBitwiseAnd:
+ case Intrinsics::kVarHandleGetAndBitwiseAndAcquire:
+ case Intrinsics::kVarHandleGetAndBitwiseAndRelease:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline bool IsVarHandleGetAndAdd(HInvoke* invoke) {
+ switch (invoke->GetIntrinsic()) {
+ case Intrinsics::kVarHandleGetAndAdd:
+ case Intrinsics::kVarHandleGetAndAddAcquire:
+ case Intrinsics::kVarHandleGetAndAddRelease:
+ return true;
+ default:
+ return false;
+ }
+}
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_UTILS_H_
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index ea2ed57..83a2e39 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -3230,34 +3230,6 @@
__ Bind(slow_path->GetExitLabel());
}
-static bool IsVarHandleGetAndBitwiseOp(HInvoke* invoke) {
- switch (invoke->GetIntrinsic()) {
- case Intrinsics::kVarHandleGetAndBitwiseOr:
- case Intrinsics::kVarHandleGetAndBitwiseOrAcquire:
- case Intrinsics::kVarHandleGetAndBitwiseOrRelease:
- case Intrinsics::kVarHandleGetAndBitwiseXor:
- case Intrinsics::kVarHandleGetAndBitwiseXorAcquire:
- case Intrinsics::kVarHandleGetAndBitwiseXorRelease:
- case Intrinsics::kVarHandleGetAndBitwiseAnd:
- case Intrinsics::kVarHandleGetAndBitwiseAndAcquire:
- case Intrinsics::kVarHandleGetAndBitwiseAndRelease:
- return true;
- default:
- return false;
- }
-}
-
-static bool IsVarHandleGetAndAdd(HInvoke* invoke) {
- switch (invoke->GetIntrinsic()) {
- case Intrinsics::kVarHandleGetAndAdd:
- case Intrinsics::kVarHandleGetAndAddAcquire:
- case Intrinsics::kVarHandleGetAndAddRelease:
- return true;
- default:
- return false;
- }
-}
-
static bool IsValidFieldVarHandleExpected(HInvoke* invoke) {
size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
if (expected_coordinates_count > 1u) {
diff --git a/test/160-read-barrier-stress/src/Main.java b/test/160-read-barrier-stress/src/Main.java
index 1902bde..27b7af7 100644
--- a/test/160-read-barrier-stress/src/Main.java
+++ b/test/160-read-barrier-stress/src/Main.java
@@ -30,6 +30,7 @@
testUnsafeCasRegression();
testVarHandleCompareAndSet();
testVarHandleCompareAndExchange();
+ testVarHandleGetAndSet();
}
public static void testFieldReads() {
@@ -361,6 +362,46 @@
}
}
+ public static void testVarHandleGetAndSet() throws Exception {
+ // Initialize local variables for comparison.
+ Object f0000 = manyFields.testField0000;
+ Object f0001 = manyFields.testField0001;
+ Object f1024 = manyFields.testField1024;
+ Object f4444 = manyFields.testField4444;
+ Object f4998 = manyFields.testField4998;
+ Object f4999 = manyFields.testField4999;
+ // Initialize VarHandle objects.
+ VarHandle f0000vh =
+ MethodHandles.lookup().findVarHandle(ManyFields.class, "testField0000", Object.class);
+ VarHandle f0001vh =
+ MethodHandles.lookup().findVarHandle(ManyFields.class, "testField0001", Object.class);
+ VarHandle f1024vh =
+ MethodHandles.lookup().findVarHandle(ManyFields.class, "testField1024", Object.class);
+ VarHandle f4444vh =
+ MethodHandles.lookup().findVarHandle(ManyFields.class, "testField4444", Object.class);
+ VarHandle f4998vh =
+ MethodHandles.lookup().findVarHandle(ManyFields.class, "testField4998", Object.class);
+ VarHandle f4999vh =
+ MethodHandles.lookup().findVarHandle(ManyFields.class, "testField4999", Object.class);
+
+ // Continually check VarHandle.getAndSet() while allocating
+ // over 64MiB memory (with heap size limited to 16MiB), ensuring we run GC and
+ // stress the read barrier implementation if concurrent collector is enabled.
+ for (int i = 0; i != 64 * 1024; ++i) {
+ allocateAtLeast1KiB();
+ ManyFields mf = manyFields; // Load the volatile `manyFields` once on each iteration.
+ // Test VarHandle.getAndSet(). Use reference comparison, not equals().
+ assertSameObject(f0000, f0000vh.getAndSet(mf, f0000)); // Unchanged.
+ assertSameObject(f0001, f0001vh.getAndSet(mf, f0001)); // Unchanged.
+ assertSameObject(f1024, f1024vh.getAndSet(mf, f4444)); // Replaced.
+ assertSameObject(f4444, f1024vh.getAndSet(mf, f1024)); // Replaced.
+ assertSameObject(f1024, f1024vh.getAndSet(mf, f1024)); // Unchanged.
+ assertSameObject(f4444, f4444vh.getAndSet(mf, f4444)); // Unchanged.
+ assertSameObject(f4998, f4998vh.getAndSet(mf, f4998)); // Unchanged.
+ assertSameObject(f4999, f4999vh.getAndSet(mf, f4999)); // Unchanged.
+ }
+ }
+
public static int $noinline$foo() { return 42; }
public static void assertDifferentObject(Object lhs, Object rhs) {