Fix memory fences in the ARM64 UnsafeCas intrinsics.
Also add some comments for the ARM UnsafeCas intrinsics.
Change-Id: Ic6e4f2c37e468db4582ac8709496a80f3c1f9a6b
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index f723940..81cab86 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1035,7 +1035,11 @@
__ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
__ Cbnz(tmp_32, &loop_head);
} else {
- __ Dmb(InnerShareable, BarrierWrites);
+ // Emit a `Dmb(InnerShareable, BarrierAll)` (DMB ISH) instruction
+ // instead of a `Dmb(InnerShareable, BarrierWrites)` (DMB ISHST)
+ // one, as the latter allows a preceding load to be delayed past
+ // the STXR instruction below.
+ __ Dmb(InnerShareable, BarrierAll);
__ Bind(&loop_head);
// TODO: When `type == Primitive::kPrimNot`, add a read barrier for
// the reference stored in the object before attempting the CAS,