Relaxed memory barriers for x86

X86 provides stronger memory guarantees and thus the memory barriers can be
optimized. This patch ensures that all memory barriers for x86 are treated
as scheduling barriers. And in cases where a barrier is needed (StoreLoad case),
an mfence is used.

Change-Id: I13d02bf3f152083ba9f358052aedb583b0d48640
Signed-off-by: Razvan A Lupusoru <razvan.a.lupusoru@intel.com>
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index c929265..14278a4 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -772,6 +772,11 @@
                    : (SRegOffset(rl_src_offset.s_reg_low) + push_offset));
     LoadWordDisp(TargetReg(kSp), srcOffsetSp, rSI);
     NewLIR4(kX86LockCmpxchg8bA, rDI, rSI, 0, 0);
+
+    // After a store we need to insert barrier in case of potential load. Since the
+    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
+    GenMemBarrier(kStoreLoad);
+
     FreeTemp(rSI);
     UnmarkTemp(rSI);
     NewLIR1(kX86Pop32R, rSI);
@@ -784,9 +789,6 @@
     FlushReg(r0);
     LockTemp(r0);
 
-    // Release store semantics, get the barrier out of the way.  TODO: revisit
-    GenMemBarrier(kStoreLoad);
-
     RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
     RegLocation rl_new_value = LoadValue(rl_src_new_value, kCoreReg);
 
@@ -801,6 +803,10 @@
     LoadValueDirect(rl_src_expected, r0);
     NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
 
+    // After a store we need to insert barrier in case of potential load. Since the
+    // locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
+    GenMemBarrier(kStoreLoad);
+
     FreeTemp(r0);
   }