Revert "Revert "X86: Use locked add rather than mfence""

This reverts commit 0da3b9117706760e8722029f407da6d0297cc943.

Fix a compilation failure that slipped in somehow.

Change-Id: Ide8681cdc921febb296ea47aa282cc195f154049
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index dda9ea2..e5a487c 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -17,6 +17,7 @@
 #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
 
+#include "arch/x86_64/instruction_set_features_x86_64.h"
 #include "code_generator.h"
 #include "dex/compiler_enums.h"
 #include "driver/compiler_options.h"
@@ -479,6 +480,18 @@
                           int64_t v,
                           HInstruction* instruction);
 
+  // Ensure that prior stores complete to memory before subsequent loads.
+  // The locked add implementation will avoid serializing device memory, but will
+  // touch (but not change) the top of the stack. The locked add should not be used for
+  // ordering non-temporal stores.
+  void MemoryFence(bool force_mfence = false) {
+    if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) {
+      assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
+    } else {
+      assembler_.mfence();
+    }
+  }
+
  private:
   // Factored implementation of GenerateFieldLoadWithBakerReadBarrier
   // and GenerateArrayLoadWithBakerReadBarrier.