Revert "Revert "X86: Use locked add rather than mfence""
This reverts commit 0da3b9117706760e8722029f407da6d0297cc943.
Fix a compilation failure that slipped in somehow.
Change-Id: Ide8681cdc921febb296ea47aa282cc195f154049
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index bc3256e..e7f7d57 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4157,7 +4157,7 @@
*/
switch (kind) {
case MemBarrierKind::kAnyAny: {
- __ mfence();
+ MemoryFence();
break;
}
case MemBarrierKind::kAnyStore:
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 7c292fa..ebbe486 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
+#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator.h"
#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
@@ -496,6 +497,19 @@
// artReadBarrierForRootSlow.
void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+ // Ensure that prior stores complete to memory before subsequent loads.
+ // The locked add implementation will avoid serializing device memory, but will
+ // touch (but not change) the top of the stack.
+ // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
+ void MemoryFence(bool non_temporal = false) {
+ if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) {
+ assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
+ } else {
+ assembler_.mfence();
+ }
+ }
+
+
private:
// Factored implementation of GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 92cef5f..e0ad062 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -4029,7 +4029,7 @@
*/
switch (kind) {
case MemBarrierKind::kAnyAny: {
- __ mfence();
+ MemoryFence();
break;
}
case MemBarrierKind::kAnyStore:
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index dda9ea2..e5a487c 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
+#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "code_generator.h"
#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
@@ -479,6 +480,18 @@
int64_t v,
HInstruction* instruction);
+ // Ensure that prior stores complete to memory before subsequent loads.
+ // The locked add implementation will avoid serializing device memory, but will
+ // touch (but not change) the top of the stack. The locked add should not be used for
+ // ordering non-temporal stores.
+ void MemoryFence(bool force_mfence = false) {
+ if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) {
+ assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
+ } else {
+ assembler_.mfence();
+ }
+ }
+
private:
// Factored implementation of GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index fd454d8..74ade7c 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2005,7 +2005,7 @@
}
if (is_volatile) {
- __ mfence();
+ codegen->MemoryFence();
}
if (type == Primitive::kPrimNot) {
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ce737e3..6e54dde 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2080,7 +2080,7 @@
}
if (is_volatile) {
- __ mfence();
+ codegen->MemoryFence();
}
if (type == Primitive::kPrimNot) {