summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
author Aart Bik <ajcbik@google.com> 2015-12-16 19:11:38 +0000
committer Gerrit Code Review <noreply-gerritcodereview@google.com> 2015-12-16 19:11:38 +0000
commit1c70f18dce7705ff70147ddebf65a97f66df8d5c (patch)
tree7fa76545c1b91499b86f840cdb8c53050e9c761c /compiler
parent1f312652e138e05328b9c4c738d3ecbab2d09ae9 (diff)
parent0da3b9117706760e8722029f407da6d0297cc943 (diff)
Merge "Revert "X86: Use locked add rather than mfence""
Diffstat (limited to 'compiler')
-rw-r--r--compiler/dex/quick/x86/assemble_x86.cc1
-rwxr-xr-xcompiler/dex/quick/x86/target_x86.cc32
-rw-r--r--compiler/dex/quick/x86/x86_lir.h1
-rw-r--r--compiler/optimizing/code_generator_x86.cc2
-rw-r--r--compiler/optimizing/code_generator_x86.h14
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc2
-rw-r--r--compiler/optimizing/code_generator_x86_64.h13
-rw-r--r--compiler/optimizing/intrinsics_x86.cc2
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc2
9 files changed, 11 insertions, 58 deletions
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 1c2a619020..e5d3841b14 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -508,7 +508,6 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
{ kX86Lfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 5, 0, 0, false }, "Lfence", "" },
{ kX86Mfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" },
{ kX86Sfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 7, 0, 0, false }, "Sfence", "" },
- { kX86LockAdd32MI8, kMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0xF0, 0, 0x83, 0x0, 0x0, 0, 0, 1, false }, "LockAdd32MI8", "[!0r+!1d],!2d" },
EXT_0F_ENCODING_MAP(Imul16, 0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
EXT_0F_ENCODING_MAP(Imul32, 0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 4ff79935d7..75f3fef599 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -20,7 +20,7 @@
#include <inttypes.h>
#include <string>
-#include "arch/x86/instruction_set_features_x86.h"
+#include "arch/instruction_set_features.h"
#include "art_method.h"
#include "backend_x86.h"
#include "base/logging.h"
@@ -585,8 +585,6 @@ bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
case kX86LockCmpxchgAR:
case kX86LockCmpxchg64M:
case kX86LockCmpxchg64A:
- case kX86LockCmpxchg64AR:
- case kX86LockAdd32MI8:
case kX86XchgMR:
case kX86Mfence:
// Atomic memory instructions provide full barrier.
@@ -600,9 +598,7 @@ bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
}
bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
- const X86InstructionSetFeatures* features =
- cu_->compiler_driver->GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
- if (!features->IsSmp()) {
+ if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
return false;
}
// Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
@@ -614,34 +610,20 @@ bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
* All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
* For those cases, all we need to ensure is that there is a scheduling barrier in place.
*/
- const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
- bool use_locked_add = features->PrefersLockedAddSynchronization();
if (barrier_kind == kAnyAny) {
- // If no LIR exists already that can be used a barrier, then generate a barrier.
+ // If no LIR exists already that can be used a barrier, then generate an mfence.
if (mem_barrier == nullptr) {
- if (use_locked_add) {
- mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
- } else {
- mem_barrier = NewLIR0(kX86Mfence);
- }
+ mem_barrier = NewLIR0(kX86Mfence);
ret = true;
}
- // If last instruction does not provide full barrier, then insert a barrier.
+ // If last instruction does not provide full barrier, then insert an mfence.
if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
- if (use_locked_add) {
- mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
- } else {
- mem_barrier = NewLIR0(kX86Mfence);
- }
+ mem_barrier = NewLIR0(kX86Mfence);
ret = true;
}
} else if (barrier_kind == kNTStoreStore) {
- if (use_locked_add) {
- mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
- } else {
- mem_barrier = NewLIR0(kX86Sfence);
- }
+ mem_barrier = NewLIR0(kX86Sfence);
ret = true;
}
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 8cd6574443..d6a6a60d3d 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -606,7 +606,6 @@ enum X86OpCode {
// load-from-memory and store-to-memory instructions
kX86Sfence, // memory barrier to serialize all previous
// store-to-memory instructions
- kX86LockAdd32MI8, // locked add used to serialize memory instructions
Binary0fOpCode(kX86Imul16), // 16bit multiply
Binary0fOpCode(kX86Imul32), // 32bit multiply
Binary0fOpCode(kX86Imul64), // 64bit multiply
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index e7f7d57b98..bc3256ec8c 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4157,7 +4157,7 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
*/
switch (kind) {
case MemBarrierKind::kAnyAny: {
- MemoryFence();
+ __ mfence();
break;
}
case MemBarrierKind::kAnyStore:
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index ebbe486cb5..7c292fa103 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -17,7 +17,6 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
-#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator.h"
#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
@@ -497,19 +496,6 @@ class CodeGeneratorX86 : public CodeGenerator {
// artReadBarrierForRootSlow.
void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
- // Ensure that prior stores complete to memory before subsequent loads.
- // The locked add implementation will avoid serializing device memory, but will
- // touch (but not change) the top of the stack.
- // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
- void MemoryFence(bool non_temporal = false) {
- if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) {
- assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
- } else {
- assembler_.mfence();
- }
- }
-
-
private:
// Factored implementation of GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 02cc88d191..92cef5f226 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -4029,7 +4029,7 @@ void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
*/
switch (kind) {
case MemBarrierKind::kAnyAny: {
- codegen_->MemoryFence();
+ __ mfence();
break;
}
case MemBarrierKind::kAnyStore:
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index e5a487c761..dda9ea22d9 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -17,7 +17,6 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
-#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "code_generator.h"
#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
@@ -480,18 +479,6 @@ class CodeGeneratorX86_64 : public CodeGenerator {
int64_t v,
HInstruction* instruction);
- // Ensure that prior stores complete to memory before subsequent loads.
- // The locked add implementation will avoid serializing device memory, but will
- // touch (but not change) the top of the stack. The locked add should not be used for
- // ordering non-temporal stores.
- void MemoryFence(bool force_mfence = false) {
- if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) {
- assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
- } else {
- assembler_.mfence();
- }
- }
-
private:
// Factored implementation of GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 74ade7c420..fd454d8322 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2005,7 +2005,7 @@ static void GenUnsafePut(LocationSummary* locations,
}
if (is_volatile) {
- codegen->MemoryFence();
+ __ mfence();
}
if (type == Primitive::kPrimNot) {
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 6e54dde0f0..ce737e3f7e 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2080,7 +2080,7 @@ static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool
}
if (is_volatile) {
- codegen->MemoryFence();
+ __ mfence();
}
if (type == Primitive::kPrimNot) {