X86: Use locked add rather than mfence
Java semantics for memory ordering can be satisfied using
lock addl $0,0(SP)
rather than mfence. The locked add synchronizes the memory caches, but
doesn't affect device memory.
Timing on a micro benchmark with a mfence or lock add $0,0(sp) in a loop
with 600000000 iterations:
time ./mfence
real 0m5.411s
user 0m5.408s
sys 0m0.000s
time ./locked_add
real 0m3.552s
user 0m3.550s
sys 0m0.000s
Implement this as an instruction-set-feature lock_add. This is off by
default (uses mfence), and enabled for atom & silvermont variants.
Generation of mfence can be forced by a parameter to MemoryFence.
Change-Id: I5cb4fded61f4cbbd7b7db42a1b6902e43e458911
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index e5d3841..1c2a619 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -508,6 +508,7 @@
{ kX86Lfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 5, 0, 0, false }, "Lfence", "" },
{ kX86Mfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" },
{ kX86Sfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 7, 0, 0, false }, "Sfence", "" },
+ { kX86LockAdd32MI8, kMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0xF0, 0, 0x83, 0x0, 0x0, 0, 0, 1, false }, "LockAdd32MI8", "[!0r+!1d],!2d" },
EXT_0F_ENCODING_MAP(Imul16, 0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
EXT_0F_ENCODING_MAP(Imul32, 0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 75f3fef..4ff7993 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -20,7 +20,7 @@
#include <inttypes.h>
#include <string>
-#include "arch/instruction_set_features.h"
+#include "arch/x86/instruction_set_features_x86.h"
#include "art_method.h"
#include "backend_x86.h"
#include "base/logging.h"
@@ -585,6 +585,8 @@
case kX86LockCmpxchgAR:
case kX86LockCmpxchg64M:
case kX86LockCmpxchg64A:
+ case kX86LockCmpxchg64AR:
+ case kX86LockAdd32MI8:
case kX86XchgMR:
case kX86Mfence:
// Atomic memory instructions provide full barrier.
@@ -598,7 +600,9 @@
}
bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
- if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
+ const X86InstructionSetFeatures* features =
+ cu_->compiler_driver->GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
+ if (!features->IsSmp()) {
return false;
}
// Start off with using the last LIR as the barrier. If it is not enough, then we will update it.
@@ -610,20 +614,34 @@
* All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model.
* For those cases, all we need to ensure is that there is a scheduling barrier in place.
*/
+ const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32;
+ bool use_locked_add = features->PrefersLockedAddSynchronization();
if (barrier_kind == kAnyAny) {
- // If no LIR exists already that can be used a barrier, then generate an mfence.
+ // If no LIR exists already that can be used a barrier, then generate a barrier.
if (mem_barrier == nullptr) {
- mem_barrier = NewLIR0(kX86Mfence);
+ if (use_locked_add) {
+ mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
+ } else {
+ mem_barrier = NewLIR0(kX86Mfence);
+ }
ret = true;
}
- // If last instruction does not provide full barrier, then insert an mfence.
+ // If last instruction does not provide full barrier, then insert a barrier.
if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) {
- mem_barrier = NewLIR0(kX86Mfence);
+ if (use_locked_add) {
+ mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
+ } else {
+ mem_barrier = NewLIR0(kX86Mfence);
+ }
ret = true;
}
} else if (barrier_kind == kNTStoreStore) {
- mem_barrier = NewLIR0(kX86Sfence);
+ if (use_locked_add) {
+ mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0);
+ } else {
+ mem_barrier = NewLIR0(kX86Sfence);
+ }
ret = true;
}
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index d6a6a60..8cd6574 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -606,6 +606,7 @@
// load-from-memory and store-to-memory instructions
kX86Sfence, // memory barrier to serialize all previous
// store-to-memory instructions
+ kX86LockAdd32MI8, // locked add used to serialize memory instructions
Binary0fOpCode(kX86Imul16), // 16bit multiply
Binary0fOpCode(kX86Imul32), // 32bit multiply
Binary0fOpCode(kX86Imul64), // 64bit multiply
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 469dd49..7a5b8db 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -4156,7 +4156,7 @@
*/
switch (kind) {
case MemBarrierKind::kAnyAny: {
- __ mfence();
+ MemoryFence();
break;
}
case MemBarrierKind::kAnyStore:
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 7121799..f0ead03 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_
+#include "arch/x86/instruction_set_features_x86.h"
#include "code_generator.h"
#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
@@ -506,6 +507,19 @@
// artReadBarrierForRootSlow.
void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+ // Ensure that prior stores complete to memory before subsequent loads.
+ // The locked add implementation will avoid serializing device memory, but will
+ // touch (but not change) the top of the stack.
+ // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores.
+ void MemoryFence(bool non_temporal = false) {
+ if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) {
+ assembler_.lock()->addl(Address(ESP, 0), Immediate(0));
+ } else {
+ assembler_.mfence();
+ }
+ }
+
+
private:
// Factored implementation of GenerateFieldLoadWithBakerReadBarrier
// and GenerateArrayLoadWithBakerReadBarrier.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 1fee192..7cea0a7 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3968,7 +3968,7 @@
*/
switch (kind) {
case MemBarrierKind::kAnyAny: {
- __ mfence();
+ codegen_->MemoryFence();
break;
}
case MemBarrierKind::kAnyStore:
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 7351fed..9397d8f 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_
+#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "code_generator.h"
#include "dex/compiler_enums.h"
#include "driver/compiler_options.h"
@@ -423,6 +424,18 @@
int64_t v,
HInstruction* instruction);
+ // Ensure that prior stores complete to memory before subsequent loads.
+ // The locked add implementation will avoid serializing device memory, but will
+ // touch (but not change) the top of the stack. The locked add should not be used for
+ // ordering non-temporal stores.
+ void MemoryFence(bool force_mfence = false) {
+ if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) {
+ assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0));
+ } else {
+ assembler_.mfence();
+ }
+ }
+
private:
struct PcRelativeDexCacheAccessInfo {
PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index fd454d8..74ade7c 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2005,7 +2005,7 @@
}
if (is_volatile) {
- __ mfence();
+ codegen->MemoryFence();
}
if (type == Primitive::kPrimNot) {
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index ac9b245..d519034 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2059,7 +2059,7 @@
}
if (is_volatile) {
- __ mfence();
+ codegen->MemoryFence();
}
if (type == Primitive::kPrimNot) {
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index ef39999..42f5df4 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -45,6 +45,11 @@
"silvermont",
};
+static constexpr const char* x86_variants_prefer_locked_add_sync[] = {
+ "atom",
+ "silvermont",
+};
+
const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant(
const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED,
bool x86_64) {
@@ -60,6 +65,10 @@
bool has_AVX = false;
bool has_AVX2 = false;
+ bool prefers_locked_add = FindVariantInArray(x86_variants_prefer_locked_add_sync,
+ arraysize(x86_variants_prefer_locked_add_sync),
+ variant);
+
bool known_variant = FindVariantInArray(x86_known_variants, arraysize(x86_known_variants),
variant);
if (!known_variant && variant != "default") {
@@ -68,10 +77,10 @@
if (x86_64) {
return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2);
+ has_AVX2, prefers_locked_add);
} else {
return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2);
+ has_AVX2, prefers_locked_add);
}
}
@@ -83,11 +92,13 @@
bool has_SSE4_2 = (bitmap & kSse4_2Bitfield) != 0;
bool has_AVX = (bitmap & kAvxBitfield) != 0;
bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
+ bool prefers_locked_add = (bitmap & kPrefersLockedAdd) != 0;
if (x86_64) {
- return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+ return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
+ has_AVX, has_AVX2, prefers_locked_add);
} else {
- return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2);
+ return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
+ has_AVX, has_AVX2, prefers_locked_add);
}
}
@@ -124,11 +135,15 @@
const bool has_AVX2 = true;
#endif
+ // No #define for memory synchronization preference.
+ const bool prefers_locked_add = false;
+
if (x86_64) {
- return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+ return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+ has_AVX2, prefers_locked_add);
} else {
return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2);
+ has_AVX2, prefers_locked_add);
}
}
@@ -141,6 +156,8 @@
bool has_SSE4_2 = false;
bool has_AVX = false;
bool has_AVX2 = false;
+ // No cpuinfo for memory synchronization preference.
+ const bool prefers_locked_add = false;
std::ifstream in("/proc/cpuinfo");
if (!in.fail()) {
@@ -177,10 +194,11 @@
LOG(ERROR) << "Failed to open /proc/cpuinfo";
}
if (x86_64) {
- return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2);
+ return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+ has_AVX2, prefers_locked_add);
} else {
return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2);
+ has_AVX2, prefers_locked_add);
}
}
@@ -204,7 +222,8 @@
(has_SSE4_1_ == other_as_x86->has_SSE4_1_) &&
(has_SSE4_2_ == other_as_x86->has_SSE4_2_) &&
(has_AVX_ == other_as_x86->has_AVX_) &&
- (has_AVX2_ == other_as_x86->has_AVX2_);
+ (has_AVX2_ == other_as_x86->has_AVX2_) &&
+ (prefers_locked_add_ == other_as_x86->prefers_locked_add_);
}
uint32_t X86InstructionSetFeatures::AsBitmap() const {
@@ -213,7 +232,8 @@
(has_SSE4_1_ ? kSse4_1Bitfield : 0) |
(has_SSE4_2_ ? kSse4_2Bitfield : 0) |
(has_AVX_ ? kAvxBitfield : 0) |
- (has_AVX2_ ? kAvx2Bitfield : 0);
+ (has_AVX2_ ? kAvx2Bitfield : 0) |
+ (prefers_locked_add_ ? kPrefersLockedAdd : 0);
}
std::string X86InstructionSetFeatures::GetFeatureString() const {
@@ -248,6 +268,11 @@
} else {
result += ",-avx2";
}
+ if (prefers_locked_add_) {
+ result += ",lock_add";
+ } else {
+ result += ",-lock_add";
+ }
return result;
}
@@ -259,6 +284,7 @@
bool has_SSE4_2 = has_SSE4_2_;
bool has_AVX = has_AVX_;
bool has_AVX2 = has_AVX2_;
+ bool prefers_locked_add = prefers_locked_add_;
for (auto i = features.begin(); i != features.end(); i++) {
std::string feature = Trim(*i);
if (feature == "ssse3") {
@@ -281,6 +307,10 @@
has_AVX2 = true;
} else if (feature == "-avx2") {
has_AVX2 = false;
+ } else if (feature == "lock_add") {
+ prefers_locked_add = true;
+ } else if (feature == "-lock_add") {
+ prefers_locked_add = false;
} else {
*error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
return nullptr;
@@ -288,10 +318,10 @@
}
if (x86_64) {
return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2);
+ has_AVX2, prefers_locked_add);
} else {
return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2);
+ has_AVX2, prefers_locked_add);
}
}
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 7b61245..2b845f8 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -60,6 +60,8 @@
bool HasSSE4_1() const { return has_SSE4_1_; }
+ bool PrefersLockedAddSynchronization() const { return prefers_locked_add_; }
+
protected:
// Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
virtual const InstructionSetFeatures*
@@ -73,9 +75,10 @@
bool x86_64, std::string* error_msg) const;
X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
- bool has_AVX, bool has_AVX2)
+ bool has_AVX, bool has_AVX2, bool prefers_locked_add)
: InstructionSetFeatures(smp), has_SSSE3_(has_SSSE3), has_SSE4_1_(has_SSE4_1),
- has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2) {
+ has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2),
+ prefers_locked_add_(prefers_locked_add) {
}
private:
@@ -87,6 +90,7 @@
kSse4_2Bitfield = 8,
kAvxBitfield = 16,
kAvx2Bitfield = 32,
+ kPrefersLockedAdd = 64,
};
const bool has_SSSE3_; // x86 128bit SIMD - Supplemental SSE.
@@ -94,6 +98,7 @@
const bool has_SSE4_2_; // x86 128bit SIMD SSE4.2.
const bool has_AVX_; // x86 256bit SIMD AVX.
const bool has_AVX2_; // x86 256bit SIMD AVX 2.0.
+ const bool prefers_locked_add_; // x86 use locked add for memory synchronization.
DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures);
};
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index 25a406b..e8d01e6 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -27,7 +27,8 @@
ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
EXPECT_TRUE(x86_features->Equals(x86_features.get()));
- EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
+ EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+ x86_features->GetFeatureString().c_str());
EXPECT_EQ(x86_features->AsBitmap(), 1U);
}
@@ -39,8 +40,9 @@
ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
EXPECT_TRUE(x86_features->Equals(x86_features.get()));
- EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
- EXPECT_EQ(x86_features->AsBitmap(), 3U);
+ EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+ x86_features->GetFeatureString().c_str());
+ EXPECT_EQ(x86_features->AsBitmap(), 67U);
// Build features for a 32-bit x86 default processor.
std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -48,7 +50,7 @@
ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
- EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+ EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
x86_default_features->GetFeatureString().c_str());
EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
@@ -58,9 +60,9 @@
ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
- EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+ EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
x86_64_features->GetFeatureString().c_str());
- EXPECT_EQ(x86_64_features->AsBitmap(), 3U);
+ EXPECT_EQ(x86_64_features->AsBitmap(), 67U);
EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
@@ -75,8 +77,9 @@
ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
EXPECT_TRUE(x86_features->Equals(x86_features.get()));
- EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str());
- EXPECT_EQ(x86_features->AsBitmap(), 15U);
+ EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+ x86_features->GetFeatureString().c_str());
+ EXPECT_EQ(x86_features->AsBitmap(), 79U);
// Build features for a 32-bit x86 default processor.
std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -84,7 +87,7 @@
ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
- EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+ EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
x86_default_features->GetFeatureString().c_str());
EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
@@ -94,9 +97,9 @@
ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
- EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2",
+ EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
x86_64_features->GetFeatureString().c_str());
- EXPECT_EQ(x86_64_features->AsBitmap(), 15U);
+ EXPECT_EQ(x86_64_features->AsBitmap(), 79U);
EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
index 3280177..b8000d0 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64.h
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -74,8 +74,9 @@
private:
X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
- bool has_AVX, bool has_AVX2)
- : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2) {
+ bool has_AVX, bool has_AVX2, bool prefers_locked_add)
+ : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
+ has_AVX2, prefers_locked_add) {
}
friend class X86InstructionSetFeatures;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
index 5171080..4562c64 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
@@ -27,7 +27,7 @@
ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
- EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2",
+ EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
x86_64_features->GetFeatureString().c_str());
EXPECT_EQ(x86_64_features->AsBitmap(), 1U);
}