diff options
| -rw-r--r-- | compiler/dex/quick/x86/assemble_x86.cc | 1 | ||||
| -rwxr-xr-x | compiler/dex/quick/x86/target_x86.cc | 32 | ||||
| -rw-r--r-- | compiler/dex/quick/x86/x86_lir.h | 1 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 14 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 13 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 2 | ||||
| -rw-r--r-- | runtime/arch/x86/instruction_set_features_x86.cc | 56 | ||||
| -rw-r--r-- | runtime/arch/x86/instruction_set_features_x86.h | 9 | ||||
| -rw-r--r-- | runtime/arch/x86/instruction_set_features_x86_test.cc | 25 | ||||
| -rw-r--r-- | runtime/arch/x86_64/instruction_set_features_x86_64.h | 5 | ||||
| -rw-r--r-- | runtime/arch/x86_64/instruction_set_features_x86_64_test.cc | 2 |
14 files changed, 126 insertions, 40 deletions
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc index e5d3841b14..1c2a619020 100644 --- a/compiler/dex/quick/x86/assemble_x86.cc +++ b/compiler/dex/quick/x86/assemble_x86.cc @@ -508,6 +508,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0, { kX86Lfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 5, 0, 0, false }, "Lfence", "" }, { kX86Mfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0, false }, "Mfence", "" }, { kX86Sfence, kReg, NO_OPERAND, { 0, 0, 0x0F, 0xAE, 0, 7, 0, 0, false }, "Sfence", "" }, + { kX86LockAdd32MI8, kMemImm, IS_LOAD | IS_STORE | IS_TERTIARY_OP | REG_USE0 | SETS_CCODES, { 0xF0, 0, 0x83, 0x0, 0x0, 0, 0, 1, false }, "LockAdd32MI8", "[!0r+!1d],!2d" }, EXT_0F_ENCODING_MAP(Imul16, 0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), EXT_0F_ENCODING_MAP(Imul32, 0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES), diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc index 75f3fef599..4ff79935d7 100755 --- a/compiler/dex/quick/x86/target_x86.cc +++ b/compiler/dex/quick/x86/target_x86.cc @@ -20,7 +20,7 @@ #include <inttypes.h> #include <string> -#include "arch/instruction_set_features.h" +#include "arch/x86/instruction_set_features_x86.h" #include "art_method.h" #include "backend_x86.h" #include "base/logging.h" @@ -585,6 +585,8 @@ bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) { case kX86LockCmpxchgAR: case kX86LockCmpxchg64M: case kX86LockCmpxchg64A: + case kX86LockCmpxchg64AR: + case kX86LockAdd32MI8: case kX86XchgMR: case kX86Mfence: // Atomic memory instructions provide full barrier. @@ -598,7 +600,9 @@ bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) { } bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { - if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) { + const X86InstructionSetFeatures* features = + cu_->compiler_driver->GetInstructionSetFeatures()->AsX86InstructionSetFeatures(); + if (!features->IsSmp()) { return false; } // Start off with using the last LIR as the barrier. If it is not enough, then we will update it. @@ -610,20 +614,34 @@ bool X86Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) { * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86 memory model. * For those cases, all we need to ensure is that there is a scheduling barrier in place. */ + const RegStorage rs_rSP = cu_->target64 ? rs_rX86_SP_64 : rs_rX86_SP_32; + bool use_locked_add = features->PrefersLockedAddSynchronization(); if (barrier_kind == kAnyAny) { - // If no LIR exists already that can be used a barrier, then generate an mfence. + // If no LIR exists already that can be used a barrier, then generate a barrier. if (mem_barrier == nullptr) { - mem_barrier = NewLIR0(kX86Mfence); + if (use_locked_add) { + mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0); + } else { + mem_barrier = NewLIR0(kX86Mfence); + } ret = true; } - // If last instruction does not provide full barrier, then insert an mfence. + // If last instruction does not provide full barrier, then insert a barrier. if (ProvidesFullMemoryBarrier(static_cast<X86OpCode>(mem_barrier->opcode)) == false) { - mem_barrier = NewLIR0(kX86Mfence); + if (use_locked_add) { + mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0); + } else { + mem_barrier = NewLIR0(kX86Mfence); + } ret = true; } } else if (barrier_kind == kNTStoreStore) { - mem_barrier = NewLIR0(kX86Sfence); + if (use_locked_add) { + mem_barrier = NewLIR3(kX86LockAdd32MI8, rs_rSP.GetReg(), 0, 0); + } else { + mem_barrier = NewLIR0(kX86Sfence); + } ret = true; } diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h index d6a6a60d3d..8cd6574443 100644 --- a/compiler/dex/quick/x86/x86_lir.h +++ b/compiler/dex/quick/x86/x86_lir.h @@ -606,6 +606,7 @@ enum X86OpCode { // load-from-memory and store-to-memory instructions kX86Sfence, // memory barrier to serialize all previous // store-to-memory instructions + kX86LockAdd32MI8, // locked add used to serialize memory instructions Binary0fOpCode(kX86Imul16), // 16bit multiply Binary0fOpCode(kX86Imul32), // 32bit multiply Binary0fOpCode(kX86Imul64), // 64bit multiply diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index bc3256ec8c..e7f7d57b98 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -4157,7 +4157,7 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) { */ switch (kind) { case MemBarrierKind::kAnyAny: { - __ mfence(); + MemoryFence(); break; } case MemBarrierKind::kAnyStore: diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 7c292fa103..ebbe486cb5 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_H_ +#include "arch/x86/instruction_set_features_x86.h" #include "code_generator.h" #include "dex/compiler_enums.h" #include "driver/compiler_options.h" @@ -496,6 +497,19 @@ class CodeGeneratorX86 : public CodeGenerator { // artReadBarrierForRootSlow. void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root); + // Ensure that prior stores complete to memory before subsequent loads. + // The locked add implementation will avoid serializing device memory, but will + // touch (but not change) the top of the stack. + // The 'non_temporal' parameter should be used to ensure ordering of non-temporal stores. + void MemoryFence(bool non_temporal = false) { + if (!non_temporal && isa_features_.PrefersLockedAddSynchronization()) { + assembler_.lock()->addl(Address(ESP, 0), Immediate(0)); + } else { + assembler_.mfence(); + } + } + + private: // Factored implementation of GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 92cef5f226..02cc88d191 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -4029,7 +4029,7 @@ void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) { */ switch (kind) { case MemBarrierKind::kAnyAny: { - __ mfence(); + codegen_->MemoryFence(); break; } case MemBarrierKind::kAnyStore: diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index dda9ea22d9..e5a487c761 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -17,6 +17,7 @@ #ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_X86_64_H_ +#include "arch/x86_64/instruction_set_features_x86_64.h" #include "code_generator.h" #include "dex/compiler_enums.h" #include "driver/compiler_options.h" @@ -479,6 +480,18 @@ class CodeGeneratorX86_64 : public CodeGenerator { int64_t v, HInstruction* instruction); + // Ensure that prior stores complete to memory before subsequent loads. + // The locked add implementation will avoid serializing device memory, but will + // touch (but not change) the top of the stack. The locked add should not be used for + // ordering non-temporal stores. + void MemoryFence(bool force_mfence = false) { + if (!force_mfence && isa_features_.PrefersLockedAddSynchronization()) { + assembler_.lock()->addl(Address(CpuRegister(RSP), 0), Immediate(0)); + } else { + assembler_.mfence(); + } + } + private: // Factored implementation of GenerateFieldLoadWithBakerReadBarrier // and GenerateArrayLoadWithBakerReadBarrier. diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index fd454d8322..74ade7c420 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2005,7 +2005,7 @@ static void GenUnsafePut(LocationSummary* locations, } if (is_volatile) { - __ mfence(); + codegen->MemoryFence(); } if (type == Primitive::kPrimNot) { diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index ce737e3f7e..6e54dde0f0 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2080,7 +2080,7 @@ static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool } if (is_volatile) { - __ mfence(); + codegen->MemoryFence(); } if (type == Primitive::kPrimNot) { diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc index ef399992b6..42f5df467d 100644 --- a/runtime/arch/x86/instruction_set_features_x86.cc +++ b/runtime/arch/x86/instruction_set_features_x86.cc @@ -45,6 +45,11 @@ static constexpr const char* x86_variants_with_sse4_2[] = { "silvermont", }; +static constexpr const char* x86_variants_prefer_locked_add_sync[] = { + "atom", + "silvermont", +}; + const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant( const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED, bool x86_64) { @@ -60,6 +65,10 @@ const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant( bool has_AVX = false; bool has_AVX2 = false; + bool prefers_locked_add = FindVariantInArray(x86_variants_prefer_locked_add_sync, + arraysize(x86_variants_prefer_locked_add_sync), + variant); + bool known_variant = FindVariantInArray(x86_known_variants, arraysize(x86_known_variants), variant); if (!known_variant && variant != "default") { @@ -68,10 +77,10 @@ const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant( if (x86_64) { return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, - has_AVX2); + has_AVX2, prefers_locked_add); } else { return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, - has_AVX2); + has_AVX2, prefers_locked_add); } } @@ -83,11 +92,13 @@ const X86InstructionSetFeatures* X86InstructionSetFeatures::FromBitmap(uint32_t bool has_SSE4_2 = (bitmap & kSse4_2Bitfield) != 0; bool has_AVX = (bitmap & kAvxBitfield) != 0; bool has_AVX2 = (bitmap & kAvxBitfield) != 0; + bool prefers_locked_add = (bitmap & kPrefersLockedAdd) != 0; if (x86_64) { - return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2); + return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, + has_AVX, has_AVX2, prefers_locked_add); } else { - return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, - has_AVX2); + return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, + has_AVX, has_AVX2, prefers_locked_add); } } @@ -124,11 +135,15 @@ const X86InstructionSetFeatures* X86InstructionSetFeatures::FromCppDefines(bool const bool has_AVX2 = true; #endif + // No #define for memory synchronization preference. + const bool prefers_locked_add = false; + if (x86_64) { - return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2); + return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, + has_AVX2, prefers_locked_add); } else { return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, - has_AVX2); + has_AVX2, prefers_locked_add); } } @@ -141,6 +156,8 @@ const X86InstructionSetFeatures* X86InstructionSetFeatures::FromCpuInfo(bool x86 bool has_SSE4_2 = false; bool has_AVX = false; bool has_AVX2 = false; + // No cpuinfo for memory synchronization preference. + const bool prefers_locked_add = false; std::ifstream in("/proc/cpuinfo"); if (!in.fail()) { @@ -177,10 +194,11 @@ const X86InstructionSetFeatures* X86InstructionSetFeatures::FromCpuInfo(bool x86 LOG(ERROR) << "Failed to open /proc/cpuinfo"; } if (x86_64) { - return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2); + return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, + has_AVX2, prefers_locked_add); } else { return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, - has_AVX2); + has_AVX2, prefers_locked_add); } } @@ -204,7 +222,8 @@ bool X86InstructionSetFeatures::Equals(const InstructionSetFeatures* other) cons (has_SSE4_1_ == other_as_x86->has_SSE4_1_) && (has_SSE4_2_ == other_as_x86->has_SSE4_2_) && (has_AVX_ == other_as_x86->has_AVX_) && - (has_AVX2_ == other_as_x86->has_AVX2_); + (has_AVX2_ == other_as_x86->has_AVX2_) && + (prefers_locked_add_ == other_as_x86->prefers_locked_add_); } uint32_t X86InstructionSetFeatures::AsBitmap() const { @@ -213,7 +232,8 @@ uint32_t X86InstructionSetFeatures::AsBitmap() const { (has_SSE4_1_ ? kSse4_1Bitfield : 0) | (has_SSE4_2_ ? kSse4_2Bitfield : 0) | (has_AVX_ ? kAvxBitfield : 0) | - (has_AVX2_ ? kAvx2Bitfield : 0); + (has_AVX2_ ? kAvx2Bitfield : 0) | + (prefers_locked_add_ ? kPrefersLockedAdd : 0); } std::string X86InstructionSetFeatures::GetFeatureString() const { @@ -248,6 +268,11 @@ std::string X86InstructionSetFeatures::GetFeatureString() const { } else { result += ",-avx2"; } + if (prefers_locked_add_) { + result += ",lock_add"; + } else { + result += ",-lock_add"; + } return result; } @@ -259,6 +284,7 @@ const InstructionSetFeatures* X86InstructionSetFeatures::AddFeaturesFromSplitStr bool has_SSE4_2 = has_SSE4_2_; bool has_AVX = has_AVX_; bool has_AVX2 = has_AVX2_; + bool prefers_locked_add = prefers_locked_add_; for (auto i = features.begin(); i != features.end(); i++) { std::string feature = Trim(*i); if (feature == "ssse3") { @@ -281,6 +307,10 @@ const InstructionSetFeatures* X86InstructionSetFeatures::AddFeaturesFromSplitStr has_AVX2 = true; } else if (feature == "-avx2") { has_AVX2 = false; + } else if (feature == "lock_add") { + prefers_locked_add = true; + } else if (feature == "-lock_add") { + prefers_locked_add = false; } else { *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str()); return nullptr; @@ -288,10 +318,10 @@ const InstructionSetFeatures* X86InstructionSetFeatures::AddFeaturesFromSplitStr } if (x86_64) { return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, - has_AVX2); + has_AVX2, prefers_locked_add); } else { return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, - has_AVX2); + has_AVX2, prefers_locked_add); } } diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h index 7b612453f9..2b845f8dcc 100644 --- a/runtime/arch/x86/instruction_set_features_x86.h +++ b/runtime/arch/x86/instruction_set_features_x86.h @@ -60,6 +60,8 @@ class X86InstructionSetFeatures : public InstructionSetFeatures { bool HasSSE4_1() const { return has_SSE4_1_; } + bool PrefersLockedAddSynchronization() const { return prefers_locked_add_; } + protected: // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures. virtual const InstructionSetFeatures* @@ -73,9 +75,10 @@ class X86InstructionSetFeatures : public InstructionSetFeatures { bool x86_64, std::string* error_msg) const; X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2, - bool has_AVX, bool has_AVX2) + bool has_AVX, bool has_AVX2, bool prefers_locked_add) : InstructionSetFeatures(smp), has_SSSE3_(has_SSSE3), has_SSE4_1_(has_SSE4_1), - has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2) { + has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2), + prefers_locked_add_(prefers_locked_add) { } private: @@ -87,6 +90,7 @@ class X86InstructionSetFeatures : public InstructionSetFeatures { kSse4_2Bitfield = 8, kAvxBitfield = 16, kAvx2Bitfield = 32, + kPrefersLockedAdd = 64, }; const bool has_SSSE3_; // x86 128bit SIMD - Supplemental SSE. @@ -94,6 +98,7 @@ class X86InstructionSetFeatures : public InstructionSetFeatures { const bool has_SSE4_2_; // x86 128bit SIMD SSE4.2. const bool has_AVX_; // x86 256bit SIMD AVX. const bool has_AVX2_; // x86 256bit SIMD AVX 2.0. + const bool prefers_locked_add_; // x86 use locked add for memory synchronization. DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures); }; diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc index 25a406b25a..e8d01e6c14 100644 --- a/runtime/arch/x86/instruction_set_features_x86_test.cc +++ b/runtime/arch/x86/instruction_set_features_x86_test.cc @@ -27,7 +27,8 @@ TEST(X86InstructionSetFeaturesTest, X86FeaturesFromDefaultVariant) { ASSERT_TRUE(x86_features.get() != nullptr) << error_msg; EXPECT_EQ(x86_features->GetInstructionSet(), kX86); EXPECT_TRUE(x86_features->Equals(x86_features.get())); - EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str()); + EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add", + x86_features->GetFeatureString().c_str()); EXPECT_EQ(x86_features->AsBitmap(), 1U); } @@ -39,8 +40,9 @@ TEST(X86InstructionSetFeaturesTest, X86FeaturesFromAtomVariant) { ASSERT_TRUE(x86_features.get() != nullptr) << error_msg; EXPECT_EQ(x86_features->GetInstructionSet(), kX86); EXPECT_TRUE(x86_features->Equals(x86_features.get())); - EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str()); - EXPECT_EQ(x86_features->AsBitmap(), 3U); + EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add", + x86_features->GetFeatureString().c_str()); + EXPECT_EQ(x86_features->AsBitmap(), 67U); // Build features for a 32-bit x86 default processor. std::unique_ptr<const InstructionSetFeatures> x86_default_features( @@ -48,7 +50,7 @@ TEST(X86InstructionSetFeaturesTest, X86FeaturesFromAtomVariant) { ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg; EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86); EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get())); - EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2", + EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add", x86_default_features->GetFeatureString().c_str()); EXPECT_EQ(x86_default_features->AsBitmap(), 1U); @@ -58,9 +60,9 @@ TEST(X86InstructionSetFeaturesTest, X86FeaturesFromAtomVariant) { ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg; EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64); EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get())); - EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2", + EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add", x86_64_features->GetFeatureString().c_str()); - EXPECT_EQ(x86_64_features->AsBitmap(), 3U); + EXPECT_EQ(x86_64_features->AsBitmap(), 67U); EXPECT_FALSE(x86_64_features->Equals(x86_features.get())); EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get())); @@ -75,8 +77,9 @@ TEST(X86InstructionSetFeaturesTest, X86FeaturesFromSilvermontVariant) { ASSERT_TRUE(x86_features.get() != nullptr) << error_msg; EXPECT_EQ(x86_features->GetInstructionSet(), kX86); EXPECT_TRUE(x86_features->Equals(x86_features.get())); - EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2", x86_features->GetFeatureString().c_str()); - EXPECT_EQ(x86_features->AsBitmap(), 15U); + EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add", + x86_features->GetFeatureString().c_str()); + EXPECT_EQ(x86_features->AsBitmap(), 79U); // Build features for a 32-bit x86 default processor. std::unique_ptr<const InstructionSetFeatures> x86_default_features( @@ -84,7 +87,7 @@ TEST(X86InstructionSetFeaturesTest, X86FeaturesFromSilvermontVariant) { ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg; EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86); EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get())); - EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2", + EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add", x86_default_features->GetFeatureString().c_str()); EXPECT_EQ(x86_default_features->AsBitmap(), 1U); @@ -94,9 +97,9 @@ TEST(X86InstructionSetFeaturesTest, X86FeaturesFromSilvermontVariant) { ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg; EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64); EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get())); - EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2", + EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add", x86_64_features->GetFeatureString().c_str()); - EXPECT_EQ(x86_64_features->AsBitmap(), 15U); + EXPECT_EQ(x86_64_features->AsBitmap(), 79U); EXPECT_FALSE(x86_64_features->Equals(x86_features.get())); EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get())); diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h index 328017716e..b8000d0001 100644 --- a/runtime/arch/x86_64/instruction_set_features_x86_64.h +++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h @@ -74,8 +74,9 @@ class X86_64InstructionSetFeatures FINAL : public X86InstructionSetFeatures { private: X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2, - bool has_AVX, bool has_AVX2) - : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2) { + bool has_AVX, bool has_AVX2, bool prefers_locked_add) + : X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, + has_AVX2, prefers_locked_add) { } friend class X86InstructionSetFeatures; diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc index 5171080912..4562c64bc9 100644 --- a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc +++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc @@ -27,7 +27,7 @@ TEST(X86_64InstructionSetFeaturesTest, X86Features) { ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg; EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64); EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get())); - EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2", + EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add", x86_64_features->GetFeatureString().c_str()); EXPECT_EQ(x86_64_features->AsBitmap(), 1U); } |