Implemented BitCount as an intrinsic. With unit test.
Rationale:
Recognizing this important operation as an intrinsic has
various advantages:
(1) having the no-side-effects/no-throw allows for
much more GVN/LICM/BCE.
(2) Some architectures, like x86_64, provide direct
support for this operation.
Performance improvements on X86_64:
CheckersEvalBench (32-bit bitboard): 27,210KNS -> 36,798KNS = + 35%
ReversiEvalBench (64-bit bitboard): 52,562KNS -> 89,086KNS = + 69%
Change-Id: I65d549b0469b7909b12c6611cdc34a8640a5751f
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc
index 4617668..3766093 100644
--- a/compiler/dex/quick/dex_file_method_inliner.cc
+++ b/compiler/dex/quick/dex_file_method_inliner.cc
@@ -39,6 +39,7 @@
true, // kIntrinsicFloatCvt
true, // kIntrinsicReverseBits
true, // kIntrinsicReverseBytes
+ true, // kIntrinsicBitCount
true, // kIntrinsicNumberOfLeadingZeros
true, // kIntrinsicNumberOfTrailingZeros
true, // kIntrinsicRotateRight
@@ -99,6 +100,7 @@
static_assert(kIntrinsicIsStatic[kIntrinsicFloatCvt], "FloatCvt must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicReverseBits], "ReverseBits must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicReverseBytes], "ReverseBytes must be static");
+static_assert(kIntrinsicIsStatic[kIntrinsicBitCount], "BitCount must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfLeadingZeros],
"NumberOfLeadingZeros must be static");
static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfTrailingZeros],
@@ -293,6 +295,7 @@
"putObjectVolatile", // kNameCachePutObjectVolatile
"putOrderedObject", // kNameCachePutOrderedObject
"arraycopy", // kNameCacheArrayCopy
+ "bitCount", // kNameCacheBitCount
"numberOfLeadingZeros", // kNameCacheNumberOfLeadingZeros
"numberOfTrailingZeros", // kNameCacheNumberOfTrailingZeros
"rotateRight", // kNameCacheRotateRight
@@ -447,6 +450,8 @@
INTRINSIC(JavaLangInteger, Reverse, I_I, kIntrinsicReverseBits, k32),
INTRINSIC(JavaLangLong, Reverse, J_J, kIntrinsicReverseBits, k64),
+ INTRINSIC(JavaLangInteger, BitCount, I_I, kIntrinsicBitCount, k32),
+ INTRINSIC(JavaLangLong, BitCount, J_I, kIntrinsicBitCount, k64),
INTRINSIC(JavaLangInteger, NumberOfLeadingZeros, I_I, kIntrinsicNumberOfLeadingZeros, k32),
INTRINSIC(JavaLangLong, NumberOfLeadingZeros, J_I, kIntrinsicNumberOfLeadingZeros, k64),
INTRINSIC(JavaLangInteger, NumberOfTrailingZeros, I_I, kIntrinsicNumberOfTrailingZeros, k32),
@@ -745,6 +750,7 @@
intrinsic.d.data & kIntrinsicFlagIsOrdered);
case kIntrinsicSystemArrayCopyCharArray:
return backend->GenInlinedArrayCopyCharArray(info);
+ case kIntrinsicBitCount:
case kIntrinsicNumberOfLeadingZeros:
case kIntrinsicNumberOfTrailingZeros:
case kIntrinsicRotateRight:
diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h
index ac70577..2803623 100644
--- a/compiler/dex/quick/dex_file_method_inliner.h
+++ b/compiler/dex/quick/dex_file_method_inliner.h
@@ -224,6 +224,7 @@
kNameCachePutObjectVolatile,
kNameCachePutOrderedObject,
kNameCacheArrayCopy,
+ kNameCacheBitCount,
kNameCacheNumberOfLeadingZeros,
kNameCacheNumberOfTrailingZeros,
kNameCacheRotateRight,
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index c6da9a3..5caf077 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -176,6 +176,16 @@
}
// Misc data processing.
+ case kIntrinsicBitCount:
+ switch (GetType(method.d.data, true)) {
+ case Primitive::kPrimInt:
+ return Intrinsics::kIntegerBitCount;
+ case Primitive::kPrimLong:
+ return Intrinsics::kLongBitCount;
+ default:
+ LOG(FATAL) << "Unknown/unsupported op size " << method.d.data;
+ UNREACHABLE();
+ }
case kIntrinsicNumberOfLeadingZeros:
switch (GetType(method.d.data, true)) {
case Primitive::kPrimInt:
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index b1fbf28..e72f927 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1577,10 +1577,12 @@
void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
UNIMPLEMENTED_INTRINSIC(IntegerReverse)
UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
UNIMPLEMENTED_INTRINSIC(LongReverse)
UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 81cab86..c5688a3 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1447,8 +1447,10 @@
void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
UNIMPLEMENTED_INTRINSIC(LongRotateRight)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h
index 2e87546..ea38034 100644
--- a/compiler/optimizing/intrinsics_list.h
+++ b/compiler/optimizing/intrinsics_list.h
@@ -28,12 +28,14 @@
V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+ V(IntegerBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
+ V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index bc126a2..81112b1 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -935,6 +935,9 @@
void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
+
UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
UNIMPLEMENTED_INTRINSIC(MathAbsInt)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 8b45ea7..ac969e3 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1724,6 +1724,9 @@
void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
}
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
+
UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 677f2e9..4715bdc 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2518,8 +2518,10 @@
UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
+UNIMPLEMENTED_INTRINSIC(LongBitCount)
UNIMPLEMENTED_INTRINSIC(LongRotateRight)
UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 690cf3d..23a628f 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2368,6 +2368,70 @@
SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
}
+static void CreateBitCountLocations(
+ ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
+ if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+ // Do nothing if there is no popcnt support. This results in generating
+ // a call for the intrinsic rather than direct code.
+ return;
+ }
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::Any());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location src = locations->InAt(0);
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ if (invoke->InputAt(0)->IsConstant()) {
+ // Evaluate this at compile time.
+ int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+ value = is_long
+ ? POPCOUNT(static_cast<uint64_t>(value))
+ : POPCOUNT(static_cast<uint32_t>(value));
+ if (value == 0) {
+ __ xorl(out, out);
+ } else {
+ __ movl(out, Immediate(value));
+ }
+ return;
+ }
+
+ if (src.IsRegister()) {
+ if (is_long) {
+ __ popcntq(out, src.AsRegister<CpuRegister>());
+ } else {
+ __ popcntl(out, src.AsRegister<CpuRegister>());
+ }
+ } else if (is_long) {
+ DCHECK(src.IsDoubleStackSlot());
+ __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+ CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
+ GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
+ CreateBitCountLocations(arena_, codegen_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
+ GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+}
+
static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index db07267..10f5a00 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -2247,6 +2247,42 @@
EmitOperand(dst.LowBits(), src);
}
+void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xB8);
+ EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitOptionalRex32(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xB8);
+ EmitOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitRex64(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xB8);
+ EmitRegisterOperand(dst.LowBits(), src.LowBits());
+}
+
+void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitRex64(dst, src);
+ EmitUint8(0x0F);
+ EmitUint8(0xB8);
+ EmitOperand(dst.LowBits(), src);
+}
+
void X86_64Assembler::repne_scasw() {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 01d28e3..6f0847e 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -647,6 +647,11 @@
void bsrq(CpuRegister dst, CpuRegister src);
void bsrq(CpuRegister dst, const Address& src);
+ void popcntl(CpuRegister dst, CpuRegister src);
+ void popcntl(CpuRegister dst, const Address& src);
+ void popcntq(CpuRegister dst, CpuRegister src);
+ void popcntq(CpuRegister dst, const Address& src);
+
void rorl(CpuRegister reg, const Immediate& imm);
void rorl(CpuRegister operand, CpuRegister shifter);
void roll(CpuRegister reg, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 00bb5ca..8a87fca 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1333,6 +1333,44 @@
DriverStr(expected, "bsrq_address");
}
+TEST_F(AssemblerX86_64Test, Popcntl) {
+ DriverStr(Repeatrr(&x86_64::X86_64Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntlAddress) {
+ GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+ GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+ x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+ GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+ const char* expected =
+ "popcntl 0xc(%RDI,%RBX,4), %R10d\n"
+ "popcntl 0xc(%R10,%RBX,4), %edi\n"
+ "popcntl 0xc(%RDI,%R9,4), %edi\n";
+
+ DriverStr(expected, "popcntl_address");
+}
+
+TEST_F(AssemblerX86_64Test, Popcntq) {
+ DriverStr(RepeatRR(&x86_64::X86_64Assembler::popcntq, "popcntq %{reg2}, %{reg1}"), "popcntq");
+}
+
+TEST_F(AssemblerX86_64Test, PopcntqAddress) {
+ GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+ GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+ x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
+ GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
+ x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
+ const char* expected =
+ "popcntq 0xc(%RDI,%RBX,4), %R10\n"
+ "popcntq 0xc(%R10,%RBX,4), %RDI\n"
+ "popcntq 0xc(%RDI,%R9,4), %RDI\n";
+
+ DriverStr(expected, "popcntq_address");
+}
+
/////////////////
// Near labels //
/////////////////
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index d4bef0f..1f74c93 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -938,6 +938,11 @@
has_modrm = true;
load = true;
break;
+ case 0xB8:
+ opcode1 = "popcnt";
+ has_modrm = true;
+ load = true;
+ break;
case 0xBE:
opcode1 = "movsxb";
has_modrm = true;
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index 42f5df4..da01ee4 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -50,6 +50,10 @@
"silvermont",
};
+static constexpr const char* x86_variants_with_popcnt[] = {
+ "silvermont",
+};
+
const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant(
const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED,
bool x86_64) {
@@ -69,6 +73,11 @@
arraysize(x86_variants_prefer_locked_add_sync),
variant);
+ bool has_POPCNT = FindVariantInArray(x86_variants_with_popcnt,
+ arraysize(x86_variants_with_popcnt),
+ variant);
+
+ // Verify that variant is known.
bool known_variant = FindVariantInArray(x86_known_variants, arraysize(x86_known_variants),
variant);
if (!known_variant && variant != "default") {
@@ -77,10 +86,10 @@
if (x86_64) {
return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2, prefers_locked_add);
+ has_AVX2, prefers_locked_add, has_POPCNT);
} else {
return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2, prefers_locked_add);
+ has_AVX2, prefers_locked_add, has_POPCNT);
}
}
@@ -93,12 +102,15 @@
bool has_AVX = (bitmap & kAvxBitfield) != 0;
bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
bool prefers_locked_add = (bitmap & kPrefersLockedAdd) != 0;
+ bool has_POPCNT = (bitmap & kPopCntBitfield) != 0;
if (x86_64) {
return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
- has_AVX, has_AVX2, prefers_locked_add);
+ has_AVX, has_AVX2, prefers_locked_add,
+ has_POPCNT);
} else {
return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
- has_AVX, has_AVX2, prefers_locked_add);
+ has_AVX, has_AVX2, prefers_locked_add,
+ has_POPCNT);
}
}
@@ -138,12 +150,15 @@
// No #define for memory synchronization preference.
const bool prefers_locked_add = false;
+ // No #define for popcnt.
+ const bool has_POPCNT = false;
+
if (x86_64) {
return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2, prefers_locked_add);
+ has_AVX2, prefers_locked_add, has_POPCNT);
} else {
return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2, prefers_locked_add);
+ has_AVX2, prefers_locked_add, has_POPCNT);
}
}
@@ -158,6 +173,7 @@
bool has_AVX2 = false;
// No cpuinfo for memory synchronization preference.
const bool prefers_locked_add = false;
+ bool has_POPCNT = false;
std::ifstream in("/proc/cpuinfo");
if (!in.fail()) {
@@ -183,6 +199,9 @@
if (line.find("avx2") != std::string::npos) {
has_AVX2 = true;
}
+ if (line.find("popcnt") != std::string::npos) {
+ has_POPCNT = true;
+ }
} else if (line.find("processor") != std::string::npos &&
line.find(": 1") != std::string::npos) {
smp = true;
@@ -195,10 +214,10 @@
}
if (x86_64) {
return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2, prefers_locked_add);
+ has_AVX2, prefers_locked_add, has_POPCNT);
} else {
return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2, prefers_locked_add);
+ has_AVX2, prefers_locked_add, has_POPCNT);
}
}
@@ -223,7 +242,8 @@
(has_SSE4_2_ == other_as_x86->has_SSE4_2_) &&
(has_AVX_ == other_as_x86->has_AVX_) &&
(has_AVX2_ == other_as_x86->has_AVX2_) &&
- (prefers_locked_add_ == other_as_x86->prefers_locked_add_);
+ (prefers_locked_add_ == other_as_x86->prefers_locked_add_) &&
+ (has_POPCNT_ == other_as_x86->has_POPCNT_);
}
uint32_t X86InstructionSetFeatures::AsBitmap() const {
@@ -233,7 +253,8 @@
(has_SSE4_2_ ? kSse4_2Bitfield : 0) |
(has_AVX_ ? kAvxBitfield : 0) |
(has_AVX2_ ? kAvx2Bitfield : 0) |
- (prefers_locked_add_ ? kPrefersLockedAdd : 0);
+ (prefers_locked_add_ ? kPrefersLockedAdd : 0) |
+ (has_POPCNT_ ? kPopCntBitfield : 0);
}
std::string X86InstructionSetFeatures::GetFeatureString() const {
@@ -273,6 +294,11 @@
} else {
result += ",-lock_add";
}
+ if (has_POPCNT_) {
+ result += ",popcnt";
+ } else {
+ result += ",-popcnt";
+ }
return result;
}
@@ -285,6 +311,7 @@
bool has_AVX = has_AVX_;
bool has_AVX2 = has_AVX2_;
bool prefers_locked_add = prefers_locked_add_;
+ bool has_POPCNT = has_POPCNT_;
for (auto i = features.begin(); i != features.end(); i++) {
std::string feature = Trim(*i);
if (feature == "ssse3") {
@@ -311,6 +338,10 @@
prefers_locked_add = true;
} else if (feature == "-lock_add") {
prefers_locked_add = false;
+ } else if (feature == "popcnt") {
+ has_POPCNT = true;
+ } else if (feature == "-popcnt") {
+ has_POPCNT = false;
} else {
*error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
return nullptr;
@@ -318,10 +349,10 @@
}
if (x86_64) {
return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2, prefers_locked_add);
+ has_AVX2, prefers_locked_add, has_POPCNT);
} else {
return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2, prefers_locked_add);
+ has_AVX2, prefers_locked_add, has_POPCNT);
}
}
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 2b845f8..1819654 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -62,6 +62,8 @@
bool PrefersLockedAddSynchronization() const { return prefers_locked_add_; }
+ bool HasPopCnt() const { return has_POPCNT_; }
+
protected:
// Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
virtual const InstructionSetFeatures*
@@ -75,10 +77,17 @@
bool x86_64, std::string* error_msg) const;
X86InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
- bool has_AVX, bool has_AVX2, bool prefers_locked_add)
- : InstructionSetFeatures(smp), has_SSSE3_(has_SSSE3), has_SSE4_1_(has_SSE4_1),
- has_SSE4_2_(has_SSE4_2), has_AVX_(has_AVX), has_AVX2_(has_AVX2),
- prefers_locked_add_(prefers_locked_add) {
+ bool has_AVX, bool has_AVX2,
+ bool prefers_locked_add,
+ bool has_POPCNT)
+ : InstructionSetFeatures(smp),
+ has_SSSE3_(has_SSSE3),
+ has_SSE4_1_(has_SSE4_1),
+ has_SSE4_2_(has_SSE4_2),
+ has_AVX_(has_AVX),
+ has_AVX2_(has_AVX2),
+ prefers_locked_add_(prefers_locked_add),
+ has_POPCNT_(has_POPCNT) {
}
private:
@@ -91,6 +100,7 @@
kAvxBitfield = 16,
kAvx2Bitfield = 32,
kPrefersLockedAdd = 64,
+ kPopCntBitfield = 128,
};
const bool has_SSSE3_; // x86 128bit SIMD - Supplemental SSE.
@@ -99,6 +109,7 @@
const bool has_AVX_; // x86 256bit SIMD AVX.
const bool has_AVX2_; // x86 256bit SIMD AVX 2.0.
const bool prefers_locked_add_; // x86 use locked add for memory synchronization.
+ const bool has_POPCNT_; // x86 population count
DISALLOW_COPY_AND_ASSIGN(X86InstructionSetFeatures);
};
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index e8d01e6..a062c12 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -27,7 +27,7 @@
ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
EXPECT_TRUE(x86_features->Equals(x86_features.get()));
- EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+ EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
x86_features->GetFeatureString().c_str());
EXPECT_EQ(x86_features->AsBitmap(), 1U);
}
@@ -40,7 +40,7 @@
ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
EXPECT_TRUE(x86_features->Equals(x86_features.get()));
- EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+ EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
x86_features->GetFeatureString().c_str());
EXPECT_EQ(x86_features->AsBitmap(), 67U);
@@ -50,7 +50,7 @@
ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
- EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+ EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
x86_default_features->GetFeatureString().c_str());
EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
@@ -60,7 +60,7 @@
ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
- EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add",
+ EXPECT_STREQ("smp,ssse3,-sse4.1,-sse4.2,-avx,-avx2,lock_add,-popcnt",
x86_64_features->GetFeatureString().c_str());
EXPECT_EQ(x86_64_features->AsBitmap(), 67U);
@@ -77,9 +77,9 @@
ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
EXPECT_TRUE(x86_features->Equals(x86_features.get()));
- EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+ EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
x86_features->GetFeatureString().c_str());
- EXPECT_EQ(x86_features->AsBitmap(), 79U);
+ EXPECT_EQ(x86_features->AsBitmap(), 207U);
// Build features for a 32-bit x86 default processor.
std::unique_ptr<const InstructionSetFeatures> x86_default_features(
@@ -87,7 +87,7 @@
ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
- EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+ EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
x86_default_features->GetFeatureString().c_str());
EXPECT_EQ(x86_default_features->AsBitmap(), 1U);
@@ -97,9 +97,9 @@
ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
- EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add",
+ EXPECT_STREQ("smp,ssse3,sse4.1,sse4.2,-avx,-avx2,lock_add,popcnt",
x86_64_features->GetFeatureString().c_str());
- EXPECT_EQ(x86_64_features->AsBitmap(), 79U);
+ EXPECT_EQ(x86_64_features->AsBitmap(), 207U);
EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
index b8000d0..aba7234 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64.h
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -74,9 +74,10 @@
private:
X86_64InstructionSetFeatures(bool smp, bool has_SSSE3, bool has_SSE4_1, bool has_SSE4_2,
- bool has_AVX, bool has_AVX2, bool prefers_locked_add)
+ bool has_AVX, bool has_AVX2, bool prefers_locked_add,
+ bool has_POPCNT)
: X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
- has_AVX2, prefers_locked_add) {
+ has_AVX2, prefers_locked_add, has_POPCNT) {
}
friend class X86InstructionSetFeatures;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
index 4562c64..78aeacf 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64_test.cc
@@ -27,7 +27,7 @@
ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
- EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add",
+ EXPECT_STREQ("smp,-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-lock_add,-popcnt",
x86_64_features->GetFeatureString().c_str());
EXPECT_EQ(x86_64_features->AsBitmap(), 1U);
}
diff --git a/runtime/quick/inline_method_analyser.h b/runtime/quick/inline_method_analyser.h
index 6cea902..ca456c2 100644
--- a/runtime/quick/inline_method_analyser.h
+++ b/runtime/quick/inline_method_analyser.h
@@ -39,6 +39,7 @@
kIntrinsicFloatCvt,
kIntrinsicReverseBits,
kIntrinsicReverseBytes,
+ kIntrinsicBitCount,
kIntrinsicNumberOfLeadingZeros,
kIntrinsicNumberOfTrailingZeros,
kIntrinsicRotateRight,
diff --git a/test/564-checker-bitcount/expected.txt b/test/564-checker-bitcount/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/564-checker-bitcount/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/564-checker-bitcount/info.txt b/test/564-checker-bitcount/info.txt
new file mode 100644
index 0000000..57db66b
--- /dev/null
+++ b/test/564-checker-bitcount/info.txt
@@ -0,0 +1 @@
+Unit test for 32-bit and 64-bit bit count operation.
diff --git a/test/564-checker-bitcount/src/Main.java b/test/564-checker-bitcount/src/Main.java
new file mode 100644
index 0000000..b250145
--- /dev/null
+++ b/test/564-checker-bitcount/src/Main.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+ // TODO: make this work when b/26700769 is done.
+ //
+ // CHECK-START-X86_64: int Main.bits32(int) disassembly (after)
+ // CHECK-DAG: popcnt
+ //
+ // CHECK-START-X86_64: int Main.bits32(int) disassembly (after)
+ // CHECK-NOT: call
+ private static int bits32(int x) {
+ return Integer.bitCount(x);
+ }
+
+ // TODO: make this work when b/26700769 is done.
+ //
+ // CHECK-START-X86_64: int Main.bits64(long) disassembly (after)
+ // CHECK-DAG: popcnt
+ //
+ // CHECK-START-X86_64: int Main.bits64(long) disassembly (after)
+ // CHECK-NOT: call
+ private static int bits64(long x) {
+ return Long.bitCount(x);
+ }
+
+ public static void main(String args[]) {
+ expectEquals32(bits32(0x00000000), 0);
+ expectEquals32(bits32(0x00000001), 1);
+ expectEquals32(bits32(0x10000000), 1);
+ expectEquals32(bits32(0x10000001), 2);
+ expectEquals32(bits32(0x00000003), 2);
+ expectEquals32(bits32(0x70000000), 3);
+ expectEquals32(bits32(0x000F0000), 4);
+ expectEquals32(bits32(0x00001111), 4);
+ expectEquals32(bits32(0x11110000), 4);
+ expectEquals32(bits32(0x11111111), 8);
+ expectEquals32(bits32(0x12345678), 13);
+ expectEquals32(bits32(0x9ABCDEF0), 19);
+ expectEquals32(bits32(0xFFFFFFFF), 32);
+
+ for (int i = 0; i < 32; i++) {
+ expectEquals32(bits32(1 << i), 1);
+ }
+
+ expectEquals64(bits64(0x0000000000000000L), 0);
+ expectEquals64(bits64(0x0000000000000001L), 1);
+ expectEquals64(bits64(0x1000000000000000L), 1);
+ expectEquals64(bits64(0x1000000000000001L), 2);
+ expectEquals64(bits64(0x0000000000000003L), 2);
+ expectEquals64(bits64(0x7000000000000000L), 3);
+ expectEquals64(bits64(0x000F000000000000L), 4);
+ expectEquals64(bits64(0x0000000011111111L), 8);
+ expectEquals64(bits64(0x1111111100000000L), 8);
+ expectEquals64(bits64(0x1111111111111111L), 16);
+ expectEquals64(bits64(0x123456789ABCDEF1L), 33);
+ expectEquals64(bits64(0xFFFFFFFFFFFFFFFFL), 64);
+
+ for (int i = 0; i < 64; i++) {
+ expectEquals64(bits64(1L << i), 1);
+ }
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals32(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+ private static void expectEquals64(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}