Support for x86 popcnt.
Change-Id: I0fc4e745764f1749a6437a199a594f3d8ea53eef
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 4715bdc..e48bed5 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2303,6 +2303,81 @@
SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
}
+static void CreateBitCountLocations(
+ ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
+ if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
+ // Do nothing if there is no popcnt support. This results in generating
+ // a call for the intrinsic rather than direct code.
+ return;
+ }
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ if (is_long) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ } else {
+ locations->SetInAt(0, Location::Any());
+ }
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location src = locations->InAt(0);
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (invoke->InputAt(0)->IsConstant()) {
+ // Evaluate this at compile time.
+ int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+ value = is_long
+ ? POPCOUNT(static_cast<uint64_t>(value))
+ : POPCOUNT(static_cast<uint32_t>(value));
+ if (value == 0) {
+ __ xorl(out, out);
+ } else {
+ __ movl(out, Immediate(value));
+ }
+ return;
+ }
+
+ // Handle the non-constant cases.
+ if (!is_long) {
+ if (src.IsRegister()) {
+ __ popcntl(out, src.AsRegister<Register>());
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ popcntl(out, Address(ESP, src.GetStackIndex()));
+ }
+ return;
+ }
+
+ // The 64-bit case needs to worry about both parts of the register.
+ DCHECK(src.IsRegisterPair());
+ Register src_lo = src.AsRegisterPairLow<Register>();
+ Register src_hi = src.AsRegisterPairHigh<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ __ popcntl(temp, src_lo);
+ __ popcntl(out, src_hi);
+ __ addl(out, temp);
+}
+
+void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
+ CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
+ GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
+ CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
+ GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+}
+
static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
LocationSummary* locations = new (arena) LocationSummary(invoke,
LocationSummary::kNoCall,
@@ -2518,10 +2593,8 @@
UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
-UNIMPLEMENTED_INTRINSIC(IntegerBitCount)
UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
-UNIMPLEMENTED_INTRINSIC(LongBitCount)
UNIMPLEMENTED_INTRINSIC(LongRotateRight)
UNIMPLEMENTED_INTRINSIC(LongRotateLeft)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopy)
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index d6caa3c..7138a46 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -186,6 +186,22 @@
EmitOperand(dst, src);
}
+void X86Assembler::popcntl(Register dst, Register src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitUint8(0x0F);
+ EmitUint8(0xB8);
+ EmitRegisterOperand(dst, src);
+}
+
+void X86Assembler::popcntl(Register dst, const Address& src) {
+ AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+ EmitUint8(0xF3);
+ EmitUint8(0x0F);
+ EmitUint8(0xB8);
+ EmitOperand(dst, src);
+}
+
void X86Assembler::movzxb(Register dst, ByteRegister src) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0x0F);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 655af9c..759a41e 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -330,11 +330,15 @@
void movntl(const Address& dst, Register src);
void bswapl(Register dst);
+
void bsfl(Register dst, Register src);
void bsfl(Register dst, const Address& src);
void bsrl(Register dst, Register src);
void bsrl(Register dst, const Address& src);
+ void popcntl(Register dst, Register src);
+ void popcntl(Register dst, const Address& src);
+
void rorl(Register reg, const Immediate& imm);
void rorl(Register operand, Register shifter);
void roll(Register reg, const Immediate& imm);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index a9b991c..0fd0982 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -260,6 +260,19 @@
DriverStr(expected, "bsrl_address");
}
+TEST_F(AssemblerX86Test, Popcntl) {
+ DriverStr(RepeatRR(&x86::X86Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl");
+}
+
+TEST_F(AssemblerX86Test, PopcntlAddress) {
+ GetAssembler()->popcntl(x86::Register(x86::EDI), x86::Address(
+ x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12));
+ const char* expected =
+ "popcntl 0xc(%EDI,%EBX,4), %EDI\n";
+
+ DriverStr(expected, "popcntl_address");
+}
+
// Rorl only allows CL as the shift count.
std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) {
std::ostringstream str;