diff options
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 77 | ||||
| -rw-r--r-- | compiler/utils/x86/assembler_x86.cc | 16 | ||||
| -rw-r--r-- | compiler/utils/x86/assembler_x86.h | 4 | ||||
| -rw-r--r-- | compiler/utils/x86/assembler_x86_test.cc | 13 |
4 files changed, 108 insertions, 2 deletions
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 4715bdca2e..e48bed59d7 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2303,6 +2303,81 @@ void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler); } +static void CreateBitCountLocations( + ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) { + if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { + // Do nothing if there is no popcnt support. This results in generating + // a call for the intrinsic rather than direct code. + return; + } + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + if (is_long) { + locations->SetInAt(0, Location::RequiresRegister()); + locations->AddTemp(Location::RequiresRegister()); + } else { + locations->SetInAt(0, Location::Any()); + } + locations->SetOut(Location::RequiresRegister()); +} + +static void GenBitCount(X86Assembler* assembler, HInvoke* invoke, bool is_long) { + LocationSummary* locations = invoke->GetLocations(); + Location src = locations->InAt(0); + Register out = locations->Out().AsRegister<Register>(); + + if (invoke->InputAt(0)->IsConstant()) { + // Evaluate this at compile time. + int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); + value = is_long + ? POPCOUNT(static_cast<uint64_t>(value)) + : POPCOUNT(static_cast<uint32_t>(value)); + if (value == 0) { + __ xorl(out, out); + } else { + __ movl(out, Immediate(value)); + } + return; + } + + // Handle the non-constant cases. + if (!is_long) { + if (src.IsRegister()) { + __ popcntl(out, src.AsRegister<Register>()); + } else { + DCHECK(src.IsStackSlot()); + __ popcntl(out, Address(ESP, src.GetStackIndex())); + } + return; + } + + // The 64-bit case needs to worry about both parts of the register. + DCHECK(src.IsRegisterPair()); + Register src_lo = src.AsRegisterPairLow<Register>(); + Register src_hi = src.AsRegisterPairHigh<Register>(); + Register temp = locations->GetTemp(0).AsRegister<Register>(); + __ popcntl(temp, src_lo); + __ popcntl(out, src_hi); + __ addl(out, temp); +} + +void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) { + CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false); +} + +void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) { + GenBitCount(GetAssembler(), invoke, /* is_long */ false); +} + +void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) { + CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true); +} + +void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) { + GenBitCount(GetAssembler(), invoke, /* is_long */ true); +} + static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, @@ -2518,10 +2593,8 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(IntegerBitCount) UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) -UNIMPLEMENTED_INTRINSIC(LongBitCount) UNIMPLEMENTED_INTRINSIC(LongRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc index d6caa3c338..7138a46890 100644 --- a/compiler/utils/x86/assembler_x86.cc +++ b/compiler/utils/x86/assembler_x86.cc @@ -186,6 +186,22 @@ void X86Assembler::bsrl(Register dst, const Address& src) { EmitOperand(dst, src); } +void X86Assembler::popcntl(Register dst, Register src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitUint8(0x0F); + EmitUint8(0xB8); + EmitRegisterOperand(dst, src); +} + +void X86Assembler::popcntl(Register dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitUint8(0x0F); + EmitUint8(0xB8); + EmitOperand(dst, src); +} + void X86Assembler::movzxb(Register dst, ByteRegister src) { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x0F); diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h index 655af9c184..759a41e80e 100644 --- a/compiler/utils/x86/assembler_x86.h +++ b/compiler/utils/x86/assembler_x86.h @@ -330,11 +330,15 @@ class X86Assembler FINAL : public Assembler { void movntl(const Address& dst, Register src); void bswapl(Register dst); + void bsfl(Register dst, Register src); void bsfl(Register dst, const Address& src); void bsrl(Register dst, Register src); void bsrl(Register dst, const Address& src); + void popcntl(Register dst, Register src); + void popcntl(Register dst, const Address& src); + void rorl(Register reg, const Immediate& imm); void rorl(Register operand, Register shifter); void roll(Register reg, const Immediate& imm); diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc index a9b991c7a0..0fd098227a 100644 --- a/compiler/utils/x86/assembler_x86_test.cc +++ b/compiler/utils/x86/assembler_x86_test.cc @@ -260,6 +260,19 @@ TEST_F(AssemblerX86Test, BsrlAddress) { DriverStr(expected, "bsrl_address"); } +TEST_F(AssemblerX86Test, Popcntl) { + DriverStr(RepeatRR(&x86::X86Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl"); +} + +TEST_F(AssemblerX86Test, PopcntlAddress) { + GetAssembler()->popcntl(x86::Register(x86::EDI), x86::Address( + x86::Register(x86::EDI), x86::Register(x86::EBX), x86::TIMES_4, 12)); + const char* expected = + "popcntl 0xc(%EDI,%EBX,4), %EDI\n"; + + DriverStr(expected, "popcntl_address"); +} + // Rorl only allows CL as the shift count. std::string rorl_fn(AssemblerX86Test::Base* assembler_test, x86::X86Assembler* assembler) { std::ostringstream str; |