diff options
| author | 2016-01-21 16:11:17 +0000 | |
|---|---|---|
| committer | 2016-01-21 16:11:17 +0000 | |
| commit | 857235b3bb21c8868dc74c854c8025ec53782b0c (patch) | |
| tree | 3f5d1816a1e81908120dbcd1cdfb668cd4a8e611 /compiler | |
| parent | f3e4c8a7fc306aacf772beef5269ee9a466bea4c (diff) | |
| parent | 3f67e692860d281858485d48a4f1f81b907f1444 (diff) | |
Merge "Implemented BitCount as an intrinsic. With unit test."
Diffstat (limited to 'compiler')
| -rw-r--r-- | compiler/dex/quick/dex_file_method_inliner.cc | 6 | ||||
| -rw-r--r-- | compiler/dex/quick/dex_file_method_inliner.h | 1 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics.cc | 10 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_arm64.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_list.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips.cc | 3 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_mips64.cc | 3 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 2 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 64 | ||||
| -rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.cc | 36 | ||||
| -rw-r--r-- | compiler/utils/x86_64/assembler_x86_64.h | 5 | ||||
| -rw-r--r-- | compiler/utils/x86_64/assembler_x86_64_test.cc | 38 |
13 files changed, 174 insertions, 0 deletions
diff --git a/compiler/dex/quick/dex_file_method_inliner.cc b/compiler/dex/quick/dex_file_method_inliner.cc index 4617668ee8..3766093fa8 100644 --- a/compiler/dex/quick/dex_file_method_inliner.cc +++ b/compiler/dex/quick/dex_file_method_inliner.cc @@ -39,6 +39,7 @@ static constexpr bool kIntrinsicIsStatic[] = { true, // kIntrinsicFloatCvt true, // kIntrinsicReverseBits true, // kIntrinsicReverseBytes + true, // kIntrinsicBitCount true, // kIntrinsicNumberOfLeadingZeros true, // kIntrinsicNumberOfTrailingZeros true, // kIntrinsicRotateRight @@ -99,6 +100,7 @@ static_assert(kIntrinsicIsStatic[kIntrinsicDoubleCvt], "DoubleCvt must be static static_assert(kIntrinsicIsStatic[kIntrinsicFloatCvt], "FloatCvt must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicReverseBits], "ReverseBits must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicReverseBytes], "ReverseBytes must be static"); +static_assert(kIntrinsicIsStatic[kIntrinsicBitCount], "BitCount must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfLeadingZeros], "NumberOfLeadingZeros must be static"); static_assert(kIntrinsicIsStatic[kIntrinsicNumberOfTrailingZeros], @@ -293,6 +295,7 @@ const char* const DexFileMethodInliner::kNameCacheNames[] = { "putObjectVolatile", // kNameCachePutObjectVolatile "putOrderedObject", // kNameCachePutOrderedObject "arraycopy", // kNameCacheArrayCopy + "bitCount", // kNameCacheBitCount "numberOfLeadingZeros", // kNameCacheNumberOfLeadingZeros "numberOfTrailingZeros", // kNameCacheNumberOfTrailingZeros "rotateRight", // kNameCacheRotateRight @@ -447,6 +450,8 @@ const DexFileMethodInliner::IntrinsicDef DexFileMethodInliner::kIntrinsicMethods INTRINSIC(JavaLangInteger, Reverse, I_I, kIntrinsicReverseBits, k32), INTRINSIC(JavaLangLong, Reverse, J_J, kIntrinsicReverseBits, k64), + INTRINSIC(JavaLangInteger, BitCount, I_I, kIntrinsicBitCount, k32), + INTRINSIC(JavaLangLong, BitCount, J_I, kIntrinsicBitCount, k64), INTRINSIC(JavaLangInteger, NumberOfLeadingZeros, I_I, kIntrinsicNumberOfLeadingZeros, k32), INTRINSIC(JavaLangLong, NumberOfLeadingZeros, J_I, kIntrinsicNumberOfLeadingZeros, k64), INTRINSIC(JavaLangInteger, NumberOfTrailingZeros, I_I, kIntrinsicNumberOfTrailingZeros, k32), @@ -745,6 +750,7 @@ bool DexFileMethodInliner::GenIntrinsic(Mir2Lir* backend, CallInfo* info) { intrinsic.d.data & kIntrinsicFlagIsOrdered); case kIntrinsicSystemArrayCopyCharArray: return backend->GenInlinedArrayCopyCharArray(info); + case kIntrinsicBitCount: case kIntrinsicNumberOfLeadingZeros: case kIntrinsicNumberOfTrailingZeros: case kIntrinsicRotateRight: diff --git a/compiler/dex/quick/dex_file_method_inliner.h b/compiler/dex/quick/dex_file_method_inliner.h index ac70577b48..28036237d7 100644 --- a/compiler/dex/quick/dex_file_method_inliner.h +++ b/compiler/dex/quick/dex_file_method_inliner.h @@ -224,6 +224,7 @@ class DexFileMethodInliner { kNameCachePutObjectVolatile, kNameCachePutOrderedObject, kNameCacheArrayCopy, + kNameCacheBitCount, kNameCacheNumberOfLeadingZeros, kNameCacheNumberOfTrailingZeros, kNameCacheRotateRight, diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index c6da9a3f5e..5caf077858 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -176,6 +176,16 @@ static Intrinsics GetIntrinsic(InlineMethod method) { } // Misc data processing. + case kIntrinsicBitCount: + switch (GetType(method.d.data, true)) { + case Primitive::kPrimInt: + return Intrinsics::kIntegerBitCount; + case Primitive::kPrimLong: + return Intrinsics::kLongBitCount; + default: + LOG(FATAL) << "Unknown/unsupported op size " << method.d.data; + UNREACHABLE(); + } case kIntrinsicNumberOfLeadingZeros: switch (GetType(method.d.data, true)) { case Primitive::kPrimInt: diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc index b1fbf28204..e72f927e44 100644 --- a/compiler/optimizing/intrinsics_arm.cc +++ b/compiler/optimizing/intrinsics_arm.cc @@ -1577,10 +1577,12 @@ void IntrinsicLocationsBuilderARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE void IntrinsicCodeGeneratorARM::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } +UNIMPLEMENTED_INTRINSIC(IntegerBitCount) UNIMPLEMENTED_INTRINSIC(IntegerReverse) UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes) UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) +UNIMPLEMENTED_INTRINSIC(LongBitCount) UNIMPLEMENTED_INTRINSIC(LongReverse) UNIMPLEMENTED_INTRINSIC(LongReverseBytes) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index e857f6f2ec..8cf2d4f393 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -1464,8 +1464,10 @@ void IntrinsicLocationsBuilderARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNU void IntrinsicCodeGeneratorARM64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } +UNIMPLEMENTED_INTRINSIC(IntegerBitCount) UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) +UNIMPLEMENTED_INTRINSIC(LongBitCount) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) UNIMPLEMENTED_INTRINSIC(LongRotateRight) UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) diff --git a/compiler/optimizing/intrinsics_list.h b/compiler/optimizing/intrinsics_list.h index 2e87546282..ea380347da 100644 --- a/compiler/optimizing/intrinsics_list.h +++ b/compiler/optimizing/intrinsics_list.h @@ -28,12 +28,14 @@ V(FloatIntBitsToFloat, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(IntegerReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(IntegerReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(IntegerBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(IntegerNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(IntegerNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(IntegerRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(IntegerRotateLeft, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(LongReverse, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(LongReverseBytes, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ + V(LongBitCount, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(LongNumberOfLeadingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(LongNumberOfTrailingZeros, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ V(LongRotateRight, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow) \ diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index bc126a2716..81112b1a34 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -935,6 +935,9 @@ void IntrinsicLocationsBuilderMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUS void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } +UNIMPLEMENTED_INTRINSIC(IntegerBitCount) +UNIMPLEMENTED_INTRINSIC(LongBitCount) + UNIMPLEMENTED_INTRINSIC(MathAbsDouble) UNIMPLEMENTED_INTRINSIC(MathAbsFloat) UNIMPLEMENTED_INTRINSIC(MathAbsInt) diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index 8b45ea7c4f..ac969e39fa 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -1724,6 +1724,9 @@ void IntrinsicLocationsBuilderMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UN void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } +UNIMPLEMENTED_INTRINSIC(IntegerBitCount) +UNIMPLEMENTED_INTRINSIC(LongBitCount) + UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(MathRoundFloat) diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 677f2e9c81..4715bdca2e 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -2518,8 +2518,10 @@ void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) UNIMPLEMENTED_INTRINSIC(MathRoundDouble) UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(IntegerBitCount) UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) +UNIMPLEMENTED_INTRINSIC(LongBitCount) UNIMPLEMENTED_INTRINSIC(LongRotateRight) UNIMPLEMENTED_INTRINSIC(LongRotateLeft) UNIMPLEMENTED_INTRINSIC(SystemArrayCopy) diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 690cf3d413..23a628f243 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2368,6 +2368,70 @@ void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) { SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler); } +static void CreateBitCountLocations( + ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) { + if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { + // Do nothing if there is no popcnt support. This results in generating + // a call for the intrinsic rather than direct code. + return; + } + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::RequiresRegister()); +} + +static void GenBitCount(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) { + LocationSummary* locations = invoke->GetLocations(); + Location src = locations->InAt(0); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + + if (invoke->InputAt(0)->IsConstant()) { + // Evaluate this at compile time. + int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); + value = is_long + ? POPCOUNT(static_cast<uint64_t>(value)) + : POPCOUNT(static_cast<uint32_t>(value)); + if (value == 0) { + __ xorl(out, out); + } else { + __ movl(out, Immediate(value)); + } + return; + } + + if (src.IsRegister()) { + if (is_long) { + __ popcntq(out, src.AsRegister<CpuRegister>()); + } else { + __ popcntl(out, src.AsRegister<CpuRegister>()); + } + } else if (is_long) { + DCHECK(src.IsDoubleStackSlot()); + __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex())); + } else { + DCHECK(src.IsStackSlot()); + __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex())); + } +} + +void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) { + CreateBitCountLocations(arena_, codegen_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) { + GenBitCount(GetAssembler(), invoke, /* is_long */ false); +} + +void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) { + CreateBitCountLocations(arena_, codegen_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) { + GenBitCount(GetAssembler(), invoke, /* is_long */ true); +} + static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) { LocationSummary* locations = new (arena) LocationSummary(invoke, LocationSummary::kNoCall, diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc index db072678ef..10f5a005e1 100644 --- a/compiler/utils/x86_64/assembler_x86_64.cc +++ b/compiler/utils/x86_64/assembler_x86_64.cc @@ -2247,6 +2247,42 @@ void X86_64Assembler::bsrq(CpuRegister dst, const Address& src) { EmitOperand(dst.LowBits(), src); } +void X86_64Assembler::popcntl(CpuRegister dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xB8); + EmitRegisterOperand(dst.LowBits(), src.LowBits()); +} + +void X86_64Assembler::popcntl(CpuRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitOptionalRex32(dst, src); + EmitUint8(0x0F); + EmitUint8(0xB8); + EmitOperand(dst.LowBits(), src); +} + +void X86_64Assembler::popcntq(CpuRegister dst, CpuRegister src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitRex64(dst, src); + EmitUint8(0x0F); + EmitUint8(0xB8); + EmitRegisterOperand(dst.LowBits(), src.LowBits()); +} + +void X86_64Assembler::popcntq(CpuRegister dst, const Address& src) { + AssemblerBuffer::EnsureCapacity ensured(&buffer_); + EmitUint8(0xF3); + EmitRex64(dst, src); + EmitUint8(0x0F); + EmitUint8(0xB8); + EmitOperand(dst.LowBits(), src); +} + void X86_64Assembler::repne_scasw() { AssemblerBuffer::EnsureCapacity ensured(&buffer_); EmitUint8(0x66); diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h index 01d28e305d..6f0847eb61 100644 --- a/compiler/utils/x86_64/assembler_x86_64.h +++ b/compiler/utils/x86_64/assembler_x86_64.h @@ -647,6 +647,11 @@ class X86_64Assembler FINAL : public Assembler { void bsrq(CpuRegister dst, CpuRegister src); void bsrq(CpuRegister dst, const Address& src); + void popcntl(CpuRegister dst, CpuRegister src); + void popcntl(CpuRegister dst, const Address& src); + void popcntq(CpuRegister dst, CpuRegister src); + void popcntq(CpuRegister dst, const Address& src); + void rorl(CpuRegister reg, const Immediate& imm); void rorl(CpuRegister operand, CpuRegister shifter); void roll(CpuRegister reg, const Immediate& imm); diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc index 00bb5ca36b..8a87fca96a 100644 --- a/compiler/utils/x86_64/assembler_x86_64_test.cc +++ b/compiler/utils/x86_64/assembler_x86_64_test.cc @@ -1333,6 +1333,44 @@ TEST_F(AssemblerX86_64Test, BsrqAddress) { DriverStr(expected, "bsrq_address"); } +TEST_F(AssemblerX86_64Test, Popcntl) { + DriverStr(Repeatrr(&x86_64::X86_64Assembler::popcntl, "popcntl %{reg2}, %{reg1}"), "popcntl"); +} + +TEST_F(AssemblerX86_64Test, PopcntlAddress) { + GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::R10), x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); + GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address( + x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); + GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12)); + const char* expected = + "popcntl 0xc(%RDI,%RBX,4), %R10d\n" + "popcntl 0xc(%R10,%RBX,4), %edi\n" + "popcntl 0xc(%RDI,%R9,4), %edi\n"; + + DriverStr(expected, "popcntl_address"); +} + +TEST_F(AssemblerX86_64Test, Popcntq) { + DriverStr(RepeatRR(&x86_64::X86_64Assembler::popcntq, "popcntq %{reg2}, %{reg1}"), "popcntq"); +} + +TEST_F(AssemblerX86_64Test, PopcntqAddress) { + GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::R10), x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); + GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address( + x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12)); + GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address( + x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12)); + const char* expected = + "popcntq 0xc(%RDI,%RBX,4), %R10\n" + "popcntq 0xc(%R10,%RBX,4), %RDI\n" + "popcntq 0xc(%RDI,%R9,4), %RDI\n"; + + DriverStr(expected, "popcntq_address"); +} + ///////////////// // Near labels // ///////////////// |