diff options
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator.cc | 1 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 29 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86.cc | 107 | ||||
| -rw-r--r-- | compiler/optimizing/intrinsics_x86_64.cc | 99 |
4 files changed, 179 insertions, 57 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index d8e442c642..c2e83cd2c2 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -1126,6 +1126,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, if (osr) { DCHECK_EQ(info->GetSuspendCheck(), instruction); DCHECK(info->IsIrreducible()); + DCHECK(environment != nullptr); if (kIsDebugBuild) { for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { HInstruction* in_environment = environment->GetInstructionAt(i); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 14cff05f58..e7212cd479 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -3608,9 +3608,17 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); if (instruction->GetResultType() == DataType::Type::kInt32) { - __ leal(tmp, Address(numerator, abs_imm - 1)); - __ testl(numerator, numerator); - __ cmov(kGreaterEqual, tmp, numerator); + // When denominator is equal to 2, we can add signed bit and numerator to tmp. + // Below we are using addl instruction instead of cmov which give us 1 cycle benefit. + if (abs_imm == 2) { + __ leal(tmp, Address(numerator, 0)); + __ shrl(tmp, Immediate(31)); + __ addl(tmp, numerator); + } else { + __ leal(tmp, Address(numerator, abs_imm - 1)); + __ testl(numerator, numerator); + __ cmov(kGreaterEqual, tmp, numerator); + } int shift = CTZ(imm); __ sarl(tmp, Immediate(shift)); @@ -3622,11 +3630,16 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { } else { DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); - - codegen_->Load64BitValue(rdx, abs_imm - 1); - __ addq(rdx, numerator); - __ testq(numerator, numerator); - __ cmov(kGreaterEqual, rdx, numerator); + if (abs_imm == 2) { + __ movq(rdx, numerator); + __ shrq(rdx, Immediate(63)); + __ addq(rdx, numerator); + } else { + codegen_->Load64BitValue(rdx, abs_imm - 1); + __ addq(rdx, numerator); + __ testq(numerator, numerator); + __ cmov(kGreaterEqual, rdx, numerator); + } int shift = CTZ(imm); __ sarq(rdx, Immediate(shift)); diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index 6dd4681847..a73f4e8b94 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -545,6 +545,96 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntry __ cfi().AdjustCFAOffset(-16); } +static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + if (is_long) { + locations->SetInAt(0, Location::RequiresRegister()); + } else { + locations->SetInAt(0, Location::Any()); + } + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +static void GenLowestOneBit(X86Assembler* assembler, + CodeGeneratorX86* codegen, + bool is_long, + HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Location src = locations->InAt(0); + Location out_loc = locations->Out(); + + if (invoke->InputAt(0)->IsConstant()) { + // Evaluate this at compile time. + int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant()); + if (value == 0) { + if (is_long) { + __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>()); + __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>()); + } else { + __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>()); + } + return; + } + // Nonzero value. + value = is_long ? CTZ(static_cast<uint64_t>(value)) + : CTZ(static_cast<uint32_t>(value)); + if (is_long) { + if (value >= 32) { + int shift = value-32; + codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0); + codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift); + } else { + codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value); + codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0); + } + } else { + codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value); + } + return; + } + // Handle non constant case + if (is_long) { + DCHECK(src.IsRegisterPair()); + Register src_lo = src.AsRegisterPairLow<Register>(); + Register src_hi = src.AsRegisterPairHigh<Register>(); + + Register out_lo = out_loc.AsRegisterPairLow<Register>(); + Register out_hi = out_loc.AsRegisterPairHigh<Register>(); + + __ movl(out_lo, src_lo); + __ movl(out_hi, src_hi); + + __ negl(out_lo); + __ adcl(out_hi, Immediate(0)); + __ negl(out_hi); + + __ andl(out_lo, src_lo); + __ andl(out_hi, src_hi); + } else { + if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) { + Register out = out_loc.AsRegister<Register>(); + __ blsi(out, src.AsRegister<Register>()); + } else { + Register out = out_loc.AsRegister<Register>(); + // Do tmp & -tmp + if (src.IsRegister()) { + __ movl(out, src.AsRegister<Register>()); + } else { + DCHECK(src.IsStackSlot()); + __ movl(out, Address(ESP, src.GetStackIndex())); + } + __ negl(out); + + if (src.IsRegister()) { + __ andl(out, src.AsRegister<Register>()); + } else { + __ andl(out, Address(ESP, src.GetStackIndex())); + } + } + } +} + void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) { CreateFPToFPCallLocations(allocator_, invoke); } @@ -657,6 +747,21 @@ void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickTanh); } +void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) { + CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke); +} +void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke); +} + +void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) { + CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke); +} + static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); @@ -2965,8 +3070,6 @@ UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite) UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit) UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit) -UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit) -UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit) UNIMPLEMENTED_INTRINSIC(X86, CRC32Update) UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf); diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 7db26dc9be..88c766fabc 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -2413,59 +2413,64 @@ static void GenOneBit(X86_64Assembler* assembler, } // Handle the non-constant cases. - CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); - if (is_high) { - // Use architectural support: basically 1 << bsr. - if (src.IsRegister()) { + if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() && + src.IsRegister()) { + __ blsi(out, src.AsRegister<CpuRegister>()); + } else { + CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); + if (is_high) { + // Use architectural support: basically 1 << bsr. + if (src.IsRegister()) { + if (is_long) { + __ bsrq(tmp, src.AsRegister<CpuRegister>()); + } else { + __ bsrl(tmp, src.AsRegister<CpuRegister>()); + } + } else if (is_long) { + DCHECK(src.IsDoubleStackSlot()); + __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); + } else { + DCHECK(src.IsStackSlot()); + __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); + } + // BSR sets ZF if the input was zero. + NearLabel is_zero, done; + __ j(kEqual, &is_zero); + __ movl(out, Immediate(1)); // Clears upper bits too. if (is_long) { - __ bsrq(tmp, src.AsRegister<CpuRegister>()); + __ shlq(out, tmp); } else { - __ bsrl(tmp, src.AsRegister<CpuRegister>()); + __ shll(out, tmp); } - } else if (is_long) { - DCHECK(src.IsDoubleStackSlot()); - __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } else { - DCHECK(src.IsStackSlot()); - __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } - // BSR sets ZF if the input was zero. - NearLabel is_zero, done; - __ j(kEqual, &is_zero); - __ movl(out, Immediate(1)); // Clears upper bits too. - if (is_long) { - __ shlq(out, tmp); - } else { - __ shll(out, tmp); - } - __ jmp(&done); - __ Bind(&is_zero); - __ xorl(out, out); // Clears upper bits too. - __ Bind(&done); - } else { - // Copy input into temporary. - if (src.IsRegister()) { + __ jmp(&done); + __ Bind(&is_zero); + __ xorl(out, out); // Clears upper bits too. + __ Bind(&done); + } else { + // Copy input into temporary. + if (src.IsRegister()) { + if (is_long) { + __ movq(tmp, src.AsRegister<CpuRegister>()); + } else { + __ movl(tmp, src.AsRegister<CpuRegister>()); + } + } else if (is_long) { + DCHECK(src.IsDoubleStackSlot()); + __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); + } else { + DCHECK(src.IsStackSlot()); + __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); + } + // Do the bit twiddling: basically tmp & -tmp; if (is_long) { - __ movq(tmp, src.AsRegister<CpuRegister>()); + __ movq(out, tmp); + __ negq(tmp); + __ andq(out, tmp); } else { - __ movl(tmp, src.AsRegister<CpuRegister>()); + __ movl(out, tmp); + __ negl(tmp); + __ andl(out, tmp); } - } else if (is_long) { - DCHECK(src.IsDoubleStackSlot()); - __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } else { - DCHECK(src.IsStackSlot()); - __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex())); - } - // Do the bit twiddling: basically tmp & -tmp; - if (is_long) { - __ movq(out, tmp); - __ negq(tmp); - __ andq(out, tmp); - } else { - __ movl(out, tmp); - __ negl(tmp); - __ andl(out, tmp); } } } |