summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/code_generator.cc1
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc29
-rw-r--r--compiler/optimizing/intrinsics_x86.cc107
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc99
4 files changed, 179 insertions, 57 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index d8e442c642..c2e83cd2c2 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1126,6 +1126,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
if (osr) {
DCHECK_EQ(info->GetSuspendCheck(), instruction);
DCHECK(info->IsIrreducible());
+ DCHECK(environment != nullptr);
if (kIsDebugBuild) {
for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
HInstruction* in_environment = environment->GetInstructionAt(i);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 14cff05f58..e7212cd479 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3608,9 +3608,17 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
if (instruction->GetResultType() == DataType::Type::kInt32) {
- __ leal(tmp, Address(numerator, abs_imm - 1));
- __ testl(numerator, numerator);
- __ cmov(kGreaterEqual, tmp, numerator);
+ // When denominator is equal to 2, we can add signed bit and numerator to tmp.
+ // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
+ if (abs_imm == 2) {
+ __ leal(tmp, Address(numerator, 0));
+ __ shrl(tmp, Immediate(31));
+ __ addl(tmp, numerator);
+ } else {
+ __ leal(tmp, Address(numerator, abs_imm - 1));
+ __ testl(numerator, numerator);
+ __ cmov(kGreaterEqual, tmp, numerator);
+ }
int shift = CTZ(imm);
__ sarl(tmp, Immediate(shift));
@@ -3622,11 +3630,16 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
} else {
DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
-
- codegen_->Load64BitValue(rdx, abs_imm - 1);
- __ addq(rdx, numerator);
- __ testq(numerator, numerator);
- __ cmov(kGreaterEqual, rdx, numerator);
+ if (abs_imm == 2) {
+ __ movq(rdx, numerator);
+ __ shrq(rdx, Immediate(63));
+ __ addq(rdx, numerator);
+ } else {
+ codegen_->Load64BitValue(rdx, abs_imm - 1);
+ __ addq(rdx, numerator);
+ __ testq(numerator, numerator);
+ __ cmov(kGreaterEqual, rdx, numerator);
+ }
int shift = CTZ(imm);
__ sarq(rdx, Immediate(shift));
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 6dd4681847..a73f4e8b94 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -545,6 +545,96 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntry
__ cfi().AdjustCFAOffset(-16);
}
+static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ if (is_long) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ } else {
+ locations->SetInAt(0, Location::Any());
+ }
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+static void GenLowestOneBit(X86Assembler* assembler,
+ CodeGeneratorX86* codegen,
+ bool is_long,
+ HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location src = locations->InAt(0);
+ Location out_loc = locations->Out();
+
+ if (invoke->InputAt(0)->IsConstant()) {
+ // Evaluate this at compile time.
+ int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+ if (value == 0) {
+ if (is_long) {
+ __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
+ __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
+ } else {
+ __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
+ }
+ return;
+ }
+ // Nonzero value.
+ value = is_long ? CTZ(static_cast<uint64_t>(value))
+ : CTZ(static_cast<uint32_t>(value));
+ if (is_long) {
+ if (value >= 32) {
+ int shift = value-32;
+ codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
+ codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
+ } else {
+ codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
+ codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
+ }
+ } else {
+ codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
+ }
+ return;
+ }
+ // Handle non constant case
+ if (is_long) {
+ DCHECK(src.IsRegisterPair());
+ Register src_lo = src.AsRegisterPairLow<Register>();
+ Register src_hi = src.AsRegisterPairHigh<Register>();
+
+ Register out_lo = out_loc.AsRegisterPairLow<Register>();
+ Register out_hi = out_loc.AsRegisterPairHigh<Register>();
+
+ __ movl(out_lo, src_lo);
+ __ movl(out_hi, src_hi);
+
+ __ negl(out_lo);
+ __ adcl(out_hi, Immediate(0));
+ __ negl(out_hi);
+
+ __ andl(out_lo, src_lo);
+ __ andl(out_hi, src_hi);
+ } else {
+ if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
+ Register out = out_loc.AsRegister<Register>();
+ __ blsi(out, src.AsRegister<Register>());
+ } else {
+ Register out = out_loc.AsRegister<Register>();
+ // Do tmp & -tmp
+ if (src.IsRegister()) {
+ __ movl(out, src.AsRegister<Register>());
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ movl(out, Address(ESP, src.GetStackIndex()));
+ }
+ __ negl(out);
+
+ if (src.IsRegister()) {
+ __ andl(out, src.AsRegister<Register>());
+ } else {
+ __ andl(out, Address(ESP, src.GetStackIndex()));
+ }
+ }
+ }
+}
+
void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
CreateFPToFPCallLocations(allocator_, invoke);
}
@@ -657,6 +747,21 @@ void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
GenFPToFPCall(invoke, codegen_, kQuickTanh);
}
+void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
+}
+void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
+ CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
+ GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
+}
+
static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
@@ -2965,8 +3070,6 @@ UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 7db26dc9be..88c766fabc 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2413,59 +2413,64 @@ static void GenOneBit(X86_64Assembler* assembler,
}
// Handle the non-constant cases.
- CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
- if (is_high) {
- // Use architectural support: basically 1 << bsr.
- if (src.IsRegister()) {
+ if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() &&
+ src.IsRegister()) {
+ __ blsi(out, src.AsRegister<CpuRegister>());
+ } else {
+ CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ if (is_high) {
+ // Use architectural support: basically 1 << bsr.
+ if (src.IsRegister()) {
+ if (is_long) {
+ __ bsrq(tmp, src.AsRegister<CpuRegister>());
+ } else {
+ __ bsrl(tmp, src.AsRegister<CpuRegister>());
+ }
+ } else if (is_long) {
+ DCHECK(src.IsDoubleStackSlot());
+ __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ }
+ // BSR sets ZF if the input was zero.
+ NearLabel is_zero, done;
+ __ j(kEqual, &is_zero);
+ __ movl(out, Immediate(1)); // Clears upper bits too.
if (is_long) {
- __ bsrq(tmp, src.AsRegister<CpuRegister>());
+ __ shlq(out, tmp);
} else {
- __ bsrl(tmp, src.AsRegister<CpuRegister>());
+ __ shll(out, tmp);
}
- } else if (is_long) {
- DCHECK(src.IsDoubleStackSlot());
- __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
- } else {
- DCHECK(src.IsStackSlot());
- __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
- }
- // BSR sets ZF if the input was zero.
- NearLabel is_zero, done;
- __ j(kEqual, &is_zero);
- __ movl(out, Immediate(1)); // Clears upper bits too.
- if (is_long) {
- __ shlq(out, tmp);
- } else {
- __ shll(out, tmp);
- }
- __ jmp(&done);
- __ Bind(&is_zero);
- __ xorl(out, out); // Clears upper bits too.
- __ Bind(&done);
- } else {
- // Copy input into temporary.
- if (src.IsRegister()) {
+ __ jmp(&done);
+ __ Bind(&is_zero);
+ __ xorl(out, out); // Clears upper bits too.
+ __ Bind(&done);
+ } else {
+ // Copy input into temporary.
+ if (src.IsRegister()) {
+ if (is_long) {
+ __ movq(tmp, src.AsRegister<CpuRegister>());
+ } else {
+ __ movl(tmp, src.AsRegister<CpuRegister>());
+ }
+ } else if (is_long) {
+ DCHECK(src.IsDoubleStackSlot());
+ __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ }
+ // Do the bit twiddling: basically tmp & -tmp;
if (is_long) {
- __ movq(tmp, src.AsRegister<CpuRegister>());
+ __ movq(out, tmp);
+ __ negq(tmp);
+ __ andq(out, tmp);
} else {
- __ movl(tmp, src.AsRegister<CpuRegister>());
+ __ movl(out, tmp);
+ __ negl(tmp);
+ __ andl(out, tmp);
}
- } else if (is_long) {
- DCHECK(src.IsDoubleStackSlot());
- __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
- } else {
- DCHECK(src.IsStackSlot());
- __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
- }
- // Do the bit twiddling: basically tmp & -tmp;
- if (is_long) {
- __ movq(out, tmp);
- __ negq(tmp);
- __ andq(out, tmp);
- } else {
- __ movl(out, tmp);
- __ negl(tmp);
- __ andl(out, tmp);
}
}
}