Implementation of integer intrinsics on x86_64
Rationale:
Efficient implementations of common integer operations.
Already tested in:
564-checker-bitcount
565-checker-rotate:
566-checker-signum
567-checker-compare
568-checker-onebit (extended to deal with run-time zero)
Change-Id: Ib48c76eee751e7925056d7f26797e9a9b5ae60dd
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 86ffb0f..6795488 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -6447,8 +6447,17 @@
__ jmp(temp_reg);
}
+void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
+ if (value == 0) {
+ __ xorl(dest, dest);
+ } else {
+ __ movl(dest, Immediate(value));
+ }
+}
+
void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
if (value == 0) {
+ // Clears upper bits too.
__ xorl(dest, dest);
} else if (value > 0 && IsInt<32>(value)) {
// We can use a 32 bit move, as it will zero-extend and is one byte shorter.
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 9626590..318087e 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -478,8 +478,10 @@
Address LiteralInt32Address(int32_t v);
Address LiteralInt64Address(int64_t v);
- // Load a 64 bit value into a register in the most efficient manner.
+ // Load a 32/64 bit value into a register in the most efficient manner.
+ void Load32BitValue(CpuRegister dest, int32_t value);
void Load64BitValue(CpuRegister dest, int64_t value);
+
Address LiteralCaseTable(HPackedSwitch* switch_instr);
// Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 6ccc5d1..51fa514 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2302,7 +2302,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
- X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
+ X86_64Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
@@ -2346,7 +2346,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
- X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
+ X86_64Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
@@ -2382,7 +2382,10 @@
locations->SetOut(Location::RequiresRegister());
}
-static void GenBitCount(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenBitCount(X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen,
+ HInvoke* invoke,
+ bool is_long) {
LocationSummary* locations = invoke->GetLocations();
Location src = locations->InAt(0);
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
@@ -2393,11 +2396,7 @@
value = is_long
? POPCOUNT(static_cast<uint64_t>(value))
: POPCOUNT(static_cast<uint32_t>(value));
- if (value == 0) {
- __ xorl(out, out);
- } else {
- __ movl(out, Immediate(value));
- }
+ codegen->Load32BitValue(out, value);
return;
}
@@ -2421,7 +2420,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
- GenBitCount(GetAssembler(), invoke, /* is_long */ false);
+ GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
}
void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
@@ -2429,7 +2428,190 @@
}
void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
- GenBitCount(GetAssembler(), invoke, /* is_long */ true);
+ GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
+}
+
+static void CreateCompareLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void GenCompare(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+ LocationSummary* locations = invoke->GetLocations();
+ CpuRegister src1 = locations->InAt(0).AsRegister<CpuRegister>();
+ CpuRegister src2 = locations->InAt(1).AsRegister<CpuRegister>();
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ NearLabel is_lt, done;
+
+ __ xorl(out, out);
+
+ if (is_long) {
+ __ cmpq(src1, src2);
+ } else {
+ __ cmpl(src1, src2);
+ }
+ __ j(kEqual, &done);
+ __ j(kLess, &is_lt);
+
+ __ movl(out, Immediate(1));
+ __ jmp(&done);
+
+ __ Bind(&is_lt);
+ __ movl(out, Immediate(-1));
+
+ __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerCompare(HInvoke* invoke) {
+ CreateCompareLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerCompare(HInvoke* invoke) {
+ GenCompare(GetAssembler(), invoke, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongCompare(HInvoke* invoke) {
+ CreateCompareLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongCompare(HInvoke* invoke) {
+ GenCompare(GetAssembler(), invoke, /* is_long */ true);
+}
+
+static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::Any());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(is_high ? Location::RegisterLocation(RCX) // needs CL
+ : Location::RequiresRegister()); // any will do
+}
+
+static void GenOneBit(X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen,
+ HInvoke* invoke,
+ bool is_high, bool is_long) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location src = locations->InAt(0);
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ if (invoke->InputAt(0)->IsConstant()) {
+ // Evaluate this at compile time.
+ int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+ if (value == 0) {
+ __ xorl(out, out); // Clears upper bits too.
+ return;
+ }
+ // Nonzero value.
+ if (is_high) {
+ value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
+ : 31 - CLZ(static_cast<uint32_t>(value));
+ } else {
+ value = is_long ? CTZ(static_cast<uint64_t>(value))
+ : CTZ(static_cast<uint32_t>(value));
+ }
+ if (is_long) {
+ codegen->Load64BitValue(out, 1L << value);
+ } else {
+ codegen->Load32BitValue(out, 1 << value);
+ }
+ return;
+ }
+
+ // Handle the non-constant cases.
+ CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ if (is_high) {
+ // Use architectural support: basically 1 << bsr.
+ if (src.IsRegister()) {
+ if (is_long) {
+ __ bsrq(tmp, src.AsRegister<CpuRegister>());
+ } else {
+ __ bsrl(tmp, src.AsRegister<CpuRegister>());
+ }
+ } else if (is_long) {
+ DCHECK(src.IsDoubleStackSlot());
+ __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ }
+ // BSR sets ZF if the input was zero.
+ NearLabel is_zero, done;
+ __ j(kEqual, &is_zero);
+ __ movl(out, Immediate(1)); // Clears upper bits too.
+ if (is_long) {
+ __ shlq(out, tmp);
+ } else {
+ __ shll(out, tmp);
+ }
+ __ jmp(&done);
+ __ Bind(&is_zero);
+ __ xorl(out, out); // Clears upper bits too.
+ __ Bind(&done);
+ } else {
+ // Copy input into temporary.
+ if (src.IsRegister()) {
+ if (is_long) {
+ __ movq(tmp, src.AsRegister<CpuRegister>());
+ } else {
+ __ movl(tmp, src.AsRegister<CpuRegister>());
+ }
+ } else if (is_long) {
+ DCHECK(src.IsDoubleStackSlot());
+ __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ }
+ // Do the bit twiddling: basically tmp & -tmp;
+ if (is_long) {
+ __ movq(out, tmp);
+ __ negq(tmp);
+ __ andq(out, tmp);
+ } else {
+ __ movl(out, tmp);
+ __ negl(tmp);
+ __ andl(out, tmp);
+ }
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+ CreateOneBitLocations(arena_, invoke, /* is_high */ true);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
+ GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
+ CreateOneBitLocations(arena_, invoke, /* is_high */ true);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
+ GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ CreateOneBitLocations(arena_, invoke, /* is_high */ false);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false);
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
+ CreateOneBitLocations(arena_, invoke, /* is_high */ false);
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
+ GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true);
}
static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -2440,7 +2622,9 @@
locations->SetOut(Location::RequiresRegister());
}
-static void GenLeadingZeros(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenLeadingZeros(X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen,
+ HInvoke* invoke, bool is_long) {
LocationSummary* locations = invoke->GetLocations();
Location src = locations->InAt(0);
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
@@ -2454,11 +2638,7 @@
} else {
value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
}
- if (value == 0) {
- __ xorl(out, out);
- } else {
- __ movl(out, Immediate(value));
- }
+ codegen->Load32BitValue(out, value);
return;
}
@@ -2497,8 +2677,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
- GenLeadingZeros(assembler, invoke, /* is_long */ false);
+ GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
}
void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -2506,8 +2685,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
- GenLeadingZeros(assembler, invoke, /* is_long */ true);
+ GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
}
static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -2518,7 +2696,9 @@
locations->SetOut(Location::RequiresRegister());
}
-static void GenTrailingZeros(X86_64Assembler* assembler, HInvoke* invoke, bool is_long) {
+static void GenTrailingZeros(X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen,
+ HInvoke* invoke, bool is_long) {
LocationSummary* locations = invoke->GetLocations();
Location src = locations->InAt(0);
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
@@ -2532,11 +2712,7 @@
} else {
value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
}
- if (value == 0) {
- __ xorl(out, out);
- } else {
- __ movl(out, Immediate(value));
- }
+ codegen->Load32BitValue(out, value);
return;
}
@@ -2570,8 +2746,7 @@
}
void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
- GenTrailingZeros(assembler, invoke, /* is_long */ false);
+ GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
}
void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -2579,8 +2754,75 @@
}
void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen_->GetAssembler());
- GenTrailingZeros(assembler, invoke, /* is_long */ true);
+ GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
+}
+
+static void CreateSignLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::Any());
+ locations->SetOut(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister()); // Need a writeable register.
+}
+
+static void GenSign(X86_64Assembler* assembler,
+ CodeGeneratorX86_64* codegen,
+ HInvoke* invoke, bool is_long) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location src = locations->InAt(0);
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+
+ if (invoke->InputAt(0)->IsConstant()) {
+ // Evaluate this at compile time.
+ int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+ codegen->Load32BitValue(out, value == 0 ? 0 : (value > 0 ? 1 : -1));
+ return;
+ }
+
+ // Copy input into temporary.
+ CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ if (src.IsRegister()) {
+ if (is_long) {
+ __ movq(tmp, src.AsRegister<CpuRegister>());
+ } else {
+ __ movl(tmp, src.AsRegister<CpuRegister>());
+ }
+ } else if (is_long) {
+ DCHECK(src.IsDoubleStackSlot());
+ __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ }
+
+ // Do the bit twiddling: basically tmp >> 63/31 | -tmp >>> 63/31 for long/int.
+ if (is_long) {
+ __ movq(out, tmp);
+ __ sarq(out, Immediate(63));
+ __ negq(tmp);
+ __ shrq(tmp, Immediate(63));
+ __ orq(out, tmp);
+ } else {
+ __ movl(out, tmp);
+ __ sarl(out, Immediate(31));
+ __ negl(tmp);
+ __ shrl(tmp, Immediate(31));
+ __ orl(out, tmp);
+ }
+}
+
+void IntrinsicLocationsBuilderX86_64::VisitIntegerSignum(HInvoke* invoke) {
+ CreateSignLocations(arena_, invoke);
+}
+void IntrinsicCodeGeneratorX86_64::VisitIntegerSignum(HInvoke* invoke) {
+ GenSign(GetAssembler(), codegen_, invoke, /* is_long */ false);
+}
+void IntrinsicLocationsBuilderX86_64::VisitLongSignum(HInvoke* invoke) {
+ CreateSignLocations(arena_, invoke);
+}
+void IntrinsicCodeGeneratorX86_64::VisitLongSignum(HInvoke* invoke) {
+ GenSign(GetAssembler(), codegen_, invoke, /* is_long */ true);
}
// Unimplemented intrinsics.
@@ -2598,15 +2840,6 @@
UNIMPLEMENTED_INTRINSIC(FloatIsNaN)
UNIMPLEMENTED_INTRINSIC(DoubleIsNaN)
-UNIMPLEMENTED_INTRINSIC(IntegerCompare)
-UNIMPLEMENTED_INTRINSIC(LongCompare)
-UNIMPLEMENTED_INTRINSIC(IntegerHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(LongHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(IntegerLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(LongLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(IntegerSignum)
-UNIMPLEMENTED_INTRINSIC(LongSignum)
-
// Rotate operations are handled as HRor instructions.
UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft)
UNIMPLEMENTED_INTRINSIC(IntegerRotateRight)
diff --git a/test/568-checker-onebit/src/Main.java b/test/568-checker-onebit/src/Main.java
index 7007c6a..6ce4ffb 100644
--- a/test/568-checker-onebit/src/Main.java
+++ b/test/568-checker-onebit/src/Main.java
@@ -45,6 +45,10 @@
}
public static void main(String args[]) {
+ // Hidden zeros.
+ int[] xi = new int[32];
+ long[] xj = new long[64];
+
expectEquals32(0x00000000, hi32(0x00000000));
expectEquals32(0x00000000, lo32(0x00000000));
expectEquals32(0x00010000, hi32(0x00010000));
@@ -55,6 +59,8 @@
expectEquals32(0x00000001, lo32(0xFFFFFFFF));
for (int i = 0; i < 32; i++) {
+ expectEquals32(0, hi32(xi[i]));
+ expectEquals32(0, lo32(xi[i]));
expectEquals32(1 << i, hi32(1 << i));
expectEquals32(1 << i, lo32(1 << i));
int expected = i < 29 ? 0x8 << i : 0x80000000;
@@ -72,6 +78,8 @@
expectEquals64(0x0000000000000001L, lo64(0xFFFFFFFFFFFFFFFFL));
for (int i = 0; i < 64; i++) {
+ expectEquals64(0L, hi64(xj[i]));
+ expectEquals64(0L, lo64(xj[i]));
expectEquals64(1L << i, hi64(1L << i));
expectEquals64(1L << i, lo64(1L << i));
long expected = i < 61 ? 0x8L << i : 0x8000000000000000L;