Optimize mod power 2 for x86
Test: 411-checker-hdiv-hrem-pow2, test.py --host
Change-Id: I9334a3eb2cb50df439b56c0161379fef46e58603
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 9f34a51..7745f64 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -3497,6 +3497,27 @@
}
}
+void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+
+ Register out = locations->Out().AsRegister<Register>();
+ Register numerator = locations->InAt(0).AsRegister<Register>();
+
+ int32_t imm = Int64FromConstant(second.GetConstant());
+ DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+ uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
+
+ Register tmp = locations->GetTemp(0).AsRegister<Register>();
+ NearLabel done;
+ __ movl(out, numerator);
+ __ andl(out, Immediate(abs_imm-1));
+ __ j(Condition::kZero, &done);
+ __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
+ __ testl(numerator, numerator);
+ __ cmovl(Condition::kLess, out, tmp);
+ __ Bind(&done);
+}
void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
LocationSummary* locations = instruction->GetLocations();
@@ -3610,8 +3631,12 @@
// Do not generate anything for 0. DivZeroCheck would forbid any generated code.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
- DivByPowerOfTwo(instruction->AsDiv());
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ if (is_div) {
+ DivByPowerOfTwo(instruction->AsDiv());
+ } else {
+ RemByPowerOfTwo(instruction->AsRem());
+ }
} else {
DCHECK(imm <= -2 || imm >= 2);
GenerateDivRemWithAnyConstant(instruction);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 93b0461..4e78be8 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -216,6 +216,7 @@
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivByPowerOfTwo(HDiv* instruction);
+ void RemByPowerOfTwo(HRem* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateRemFP(HRem* rem);
void HandleCondition(HCondition* condition);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index dac2dba..b9ae2cd 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3560,7 +3560,40 @@
LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
}
}
+void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+ uint64_t abs_imm = AbsOrMin(imm);
+ CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ if (instruction->GetResultType() == DataType::Type::kInt32) {
+ NearLabel done;
+ __ movl(out, numerator);
+ __ andl(out, Immediate(abs_imm-1));
+ __ j(Condition::kZero, &done);
+ __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
+ __ testl(numerator, numerator);
+ __ cmov(Condition::kLess, out, tmp, false);
+ __ Bind(&done);
+ } else {
+ DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
+ codegen_->Load64BitValue(tmp, abs_imm - 1);
+ NearLabel done;
+
+ __ movq(out, numerator);
+ __ andq(out, tmp);
+ __ j(Condition::kZero, &done);
+ __ movq(tmp, numerator);
+ __ sarq(tmp, Immediate(63));
+ __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
+ __ orq(out, tmp);
+ __ Bind(&done);
+ }
+}
void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location second = locations->InAt(1);
@@ -3737,8 +3770,12 @@
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
- DivByPowerOfTwo(instruction->AsDiv());
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ if (is_div) {
+ DivByPowerOfTwo(instruction->AsDiv());
+ } else {
+ RemByPowerOfTwo(instruction->AsRem());
+ }
} else {
DCHECK(imm <= -2 || imm >= 2);
GenerateDivRemWithAnyConstant(instruction);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 1e71397..25e5aa4 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -213,6 +213,7 @@
void GenerateRemFP(HRem* rem);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivByPowerOfTwo(HDiv* instruction);
+ void RemByPowerOfTwo(HRem* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleCondition(HCondition* condition);
diff --git a/test/411-checker-hdiv-hrem-pow2/src/RemTest.java b/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
index 72725c1..54d7847 100644
--- a/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
+++ b/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
@@ -92,6 +92,17 @@
/// CHECK: cmp w{{\d+}}, #0x0
/// CHECK: and w{{\d+}}, w{{\d+}}, #0x1
/// CHECK: cneg w{{\d+}}, w{{\d+}}, lt
+ /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntMod2(int) disassembly (after)
+ /// CHECK: Rem [{{i\d+}},{{i\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shr
+ /// CHECK-NOT: imul
+ /// CHECK: mov
+ /// CHECK: and
+ /// CHECK: jz/eq
+ /// CHECK: lea
+ /// CHECK: test
+ /// CHECK: cmovl/nge
private static Integer $noinline$IntMod2(int v) {
int r = v % 2;
return r;
@@ -101,6 +112,17 @@
/// CHECK: cmp w{{\d+}}, #0x0
/// CHECK: and w{{\d+}}, w{{\d+}}, #0x1
/// CHECK: cneg w{{\d+}}, w{{\d+}}, lt
+ /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModMinus2(int) disassembly (after)
+ /// CHECK: Rem [{{i\d+}},{{i\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shr
+ /// CHECK-NOT: imul
+ /// CHECK: mov
+ /// CHECK: and
+ /// CHECK: jz/eq
+ /// CHECK: lea
+ /// CHECK: test
+ /// CHECK: cmovl/nge
private static Integer $noinline$IntModMinus2(int v) {
int r = v % -2;
return r;
@@ -111,6 +133,17 @@
/// CHECK: and w{{\d+}}, w{{\d+}}, #0xf
/// CHECK: and w{{\d+}}, w{{\d+}}, #0xf
/// CHECK: csneg w{{\d+}}, w{{\d+}}, mi
+ /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntMod16(int) disassembly (after)
+ /// CHECK: Rem [{{i\d+}},{{i\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shr
+ /// CHECK-NOT: imul
+ /// CHECK: mov
+ /// CHECK: and
+ /// CHECK: jz/eq
+ /// CHECK: lea
+ /// CHECK: test
+ /// CHECK: cmovl/nge
private static Integer $noinline$IntMod16(int v) {
int r = v % 16;
return r;
@@ -121,6 +154,17 @@
/// CHECK: and w{{\d+}}, w{{\d+}}, #0xf
/// CHECK: and w{{\d+}}, w{{\d+}}, #0xf
/// CHECK: csneg w{{\d+}}, w{{\d+}}, mi
+ /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModMinus16(int) disassembly (after)
+ /// CHECK: Rem [{{i\d+}},{{i\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shr
+ /// CHECK-NOT: imul
+ /// CHECK: mov
+ /// CHECK: and
+ /// CHECK: jz/eq
+ /// CHECK: lea
+ /// CHECK: test
+ /// CHECK: cmovl/nge
private static Integer $noinline$IntModMinus16(int v) {
int r = v % -16;
return r;
@@ -131,6 +175,17 @@
/// CHECK: and w{{\d+}}, w{{\d+}}, #0x7fffffff
/// CHECK: and w{{\d+}}, w{{\d+}}, #0x7fffffff
/// CHECK: csneg w{{\d+}}, w{{\d+}}, mi
+ /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModIntMin(int) disassembly (after)
+ /// CHECK: Rem [{{i\d+}},{{i\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shr
+ /// CHECK-NOT: imul
+ /// CHECK: mov
+ /// CHECK: and
+ /// CHECK: jz/eq
+ /// CHECK: lea
+ /// CHECK: test
+ /// CHECK: cmovl/nge
private static Integer $noinline$IntModIntMin(int v) {
int r = v % Integer.MIN_VALUE;
return r;
@@ -211,6 +266,18 @@
/// CHECK: cmp x{{\d+}}, #0x0
/// CHECK: and x{{\d+}}, x{{\d+}}, #0x1
/// CHECK: cneg x{{\d+}}, x{{\d+}}, lt
+ /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongMod2(long) disassembly (after)
+ /// CHECK: Rem [{{j\d+}},{{j\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shrq
+ /// CHECK-NOT: imulq
+ /// CHECK: movq
+ /// CHECK: andq
+ /// CHECK: jz/eq
+ /// CHECK: movq
+ /// CHECK: sarq
+ /// CHECK: shlq
+ /// CHECK: orq
private static Long $noinline$LongMod2(long v) {
long r = v % 2;
return r;
@@ -220,6 +287,18 @@
/// CHECK: cmp x{{\d+}}, #0x0
/// CHECK: and x{{\d+}}, x{{\d+}}, #0x1
/// CHECK: cneg x{{\d+}}, x{{\d+}}, lt
+ /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModMinus2(long) disassembly (after)
+ /// CHECK: Rem [{{j\d+}},{{j\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shrq
+ /// CHECK-NOT: imulq
+ /// CHECK: movq
+ /// CHECK: andq
+ /// CHECK: jz/eq
+ /// CHECK: movq
+ /// CHECK: sarq
+ /// CHECK: shlq
+ /// CHECK: orq
private static Long $noinline$LongModMinus2(long v) {
long r = v % -2;
return r;
@@ -230,6 +309,19 @@
/// CHECK: and x{{\d+}}, x{{\d+}}, #0xf
/// CHECK: and x{{\d+}}, x{{\d+}}, #0xf
/// CHECK: csneg x{{\d+}}, x{{\d+}}, mi
+
+ /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongMod16(long) disassembly (after)
+ /// CHECK: Rem [{{j\d+}},{{j\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shrq
+ /// CHECK-NOT: imulq
+ /// CHECK: movq
+ /// CHECK: andq
+ /// CHECK: jz/eq
+ /// CHECK: movq
+ /// CHECK: sarq
+ /// CHECK: shlq
+ /// CHECK: orq
private static Long $noinline$LongMod16(long v) {
long r = v % 16;
return r;
@@ -240,6 +332,18 @@
/// CHECK: and x{{\d+}}, x{{\d+}}, #0xf
/// CHECK: and x{{\d+}}, x{{\d+}}, #0xf
/// CHECK: csneg x{{\d+}}, x{{\d+}}, mi
+ /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModMinus16(long) disassembly (after)
+ /// CHECK: Rem [{{j\d+}},{{j\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shrq
+ /// CHECK-NOT: imulq
+ /// CHECK: movq
+ /// CHECK: andq
+ /// CHECK: jz/eq
+ /// CHECK: movq
+ /// CHECK: sarq
+ /// CHECK: shlq
+ /// CHECK: orq
private static Long $noinline$LongModMinus16(long v) {
long r = v % -16;
return r;
@@ -250,6 +354,18 @@
/// CHECK: and x{{\d+}}, x{{\d+}}, #0x7fffffffffffffff
/// CHECK: and x{{\d+}}, x{{\d+}}, #0x7fffffffffffffff
/// CHECK: csneg x{{\d+}}, x{{\d+}}, mi
+ /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModLongMin(long) disassembly (after)
+ /// CHECK: Rem [{{j\d+}},{{j\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shrq
+ /// CHECK-NOT: imulq
+ /// CHECK: movq
+ /// CHECK: andq
+ /// CHECK: jz/eq
+ /// CHECK: movq
+ /// CHECK: sarq
+ /// CHECK: shlq
+ /// CHECK: orq
private static Long $noinline$LongModLongMin(long v) {
long r = v % Long.MIN_VALUE;
return r;