summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
author Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com> 2018-11-06 13:05:44 +0530
committer Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com> 2018-11-12 21:09:32 +0530
commita66784b09f10d847b49bc878f10c45690e212f0b (patch)
treea18ee4ec39a44256223b9f756159bfd535ddfde3
parentfe59955fc41e277bf1c60378202ba785abb1e4a8 (diff)
Optimize mod power 2 for x86
Test: 411-checker-hdiv-hrem-pow2, test.py --host Change-Id: I9334a3eb2cb50df439b56c0161379fef46e58603 Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
-rw-r--r--compiler/optimizing/code_generator_x86.cc29
-rw-r--r--compiler/optimizing/code_generator_x86.h1
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc41
-rw-r--r--compiler/optimizing/code_generator_x86_64.h1
-rw-r--r--test/411-checker-hdiv-hrem-pow2/src/RemTest.java116
5 files changed, 184 insertions, 4 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 9f34a51d84..7745f64b24 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -3497,6 +3497,27 @@ void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruct
}
}
+void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+
+ Register out = locations->Out().AsRegister<Register>();
+ Register numerator = locations->InAt(0).AsRegister<Register>();
+
+ int32_t imm = Int64FromConstant(second.GetConstant());
+ DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+ uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
+
+ Register tmp = locations->GetTemp(0).AsRegister<Register>();
+ NearLabel done;
+ __ movl(out, numerator);
+ __ andl(out, Immediate(abs_imm-1));
+ __ j(Condition::kZero, &done);
+ __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
+ __ testl(numerator, numerator);
+ __ cmovl(Condition::kLess, out, tmp);
+ __ Bind(&done);
+}
void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
LocationSummary* locations = instruction->GetLocations();
@@ -3610,8 +3631,12 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
// Do not generate anything for 0. DivZeroCheck would forbid any generated code.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
- DivByPowerOfTwo(instruction->AsDiv());
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ if (is_div) {
+ DivByPowerOfTwo(instruction->AsDiv());
+ } else {
+ RemByPowerOfTwo(instruction->AsRem());
+ }
} else {
DCHECK(imm <= -2 || imm >= 2);
GenerateDivRemWithAnyConstant(instruction);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 93b0461975..4e78be8453 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -216,6 +216,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivByPowerOfTwo(HDiv* instruction);
+ void RemByPowerOfTwo(HRem* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateRemFP(HRem* rem);
void HandleCondition(HCondition* condition);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index dac2dba605..b9ae2cd46a 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3560,7 +3560,40 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr
LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
}
}
+void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+ uint64_t abs_imm = AbsOrMin(imm);
+ CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ if (instruction->GetResultType() == DataType::Type::kInt32) {
+ NearLabel done;
+ __ movl(out, numerator);
+ __ andl(out, Immediate(abs_imm-1));
+ __ j(Condition::kZero, &done);
+ __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
+ __ testl(numerator, numerator);
+ __ cmov(Condition::kLess, out, tmp, false);
+ __ Bind(&done);
+
+ } else {
+ DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
+ codegen_->Load64BitValue(tmp, abs_imm - 1);
+ NearLabel done;
+ __ movq(out, numerator);
+ __ andq(out, tmp);
+ __ j(Condition::kZero, &done);
+ __ movq(tmp, numerator);
+ __ sarq(tmp, Immediate(63));
+ __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
+ __ orq(out, tmp);
+ __ Bind(&done);
+ }
+}
void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location second = locations->InAt(1);
@@ -3737,8 +3770,12 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
- DivByPowerOfTwo(instruction->AsDiv());
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ if (is_div) {
+ DivByPowerOfTwo(instruction->AsDiv());
+ } else {
+ RemByPowerOfTwo(instruction->AsRem());
+ }
} else {
DCHECK(imm <= -2 || imm >= 2);
GenerateDivRemWithAnyConstant(instruction);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 1e7139718b..25e5aa4a3f 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -213,6 +213,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
void GenerateRemFP(HRem* rem);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivByPowerOfTwo(HDiv* instruction);
+ void RemByPowerOfTwo(HRem* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleCondition(HCondition* condition);
diff --git a/test/411-checker-hdiv-hrem-pow2/src/RemTest.java b/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
index 72725c1cd4..54d7847fdf 100644
--- a/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
+++ b/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
@@ -92,6 +92,17 @@ public class RemTest {
/// CHECK: cmp w{{\d+}}, #0x0
/// CHECK: and w{{\d+}}, w{{\d+}}, #0x1
/// CHECK: cneg w{{\d+}}, w{{\d+}}, lt
+ /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntMod2(int) disassembly (after)
+ /// CHECK: Rem [{{i\d+}},{{i\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shr
+ /// CHECK-NOT: imul
+ /// CHECK: mov
+ /// CHECK: and
+ /// CHECK: jz/eq
+ /// CHECK: lea
+ /// CHECK: test
+ /// CHECK: cmovl/nge
private static Integer $noinline$IntMod2(int v) {
int r = v % 2;
return r;
@@ -101,6 +112,17 @@ public class RemTest {
/// CHECK: cmp w{{\d+}}, #0x0
/// CHECK: and w{{\d+}}, w{{\d+}}, #0x1
/// CHECK: cneg w{{\d+}}, w{{\d+}}, lt
+ /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModMinus2(int) disassembly (after)
+ /// CHECK: Rem [{{i\d+}},{{i\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shr
+ /// CHECK-NOT: imul
+ /// CHECK: mov
+ /// CHECK: and
+ /// CHECK: jz/eq
+ /// CHECK: lea
+ /// CHECK: test
+ /// CHECK: cmovl/nge
private static Integer $noinline$IntModMinus2(int v) {
int r = v % -2;
return r;
@@ -111,6 +133,17 @@ public class RemTest {
/// CHECK: and w{{\d+}}, w{{\d+}}, #0xf
/// CHECK: and w{{\d+}}, w{{\d+}}, #0xf
/// CHECK: csneg w{{\d+}}, w{{\d+}}, mi
+ /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntMod16(int) disassembly (after)
+ /// CHECK: Rem [{{i\d+}},{{i\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shr
+ /// CHECK-NOT: imul
+ /// CHECK: mov
+ /// CHECK: and
+ /// CHECK: jz/eq
+ /// CHECK: lea
+ /// CHECK: test
+ /// CHECK: cmovl/nge
private static Integer $noinline$IntMod16(int v) {
int r = v % 16;
return r;
@@ -121,6 +154,17 @@ public class RemTest {
/// CHECK: and w{{\d+}}, w{{\d+}}, #0xf
/// CHECK: and w{{\d+}}, w{{\d+}}, #0xf
/// CHECK: csneg w{{\d+}}, w{{\d+}}, mi
+ /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModMinus16(int) disassembly (after)
+ /// CHECK: Rem [{{i\d+}},{{i\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shr
+ /// CHECK-NOT: imul
+ /// CHECK: mov
+ /// CHECK: and
+ /// CHECK: jz/eq
+ /// CHECK: lea
+ /// CHECK: test
+ /// CHECK: cmovl/nge
private static Integer $noinline$IntModMinus16(int v) {
int r = v % -16;
return r;
@@ -131,6 +175,17 @@ public class RemTest {
/// CHECK: and w{{\d+}}, w{{\d+}}, #0x7fffffff
/// CHECK: and w{{\d+}}, w{{\d+}}, #0x7fffffff
/// CHECK: csneg w{{\d+}}, w{{\d+}}, mi
+ /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModIntMin(int) disassembly (after)
+ /// CHECK: Rem [{{i\d+}},{{i\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shr
+ /// CHECK-NOT: imul
+ /// CHECK: mov
+ /// CHECK: and
+ /// CHECK: jz/eq
+ /// CHECK: lea
+ /// CHECK: test
+ /// CHECK: cmovl/nge
private static Integer $noinline$IntModIntMin(int v) {
int r = v % Integer.MIN_VALUE;
return r;
@@ -211,6 +266,18 @@ public class RemTest {
/// CHECK: cmp x{{\d+}}, #0x0
/// CHECK: and x{{\d+}}, x{{\d+}}, #0x1
/// CHECK: cneg x{{\d+}}, x{{\d+}}, lt
+ /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongMod2(long) disassembly (after)
+ /// CHECK: Rem [{{j\d+}},{{j\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shrq
+ /// CHECK-NOT: imulq
+ /// CHECK: movq
+ /// CHECK: andq
+ /// CHECK: jz/eq
+ /// CHECK: movq
+ /// CHECK: sarq
+ /// CHECK: shlq
+ /// CHECK: orq
private static Long $noinline$LongMod2(long v) {
long r = v % 2;
return r;
@@ -220,6 +287,18 @@ public class RemTest {
/// CHECK: cmp x{{\d+}}, #0x0
/// CHECK: and x{{\d+}}, x{{\d+}}, #0x1
/// CHECK: cneg x{{\d+}}, x{{\d+}}, lt
+ /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModMinus2(long) disassembly (after)
+ /// CHECK: Rem [{{j\d+}},{{j\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shrq
+ /// CHECK-NOT: imulq
+ /// CHECK: movq
+ /// CHECK: andq
+ /// CHECK: jz/eq
+ /// CHECK: movq
+ /// CHECK: sarq
+ /// CHECK: shlq
+ /// CHECK: orq
private static Long $noinline$LongModMinus2(long v) {
long r = v % -2;
return r;
@@ -230,6 +309,19 @@ public class RemTest {
/// CHECK: and x{{\d+}}, x{{\d+}}, #0xf
/// CHECK: and x{{\d+}}, x{{\d+}}, #0xf
/// CHECK: csneg x{{\d+}}, x{{\d+}}, mi
+
+ /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongMod16(long) disassembly (after)
+ /// CHECK: Rem [{{j\d+}},{{j\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shrq
+ /// CHECK-NOT: imulq
+ /// CHECK: movq
+ /// CHECK: andq
+ /// CHECK: jz/eq
+ /// CHECK: movq
+ /// CHECK: sarq
+ /// CHECK: shlq
+ /// CHECK: orq
private static Long $noinline$LongMod16(long v) {
long r = v % 16;
return r;
@@ -240,6 +332,18 @@ public class RemTest {
/// CHECK: and x{{\d+}}, x{{\d+}}, #0xf
/// CHECK: and x{{\d+}}, x{{\d+}}, #0xf
/// CHECK: csneg x{{\d+}}, x{{\d+}}, mi
+ /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModMinus16(long) disassembly (after)
+ /// CHECK: Rem [{{j\d+}},{{j\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shrq
+ /// CHECK-NOT: imulq
+ /// CHECK: movq
+ /// CHECK: andq
+ /// CHECK: jz/eq
+ /// CHECK: movq
+ /// CHECK: sarq
+ /// CHECK: shlq
+ /// CHECK: orq
private static Long $noinline$LongModMinus16(long v) {
long r = v % -16;
return r;
@@ -250,6 +354,18 @@ public class RemTest {
/// CHECK: and x{{\d+}}, x{{\d+}}, #0x7fffffffffffffff
/// CHECK: and x{{\d+}}, x{{\d+}}, #0x7fffffffffffffff
/// CHECK: csneg x{{\d+}}, x{{\d+}}, mi
+ /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModLongMin(long) disassembly (after)
+ /// CHECK: Rem [{{j\d+}},{{j\d+}}]
+ /// CHECK-NOT: imul
+ /// CHECK-NOT: shrq
+ /// CHECK-NOT: imulq
+ /// CHECK: movq
+ /// CHECK: andq
+ /// CHECK: jz/eq
+ /// CHECK: movq
+ /// CHECK: sarq
+ /// CHECK: shlq
+ /// CHECK: orq
private static Long $noinline$LongModLongMin(long v) {
long r = v % Long.MIN_VALUE;
return r;