diff options
| author | 2018-11-06 13:05:44 +0530 | |
|---|---|---|
| committer | 2018-11-12 21:09:32 +0530 | |
| commit | a66784b09f10d847b49bc878f10c45690e212f0b (patch) | |
| tree | a18ee4ec39a44256223b9f756159bfd535ddfde3 | |
| parent | fe59955fc41e277bf1c60378202ba785abb1e4a8 (diff) | |
Optimize mod power 2 for x86
Test: 411-checker-hdiv-hrem-pow2, test.py --host
Change-Id: I9334a3eb2cb50df439b56c0161379fef46e58603
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 29 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 1 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 41 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 1 | ||||
| -rw-r--r-- | test/411-checker-hdiv-hrem-pow2/src/RemTest.java | 116 |
5 files changed, 184 insertions, 4 deletions
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 9f34a51d84..7745f64b24 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -3497,6 +3497,27 @@ void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruct } } +void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + + Register out = locations->Out().AsRegister<Register>(); + Register numerator = locations->InAt(0).AsRegister<Register>(); + + int32_t imm = Int64FromConstant(second.GetConstant()); + DCHECK(IsPowerOfTwo(AbsOrMin(imm))); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); + + Register tmp = locations->GetTemp(0).AsRegister<Register>(); + NearLabel done; + __ movl(out, numerator); + __ andl(out, Immediate(abs_imm-1)); + __ j(Condition::kZero, &done); + __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1)))); + __ testl(numerator, numerator); + __ cmovl(Condition::kLess, out, tmp); + __ Bind(&done); +} void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) { LocationSummary* locations = instruction->GetLocations(); @@ -3610,8 +3631,12 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr // Do not generate anything for 0. DivZeroCheck would forbid any generated code. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) { - DivByPowerOfTwo(instruction->AsDiv()); + } else if (IsPowerOfTwo(AbsOrMin(imm))) { + if (is_div) { + DivByPowerOfTwo(instruction->AsDiv()); + } else { + RemByPowerOfTwo(instruction->AsRem()); + } } else { DCHECK(imm <= -2 || imm >= 2); GenerateDivRemWithAnyConstant(instruction); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index 93b0461975..4e78be8453 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -216,6 +216,7 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator { void GenerateDivRemIntegral(HBinaryOperation* instruction); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivByPowerOfTwo(HDiv* instruction); + void RemByPowerOfTwo(HRem* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateRemFP(HRem* rem); void HandleCondition(HCondition* condition); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index dac2dba605..b9ae2cd46a 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -3560,7 +3560,40 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType(); } } +void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Location second = locations->InAt(1); + CpuRegister out = locations->Out().AsRegister<CpuRegister>(); + CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + DCHECK(IsPowerOfTwo(AbsOrMin(imm))); + uint64_t abs_imm = AbsOrMin(imm); + CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); + if (instruction->GetResultType() == DataType::Type::kInt32) { + NearLabel done; + __ movl(out, numerator); + __ andl(out, Immediate(abs_imm-1)); + __ j(Condition::kZero, &done); + __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1)))); + __ testl(numerator, numerator); + __ cmov(Condition::kLess, out, tmp, false); + __ Bind(&done); + + } else { + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); + codegen_->Load64BitValue(tmp, abs_imm - 1); + NearLabel done; + __ movq(out, numerator); + __ andq(out, tmp); + __ j(Condition::kZero, &done); + __ movq(tmp, numerator); + __ sarq(tmp, Immediate(63)); + __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm))); + __ orq(out, tmp); + __ Bind(&done); + } +} void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { LocationSummary* locations = instruction->GetLocations(); Location second = locations->InAt(1); @@ -3737,8 +3770,12 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in // Do not generate anything. DivZeroCheck would prevent any code to be executed. } else if (imm == 1 || imm == -1) { DivRemOneOrMinusOne(instruction); - } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) { - DivByPowerOfTwo(instruction->AsDiv()); + } else if (IsPowerOfTwo(AbsOrMin(imm))) { + if (is_div) { + DivByPowerOfTwo(instruction->AsDiv()); + } else { + RemByPowerOfTwo(instruction->AsRem()); + } } else { DCHECK(imm <= -2 || imm >= 2); GenerateDivRemWithAnyConstant(instruction); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 1e7139718b..25e5aa4a3f 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -213,6 +213,7 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator { void GenerateRemFP(HRem* rem); void DivRemOneOrMinusOne(HBinaryOperation* instruction); void DivByPowerOfTwo(HDiv* instruction); + void RemByPowerOfTwo(HRem* instruction); void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void HandleCondition(HCondition* condition); diff --git a/test/411-checker-hdiv-hrem-pow2/src/RemTest.java b/test/411-checker-hdiv-hrem-pow2/src/RemTest.java index 72725c1cd4..54d7847fdf 100644 --- a/test/411-checker-hdiv-hrem-pow2/src/RemTest.java +++ b/test/411-checker-hdiv-hrem-pow2/src/RemTest.java @@ -92,6 +92,17 @@ public class RemTest { /// CHECK: cmp w{{\d+}}, #0x0 /// CHECK: and w{{\d+}}, w{{\d+}}, #0x1 /// CHECK: cneg w{{\d+}}, w{{\d+}}, lt + /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntMod2(int) disassembly (after) + /// CHECK: Rem [{{i\d+}},{{i\d+}}] + /// CHECK-NOT: imul + /// CHECK-NOT: shr + /// CHECK-NOT: imul + /// CHECK: mov + /// CHECK: and + /// CHECK: jz/eq + /// CHECK: lea + /// CHECK: test + /// CHECK: cmovl/nge private static Integer $noinline$IntMod2(int v) { int r = v % 2; return r; @@ -101,6 +112,17 @@ public class RemTest { /// CHECK: cmp w{{\d+}}, #0x0 /// CHECK: and w{{\d+}}, w{{\d+}}, #0x1 /// CHECK: cneg w{{\d+}}, w{{\d+}}, lt + /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModMinus2(int) disassembly (after) + /// CHECK: Rem [{{i\d+}},{{i\d+}}] + /// CHECK-NOT: imul + /// CHECK-NOT: shr + /// CHECK-NOT: imul + /// CHECK: mov + /// CHECK: and + /// CHECK: jz/eq + /// CHECK: lea + /// CHECK: test + /// CHECK: cmovl/nge private static Integer $noinline$IntModMinus2(int v) { int r = v % -2; return r; @@ -111,6 +133,17 @@ public class RemTest { /// CHECK: and w{{\d+}}, w{{\d+}}, #0xf /// CHECK: and w{{\d+}}, w{{\d+}}, #0xf /// CHECK: csneg w{{\d+}}, w{{\d+}}, mi + /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntMod16(int) disassembly (after) + /// CHECK: Rem [{{i\d+}},{{i\d+}}] + /// CHECK-NOT: imul + /// CHECK-NOT: shr + /// CHECK-NOT: imul + /// CHECK: mov + /// CHECK: and + /// CHECK: jz/eq + /// CHECK: lea + /// CHECK: test + /// CHECK: cmovl/nge private static Integer $noinline$IntMod16(int v) { int r = v % 16; return r; @@ -121,6 +154,17 @@ public class RemTest { /// CHECK: and w{{\d+}}, w{{\d+}}, #0xf /// CHECK: and w{{\d+}}, w{{\d+}}, #0xf /// CHECK: csneg w{{\d+}}, w{{\d+}}, mi + /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModMinus16(int) disassembly (after) + /// CHECK: Rem [{{i\d+}},{{i\d+}}] + /// CHECK-NOT: imul + /// CHECK-NOT: shr + /// CHECK-NOT: imul + /// CHECK: mov + /// CHECK: and + /// CHECK: jz/eq + /// CHECK: lea + /// CHECK: test + /// CHECK: cmovl/nge private static Integer $noinline$IntModMinus16(int v) { int r = v % -16; return r; @@ -131,6 +175,17 @@ public class RemTest { /// CHECK: and w{{\d+}}, w{{\d+}}, #0x7fffffff /// CHECK: and w{{\d+}}, w{{\d+}}, #0x7fffffff /// CHECK: csneg w{{\d+}}, w{{\d+}}, mi + /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModIntMin(int) disassembly (after) + /// CHECK: Rem [{{i\d+}},{{i\d+}}] + /// CHECK-NOT: imul + /// CHECK-NOT: shr + /// CHECK-NOT: imul + /// CHECK: mov + /// CHECK: and + /// CHECK: jz/eq + /// CHECK: lea + /// CHECK: test + /// CHECK: cmovl/nge private static Integer $noinline$IntModIntMin(int v) { int r = v % Integer.MIN_VALUE; return r; @@ -211,6 +266,18 @@ public class RemTest { /// CHECK: cmp x{{\d+}}, #0x0 /// CHECK: and x{{\d+}}, x{{\d+}}, #0x1 /// CHECK: cneg x{{\d+}}, x{{\d+}}, lt + /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongMod2(long) disassembly (after) + /// CHECK: Rem [{{j\d+}},{{j\d+}}] + /// CHECK-NOT: imul + /// CHECK-NOT: shrq + /// CHECK-NOT: imulq + /// CHECK: movq + /// CHECK: andq + /// CHECK: jz/eq + /// CHECK: movq + /// CHECK: sarq + /// CHECK: shlq + /// CHECK: orq private static Long $noinline$LongMod2(long v) { long r = v % 2; return r; @@ -220,6 +287,18 @@ public class RemTest { /// CHECK: cmp x{{\d+}}, #0x0 /// CHECK: and x{{\d+}}, x{{\d+}}, #0x1 /// CHECK: cneg x{{\d+}}, x{{\d+}}, lt + /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModMinus2(long) disassembly (after) + /// CHECK: Rem [{{j\d+}},{{j\d+}}] + /// CHECK-NOT: imul + /// CHECK-NOT: shrq + /// CHECK-NOT: imulq + /// CHECK: movq + /// CHECK: andq + /// CHECK: jz/eq + /// CHECK: movq + /// CHECK: sarq + /// CHECK: shlq + /// CHECK: orq private static Long $noinline$LongModMinus2(long v) { long r = v % -2; return r; @@ -230,6 +309,19 @@ public class RemTest { /// CHECK: and x{{\d+}}, x{{\d+}}, #0xf /// CHECK: and x{{\d+}}, x{{\d+}}, #0xf /// CHECK: csneg x{{\d+}}, x{{\d+}}, mi + + /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongMod16(long) disassembly (after) + /// CHECK: Rem [{{j\d+}},{{j\d+}}] + /// CHECK-NOT: imul + /// CHECK-NOT: shrq + /// CHECK-NOT: imulq + /// CHECK: movq + /// CHECK: andq + /// CHECK: jz/eq + /// CHECK: movq + /// CHECK: sarq + /// CHECK: shlq + /// CHECK: orq private static Long $noinline$LongMod16(long v) { long r = v % 16; return r; @@ -240,6 +332,18 @@ public class RemTest { /// CHECK: and x{{\d+}}, x{{\d+}}, #0xf /// CHECK: and x{{\d+}}, x{{\d+}}, #0xf /// CHECK: csneg x{{\d+}}, x{{\d+}}, mi + /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModMinus16(long) disassembly (after) + /// CHECK: Rem [{{j\d+}},{{j\d+}}] + /// CHECK-NOT: imul + /// CHECK-NOT: shrq + /// CHECK-NOT: imulq + /// CHECK: movq + /// CHECK: andq + /// CHECK: jz/eq + /// CHECK: movq + /// CHECK: sarq + /// CHECK: shlq + /// CHECK: orq private static Long $noinline$LongModMinus16(long v) { long r = v % -16; return r; @@ -250,6 +354,18 @@ public class RemTest { /// CHECK: and x{{\d+}}, x{{\d+}}, #0x7fffffffffffffff /// CHECK: and x{{\d+}}, x{{\d+}}, #0x7fffffffffffffff /// CHECK: csneg x{{\d+}}, x{{\d+}}, mi + /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModLongMin(long) disassembly (after) + /// CHECK: Rem [{{j\d+}},{{j\d+}}] + /// CHECK-NOT: imul + /// CHECK-NOT: shrq + /// CHECK-NOT: imulq + /// CHECK: movq + /// CHECK: andq + /// CHECK: jz/eq + /// CHECK: movq + /// CHECK: sarq + /// CHECK: shlq + /// CHECK: orq private static Long $noinline$LongModLongMin(long v) { long r = v % Long.MIN_VALUE; return r; |