Optimize mod power 2 for x86

Test: 411-checker-hdiv-hrem-pow2, test.py --host

Change-Id: I9334a3eb2cb50df439b56c0161379fef46e58603
Signed-off-by: Shalini Salomi Bodapati <shalini.salomi.bodapati@intel.com>
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 9f34a51..7745f64 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -3497,6 +3497,27 @@
   }
 }
 
+void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+
+  Register out = locations->Out().AsRegister<Register>();
+  Register numerator = locations->InAt(0).AsRegister<Register>();
+
+  int32_t imm = Int64FromConstant(second.GetConstant());
+  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+  uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
+
+  Register tmp = locations->GetTemp(0).AsRegister<Register>();
+  NearLabel done;
+  __ movl(out, numerator);
+  __ andl(out, Immediate(abs_imm-1));
+  __ j(Condition::kZero, &done);
+  __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
+  __ testl(numerator, numerator);
+  __ cmovl(Condition::kLess, out, tmp);
+  __ Bind(&done);
+}
 
 void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
   LocationSummary* locations = instruction->GetLocations();
@@ -3610,8 +3631,12 @@
           // Do not generate anything for 0. DivZeroCheck would forbid any generated code.
         } else if (imm == 1 || imm == -1) {
           DivRemOneOrMinusOne(instruction);
-        } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
-          DivByPowerOfTwo(instruction->AsDiv());
+        } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+          if (is_div) {
+            DivByPowerOfTwo(instruction->AsDiv());
+          } else {
+            RemByPowerOfTwo(instruction->AsRem());
+          }
         } else {
           DCHECK(imm <= -2 || imm >= 2);
           GenerateDivRemWithAnyConstant(instruction);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 93b0461..4e78be8 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -216,6 +216,7 @@
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivByPowerOfTwo(HDiv* instruction);
+  void RemByPowerOfTwo(HRem* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateRemFP(HRem* rem);
   void HandleCondition(HCondition* condition);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index dac2dba..b9ae2cd 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3560,7 +3560,40 @@
       LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
   }
 }
+void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location second = locations->InAt(1);
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
+  int64_t imm = Int64FromConstant(second.GetConstant());
+  DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+  uint64_t abs_imm = AbsOrMin(imm);
+  CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+  if (instruction->GetResultType() == DataType::Type::kInt32) {
+    NearLabel done;
+    __ movl(out, numerator);
+    __ andl(out, Immediate(abs_imm-1));
+    __ j(Condition::kZero, &done);
+    __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
+    __ testl(numerator, numerator);
+    __ cmov(Condition::kLess, out, tmp, false);
+    __ Bind(&done);
 
+  } else {
+    DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
+    codegen_->Load64BitValue(tmp, abs_imm - 1);
+    NearLabel done;
+
+    __ movq(out, numerator);
+    __ andq(out, tmp);
+    __ j(Condition::kZero, &done);
+    __ movq(tmp, numerator);
+    __ sarq(tmp, Immediate(63));
+    __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
+    __ orq(out, tmp);
+    __ Bind(&done);
+  }
+}
 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
   LocationSummary* locations = instruction->GetLocations();
   Location second = locations->InAt(1);
@@ -3737,8 +3770,12 @@
       // Do not generate anything. DivZeroCheck would prevent any code to be executed.
     } else if (imm == 1 || imm == -1) {
       DivRemOneOrMinusOne(instruction);
-    } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
-      DivByPowerOfTwo(instruction->AsDiv());
+    } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+      if (is_div) {
+        DivByPowerOfTwo(instruction->AsDiv());
+      } else {
+        RemByPowerOfTwo(instruction->AsRem());
+      }
     } else {
       DCHECK(imm <= -2 || imm >= 2);
       GenerateDivRemWithAnyConstant(instruction);
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 1e71397..25e5aa4 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -213,6 +213,7 @@
   void GenerateRemFP(HRem* rem);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivByPowerOfTwo(HDiv* instruction);
+  void RemByPowerOfTwo(HRem* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleCondition(HCondition* condition);
diff --git a/test/411-checker-hdiv-hrem-pow2/src/RemTest.java b/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
index 72725c1..54d7847 100644
--- a/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
+++ b/test/411-checker-hdiv-hrem-pow2/src/RemTest.java
@@ -92,6 +92,17 @@
   /// CHECK:                 cmp w{{\d+}}, #0x0
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0x1
   /// CHECK:                 cneg w{{\d+}}, w{{\d+}}, lt
+  /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntMod2(int) disassembly (after)
+  /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shr
+  /// CHECK-NOT:      imul
+  /// CHECK:          mov
+  /// CHECK:          and
+  /// CHECK:          jz/eq
+  /// CHECK:          lea
+  /// CHECK:          test
+  /// CHECK:          cmovl/nge
   private static Integer $noinline$IntMod2(int v) {
     int r = v % 2;
     return r;
@@ -101,6 +112,17 @@
   /// CHECK:                 cmp w{{\d+}}, #0x0
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0x1
   /// CHECK:                 cneg w{{\d+}}, w{{\d+}}, lt
+  /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModMinus2(int) disassembly (after)
+  /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shr
+  /// CHECK-NOT:      imul
+  /// CHECK:          mov
+  /// CHECK:          and
+  /// CHECK:          jz/eq
+  /// CHECK:          lea
+  /// CHECK:          test
+  /// CHECK:          cmovl/nge
   private static Integer $noinline$IntModMinus2(int v) {
     int r = v % -2;
     return r;
@@ -111,6 +133,17 @@
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                 csneg w{{\d+}}, w{{\d+}}, mi
+  /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntMod16(int) disassembly (after)
+  /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shr
+  /// CHECK-NOT:      imul
+  /// CHECK:          mov
+  /// CHECK:          and
+  /// CHECK:          jz/eq
+  /// CHECK:          lea
+  /// CHECK:          test
+  /// CHECK:          cmovl/nge
   private static Integer $noinline$IntMod16(int v) {
     int r = v % 16;
     return r;
@@ -121,6 +154,17 @@
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0xf
   /// CHECK:                 csneg w{{\d+}}, w{{\d+}}, mi
+  /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModMinus16(int) disassembly (after)
+  /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shr
+  /// CHECK-NOT:      imul
+  /// CHECK:          mov
+  /// CHECK:          and
+  /// CHECK:          jz/eq
+  /// CHECK:          lea
+  /// CHECK:          test
+  /// CHECK:          cmovl/nge
   private static Integer $noinline$IntModMinus16(int v) {
     int r = v % -16;
     return r;
@@ -131,6 +175,17 @@
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0x7fffffff
   /// CHECK:                 and w{{\d+}}, w{{\d+}}, #0x7fffffff
   /// CHECK:                 csneg w{{\d+}}, w{{\d+}}, mi
+  /// CHECK-START-X86_64: java.lang.Integer RemTest.$noinline$IntModIntMin(int) disassembly (after)
+  /// CHECK:          Rem [{{i\d+}},{{i\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shr
+  /// CHECK-NOT:      imul
+  /// CHECK:          mov
+  /// CHECK:          and
+  /// CHECK:          jz/eq
+  /// CHECK:          lea
+  /// CHECK:          test
+  /// CHECK:          cmovl/nge
   private static Integer $noinline$IntModIntMin(int v) {
     int r = v % Integer.MIN_VALUE;
     return r;
@@ -211,6 +266,18 @@
   /// CHECK:                 cmp x{{\d+}}, #0x0
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0x1
   /// CHECK:                 cneg x{{\d+}}, x{{\d+}}, lt
+  /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongMod2(long) disassembly (after)
+  /// CHECK:          Rem [{{j\d+}},{{j\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shrq
+  /// CHECK-NOT:      imulq
+  /// CHECK:          movq
+  /// CHECK:          andq
+  /// CHECK:          jz/eq
+  /// CHECK:          movq
+  /// CHECK:          sarq
+  /// CHECK:          shlq
+  /// CHECK:          orq
   private static Long $noinline$LongMod2(long v) {
     long r = v % 2;
     return r;
@@ -220,6 +287,18 @@
   /// CHECK:                 cmp x{{\d+}}, #0x0
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0x1
   /// CHECK:                 cneg x{{\d+}}, x{{\d+}}, lt
+  /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModMinus2(long) disassembly (after)
+  /// CHECK:          Rem [{{j\d+}},{{j\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shrq
+  /// CHECK-NOT:      imulq
+  /// CHECK:          movq
+  /// CHECK:          andq
+  /// CHECK:          jz/eq
+  /// CHECK:          movq
+  /// CHECK:          sarq
+  /// CHECK:          shlq
+  /// CHECK:          orq
   private static Long $noinline$LongModMinus2(long v) {
     long r = v % -2;
     return r;
@@ -230,6 +309,19 @@
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0xf
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0xf
   /// CHECK:                 csneg x{{\d+}}, x{{\d+}}, mi
+
+  /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongMod16(long) disassembly (after)
+  /// CHECK:          Rem [{{j\d+}},{{j\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shrq
+  /// CHECK-NOT:      imulq
+  /// CHECK:          movq
+  /// CHECK:          andq
+  /// CHECK:          jz/eq
+  /// CHECK:          movq
+  /// CHECK:          sarq
+  /// CHECK:          shlq
+  /// CHECK:          orq
   private static Long $noinline$LongMod16(long v) {
     long r = v % 16;
     return r;
@@ -240,6 +332,18 @@
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0xf
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0xf
   /// CHECK:                 csneg x{{\d+}}, x{{\d+}}, mi
+  /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModMinus16(long) disassembly (after)
+  /// CHECK:          Rem [{{j\d+}},{{j\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shrq
+  /// CHECK-NOT:      imulq
+  /// CHECK:          movq
+  /// CHECK:          andq
+  /// CHECK:          jz/eq
+  /// CHECK:          movq
+  /// CHECK:          sarq
+  /// CHECK:          shlq
+  /// CHECK:          orq
   private static Long $noinline$LongModMinus16(long v) {
     long r = v % -16;
     return r;
@@ -250,6 +354,18 @@
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0x7fffffffffffffff
   /// CHECK:                 and x{{\d+}}, x{{\d+}}, #0x7fffffffffffffff
   /// CHECK:                 csneg x{{\d+}}, x{{\d+}}, mi
+  /// CHECK-START-X86_64: java.lang.Long RemTest.$noinline$LongModLongMin(long) disassembly (after)
+  /// CHECK:          Rem [{{j\d+}},{{j\d+}}]
+  /// CHECK-NOT:      imul
+  /// CHECK-NOT:      shrq
+  /// CHECK-NOT:      imulq
+  /// CHECK:          movq
+  /// CHECK:          andq
+  /// CHECK:          jz/eq
+  /// CHECK:          movq
+  /// CHECK:          sarq
+  /// CHECK:          shlq
+  /// CHECK:          orq
   private static Long $noinline$LongModLongMin(long v) {
     long r = v % Long.MIN_VALUE;
     return r;