From 1cc73291d4039283fa622ec3e1fb681a9faa3499 Mon Sep 17 00:00:00 2001
From: Atul Bajaj <atul.bajaj@intel.com>
Date: Thu, 15 Nov 2018 11:36:53 +0530
Subject: On x86 modified the generated instructions for case "divide by 2". As
 we can use fewer instructions to represent HDiv when divide by2.

Gordon Peak device we see the improvement with below benchmark.
SPECjvm2008-4T	compiler.compiler	1.50%
jArtBand_all_1T	fmath	                4.50%

Test: 411-checker-hdiv-hrem-pow2, test.py --host

Change-Id: I2e6458abd082553f3a04f3d30e76c973bc902432
Signed-off-by: Atul Bajaj <atul.bajaj@intel.com>
---
 compiler/optimizing/code_generator_x86_64.cc | 29 ++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

(limited to 'compiler/optimizing')
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 14cff05f58..e7212cd479 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -3608,9 +3608,17 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
   CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
 
   if (instruction->GetResultType() == DataType::Type::kInt32) {
-    __ leal(tmp, Address(numerator, abs_imm - 1));
-    __ testl(numerator, numerator);
-    __ cmov(kGreaterEqual, tmp, numerator);
+    // When denominator is equal to 2, we can add signed bit and numerator to tmp.
+    // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
+    if (abs_imm == 2) {
+      __ leal(tmp, Address(numerator, 0));
+      __ shrl(tmp, Immediate(31));
+      __ addl(tmp, numerator);
+    } else {
+      __ leal(tmp, Address(numerator, abs_imm - 1));
+      __ testl(numerator, numerator);
+      __ cmov(kGreaterEqual, tmp, numerator);
+    }
     int shift = CTZ(imm);
     __ sarl(tmp, Immediate(shift));
 
@@ -3622,11 +3630,16 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
   } else {
     DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
     CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
-
-    codegen_->Load64BitValue(rdx, abs_imm - 1);
-    __ addq(rdx, numerator);
-    __ testq(numerator, numerator);
-    __ cmov(kGreaterEqual, rdx, numerator);
+    if (abs_imm == 2) {
+      __ movq(rdx, numerator);
+      __ shrq(rdx, Immediate(63));
+      __ addq(rdx, numerator);
+    } else {
+      codegen_->Load64BitValue(rdx, abs_imm - 1);
+      __ addq(rdx, numerator);
+      __ testq(numerator, numerator);
+      __ cmov(kGreaterEqual, rdx, numerator);
+    }
     int shift = CTZ(imm);
     __ sarq(rdx, Immediate(shift));
 
-- 
cgit v1.2.3-59-g8ed1b