ARM64: Combine LSR+ASR into ASR for Int32 HDiv/HRem HDiv/HRem having a constant divisor are optimized by using multiplication of the dividend by a sort of reciprocal of the divisor. The multiplication is done by multiplying 32-bit numbers into a 64-bit result. The high 32 bits of the result are used. In case of Int32 LSR is used to get those bits. After that there might be correction operations and ASR. When there are no correction operations between LSR and ASR they can be combined into one ASR. This CL implements this optimization. Improvements (Pixel 3): little core big core jit_aot/LoadCheck.RandomSumInvokeStaticMethod 7.1% 8.3% jit_aot/LoadCheck.RandomSumInvokeUserClass 4.6% 12.0% benchmarksgame/fasta 3.3% 1.0% benchmarksgame/fasta_4 2.4% 2.6% benchmarksgame/fastaredux 2.2% 2.2% SPECjvm2k8 MPEGAudio 1.7% 1.0% Test: test.py --host --optimizing --jit Test: test.py --target --optimizing --jit Change-Id: I5267b38d3a58319e24152917fabe836d5b346bce

commit: 968db3c09e5059e30044d69f1a5fd9bcd937392e [log] [tgz]
author: Evgeny Astigeevich <evgeny.astigeevich@linaro.org> Thu May 07 12:44:10 2020 +0100
committer: Evgeny Astigeevich <evgeny.astigeevich@linaro.org> Tue May 12 10:35:49 2020 +0100
tree: 5496a327556b30ac2cd1877b515fa852688036bd
parent: 2750a9884d7579f301c7ff65a6daaf8520af7902 [diff] [blame]
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index a2896fc..f4e18cf 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc

@@ -3045,6 +3045,13 @@
   return divisor < 0 && magic_number > 0;
 }
 
+// Return true if the result of multiplication of the dividend by a sort of reciprocal
+// of the divisor (magic_number) needs to be corrected. This means additional operations will
+// be generated.
+static inline bool NeedToCorrectMulResult(int64_t magic_number, int64_t divisor) {
+  return NeedToAddDividend(magic_number, divisor) || NeedToSubDividend(magic_number, divisor);
+}
+
 void InstructionCodeGeneratorARM64::GenerateResultDivRemWithAnyConstant(
     bool is_rem,
     int final_right_shift,
@@ -3131,7 +3138,17 @@
   // temp = get_high(dividend * magic)
   __ Mov(temp, magic);
   __ Smull(temp.X(), dividend, temp);
-  __ Lsr(temp.X(), temp.X(), 32);
+
+  if (NeedToCorrectMulResult(magic, imm)) {
+    __ Lsr(temp.X(), temp.X(), 32);
+  } else {
+    // As between 'lsr temp.X(), temp.X(), #32' and 'asr temp, temp, #shift' there are
+    // no other instructions modifying 'temp', they can be combined into one
+    // 'asr temp.X(), temp.X(), #32 + shift'.
+    DCHECK_LT(shift, 32);
+    __ Asr(temp.X(), temp.X(), 32 + shift);
+    shift = 0;
+  }
 
   GenerateResultDivRemWithAnyConstant(/* is_rem= */ instruction->IsRem(),
                                       /* final_right_shift= */ shift,
commit	968db3c09e5059e30044d69f1a5fd9bcd937392e	[log] [tgz]
author	Evgeny Astigeevich <evgeny.astigeevich@linaro.org>	Thu May 07 12:44:10 2020 +0100
committer	Evgeny Astigeevich <evgeny.astigeevich@linaro.org>	Tue May 12 10:35:49 2020 +0100
tree	5496a327556b30ac2cd1877b515fa852688036bd
parent	2750a9884d7579f301c7ff65a6daaf8520af7902 [diff] [blame]