Merge "ARM: Strength reduction for floating-point division"

commit: 40845d266e1152a846858d94cee70c33b136de68 [log] [tgz]
author: Ian Rogers <irogers@google.com> Wed Nov 05 19:39:10 2014 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> Wed Nov 05 19:39:11 2014 +0000
tree: 9a5f41cb1dd1930ae003feeccb4211d59b465e19
parent: 211d45e059935a7874a1ec89846f03d35ffba29f [diff]
parent: 675e09b2753c2fcd521bd8f0230a0abf06e9b0e9 [diff]
diff --git a/compiler/dex/quick/arm/codegen_arm.h b/compiler/dex/quick/arm/codegen_arm.h
index 179ba02..d235199 100644
--- a/compiler/dex/quick/arm/codegen_arm.h
+++ b/compiler/dex/quick/arm/codegen_arm.h

@@ -90,6 +90,10 @@
     bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
                             RegLocation rl_dest, int lit);
     bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+    void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+                                    int32_t constant) OVERRIDE;
+    void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+                                     int64_t constant) OVERRIDE;
     LIR* CheckSuspendUsingLoad() OVERRIDE;
     RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
     LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,

diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index 3eb7c83..2b2592d 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc

@@ -113,6 +113,32 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+void ArmMir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+                                            int32_t constant) {
+  RegLocation rl_result;
+  RegStorage r_tmp = AllocTempSingle();
+  LoadConstantNoClobber(r_tmp, constant);
+  rl_src1 = LoadValue(rl_src1, kFPReg);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR3(kThumb2Vmuls, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
+  StoreValue(rl_dest, rl_result);
+}
+
+void ArmMir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+                                             int64_t constant) {
+  RegLocation rl_result;
+  RegStorage r_tmp = AllocTempDouble();
+  DCHECK(r_tmp.IsDouble());
+  LoadConstantWide(r_tmp, constant);
+  rl_src1 = LoadValueWide(rl_src1, kFPReg);
+  DCHECK(rl_src1.wide);
+  rl_result = EvalLocWide(rl_dest, kFPReg, true);
+  DCHECK(rl_dest.wide);
+  DCHECK(rl_result.wide);
+  NewLIR3(kThumb2Vmuld, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
+  StoreValueWide(rl_dest, rl_result);
+}
+
 void ArmMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src) {
   int op = kThumbBkpt;
   int src_reg;

diff --git a/compiler/dex/quick/arm64/codegen_arm64.h b/compiler/dex/quick/arm64/codegen_arm64.h
index bd363c4..5182a89 100644
--- a/compiler/dex/quick/arm64/codegen_arm64.h
+++ b/compiler/dex/quick/arm64/codegen_arm64.h

@@ -71,6 +71,10 @@
   bool HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div,
                           RegLocation rl_src, RegLocation rl_dest, int64_t lit);
   bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+  void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+                                  int32_t constant) OVERRIDE;
+  void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+                                   int64_t constant) OVERRIDE;
   LIR* CheckSuspendUsingLoad() OVERRIDE;
   RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
   LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,

diff --git a/compiler/dex/quick/arm64/fp_arm64.cc b/compiler/dex/quick/arm64/fp_arm64.cc
index db24d12..ff692b7 100644
--- a/compiler/dex/quick/arm64/fp_arm64.cc
+++ b/compiler/dex/quick/arm64/fp_arm64.cc

@@ -116,6 +116,32 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+void Arm64Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+                                              int32_t constant) {
+  RegLocation rl_result;
+  RegStorage r_tmp = AllocTempSingle();
+  LoadConstantNoClobber(r_tmp, constant);
+  rl_src1 = LoadValue(rl_src1, kFPReg);
+  rl_result = EvalLoc(rl_dest, kFPReg, true);
+  NewLIR3(kA64Fmul3fff, rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
+  StoreValue(rl_dest, rl_result);
+}
+
+void Arm64Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+                                               int64_t constant) {
+  RegLocation rl_result;
+  RegStorage r_tmp = AllocTempDouble();
+  DCHECK(r_tmp.IsDouble());
+  LoadConstantWide(r_tmp, constant);
+  rl_src1 = LoadValueWide(rl_src1, kFPReg);
+  DCHECK(rl_src1.wide);
+  rl_result = EvalLocWide(rl_dest, kFPReg, true);
+  DCHECK(rl_dest.wide);
+  DCHECK(rl_result.wide);
+  NewLIR3(WIDE(kA64Fmul3fff), rl_result.reg.GetReg(), rl_src1.reg.GetReg(), r_tmp.GetReg());
+  StoreValueWide(rl_dest, rl_result);
+}
+
 void Arm64Mir2Lir::GenConversion(Instruction::Code opcode,
                                  RegLocation rl_dest, RegLocation rl_src) {
   int op = kA64Brk1d;

diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index c5aa27c..061ee07 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc

@@ -1785,6 +1785,34 @@
   return true;
 }
 
+// Returns true if it generates instructions.
+bool Mir2Lir::HandleEasyFloatingPointDiv(RegLocation rl_dest, RegLocation rl_src1,
+                                         RegLocation rl_src2) {
+  if (!rl_src2.is_const ||
+      ((cu_->instruction_set != kThumb2) && (cu_->instruction_set != kArm64))) {
+    return false;
+  }
+
+  if (!rl_src2.wide) {
+    int32_t divisor = mir_graph_->ConstantValue(rl_src2);
+    if (CanDivideByReciprocalMultiplyFloat(divisor)) {
+      // Generate multiply by reciprocal instead of div.
+      float recip = 1.0f/bit_cast<int32_t, float>(divisor);
+      GenMultiplyByConstantFloat(rl_dest, rl_src1, bit_cast<float, int32_t>(recip));
+      return true;
+    }
+  } else {
+    int64_t divisor = mir_graph_->ConstantValueWide(rl_src2);
+    if (CanDivideByReciprocalMultiplyDouble(divisor)) {
+      // Generate multiply by reciprocal instead of div.
+      double recip = 1.0/bit_cast<double, int64_t>(divisor);
+      GenMultiplyByConstantDouble(rl_dest, rl_src1, bit_cast<double, int64_t>(recip));
+      return true;
+    }
+  }
+  return false;
+}
+
 void Mir2Lir::GenArithOpIntLit(Instruction::Code opcode, RegLocation rl_dest, RegLocation rl_src,
                                int lit) {
   RegLocation rl_result;

diff --git a/compiler/dex/quick/mips/codegen_mips.h b/compiler/dex/quick/mips/codegen_mips.h
index dc6930c..7e9d80d 100644
--- a/compiler/dex/quick/mips/codegen_mips.h
+++ b/compiler/dex/quick/mips/codegen_mips.h

@@ -31,6 +31,10 @@
     bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
                             RegLocation rl_dest, int lit);
     bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+    void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+                                    int32_t constant) OVERRIDE;
+    void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+                                     int64_t constant) OVERRIDE;
     LIR* CheckSuspendUsingLoad() OVERRIDE;
     RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
     LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,

diff --git a/compiler/dex/quick/mips/fp_mips.cc b/compiler/dex/quick/mips/fp_mips.cc
index 4315915..0a7aa99 100644
--- a/compiler/dex/quick/mips/fp_mips.cc
+++ b/compiler/dex/quick/mips/fp_mips.cc

@@ -113,6 +113,20 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+void MipsMir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+                                             int32_t constant) {
+  // TODO: need mips implementation.
+  UNUSED(rl_dest, rl_src1, constant);
+  LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in mips";
+}
+
+void MipsMir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+                                              int64_t constant) {
+  // TODO: need mips implementation.
+  UNUSED(rl_dest, rl_src1, constant);
+  LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in mips";
+}
+
 void MipsMir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
                                 RegLocation rl_src) {
   int op = kMipsNop;

diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 533a677..ccaa167 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc

@@ -1052,28 +1052,36 @@
       }
       break;
 
+    case Instruction::DIV_FLOAT:
+    case Instruction::DIV_FLOAT_2ADDR:
+      if (HandleEasyFloatingPointDiv(rl_dest, rl_src[0], rl_src[1])) {
+        break;
+      }
+      FALLTHROUGH_INTENDED;
     case Instruction::ADD_FLOAT:
     case Instruction::SUB_FLOAT:
     case Instruction::MUL_FLOAT:
-    case Instruction::DIV_FLOAT:
     case Instruction::REM_FLOAT:
     case Instruction::ADD_FLOAT_2ADDR:
     case Instruction::SUB_FLOAT_2ADDR:
     case Instruction::MUL_FLOAT_2ADDR:
-    case Instruction::DIV_FLOAT_2ADDR:
     case Instruction::REM_FLOAT_2ADDR:
       GenArithOpFloat(opcode, rl_dest, rl_src[0], rl_src[1]);
       break;
 
+    case Instruction::DIV_DOUBLE:
+    case Instruction::DIV_DOUBLE_2ADDR:
+      if (HandleEasyFloatingPointDiv(rl_dest, rl_src[0], rl_src[1])) {
+        break;
+      }
+      FALLTHROUGH_INTENDED;
     case Instruction::ADD_DOUBLE:
     case Instruction::SUB_DOUBLE:
     case Instruction::MUL_DOUBLE:
-    case Instruction::DIV_DOUBLE:
     case Instruction::REM_DOUBLE:
     case Instruction::ADD_DOUBLE_2ADDR:
     case Instruction::SUB_DOUBLE_2ADDR:
     case Instruction::MUL_DOUBLE_2ADDR:
-    case Instruction::DIV_DOUBLE_2ADDR:
     case Instruction::REM_DOUBLE_2ADDR:
       GenArithOpDouble(opcode, rl_dest, rl_src[0], rl_src[1]);
       break;

diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 4623f79..bacc6d2 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h

@@ -789,6 +789,7 @@
     virtual bool HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
                                   RegLocation rl_src, RegLocation rl_dest, int lit);
     bool HandleEasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit);
+    bool HandleEasyFloatingPointDiv(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2);
     virtual void HandleSlowPaths();
     void GenBarrier();
     void GenDivZeroException();
@@ -1120,6 +1121,10 @@
     virtual bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
                                     RegLocation rl_src, RegLocation rl_dest, int lit) = 0;
     virtual bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) = 0;
+    virtual void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+                                            int32_t constant) = 0;
+    virtual void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+                                             int64_t constant) = 0;
     virtual LIR* CheckSuspendUsingLoad() = 0;
 
     virtual RegStorage LoadHelper(QuickEntrypointEnum trampoline) = 0;
@@ -1439,6 +1444,26 @@
       return InexpensiveConstantInt(value);
     }
 
+    /**
+     * @brief Whether division by the given divisor can be converted to multiply by its reciprocal.
+     * @param divisor A constant divisor bits of float type.
+     * @return Returns true iff, x/divisor == x*(1.0f/divisor), for every float x.
+     */
+    bool CanDivideByReciprocalMultiplyFloat(int32_t divisor) {
+      // True, if float value significand bits are 0.
+      return ((divisor & 0x7fffff) == 0);
+    }
+
+    /**
+     * @brief Whether division by the given divisor can be converted to multiply by its reciprocal.
+     * @param divisor A constant divisor bits of double type.
+     * @return Returns true iff, x/divisor == x*(1.0/divisor), for every double x.
+     */
+    bool CanDivideByReciprocalMultiplyDouble(int64_t divisor) {
+      // True, if double value significand bits are 0.
+      return ((divisor & ((UINT64_C(1) << 52) - 1)) == 0);
+    }
+
     // May be optimized by targets.
     virtual void GenMonitorEnter(int opt_flags, RegLocation rl_src);
     virtual void GenMonitorExit(int opt_flags, RegLocation rl_src);

diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index dec99ae..4412a1e 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h

@@ -78,6 +78,10 @@
   bool SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div, RegLocation rl_src,
                           RegLocation rl_dest, int lit) OVERRIDE;
   bool EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) OVERRIDE;
+  void GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+                                  int32_t constant) OVERRIDE;
+  void GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+                                   int64_t constant) OVERRIDE;
   LIR* CheckSuspendUsingLoad() OVERRIDE;
   RegStorage LoadHelper(QuickEntrypointEnum trampoline) OVERRIDE;
   LIR* LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,

diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 254d90f..33bb0ee 100755
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc

@@ -122,6 +122,20 @@
   StoreValueWide(rl_dest, rl_result);
 }
 
+void X86Mir2Lir::GenMultiplyByConstantFloat(RegLocation rl_dest, RegLocation rl_src1,
+                                            int32_t constant) {
+  // TODO: need x86 implementation.
+  UNUSED(rl_dest, rl_src1, constant);
+  LOG(FATAL) << "Unimplemented GenMultiplyByConstantFloat in x86";
+}
+
+void X86Mir2Lir::GenMultiplyByConstantDouble(RegLocation rl_dest, RegLocation rl_src1,
+                                             int64_t constant) {
+  // TODO: need x86 implementation.
+  UNUSED(rl_dest, rl_src1, constant);
+  LOG(FATAL) << "Unimplemented GenMultiplyByConstantDouble in x86";
+}
+
 void X86Mir2Lir::GenLongToFP(RegLocation rl_dest, RegLocation rl_src, bool is_double) {
   // Compute offsets to the source and destination VRs on stack
   int src_v_reg_offset = SRegOffset(rl_src.s_reg_low);

diff --git a/test/703-floating-point-div/expected.txt b/test/703-floating-point-div/expected.txt
new file mode 100644
index 0000000..76f5a5a
--- /dev/null
+++ b/test/703-floating-point-div/expected.txt

@@ -0,0 +1 @@
+Done!

diff --git a/test/703-floating-point-div/info.txt b/test/703-floating-point-div/info.txt
new file mode 100644
index 0000000..418b831
--- /dev/null
+++ b/test/703-floating-point-div/info.txt

@@ -0,0 +1 @@
+Simple tests to check floating point division.

diff --git a/test/703-floating-point-div/src/Main.java b/test/703-floating-point-div/src/Main.java
new file mode 100644
index 0000000..9990a54
--- /dev/null
+++ b/test/703-floating-point-div/src/Main.java

@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+    static double dPi = Math.PI;
+    static float  fPi = (float)Math.PI;
+
+    public static void expectEquals(long expected, long result) {
+        if (expected != result) {
+            throw new Error("Expected: " + expected + ", found: " + result);
+        }
+    }
+
+    public static void expectEquals(int expected, int result) {
+        if (expected != result) {
+            throw new Error("Expected: " + expected + ", found: " + result);
+        }
+    }
+
+    public static void divDoubleTest() {
+        double d1 = 0x1.0p1023;
+        double d2 = -2.0;
+        double d3 = 0.0;
+        double d4 = Double.MIN_NORMAL;
+        double d5 = Double.POSITIVE_INFINITY;
+        double d6 = Double.NEGATIVE_INFINITY;
+        double d7 = -0.0;
+        double d8 = Double.MAX_VALUE;
+        double d9 = Double.MIN_VALUE;
+        double d0 = Double.NaN;
+
+        expectEquals(Double.doubleToRawLongBits(dPi/d1), 0x1921fb54442d18L);
+        expectEquals(Double.doubleToRawLongBits(dPi/d2), 0xbff921fb54442d18L);
+        expectEquals(Double.doubleToRawLongBits(dPi/d3), 0x7ff0000000000000L);
+        expectEquals(Double.doubleToRawLongBits(dPi/d4), 0x7fe921fb54442d18L);
+        expectEquals(Double.doubleToRawLongBits(dPi/d5), 0x0L);
+        expectEquals(Double.doubleToRawLongBits(dPi/d6), 0x8000000000000000L);
+        expectEquals(Double.doubleToRawLongBits(dPi/d7), 0xfff0000000000000L);
+
+        expectEquals(Double.doubleToRawLongBits(dPi/d8), 0xc90fdaa22168cL);
+        expectEquals(Double.doubleToRawLongBits(dPi/d9), 0x7ff0000000000000L);
+        expectEquals(Double.doubleToRawLongBits(dPi/d0), 0x7ff8000000000000L);
+    }
+
+    public static void divFloatTest() {
+        float f1 = 0x1.0p127f;
+        float f2 = -2.0f;
+        float f3 = 0.0f;
+        float f4 = Float.MIN_NORMAL;
+        float f5 = Float.POSITIVE_INFINITY;
+        float f6 = Float.NEGATIVE_INFINITY;
+        float f7 = -0.0f;
+        float f8 = Float.MAX_VALUE;
+        float f9 = Float.MIN_VALUE;
+        float f0 = Float.NaN;
+
+        expectEquals(Float.floatToRawIntBits(fPi/f1), 0xc90fdb);
+        expectEquals(Float.floatToRawIntBits(fPi/f2), 0xbfc90fdb);
+        expectEquals(Float.floatToRawIntBits(fPi/f3), 0x7f800000);
+        expectEquals(Float.floatToRawIntBits(fPi/f4), 0x7f490fdb);
+        expectEquals(Float.floatToRawIntBits(fPi/f5), 0x0);
+        expectEquals(Float.floatToRawIntBits(fPi/f6), 0x80000000);
+        expectEquals(Float.floatToRawIntBits(fPi/f7), 0xff800000);
+
+        expectEquals(Float.floatToRawIntBits(fPi/f8), 0x6487ee);
+        expectEquals(Float.floatToRawIntBits(fPi/f9), 0x7f800000);
+        expectEquals(Float.floatToRawIntBits(fPi/f0), 0x7fc00000);
+    }
+
+    public static void main(String[] args) {
+        divDoubleTest();
+        divFloatTest();
+        System.out.println("Done!");
+    }
+
+}
commit	40845d266e1152a846858d94cee70c33b136de68	[log] [tgz]
author	Ian Rogers <irogers@google.com>	Wed Nov 05 19:39:10 2014 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	Wed Nov 05 19:39:11 2014 +0000
tree	9a5f41cb1dd1930ae003feeccb4211d59b465e19
parent	211d45e059935a7874a1ec89846f03d35ffba29f [diff]
parent	675e09b2753c2fcd521bd8f0230a0abf06e9b0e9 [diff]