Sqrt intrinsic

Perform math intrinsics on StrictMath when appropriate following Dalvik
change: https://android-review.googlesource.com/42932

Filter package name of intrinsic methods to reduce number of
comparisons.

Change-Id: Iff97c501d4386f76d3f2297406417fa3b76d0a8a
diff --git a/src/compiler/codegen/GenInvoke.cc b/src/compiler/codegen/GenInvoke.cc
index 1bfcf0e..ba580f8 100644
--- a/src/compiler/codegen/GenInvoke.cc
+++ b/src/compiler/codegen/GenInvoke.cc
@@ -978,6 +978,33 @@
 #endif
 }
 
+bool genInlinedSqrt(CompilationUnit* cUnit, CallInfo* info) {
+#if defined(TARGET_ARM)
+  LIR *branch;
+  RegLocation rlSrc = info->args[0];
+  RegLocation rlDest = inlineTargetWide(cUnit, info);  // double place for result
+  rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
+  RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+  newLIR2(cUnit, kThumb2Vsqrtd, S2D(rlResult.lowReg, rlResult.highReg),
+          S2D(rlSrc.lowReg, rlSrc.highReg));
+  newLIR2(cUnit, kThumb2Vcmpd, S2D(rlResult.lowReg, rlResult.highReg),
+          S2D(rlResult.lowReg, rlResult.highReg));
+  newLIR0(cUnit, kThumb2Fmstat);
+  branch = newLIR2(cUnit, kThumbBCond, 0, kArmCondEq);
+  oatClobberCalleeSave(cUnit);
+  oatLockCallTemps(cUnit);  // Using fixed registers
+  int rTgt = loadHelper(cUnit, ENTRYPOINT_OFFSET(pSqrt));
+  newLIR3(cUnit, kThumb2Fmrrd, r0, r1, S2D(rlSrc.lowReg, rlSrc.highReg));
+  newLIR1(cUnit, kThumbBlxR, rTgt);
+  newLIR3(cUnit, kThumb2Fmdrr, S2D(rlResult.lowReg, rlResult.highReg), r0, r1);
+  branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+  storeValueWide(cUnit, rlDest, rlResult);
+  return true;
+#else
+  return false;
+#endif
+}
+
 bool genIntrinsic(CompilationUnit* cUnit, CallInfo* info)
 {
   if (info->optFlags & MIR_INLINED) {
@@ -994,53 +1021,64 @@
    * take advantage of/generate new useful dataflow info.
    */
   std::string tgtMethod(PrettyMethod(info->index, *cUnit->dex_file));
-  if (tgtMethod == "char java.lang.String.charAt(int)") {
-    return genInlinedCharAt(cUnit, info);
-  }
-  if (tgtMethod == "int java.lang.Math.min(int, int)") {
-    return genInlinedMinMaxInt(cUnit, info, true /* isMin */);
-  }
-  if (tgtMethod == "int java.lang.Math.max(int, int)") {
-    return genInlinedMinMaxInt(cUnit, info, false /* isMin */);
-  }
-  if (tgtMethod == "int java.lang.String.length()") {
-    return genInlinedStringIsEmptyOrLength(cUnit, info, false /* isEmpty */);
-  }
-  if (tgtMethod == "boolean java.lang.String.isEmpty()") {
-    return genInlinedStringIsEmptyOrLength(cUnit, info, true /* isEmpty */);
-  }
-  if (tgtMethod == "int java.lang.Math.abs(int)") {
-    return genInlinedAbsInt(cUnit, info);
-  }
-  if (tgtMethod == "long java.lang.Math.abs(long)") {
-    return genInlinedAbsLong(cUnit, info);
-  }
-  if (tgtMethod == "int java.lang.Float.floatToRawIntBits(float)") {
-    return genInlinedFloatCvt(cUnit, info);
-  }
-  if (tgtMethod == "float java.lang.Float.intBitsToFloat(int)") {
-    return genInlinedFloatCvt(cUnit, info);
-  }
-  if (tgtMethod == "long java.lang.Double.doubleToRawLongBits(double)") {
-    return genInlinedDoubleCvt(cUnit, info);
-  }
-  if (tgtMethod == "double java.lang.Double.longBitsToDouble(long)") {
-    return genInlinedDoubleCvt(cUnit, info);
-  }
-  if (tgtMethod == "int java.lang.String.indexOf(int, int)") {
-    return genInlinedIndexOf(cUnit, info, false /* base 0 */);
-  }
-  if (tgtMethod == "int java.lang.String.indexOf(int)") {
-    return genInlinedIndexOf(cUnit, info, true /* base 0 */);
-  }
-  if (tgtMethod == "int java.lang.String.compareTo(java.lang.String)") {
-    return genInlinedStringCompareTo(cUnit, info);
-  }
-  if (tgtMethod == "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") {
-    return genInlinedCas32(cUnit, info, false);
-  }
-  if (tgtMethod == "boolean sun.misc.Unsafe.compareAndSwapObject(java.lang.Object, long, java.lang.Object, java.lang.Object)") {
-    return genInlinedCas32(cUnit, info, true);
+  if (tgtMethod.find(" java.lang") != std::string::npos) {
+    if (tgtMethod == "long java.lang.Double.doubleToRawLongBits(double)") {
+      return genInlinedDoubleCvt(cUnit, info);
+    }
+    if (tgtMethod == "double java.lang.Double.longBitsToDouble(long)") {
+      return genInlinedDoubleCvt(cUnit, info);
+    }
+    if (tgtMethod == "int java.lang.Float.floatToRawIntBits(float)") {
+      return genInlinedFloatCvt(cUnit, info);
+    }
+    if (tgtMethod == "float java.lang.Float.intBitsToFloat(int)") {
+      return genInlinedFloatCvt(cUnit, info);
+    }
+    if (tgtMethod == "int java.lang.Math.abs(int)" ||
+        tgtMethod == "int java.lang.StrictMath.abs(int)") {
+      return genInlinedAbsInt(cUnit, info);
+    }
+    if (tgtMethod == "long java.lang.Math.abs(long)" ||
+        tgtMethod == "long java.lang.StrictMath.abs(long)") {
+      return genInlinedAbsLong(cUnit, info);
+    }
+    if (tgtMethod == "int java.lang.Math.max(int, int)" ||
+        tgtMethod == "int java.lang.StrictMath.max(int, int)") {
+      return genInlinedMinMaxInt(cUnit, info, false /* isMin */);
+    }
+    if (tgtMethod == "int java.lang.Math.min(int, int)" ||
+        tgtMethod == "int java.lang.StrictMath.min(int, int)") {
+      return genInlinedMinMaxInt(cUnit, info, true /* isMin */);
+    }
+    if (tgtMethod == "double java.lang.Math.sqrt(double)" ||
+        tgtMethod == "double java.lang.StrictMath.sqrt(double)") {
+      return genInlinedSqrt(cUnit, info);
+    }
+    if (tgtMethod == "char java.lang.String.charAt(int)") {
+      return genInlinedCharAt(cUnit, info);
+    }
+    if (tgtMethod == "int java.lang.String.compareTo(java.lang.String)") {
+      return genInlinedStringCompareTo(cUnit, info);
+    }
+    if (tgtMethod == "boolean java.lang.String.isEmpty()") {
+      return genInlinedStringIsEmptyOrLength(cUnit, info, true /* isEmpty */);
+    }
+    if (tgtMethod == "int java.lang.String.indexOf(int, int)") {
+      return genInlinedIndexOf(cUnit, info, false /* base 0 */);
+    }
+    if (tgtMethod == "int java.lang.String.indexOf(int)") {
+      return genInlinedIndexOf(cUnit, info, true /* base 0 */);
+    }
+    if (tgtMethod == "int java.lang.String.length()") {
+      return genInlinedStringIsEmptyOrLength(cUnit, info, false /* isEmpty */);
+    }
+  } else if (tgtMethod.find("boolean sun.misc.Unsafe.compareAndSwap") != std::string::npos) {
+    if (tgtMethod == "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") {
+      return genInlinedCas32(cUnit, info, false);
+    }
+    if (tgtMethod == "boolean sun.misc.Unsafe.compareAndSwapObject(java.lang.Object, long, java.lang.Object, java.lang.Object)") {
+      return genInlinedCas32(cUnit, info, true);
+    }
   }
   return false;
 }
diff --git a/src/disassembler_arm.cc b/src/disassembler_arm.cc
index f987367..509755c 100644
--- a/src/disassembler_arm.cc
+++ b/src/disassembler_arm.cc
@@ -459,8 +459,7 @@
               args << Rn << ", " << d << " .. " << (d + imm8);
             }
           }
-        }
-        if ((op3 & 0x30) == 0x20 && op4 == 0) {  // 10 xxxx ... 0
+        } else if ((op3 & 0x30) == 0x20 && op4 == 0) {  // 10 xxxx ... 0
           if ((coproc & 0xE) == 0xA) {
             // VFP data-processing instructions
             // |111|1|1100|0000|0000|1111|110|0|00  |0|0|0000|
@@ -473,21 +472,28 @@
             //  111 0 1110|1111 0100 1110 101 0 01   1 0 1001 - eef4ea69
             uint32_t opc1 = (instr >> 20) & 0xF;
             uint32_t opc2 = (instr >> 16) & 0xF;
-            //uint32_t opc3 = (instr >> 6) & 0x3;
+            uint32_t opc3 = (instr >> 6) & 0x3;
             if ((opc1 & 0xB) == 0xB) {  // 1x11
               // Other VFP data-processing instructions.
+              uint32_t D  = (instr >> 22) & 0x1;
+              uint32_t Vd = (instr >> 12) & 0xF;
+              uint32_t sz = (instr >> 8) & 1;
+              uint32_t M  = (instr >> 5) & 1;
+              uint32_t Vm = instr & 0xF;
+              bool dp_operation = sz == 1;
               switch (opc2) {
+                case 0x1: // Vneg/Vsqrt
+                  //  1110 11101 D 11 0001 dddd 101s o1M0 mmmm
+                  opcode << (opc3 == 1 ? "vneg" : "vsqrt") << (dp_operation ? ".f64" : ".f32");
+                  if (dp_operation) {
+                    args << "f" << ((D << 4) | Vd) << ", " << "f" << ((M << 4) | Vm);
+                  } else {
+                    args << "f" << ((Vd << 1) | D) << ", " << "f" << ((Vm << 1) | M);
+                  }
+                  break;
                 case 0x4: case 0x5:  { // Vector compare
                   // 1110 11101 D 11 0100 dddd 101 sE1M0 mmmm
-                  uint32_t D  = (instr >> 22) & 0x1;
-                  uint32_t Vd = (instr >> 12) & 0xF;
-                  uint32_t sz = (instr >> 8) & 1;
-                  uint32_t E  = (instr >> 7) & 1;
-                  uint32_t M  = (instr >> 5) & 1;
-                  uint32_t Vm = instr & 0xF;
-                  bool dp_operation = sz == 1;
-                  opcode << (E == 0 ? "vcmp" : "vcmpe");
-                  opcode << (dp_operation ? ".f64" : ".f32");
+                  opcode << (opc3 == 1 ? "vcmp" : "vcmpe") << (dp_operation ? ".f64" : ".f32");
                   if (dp_operation) {
                     args << "f" << ((D << 4) | Vd) << ", " << "f" << ((M << 4) | Vm);
                   } else {
@@ -498,6 +504,24 @@
               }
             }
           }
+        } else if ((op3 & 0x30) == 0x30) {  // 11 xxxx
+          // Advanced SIMD
+          if ((instr & 0xFFBF0ED0) == 0xeeb10ac0) {  // Vsqrt
+            //  1110 11101 D 11 0001 dddd 101S 11M0 mmmm
+            //  1110 11101 0 11 0001 1101 1011 1100 1000 - eeb1dbc8
+            uint32_t D = (instr >> 22) & 1;
+            uint32_t Vd = (instr >> 12) & 0xF;
+            uint32_t sz = (instr >> 8) & 1;
+            uint32_t M = (instr >> 5) & 1;
+            uint32_t Vm = instr & 0xF;
+            bool dp_operation = sz == 1;
+            opcode << "vsqrt" << (dp_operation ? ".f64" : ".f32");
+            if (dp_operation) {
+              args << "f" << ((D << 4) | Vd) << ", " << "f" << ((M << 4) | Vm);
+            } else {
+              args << "f" << ((Vd << 1) | D) << ", " << "f" << ((Vm << 1) | M);
+            }
+          }
         }
       }
       break;
diff --git a/src/oat/runtime/arm/oat_support_entrypoints_arm.cc b/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
index 5aedff7..df26e21 100644
--- a/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
+++ b/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
@@ -213,6 +213,7 @@
   points->pDsub = __aeabi_dsub;
   points->pF2d = __aeabi_f2d;
   points->pFmod = fmod;
+  points->pSqrt = sqrt;
   points->pI2d = __aeabi_i2d;
   points->pL2d = __aeabi_l2d;
   points->pD2f = __aeabi_d2f;
diff --git a/src/oat/runtime/oat_support_entrypoints.h b/src/oat/runtime/oat_support_entrypoints.h
index a504778..a069522 100644
--- a/src/oat/runtime/oat_support_entrypoints.h
+++ b/src/oat/runtime/oat_support_entrypoints.h
@@ -96,6 +96,7 @@
   double (*pDsub)(double, double);
   double (*pF2d)(float);
   double (*pFmod)(double, double);
+  double (*pSqrt)(double);
   double (*pI2d)(int);
   double (*pL2d)(int64_t);
   float (*pD2f)(double);
diff --git a/src/thread.cc b/src/thread.cc
index f215783..e45ed48 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -1437,6 +1437,7 @@
   ENTRY_POINT_INFO(pDsub),
   ENTRY_POINT_INFO(pF2d),
   ENTRY_POINT_INFO(pFmod),
+  ENTRY_POINT_INFO(pSqrt),
   ENTRY_POINT_INFO(pI2d),
   ENTRY_POINT_INFO(pL2d),
   ENTRY_POINT_INFO(pD2f),