Sqrt intrinsic
Perform math intrinsics on StrictMath when appropriate following Dalvik
change: https://android-review.googlesource.com/42932
Filter package name of intrinsic methods to reduce number of
comparisons.
Change-Id: Iff97c501d4386f76d3f2297406417fa3b76d0a8a
diff --git a/src/compiler/codegen/GenInvoke.cc b/src/compiler/codegen/GenInvoke.cc
index 1bfcf0e..ba580f8 100644
--- a/src/compiler/codegen/GenInvoke.cc
+++ b/src/compiler/codegen/GenInvoke.cc
@@ -978,6 +978,33 @@
#endif
}
+bool genInlinedSqrt(CompilationUnit* cUnit, CallInfo* info) {
+#if defined(TARGET_ARM)
+ LIR *branch;
+ RegLocation rlSrc = info->args[0];
+ RegLocation rlDest = inlineTargetWide(cUnit, info); // double place for result
+ rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
+ RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+ newLIR2(cUnit, kThumb2Vsqrtd, S2D(rlResult.lowReg, rlResult.highReg),
+ S2D(rlSrc.lowReg, rlSrc.highReg));
+ newLIR2(cUnit, kThumb2Vcmpd, S2D(rlResult.lowReg, rlResult.highReg),
+ S2D(rlResult.lowReg, rlResult.highReg));
+ newLIR0(cUnit, kThumb2Fmstat);
+ branch = newLIR2(cUnit, kThumbBCond, 0, kArmCondEq);
+ oatClobberCalleeSave(cUnit);
+ oatLockCallTemps(cUnit); // Using fixed registers
+ int rTgt = loadHelper(cUnit, ENTRYPOINT_OFFSET(pSqrt));
+ newLIR3(cUnit, kThumb2Fmrrd, r0, r1, S2D(rlSrc.lowReg, rlSrc.highReg));
+ newLIR1(cUnit, kThumbBlxR, rTgt);
+ newLIR3(cUnit, kThumb2Fmdrr, S2D(rlResult.lowReg, rlResult.highReg), r0, r1);
+ branch->target = newLIR0(cUnit, kPseudoTargetLabel);
+ storeValueWide(cUnit, rlDest, rlResult);
+ return true;
+#else
+ return false;
+#endif
+}
+
bool genIntrinsic(CompilationUnit* cUnit, CallInfo* info)
{
if (info->optFlags & MIR_INLINED) {
@@ -994,53 +1021,64 @@
* take advantage of/generate new useful dataflow info.
*/
std::string tgtMethod(PrettyMethod(info->index, *cUnit->dex_file));
- if (tgtMethod == "char java.lang.String.charAt(int)") {
- return genInlinedCharAt(cUnit, info);
- }
- if (tgtMethod == "int java.lang.Math.min(int, int)") {
- return genInlinedMinMaxInt(cUnit, info, true /* isMin */);
- }
- if (tgtMethod == "int java.lang.Math.max(int, int)") {
- return genInlinedMinMaxInt(cUnit, info, false /* isMin */);
- }
- if (tgtMethod == "int java.lang.String.length()") {
- return genInlinedStringIsEmptyOrLength(cUnit, info, false /* isEmpty */);
- }
- if (tgtMethod == "boolean java.lang.String.isEmpty()") {
- return genInlinedStringIsEmptyOrLength(cUnit, info, true /* isEmpty */);
- }
- if (tgtMethod == "int java.lang.Math.abs(int)") {
- return genInlinedAbsInt(cUnit, info);
- }
- if (tgtMethod == "long java.lang.Math.abs(long)") {
- return genInlinedAbsLong(cUnit, info);
- }
- if (tgtMethod == "int java.lang.Float.floatToRawIntBits(float)") {
- return genInlinedFloatCvt(cUnit, info);
- }
- if (tgtMethod == "float java.lang.Float.intBitsToFloat(int)") {
- return genInlinedFloatCvt(cUnit, info);
- }
- if (tgtMethod == "long java.lang.Double.doubleToRawLongBits(double)") {
- return genInlinedDoubleCvt(cUnit, info);
- }
- if (tgtMethod == "double java.lang.Double.longBitsToDouble(long)") {
- return genInlinedDoubleCvt(cUnit, info);
- }
- if (tgtMethod == "int java.lang.String.indexOf(int, int)") {
- return genInlinedIndexOf(cUnit, info, false /* base 0 */);
- }
- if (tgtMethod == "int java.lang.String.indexOf(int)") {
- return genInlinedIndexOf(cUnit, info, true /* base 0 */);
- }
- if (tgtMethod == "int java.lang.String.compareTo(java.lang.String)") {
- return genInlinedStringCompareTo(cUnit, info);
- }
- if (tgtMethod == "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") {
- return genInlinedCas32(cUnit, info, false);
- }
- if (tgtMethod == "boolean sun.misc.Unsafe.compareAndSwapObject(java.lang.Object, long, java.lang.Object, java.lang.Object)") {
- return genInlinedCas32(cUnit, info, true);
+ if (tgtMethod.find(" java.lang") != std::string::npos) {
+ if (tgtMethod == "long java.lang.Double.doubleToRawLongBits(double)") {
+ return genInlinedDoubleCvt(cUnit, info);
+ }
+ if (tgtMethod == "double java.lang.Double.longBitsToDouble(long)") {
+ return genInlinedDoubleCvt(cUnit, info);
+ }
+ if (tgtMethod == "int java.lang.Float.floatToRawIntBits(float)") {
+ return genInlinedFloatCvt(cUnit, info);
+ }
+ if (tgtMethod == "float java.lang.Float.intBitsToFloat(int)") {
+ return genInlinedFloatCvt(cUnit, info);
+ }
+ if (tgtMethod == "int java.lang.Math.abs(int)" ||
+ tgtMethod == "int java.lang.StrictMath.abs(int)") {
+ return genInlinedAbsInt(cUnit, info);
+ }
+ if (tgtMethod == "long java.lang.Math.abs(long)" ||
+ tgtMethod == "long java.lang.StrictMath.abs(long)") {
+ return genInlinedAbsLong(cUnit, info);
+ }
+ if (tgtMethod == "int java.lang.Math.max(int, int)" ||
+ tgtMethod == "int java.lang.StrictMath.max(int, int)") {
+ return genInlinedMinMaxInt(cUnit, info, false /* isMin */);
+ }
+ if (tgtMethod == "int java.lang.Math.min(int, int)" ||
+ tgtMethod == "int java.lang.StrictMath.min(int, int)") {
+ return genInlinedMinMaxInt(cUnit, info, true /* isMin */);
+ }
+ if (tgtMethod == "double java.lang.Math.sqrt(double)" ||
+ tgtMethod == "double java.lang.StrictMath.sqrt(double)") {
+ return genInlinedSqrt(cUnit, info);
+ }
+ if (tgtMethod == "char java.lang.String.charAt(int)") {
+ return genInlinedCharAt(cUnit, info);
+ }
+ if (tgtMethod == "int java.lang.String.compareTo(java.lang.String)") {
+ return genInlinedStringCompareTo(cUnit, info);
+ }
+ if (tgtMethod == "boolean java.lang.String.isEmpty()") {
+ return genInlinedStringIsEmptyOrLength(cUnit, info, true /* isEmpty */);
+ }
+ if (tgtMethod == "int java.lang.String.indexOf(int, int)") {
+ return genInlinedIndexOf(cUnit, info, false /* base 0 */);
+ }
+ if (tgtMethod == "int java.lang.String.indexOf(int)") {
+ return genInlinedIndexOf(cUnit, info, true /* base 0 */);
+ }
+ if (tgtMethod == "int java.lang.String.length()") {
+ return genInlinedStringIsEmptyOrLength(cUnit, info, false /* isEmpty */);
+ }
+ } else if (tgtMethod.find("boolean sun.misc.Unsafe.compareAndSwap") != std::string::npos) {
+ if (tgtMethod == "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") {
+ return genInlinedCas32(cUnit, info, false);
+ }
+ if (tgtMethod == "boolean sun.misc.Unsafe.compareAndSwapObject(java.lang.Object, long, java.lang.Object, java.lang.Object)") {
+ return genInlinedCas32(cUnit, info, true);
+ }
}
return false;
}
diff --git a/src/disassembler_arm.cc b/src/disassembler_arm.cc
index f987367..509755c 100644
--- a/src/disassembler_arm.cc
+++ b/src/disassembler_arm.cc
@@ -459,8 +459,7 @@
args << Rn << ", " << d << " .. " << (d + imm8);
}
}
- }
- if ((op3 & 0x30) == 0x20 && op4 == 0) { // 10 xxxx ... 0
+ } else if ((op3 & 0x30) == 0x20 && op4 == 0) { // 10 xxxx ... 0
if ((coproc & 0xE) == 0xA) {
// VFP data-processing instructions
// |111|1|1100|0000|0000|1111|110|0|00 |0|0|0000|
@@ -473,21 +472,28 @@
// 111 0 1110|1111 0100 1110 101 0 01 1 0 1001 - eef4ea69
uint32_t opc1 = (instr >> 20) & 0xF;
uint32_t opc2 = (instr >> 16) & 0xF;
- //uint32_t opc3 = (instr >> 6) & 0x3;
+ uint32_t opc3 = (instr >> 6) & 0x3;
if ((opc1 & 0xB) == 0xB) { // 1x11
// Other VFP data-processing instructions.
+ uint32_t D = (instr >> 22) & 0x1;
+ uint32_t Vd = (instr >> 12) & 0xF;
+ uint32_t sz = (instr >> 8) & 1;
+ uint32_t M = (instr >> 5) & 1;
+ uint32_t Vm = instr & 0xF;
+ bool dp_operation = sz == 1;
switch (opc2) {
+ case 0x1: // Vneg/Vsqrt
+ // 1110 11101 D 11 0001 dddd 101s o1M0 mmmm
+ opcode << (opc3 == 1 ? "vneg" : "vsqrt") << (dp_operation ? ".f64" : ".f32");
+ if (dp_operation) {
+ args << "f" << ((D << 4) | Vd) << ", " << "f" << ((M << 4) | Vm);
+ } else {
+ args << "f" << ((Vd << 1) | D) << ", " << "f" << ((Vm << 1) | M);
+ }
+ break;
case 0x4: case 0x5: { // Vector compare
// 1110 11101 D 11 0100 dddd 101 sE1M0 mmmm
- uint32_t D = (instr >> 22) & 0x1;
- uint32_t Vd = (instr >> 12) & 0xF;
- uint32_t sz = (instr >> 8) & 1;
- uint32_t E = (instr >> 7) & 1;
- uint32_t M = (instr >> 5) & 1;
- uint32_t Vm = instr & 0xF;
- bool dp_operation = sz == 1;
- opcode << (E == 0 ? "vcmp" : "vcmpe");
- opcode << (dp_operation ? ".f64" : ".f32");
+ opcode << (opc3 == 1 ? "vcmp" : "vcmpe") << (dp_operation ? ".f64" : ".f32");
if (dp_operation) {
args << "f" << ((D << 4) | Vd) << ", " << "f" << ((M << 4) | Vm);
} else {
@@ -498,6 +504,24 @@
}
}
}
+ } else if ((op3 & 0x30) == 0x30) { // 11 xxxx
+ // Advanced SIMD
+ if ((instr & 0xFFBF0ED0) == 0xeeb10ac0) { // Vsqrt
+ // 1110 11101 D 11 0001 dddd 101S 11M0 mmmm
+ // 1110 11101 0 11 0001 1101 1011 1100 1000 - eeb1dbc8
+ uint32_t D = (instr >> 22) & 1;
+ uint32_t Vd = (instr >> 12) & 0xF;
+ uint32_t sz = (instr >> 8) & 1;
+ uint32_t M = (instr >> 5) & 1;
+ uint32_t Vm = instr & 0xF;
+ bool dp_operation = sz == 1;
+ opcode << "vsqrt" << (dp_operation ? ".f64" : ".f32");
+ if (dp_operation) {
+ args << "f" << ((D << 4) | Vd) << ", " << "f" << ((M << 4) | Vm);
+ } else {
+ args << "f" << ((Vd << 1) | D) << ", " << "f" << ((Vm << 1) | M);
+ }
+ }
}
}
break;
diff --git a/src/oat/runtime/arm/oat_support_entrypoints_arm.cc b/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
index 5aedff7..df26e21 100644
--- a/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
+++ b/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
@@ -213,6 +213,7 @@
points->pDsub = __aeabi_dsub;
points->pF2d = __aeabi_f2d;
points->pFmod = fmod;
+ points->pSqrt = sqrt;
points->pI2d = __aeabi_i2d;
points->pL2d = __aeabi_l2d;
points->pD2f = __aeabi_d2f;
diff --git a/src/oat/runtime/oat_support_entrypoints.h b/src/oat/runtime/oat_support_entrypoints.h
index a504778..a069522 100644
--- a/src/oat/runtime/oat_support_entrypoints.h
+++ b/src/oat/runtime/oat_support_entrypoints.h
@@ -96,6 +96,7 @@
double (*pDsub)(double, double);
double (*pF2d)(float);
double (*pFmod)(double, double);
+ double (*pSqrt)(double);
double (*pI2d)(int);
double (*pL2d)(int64_t);
float (*pD2f)(double);
diff --git a/src/thread.cc b/src/thread.cc
index f215783..e45ed48 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -1437,6 +1437,7 @@
ENTRY_POINT_INFO(pDsub),
ENTRY_POINT_INFO(pF2d),
ENTRY_POINT_INFO(pFmod),
+ ENTRY_POINT_INFO(pSqrt),
ENTRY_POINT_INFO(pI2d),
ENTRY_POINT_INFO(pL2d),
ENTRY_POINT_INFO(pD2f),