Art compiler: remove unnecessary sqrt call
For reasons lost in the mists of time, the Dalvik JIT tested
the results of an inlined sqrt for NaN on Arm targets, and then
called an out-of-line routine to recompute if true. The Quick
compiler inherited this behavior. It is not necessary, and the CL
purges it (along with the out-of-line sqrt entrypoint).
Change-Id: I8c8fa6feacf9b7c3b9e190dfc6f728932fd948c6
diff --git a/compiler/dex/quick/arm/fp_arm.cc b/compiler/dex/quick/arm/fp_arm.cc
index bb02f74..18d4391 100644
--- a/compiler/dex/quick/arm/fp_arm.cc
+++ b/compiler/dex/quick/arm/fp_arm.cc
@@ -334,22 +334,11 @@
bool ArmMir2Lir::GenInlinedSqrt(CallInfo* info) {
DCHECK_EQ(cu_->instruction_set, kThumb2);
- LIR *branch;
RegLocation rl_src = info->args[0];
RegLocation rl_dest = InlineTargetWide(info); // double place for result
rl_src = LoadValueWide(rl_src, kFPReg);
RegLocation rl_result = EvalLoc(rl_dest, kFPReg, true);
NewLIR2(kThumb2Vsqrtd, rl_result.reg.GetReg(), rl_src.reg.GetReg());
- NewLIR2(kThumb2Vcmpd, rl_result.reg.GetReg(), rl_result.reg.GetReg());
- NewLIR0(kThumb2Fmstat);
- branch = NewLIR2(kThumbBCond, 0, kArmCondEq);
- ClobberCallerSave();
- LockCallTemps(); // Using fixed registers
- RegStorage r_tgt = LoadHelper(QUICK_ENTRYPOINT_OFFSET(4, pSqrt));
- NewLIR3(kThumb2Fmrrd, rs_r0.GetReg(), rs_r1.GetReg(), rl_src.reg.GetReg());
- NewLIR1(kThumbBlxR, r_tgt.GetReg());
- NewLIR3(kThumb2Fmdrr, rl_result.reg.GetReg(), rs_r0.GetReg(), rs_r1.GetReg());
- branch->target = NewLIR0(kPseudoTargetLabel);
StoreValueWide(rl_dest, rl_result);
return true;
}
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 6812f3c..49cf71b 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -180,7 +180,7 @@
EXPECT_EQ(80U, sizeof(OatHeader));
EXPECT_EQ(8U, sizeof(OatMethodOffsets));
EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
- EXPECT_EQ(80 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+ EXPECT_EQ(79 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
}
TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 23e3433..340a83e 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -196,7 +196,6 @@
qpoints->pCmplDouble = CmplDouble;
qpoints->pCmplFloat = CmplFloat;
qpoints->pFmod = fmod;
- qpoints->pSqrt = sqrt;
qpoints->pL2d = __aeabi_l2d;
qpoints->pFmodf = fmodf;
qpoints->pL2f = __aeabi_l2f;
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index cb9f53b..46e819e 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -182,7 +182,6 @@
qpoints->pCmplDouble = CmplDouble;
qpoints->pCmplFloat = CmplFloat;
qpoints->pFmod = fmod;
- qpoints->pSqrt = sqrt;
qpoints->pL2d = NULL;
qpoints->pFmodf = fmodf;
qpoints->pL2f = NULL;
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 8ad29dd..c53fa1e 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -177,7 +177,6 @@
// points->pCmplDouble = NULL; // Not needed on x86.
// points->pCmplFloat = NULL; // Not needed on x86.
qpoints->pFmod = art_quick_fmod;
- // qpoints->pSqrt = NULL; // Not needed on x86.
// qpoints->pL2d = NULL; // Not needed on x86.
qpoints->pFmodf = art_quick_fmodf;
// qpoints->pL2f = NULL; // Not needed on x86.
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 86dcf36..aeda072 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -176,7 +176,6 @@
// points->pCmplDouble = NULL; // Not needed on x86.
// points->pCmplFloat = NULL; // Not needed on x86.
qpoints->pFmod = fmod;
- // qpoints->pSqrt = NULL; // Not needed on x86.
// qpoints->pL2d = NULL; // Not needed on x86.
qpoints->pFmodf = fmodf;
// qpoints->pL2f = NULL; // Not needed on x86.
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index ec69e28..7bd1582 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -98,7 +98,6 @@
int32_t (*pCmplDouble)(double, double);
int32_t (*pCmplFloat)(float, float);
double (*pFmod)(double, double);
- double (*pSqrt)(double);
double (*pL2d)(int64_t);
float (*pFmodf)(float, float);
float (*pL2f)(int64_t);
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 10d335e..9c14a4f 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
namespace art {
const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '2', '9', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '3', '0', '\0' };
OatHeader::OatHeader() {
memset(this, 0, sizeof(*this));
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 41cfc58..55bec1e 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1816,7 +1816,6 @@
QUICK_ENTRY_POINT_INFO(pCmplDouble)
QUICK_ENTRY_POINT_INFO(pCmplFloat)
QUICK_ENTRY_POINT_INFO(pFmod)
- QUICK_ENTRY_POINT_INFO(pSqrt)
QUICK_ENTRY_POINT_INFO(pL2d)
QUICK_ENTRY_POINT_INFO(pFmodf)
QUICK_ENTRY_POINT_INFO(pL2f)