Enable load store elimination on x86.
Includes a fix to prevent stores from being sunk between cmp and jcc
ops. Also fixes neg-float/double when the source and destination are the
same register. All optis are enabled by default on x86 now.
Change-Id: Ie6f1a3a5ba94fd1b5298df87779d70d9868e8baa
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index b68233b..7585b77 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -797,10 +797,6 @@
cUnit->disableOpt |= ~(1 << kSafeOptimizations);
}
#endif
- if (cUnit->instructionSet == kX86) {
- // Disable some optimizations on X86 for now
- cUnit->disableOpt |= (1 << kLoadStoreElimination);
- }
/* Are we generating code for the debugger? */
if (compiler.IsDebuggingSupported()) {
cUnit->genDebugger = true;
diff --git a/src/compiler/codegen/LocalOptimizations.cc b/src/compiler/codegen/LocalOptimizations.cc
index faab3e0..2fc7ae0 100644
--- a/src/compiler/codegen/LocalOptimizations.cc
+++ b/src/compiler/codegen/LocalOptimizations.cc
@@ -226,6 +226,15 @@
}
if (stopHere == true) {
+#if defined(TARGET_X86)
+ // Prevent stores from being sunk between ops that generate ccodes and
+ // ops that use them.
+ int flags = EncodingMap[checkLIR->opcode].flags;
+ if (sinkDistance > 0 && (flags & IS_BRANCH) && (flags & USES_CCODES)) {
+ checkLIR = PREV_LIR(checkLIR);
+ sinkDistance--;
+ }
+#endif
DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR, "REG CLOBBERED"));
/* Only sink store instructions */
if (sinkDistance && !isThisLIRLoad) {
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index 4e56e1d..6003465 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -44,16 +44,23 @@
case Instruction::MUL_FLOAT:
op = kX86MulssRR;
break;
- case Instruction::NEG_FLOAT:
+ case Instruction::NEG_FLOAT: {
// TODO: Make this an XorpsRM where the memory location holds 0x80000000
rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
tempReg = oatAllocTemp(cUnit);
loadConstant(cUnit, tempReg, 0x80000000);
- newLIR2(cUnit, kX86MovdxrRR, rlResult.lowReg, tempReg);
- newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlSrc1.lowReg);
+ int rDest = rlResult.lowReg;
+ int rSrc1 = rlSrc1.lowReg;
+ if (rDest == rSrc1) {
+ rSrc1 = oatAllocTempFloat(cUnit);
+ opRegCopy(cUnit, rSrc1, rDest);
+ }
+ newLIR2(cUnit, kX86MovdxrRR, rDest, tempReg);
+ newLIR2(cUnit, kX86XorpsRR, rDest, rSrc1);
storeValue(cUnit, rlDest, rlResult);
return false;
+ }
case Instruction::REM_FLOAT_2ADDR:
case Instruction::REM_FLOAT: {
return genArithOpFloatPortable(cUnit, opcode, rlDest, rlSrc1, rlSrc2);
@@ -67,7 +74,7 @@
int rDest = rlResult.lowReg;
int rSrc1 = rlSrc1.lowReg;
int rSrc2 = rlSrc2.lowReg;
- if (rSrc2 == rDest) {
+ if (rDest == rSrc2) {
rSrc2 = oatAllocTempFloat(cUnit);
opRegCopy(cUnit, rSrc2, rDest);
}
@@ -102,17 +109,24 @@
case Instruction::MUL_DOUBLE:
op = kX86MulsdRR;
break;
- case Instruction::NEG_DOUBLE:
+ case Instruction::NEG_DOUBLE: {
// TODO: Make this an XorpdRM where the memory location holds 0x8000000000000000
rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
tempReg = oatAllocTemp(cUnit);
loadConstant(cUnit, tempReg, 0x80000000);
- newLIR2(cUnit, kX86MovdxrRR, rlResult.lowReg, tempReg);
- newLIR2(cUnit, kX86PsllqRI, rlResult.lowReg, 32);
- newLIR2(cUnit, kX86XorpsRR, rlResult.lowReg, rlSrc1.lowReg);
+ int rDest = S2D(rlResult.lowReg, rlResult.highReg);
+ int rSrc1 = S2D(rlSrc1.lowReg, rlSrc1.highReg);
+ if (rDest == rSrc1) {
+ rSrc1 = oatAllocTempDouble(cUnit) | FP_DOUBLE;
+ opRegCopy(cUnit, rSrc1, rDest);
+ }
+ newLIR2(cUnit, kX86MovdxrRR, rDest, tempReg);
+ newLIR2(cUnit, kX86PsllqRI, rDest, 32);
+ newLIR2(cUnit, kX86XorpsRR, rDest, rSrc1);
storeValueWide(cUnit, rlDest, rlResult);
return false;
+ }
case Instruction::REM_DOUBLE_2ADDR:
case Instruction::REM_DOUBLE: {
return genArithOpDoublePortable(cUnit, opcode, rlDest, rlSrc1, rlSrc2);