Optimization fixes
Two primary fixes. First, the save/restore mechanism for FP callee saves
was broken if there were any holes in the save mask (the Arm ld/store
multiple instructions for floating point use a start + count mechanism,
rather than the bit-mask mechanism used for core registers).
The second fix corrects a problem introduced by the recent enhancements
to loading floating point literals. The load->copy optimization mechanism
for literal loads used the value of the loaded literal to identify
redundant loads. However, it used only the first 32 bits of the
literal - which worked fine previously because 64-bit literal loads
were treated as a pair of 32-bit loads. The fix was to use the
label of the literal rather than the value in the aliasInfo - which
works for all sizes.
Change-Id: Ic4779adf73b2c7d80059a988b0ecdef39921a81f
diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc
index 9df80bd..9690287 100644
--- a/src/compiler/codegen/RallocUtil.cc
+++ b/src/compiler/codegen/RallocUtil.cc
@@ -227,7 +227,7 @@
coreRegs[i].inUse = true;
cUnit->coreSpillMask |= (1 << res);
cUnit->coreVmapTable.push_back(sReg);
- cUnit->numSpills++;
+ cUnit->numCoreSpills++;
cUnit->regLocation[sReg].location = kLocPhysReg;
cUnit->regLocation[sReg].lowReg = res;
cUnit->regLocation[sReg].home = true;
@@ -238,6 +238,28 @@
}
/*
+ * Mark a callee-save fp register as promoted. Note that
+ * vpush/vpop uses contiguous register lists so we must
+ * include any holes in the mask. Associate holes with
+ * Dalvik register INVALID_REG (-1).
+ */
+STATIC void markPreservedSingle(CompilationUnit* cUnit, int sReg, int reg)
+{
+ DCHECK_GE(reg, FP_REG_MASK + FP_CALLEE_SAVE_BASE);
+ reg = (reg & FP_REG_MASK) - FP_CALLEE_SAVE_BASE;
+ // Ensure fpVmapTable is large enough
+ int tableSize = cUnit->fpVmapTable.size();
+ for (int i = tableSize; i < (reg + 1); i++) {
+ cUnit->fpVmapTable.push_back(INVALID_REG);
+ }
+ // Add the current mapping
+ cUnit->fpVmapTable[reg] = sReg;
+ // Size of fpVmapTable is high-water mark, use to set mask
+ cUnit->numFPSpills = cUnit->fpVmapTable.size();
+ cUnit->fpSpillMask = ((1 << cUnit->numFPSpills) - 1) << FP_CALLEE_SAVE_BASE;
+}
+
+/*
* Reserve a callee-save fp single register. Try to fullfill request for
* even/odd allocation, but go ahead and allocate anything if not
* available. If nothing's available, return -1.
@@ -251,10 +273,7 @@
((FPRegs[i].reg & 0x1) == 0) == even) {
res = FPRegs[i].reg;
FPRegs[i].inUse = true;
- cUnit->fpSpillMask |= (1 << (res & FP_REG_MASK));
- cUnit->fpVmapTable.push_back(sReg);
- cUnit->numSpills++;
- cUnit->numFPSpills++;
+ markPreservedSingle(cUnit, sReg, res);
cUnit->regLocation[sReg].fpLocation = kLocPhysReg;
cUnit->regLocation[sReg].fpLowReg = res;
cUnit->regLocation[sReg].home = true;
@@ -292,10 +311,7 @@
res = p->reg;
p->inUse = true;
DCHECK_EQ((res & 1), 0);
- cUnit->fpSpillMask |= (1 << (res & FP_REG_MASK));
- cUnit->fpVmapTable.push_back(sReg);
- cUnit->numSpills++;
- cUnit->numFPSpills ++;
+ markPreservedSingle(cUnit, sReg, res);
} else {
RegisterInfo* FPRegs = cUnit->regPool->FPRegs;
for (int i = 0; i < cUnit->regPool->numFPRegs; i++) {
@@ -306,13 +322,10 @@
(FPRegs[i].reg + 1) == FPRegs[i+1].reg) {
res = FPRegs[i].reg;
FPRegs[i].inUse = true;
- cUnit->fpSpillMask |= (1 << (res & FP_REG_MASK));
- cUnit->fpVmapTable.push_back(sReg);
+ markPreservedSingle(cUnit, sReg, res);
FPRegs[i+1].inUse = true;
- cUnit->fpSpillMask |= (1 << ((res+1) & FP_REG_MASK));
- cUnit->fpVmapTable.push_back(sReg);
- cUnit->numSpills += 2;
- cUnit->numFPSpills += 2;
+ DCHECK_EQ(res + 1, FPRegs[i+1].reg);
+ markPreservedSingle(cUnit, sReg+1, res+1);
break;
}
}
diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc
index 3ceffae..edce114 100644
--- a/src/compiler/codegen/arm/ArchUtility.cc
+++ b/src/compiler/codegen/arm/ArchUtility.cc
@@ -404,7 +404,8 @@
LOG(INFO) << "Regs (excluding ins) : " << cUnit->numRegs;
LOG(INFO) << "Ins : " << cUnit->numIns;
LOG(INFO) << "Outs : " << cUnit->numOuts;
- LOG(INFO) << "Spills : " << cUnit->numSpills;
+ LOG(INFO) << "CoreSpills : " << cUnit->numCoreSpills;
+ LOG(INFO) << "FPSpills : " << cUnit->numFPSpills;
LOG(INFO) << "Padding : " << cUnit->numPadding;
LOG(INFO) << "Frame size : " << cUnit->frameSize;
LOG(INFO) << "Start of ins : " << cUnit->insOffset;
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index 3b2e986..99b22e7 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -99,6 +99,8 @@
#define FP_REG_OFFSET 32
/* Offset to distinguish DP FP regs */
#define FP_DOUBLE 64
+/* First FP callee save */
+#define FP_CALLEE_SAVE_BASE 16
/* Reg types */
#define REGTYPE(x) (x & (FP_REG_OFFSET | FP_DOUBLE))
#define FPREG(x) ((x & FP_REG_OFFSET) == FP_REG_OFFSET)
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc
index 84c3792..9a7c642 100644
--- a/src/compiler/codegen/arm/ArmRallocUtil.cc
+++ b/src/compiler/codegen/arm/ArmRallocUtil.cc
@@ -136,7 +136,7 @@
* machinery is in place, always spill lr.
*/
cUnit->coreSpillMask |= (1 << rLR);
- cUnit->numSpills++;
+ cUnit->numCoreSpills++;
/*
* Simple hack for testing register allocation. Just do a static
* count of the uses of Dalvik registers. Note that we examine
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index 80118d8..a4e211b 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -1853,6 +1853,7 @@
ArmLIR* headLIR = NULL;
+ int spillCount = cUnit->numCoreSpills + cUnit->numFPSpills;
if (bb->blockType == kEntryBlock) {
/*
* On entry, r0, r1, r2 & r3 are live. Let the register allocation
@@ -1882,17 +1883,22 @@
newLIR1(cUnit, kThumb2Push, cUnit->coreSpillMask);
/* Need to spill any FP regs? */
if (cUnit->numFPSpills) {
+ /*
+ * NOTE: fp spills are a little different from core spills in that
+ * they are pushed as a contiguous block. When promoting from
+ * the fp set, we must allocate all singles from s16..highest-promoted
+ */
newLIR1(cUnit, kThumb2VPushCS, cUnit->numFPSpills);
}
if (!skipOverflowCheck) {
opRegRegImm(cUnit, kOpSub, rLR, rSP,
- cUnit->frameSize - (cUnit->numSpills * 4));
+ cUnit->frameSize - (spillCount * 4));
genRegRegCheck(cUnit, kArmCondCc, rLR, r12, NULL,
kArmThrowStackOverflow);
genRegCopy(cUnit, rSP, rLR); // Establish stack
} else {
opRegImm(cUnit, kOpSub, rSP,
- cUnit->frameSize - (cUnit->numSpills * 4));
+ cUnit->frameSize - (spillCount * 4));
}
storeBaseDisp(cUnit, rSP, 0, r0, kWord);
flushIns(cUnit);
@@ -1902,7 +1908,7 @@
oatFreeTemp(cUnit, r3);
} else if (bb->blockType == kExitBlock) {
newLIR0(cUnit, kArmPseudoMethodExit);
- opRegImm(cUnit, kOpAdd, rSP, cUnit->frameSize - (cUnit->numSpills * 4));
+ opRegImm(cUnit, kOpAdd, rSP, cUnit->frameSize - (spillCount * 4));
/* Need to restore any FP callee saves? */
if (cUnit->numFPSpills) {
newLIR1(cUnit, kThumb2VPopCS, cUnit->numFPSpills);
@@ -2121,7 +2127,8 @@
funcOffset =
OFFSETOF_MEMBER(Thread, pThrowStackOverflowFromCode);
// Restore stack alignment
- opRegImm(cUnit, kOpAdd, rSP, cUnit->numSpills * 4);
+ opRegImm(cUnit, kOpAdd, rSP,
+ (cUnit->numCoreSpills + cUnit->numFPSpills) * 4);
break;
default:
LOG(FATAL) << "Unexpected throw kind: " << lab->operands[0];
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
index 3ee23ea..45c7377 100644
--- a/src/compiler/codegen/arm/Thumb2/Factory.cc
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -75,7 +75,7 @@
loadPcRel->operands[1] = r15pc;
setupResourceMasks(loadPcRel);
setMemRefType(loadPcRel, true, kLiteral);
- loadPcRel->aliasInfo = dataTarget->operands[0];
+ loadPcRel->aliasInfo = (intptr_t)dataTarget;
oatAppendLIR(cUnit, (LIR* ) loadPcRel);
return loadPcRel;
}
@@ -183,7 +183,7 @@
loadPcRel->operands[0] = rDest;
setupResourceMasks(loadPcRel);
setMemRefType(loadPcRel, true, kLiteral);
- loadPcRel->aliasInfo = dataTarget->operands[0];
+ loadPcRel->aliasInfo = (intptr_t)dataTarget;
res = loadPcRel;
oatAppendLIR(cUnit, (LIR* ) loadPcRel);
@@ -661,7 +661,7 @@
loadPcRel->operands[1] = r15pc;
setupResourceMasks(loadPcRel);
setMemRefType(loadPcRel, true, kLiteral);
- loadPcRel->aliasInfo = dataTarget->operands[0];
+ loadPcRel->aliasInfo = (intptr_t)dataTarget;
oatAppendLIR(cUnit, (LIR* ) loadPcRel);
res = loadPcRel;
}