Loop detection, improved reg allocation
Detect loops and loop nesting depth, and use the latter to
weight register uses (which are then used to determine which
registers to promote).
Also:
o Fixed typo that prevented squashing of useless fp reg copies
o Rescheduled array access checks to hide latency of limit load.
o Add basic-block optimization pass to remove duplicate range
checks.
o Fixed bug that prevented recognition of redundant null
checks following iput-wide and aput-wide.
Change-Id: Icfbae39e89b1d14b8703ad6bbb0b29c0635fed1e
diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc
index 581c463..7b05a66 100644
--- a/src/compiler/Dataflow.cc
+++ b/src/compiler/Dataflow.cc
@@ -259,7 +259,7 @@
DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
// 4C APUT_WIDE vAA, vBB, vCC
- DF_UA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
+ DF_UA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_2 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
// 4D APUT_OBJECT vAA, vBB, vCC
DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
@@ -301,7 +301,7 @@
DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
// 5A IPUT_WIDE vA, vB, field@CCCC
- DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
+ DF_UA_WIDE | DF_UB | DF_NULL_CHK_2 | DF_IS_SETTER | DF_CORE_B,
// 5B IPUT_OBJECT vA, vB, field@CCCC
DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
@@ -730,7 +730,7 @@
DF_DA_WIDE | DF_UB | DF_NULL_CHK_0 | DF_CORE_B,
// E9 IPUT_WIDE_VOLATILE
- DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_CORE_B,
+ DF_UA_WIDE | DF_UB | DF_NULL_CHK_2 | DF_CORE_B,
// EA SGET_WIDE_VOLATILE
DF_DA_WIDE,
@@ -769,7 +769,7 @@
DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER,
// F6 IPUT_WIDE_QUICK
- DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 |DF_IS_SETTER,
+ DF_UA_WIDE | DF_UB | DF_NULL_CHK_2 |DF_IS_SETTER,
// F7 IPUT_OBJECT_QUICK
DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER,
@@ -1631,7 +1631,8 @@
}
/* Advance to next strictly dominated MIR node in an extended basic block */
-MIR* advanceMIR(CompilationUnit* cUnit, BasicBlock** pBb, MIR* mir, ArenaBitVector* bv) {
+MIR* advanceMIR(CompilationUnit* cUnit, BasicBlock** pBb, MIR* mir, ArenaBitVector* bv,
+ bool clearMark) {
BasicBlock* bb = *pBb;
if (mir != NULL) {
mir = mir->next;
@@ -1648,9 +1649,98 @@
}
}
}
+ if (mir && clearMark) {
+ mir->optimizationFlags &= ~MIR_MARK;
+ }
return mir;
}
+/*
+ * For the rest of the extended run, replace uses of rOld with rNew.
+ */
+void replaceOperands(CompilationUnit* cUnit, BasicBlock** pBp, MIR* mir,
+ int rOld, int rNew)
+{
+ while (true) {
+ mir = advanceMIR(cUnit, pBp, mir, NULL, false);
+ if (mir == NULL) {
+ break;
+ }
+ if (mir->ssaRep == NULL) {
+ continue;
+ }
+ if (mir->optimizationFlags & MIR_MARK) {
+ LOG(INFO) << "recursive replace";
+ mir->dalvikInsn.opcode = (Instruction::Code)kMirOpCopy;
+ mir->ssaRep->uses[0] = rNew;
+ mir->ssaRep->numUses = 1;
+ replaceOperands(cUnit, pBp, mir, mir->ssaRep->defs[0], rNew);
+ mir->optimizationFlags &= ~MIR_MARK;
+ } else {
+ for (int i = 0; i < mir->ssaRep->numUses; i++) {
+ if (mir->ssaRep->uses[i] == rOld) {
+ mir->optimizationFlags |= MIR_IGNORE_NULL_CHECK;
+ mir->ssaRep->uses[i] = rNew;
+ }
+ }
+ }
+ }
+}
+
+void squashDupRangeChecks(CompilationUnit* cUnit, BasicBlock** pBp, MIR* mir,
+ int arraySreg, int indexSreg)
+{
+ while (true) {
+ mir = advanceMIR(cUnit, pBp, mir, NULL, false);
+ if (!mir) {
+ break;
+ }
+ if ((mir->ssaRep == NULL) ||
+ (mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+ continue;
+ }
+ int checkArray = INVALID_SREG;
+ int checkIndex = INVALID_SREG;
+ switch(mir->dalvikInsn.opcode) {
+ case Instruction::AGET:
+ case Instruction::AGET_OBJECT:
+ case Instruction::AGET_BOOLEAN:
+ case Instruction::AGET_BYTE:
+ case Instruction::AGET_CHAR:
+ case Instruction::AGET_SHORT:
+ case Instruction::AGET_WIDE:
+ checkArray = mir->ssaRep->uses[0];
+ checkIndex = mir->ssaRep->uses[1];
+ break;
+ break;
+ case Instruction::APUT:
+ case Instruction::APUT_OBJECT:
+ case Instruction::APUT_SHORT:
+ case Instruction::APUT_CHAR:
+ case Instruction::APUT_BYTE:
+ case Instruction::APUT_BOOLEAN:
+ checkArray = mir->ssaRep->uses[1];
+ checkIndex = mir->ssaRep->uses[2];
+ break;
+ case Instruction::APUT_WIDE:
+ checkArray = mir->ssaRep->uses[2];
+ checkIndex = mir->ssaRep->uses[3];
+ default:
+ break;
+ }
+ if (checkArray == INVALID_SREG) {
+ continue;
+ }
+ if ((arraySreg == checkArray) && (indexSreg == checkIndex)) {
+ if (cUnit->printMe) {
+ LOG(INFO) << "Squashing range check @ 0x" << std::hex
+ << mir->offset;
+ }
+ mir->optimizationFlags |= MIR_IGNORE_RANGE_CHECK;
+ }
+ }
+}
+
/* Allocate a compiler temp, return Sreg. Reuse existing if no conflict */
int allocCompilerTempSreg(CompilationUnit* cUnit, ArenaBitVector* bv)
{
@@ -1704,7 +1794,39 @@
for (MIR* mir = bb->firstMIRInsn; mir; mir = mir->next) {
// Look for interesting opcodes, skip otherwise
- switch(mir->dalvikInsn.opcode) {
+ Instruction::Code opcode = mir->dalvikInsn.opcode;
+ switch(opcode) {
+ case Instruction::AGET:
+ case Instruction::AGET_OBJECT:
+ case Instruction::AGET_BOOLEAN:
+ case Instruction::AGET_BYTE:
+ case Instruction::AGET_CHAR:
+ case Instruction::AGET_SHORT:
+ case Instruction::AGET_WIDE:
+ if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+ int arrSreg = mir->ssaRep->uses[0];
+ int idxSreg = mir->ssaRep->uses[1];
+ BasicBlock* tbb = bb;
+ squashDupRangeChecks(cUnit, &tbb, mir, arrSreg, idxSreg);
+ }
+ break;
+ case Instruction::APUT:
+ case Instruction::APUT_OBJECT:
+ case Instruction::APUT_SHORT:
+ case Instruction::APUT_CHAR:
+ case Instruction::APUT_BYTE:
+ case Instruction::APUT_BOOLEAN:
+ case Instruction::APUT_WIDE:
+ if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+ int start = (opcode == Instruction::APUT_WIDE) ? 2 : 1;
+ int arrSreg = mir->ssaRep->uses[start];
+ int idxSreg = mir->ssaRep->uses[start + 1];
+ BasicBlock* tbb = bb;
+ squashDupRangeChecks(cUnit, &tbb, mir, arrSreg, idxSreg);
+ }
+ break;
+#if 0
+ // Probably not worthwhile.
case Instruction::IGET_OBJECT: {
// TODO: look for CSE
if (mir->optimizationFlags & MIR_DUP) {
@@ -1720,7 +1842,7 @@
BasicBlock* tbb = bb;
MIR* tm = mir;
while (true) {
- tm = advanceMIR(cUnit, &tbb, tm, tempBlockV);
+ tm = advanceMIR(cUnit, &tbb, tm, tempBlockV, true);
if ((tm == NULL) || (tm == mir)) {
break;
}
@@ -1730,9 +1852,11 @@
&& (tm->dalvikInsn.vC == fieldIdx)) {
if (cUnit->printMe) {
LOG(INFO) << "Got DUP IGET_OBJECT @ 0x"
- << std::hex << tm->offset << ", from 0x"
- << std::hex <<mir->offset;
+ << std::hex << tm->offset
+ << ", from 0x" << std::hex
+ << mir->offset;
}
+ tm->optimizationFlags |= MIR_MARK;
matches++;
} else if ((opcode == Instruction::IPUT_OBJECT)
&& (tm->ssaRep->uses[0] == objSreg)
@@ -1753,24 +1877,19 @@
newMir->ssaRep->defs[0] = dstSreg;
newMir->ssaRep->uses[0] = tmpSreg;
mir->ssaRep->defs[0] = tmpSreg;
+ mir->optimizationFlags &= ~MIR_MARK;
oatInsertMIRAfter(bb, mir, newMir);
+ BasicBlock* tbb = bb;
+ replaceOperands(cUnit, &tbb, mir, dstSreg, tmpSreg);
}
}
break;
- case Instruction::IF_EQ:
- case Instruction::IF_NE:
- case Instruction::IF_LT:
- case Instruction::IF_GE:
- case Instruction::IF_GT:
- case Instruction::IF_LE:
- // TODO: Check for and fuse preceeding comparison
- break;
- case Instruction::IF_EQZ:
- case Instruction::IF_NEZ:
- case Instruction::IF_LTZ:
- case Instruction::IF_GEZ:
- case Instruction::IF_GTZ:
- case Instruction::IF_LEZ:
+#endif
+ case Instruction::CMPL_FLOAT:
+ case Instruction::CMPL_DOUBLE:
+ case Instruction::CMPG_FLOAT:
+ case Instruction::CMPG_DOUBLE:
+ case Instruction::CMP_LONG:
// TODO: Check for and fuse preceeding comparison
break;
default:
@@ -1901,8 +2020,15 @@
// Already nullchecked?
if (dfAttributes & DF_HAS_NULL_CHKS) {
- int srcSreg = (dfAttributes & DF_NULL_CHK_1) ?
- mir->ssaRep->uses[1] : mir->ssaRep->uses[0];
+ int srcIdx;
+ if (dfAttributes & DF_NULL_CHK_1) {
+ srcIdx = 1;
+ } else if (dfAttributes & DF_NULL_CHK_2) {
+ srcIdx = 2;
+ } else {
+ srcIdx = 0;
+ }
+ int srcSreg = mir->ssaRep->uses[srcIdx];
if (oatIsBitSet(cUnit->tempSSARegisterV, srcSreg)) {
// Eliminate the null check
mir->optimizationFlags |= MIR_IGNORE_NULL_CHECK;
@@ -1938,13 +2064,202 @@
void oatMethodBasicBlockOptimization(CompilationUnit *cUnit)
{
- oatInitGrowableList(cUnit, &cUnit->compilerTemps, 6, kListMisc);
- DCHECK_EQ(cUnit->numCompilerTemps, 0);
if (!(cUnit->disableOpt & (1 << kBBOpt))) {
- oatDataFlowAnalysisDispatcher(cUnit, basicBlockOpt,
- kAllNodes,
- false /* isIterative */);
+ oatInitGrowableList(cUnit, &cUnit->compilerTemps, 6, kListMisc);
+ DCHECK_EQ(cUnit->numCompilerTemps, 0);
+ if (!(cUnit->disableOpt & (1 << kBBOpt))) {
+ oatDataFlowAnalysisDispatcher(cUnit, basicBlockOpt,
+ kAllNodes, false /* isIterative */);
+ }
}
}
+void addLoopHeader(CompilationUnit* cUnit, BasicBlock* header,
+ BasicBlock* backEdge)
+{
+ GrowableListIterator iter;
+ oatGrowableListIteratorInit(&cUnit->loopHeaders, &iter);
+ for (LoopInfo* loop = (LoopInfo*)oatGrowableListIteratorNext(&iter);
+ (loop != NULL); loop = (LoopInfo*)oatGrowableListIteratorNext(&iter)) {
+ if (loop->header == header) {
+ oatInsertGrowableList(cUnit, &loop->incomingBackEdges,
+ (intptr_t)backEdge);
+ return;
+ }
+ }
+ LoopInfo* info = (LoopInfo*)oatNew(cUnit, sizeof(LoopInfo), true,
+ kAllocDFInfo);
+ info->header = header;
+ oatInitGrowableList(cUnit, &info->incomingBackEdges, 2, kListMisc);
+ oatInsertGrowableList(cUnit, &info->incomingBackEdges, (intptr_t)backEdge);
+ oatInsertGrowableList(cUnit, &cUnit->loopHeaders, (intptr_t)info);
+}
+
+bool findBackEdges(struct CompilationUnit* cUnit, struct BasicBlock* bb)
+{
+ if ((bb->dataFlowInfo == NULL) || (bb->lastMIRInsn == NULL)) {
+ return false;
+ }
+ Instruction::Code opcode = bb->lastMIRInsn->dalvikInsn.opcode;
+ if (Instruction::Flags(opcode) & Instruction::kBranch) {
+ if (bb->taken && (bb->taken->startOffset <= bb->startOffset)) {
+ DCHECK(bb->dominators != NULL);
+ if (oatIsBitSet(bb->dominators, bb->taken->id)) {
+ if (cUnit->printMe) {
+ LOG(INFO) << "Loop backedge from 0x"
+ << std::hex << bb->lastMIRInsn->offset
+ << " to 0x" << std::hex << bb->taken->startOffset;
+ }
+ addLoopHeader(cUnit, bb->taken, bb);
+ }
+ }
+ }
+ return false;
+}
+
+void addBlocksToLoop(CompilationUnit* cUnit, ArenaBitVector* blocks,
+ BasicBlock* bb, int headId)
+{
+ if (!oatIsBitSet(bb->dominators, headId) ||
+ oatIsBitSet(blocks, bb->id)) {
+ return;
+ }
+ oatSetBit(cUnit, blocks, bb->id);
+ GrowableListIterator iter;
+ oatGrowableListIteratorInit(bb->predecessors, &iter);
+ BasicBlock* predBB;
+ for (predBB = (BasicBlock*)oatGrowableListIteratorNext(&iter); predBB;
+ predBB = (BasicBlock*)oatGrowableListIteratorNext(&iter)) {
+ addBlocksToLoop(cUnit, blocks, predBB, headId);
+ }
+}
+
+void oatDumpLoops(CompilationUnit *cUnit)
+{
+ GrowableListIterator iter;
+ oatGrowableListIteratorInit(&cUnit->loopHeaders, &iter);
+ for (LoopInfo* loop = (LoopInfo*)oatGrowableListIteratorNext(&iter);
+ (loop != NULL); loop = (LoopInfo*)oatGrowableListIteratorNext(&iter)) {
+ LOG(INFO) << "Loop head block id " << loop->header->id
+ << ", offset 0x" << std::hex << loop->header->startOffset
+ << ", Depth: " << loop->header->nestingDepth;
+ GrowableListIterator iter;
+ oatGrowableListIteratorInit(&loop->incomingBackEdges, &iter);
+ BasicBlock* edgeBB;
+ for (edgeBB = (BasicBlock*)oatGrowableListIteratorNext(&iter); edgeBB;
+ edgeBB = (BasicBlock*)oatGrowableListIteratorNext(&iter)) {
+ LOG(INFO) << " Backedge block id " << edgeBB->id
+ << ", offset 0x" << std::hex << edgeBB->startOffset;
+ ArenaBitVectorIterator bIter;
+ oatBitVectorIteratorInit(loop->blocks, &bIter);
+ for (int bbId = oatBitVectorIteratorNext(&bIter); bbId != -1;
+ bbId = oatBitVectorIteratorNext(&bIter)) {
+ BasicBlock *bb;
+ bb = (BasicBlock*)
+ oatGrowableListGetElement(&cUnit->blockList, bbId);
+ LOG(INFO) << " (" << bb->id << ", 0x" << std::hex
+ << bb->startOffset << ")";
+ }
+ }
+ }
+}
+
+void oatMethodLoopDetection(CompilationUnit *cUnit)
+{
+ if (cUnit->disableOpt & (1 << kPromoteRegs)) {
+ return;
+ }
+ oatInitGrowableList(cUnit, &cUnit->loopHeaders, 6, kListMisc);
+ // Find the loop headers
+ oatDataFlowAnalysisDispatcher(cUnit, findBackEdges,
+ kAllNodes, false /* isIterative */);
+ GrowableListIterator iter;
+ oatGrowableListIteratorInit(&cUnit->loopHeaders, &iter);
+ // Add blocks to each header
+ for (LoopInfo* loop = (LoopInfo*)oatGrowableListIteratorNext(&iter);
+ loop; loop = (LoopInfo*)oatGrowableListIteratorNext(&iter)) {
+ loop->blocks = oatAllocBitVector(cUnit, cUnit->numBlocks, true,
+ kBitMapMisc);
+ oatSetBit(cUnit, loop->blocks, loop->header->id);
+ GrowableListIterator iter;
+ oatGrowableListIteratorInit(&loop->incomingBackEdges, &iter);
+ BasicBlock* edgeBB;
+ for (edgeBB = (BasicBlock*)oatGrowableListIteratorNext(&iter); edgeBB;
+ edgeBB = (BasicBlock*)oatGrowableListIteratorNext(&iter)) {
+ addBlocksToLoop(cUnit, loop->blocks, edgeBB, loop->header->id);
+ }
+ }
+ // Compute the nesting depth of each header
+ oatGrowableListIteratorInit(&cUnit->loopHeaders, &iter);
+ for (LoopInfo* loop = (LoopInfo*)oatGrowableListIteratorNext(&iter);
+ loop; loop = (LoopInfo*)oatGrowableListIteratorNext(&iter)) {
+ GrowableListIterator iter2;
+ oatGrowableListIteratorInit(&cUnit->loopHeaders, &iter2);
+ LoopInfo* loop2;
+ for (loop2 = (LoopInfo*)oatGrowableListIteratorNext(&iter2);
+ loop2; loop2 = (LoopInfo*)oatGrowableListIteratorNext(&iter2)) {
+ if (oatIsBitSet(loop2->blocks, loop->header->id)) {
+ loop->header->nestingDepth++;
+ }
+ }
+ }
+ // Assign nesting depth to each block in all loops
+ oatGrowableListIteratorInit(&cUnit->loopHeaders, &iter);
+ for (LoopInfo* loop = (LoopInfo*)oatGrowableListIteratorNext(&iter);
+ (loop != NULL); loop = (LoopInfo*)oatGrowableListIteratorNext(&iter)) {
+ ArenaBitVectorIterator bIter;
+ oatBitVectorIteratorInit(loop->blocks, &bIter);
+ for (int bbId = oatBitVectorIteratorNext(&bIter); bbId != -1;
+ bbId = oatBitVectorIteratorNext(&bIter)) {
+ BasicBlock *bb;
+ bb = (BasicBlock*) oatGrowableListGetElement(&cUnit->blockList,
+ bbId);
+ bb->nestingDepth = std::max(bb->nestingDepth,
+ loop->header->nestingDepth);
+ }
+ }
+ if (cUnit->printMe) {
+ oatDumpLoops(cUnit);
+ }
+}
+
+/*
+ * Count uses, weighting by loop nesting depth. This code only
+ * counts explicitly used sRegs. A later phase will add implicit
+ * counts for things such as Method*, null-checked references, etc.
+ */
+bool countUses(struct CompilationUnit* cUnit, struct BasicBlock* bb)
+{
+ if (bb->blockType != kDalvikByteCode) {
+ return false;
+ }
+ for (MIR* mir = bb->firstMIRInsn; (mir != NULL); mir = mir->next) {
+ if (mir->ssaRep == NULL) {
+ continue;
+ }
+ for (int i = 0; i < mir->ssaRep->numUses; i++) {
+ int sReg = mir->ssaRep->uses[i];
+ uint32_t weight = std::min(16U, (uint32_t)bb->nestingDepth);
+ DCHECK_LT(sReg, (int)cUnit->useCounts.numUsed);
+ cUnit->useCounts.elemList[sReg] += (1 << weight);
+ }
+ }
+ return false;
+}
+
+void oatMethodUseCount(CompilationUnit *cUnit)
+{
+ if (cUnit->disableOpt & (1 << kPromoteRegs)) {
+ return;
+ }
+ oatInitGrowableList(cUnit, &cUnit->useCounts, cUnit->numSSARegs + 32,
+ kListMisc);
+ // Initialize list
+ for (int i = 0; i < cUnit->numSSARegs; i++) {
+ oatInsertGrowableList(cUnit, &cUnit->useCounts, 0);
+ }
+ oatDataFlowAnalysisDispatcher(cUnit, countUses,
+ kAllNodes, false /* isIterative */);
+}
+
} // namespace art