Loop detection, improved reg allocation
Detect loops and loop nesting depth, and use the latter to
weight register uses (which are then used to determine which
registers to promote).
Also:
o Fixed typo that prevented squashing of useless fp reg copies
o Rescheduled array access checks to hide latency of limit load.
o Add basic-block optimization pass to remove duplicate range
checks.
o Fixed bug that prevented recognition of redundant null
checks following iput-wide and aput-wide.
Change-Id: Icfbae39e89b1d14b8703ad6bbb0b29c0635fed1e
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 5750913..54d9951 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -179,6 +179,7 @@
kMIRCallee, // Instruction is inlined from callee
kMIRIgnoreSuspendCheck,
kMIRDup,
+ kMIRMark, // Temporary node mark
};
#define MIR_IGNORE_NULL_CHECK (1 << kMIRIgnoreNullCheck)
@@ -190,6 +191,7 @@
#define MIR_CALLEE (1 << kMIRCallee)
#define MIR_IGNORE_SUSPEND_CHECK (1 << kMIRIgnoreSuspendCheck)
#define MIR_DUP (1 << kMIRDup)
+#define MIR_MARK (1 << kMIRMark)
struct CallsiteInfo {
const char* classDescriptor;
@@ -234,7 +236,8 @@
bool hidden;
bool catchEntry;
bool fallThroughTarget; // Reached via fallthrough
- unsigned int startOffset;
+ uint16_t startOffset;
+ uint16_t nestingDepth;
const Method* containingMethod; // For blocks from the callee
BBType blockType;
bool needFallThroughBranch; // For blocks ended due to length limit
@@ -335,6 +338,12 @@
int* phiAliasMap; // length == numSSAReg
MIR* phiList;
+ /* Use counts of ssa names */
+ GrowableList useCounts;
+
+ /* Optimization support */
+ GrowableList loopHeaders;
+
/* Map SSA names to location */
RegLocation* regLocation;
int sequenceNumber;
diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc
index 581c463..7b05a66 100644
--- a/src/compiler/Dataflow.cc
+++ b/src/compiler/Dataflow.cc
@@ -259,7 +259,7 @@
DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
// 4C APUT_WIDE vAA, vBB, vCC
- DF_UA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
+ DF_UA_WIDE | DF_UB | DF_UC | DF_NULL_CHK_2 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
// 4D APUT_OBJECT vAA, vBB, vCC
DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_IS_SETTER | DF_CORE_B | DF_CORE_C,
@@ -301,7 +301,7 @@
DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
// 5A IPUT_WIDE vA, vB, field@CCCC
- DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
+ DF_UA_WIDE | DF_UB | DF_NULL_CHK_2 | DF_IS_SETTER | DF_CORE_B,
// 5B IPUT_OBJECT vA, vB, field@CCCC
DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER | DF_CORE_B,
@@ -730,7 +730,7 @@
DF_DA_WIDE | DF_UB | DF_NULL_CHK_0 | DF_CORE_B,
// E9 IPUT_WIDE_VOLATILE
- DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 | DF_CORE_B,
+ DF_UA_WIDE | DF_UB | DF_NULL_CHK_2 | DF_CORE_B,
// EA SGET_WIDE_VOLATILE
DF_DA_WIDE,
@@ -769,7 +769,7 @@
DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER,
// F6 IPUT_WIDE_QUICK
- DF_UA_WIDE | DF_UB | DF_NULL_CHK_1 |DF_IS_SETTER,
+ DF_UA_WIDE | DF_UB | DF_NULL_CHK_2 |DF_IS_SETTER,
// F7 IPUT_OBJECT_QUICK
DF_UA | DF_UB | DF_NULL_CHK_1 | DF_IS_SETTER,
@@ -1631,7 +1631,8 @@
}
/* Advance to next strictly dominated MIR node in an extended basic block */
-MIR* advanceMIR(CompilationUnit* cUnit, BasicBlock** pBb, MIR* mir, ArenaBitVector* bv) {
+MIR* advanceMIR(CompilationUnit* cUnit, BasicBlock** pBb, MIR* mir, ArenaBitVector* bv,
+ bool clearMark) {
BasicBlock* bb = *pBb;
if (mir != NULL) {
mir = mir->next;
@@ -1648,9 +1649,98 @@
}
}
}
+ if (mir && clearMark) {
+ mir->optimizationFlags &= ~MIR_MARK;
+ }
return mir;
}
+/*
+ * For the rest of the extended run, replace uses of rOld with rNew.
+ */
+void replaceOperands(CompilationUnit* cUnit, BasicBlock** pBp, MIR* mir,
+ int rOld, int rNew)
+{
+ while (true) {
+ mir = advanceMIR(cUnit, pBp, mir, NULL, false);
+ if (mir == NULL) {
+ break;
+ }
+ if (mir->ssaRep == NULL) {
+ continue;
+ }
+ if (mir->optimizationFlags & MIR_MARK) {
+ LOG(INFO) << "recursive replace";
+ mir->dalvikInsn.opcode = (Instruction::Code)kMirOpCopy;
+ mir->ssaRep->uses[0] = rNew;
+ mir->ssaRep->numUses = 1;
+ replaceOperands(cUnit, pBp, mir, mir->ssaRep->defs[0], rNew);
+ mir->optimizationFlags &= ~MIR_MARK;
+ } else {
+ for (int i = 0; i < mir->ssaRep->numUses; i++) {
+ if (mir->ssaRep->uses[i] == rOld) {
+ mir->optimizationFlags |= MIR_IGNORE_NULL_CHECK;
+ mir->ssaRep->uses[i] = rNew;
+ }
+ }
+ }
+ }
+}
+
+void squashDupRangeChecks(CompilationUnit* cUnit, BasicBlock** pBp, MIR* mir,
+ int arraySreg, int indexSreg)
+{
+ while (true) {
+ mir = advanceMIR(cUnit, pBp, mir, NULL, false);
+ if (!mir) {
+ break;
+ }
+ if ((mir->ssaRep == NULL) ||
+ (mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+ continue;
+ }
+ int checkArray = INVALID_SREG;
+ int checkIndex = INVALID_SREG;
+ switch(mir->dalvikInsn.opcode) {
+ case Instruction::AGET:
+ case Instruction::AGET_OBJECT:
+ case Instruction::AGET_BOOLEAN:
+ case Instruction::AGET_BYTE:
+ case Instruction::AGET_CHAR:
+ case Instruction::AGET_SHORT:
+ case Instruction::AGET_WIDE:
+ checkArray = mir->ssaRep->uses[0];
+ checkIndex = mir->ssaRep->uses[1];
+ break;
+ break;
+ case Instruction::APUT:
+ case Instruction::APUT_OBJECT:
+ case Instruction::APUT_SHORT:
+ case Instruction::APUT_CHAR:
+ case Instruction::APUT_BYTE:
+ case Instruction::APUT_BOOLEAN:
+ checkArray = mir->ssaRep->uses[1];
+ checkIndex = mir->ssaRep->uses[2];
+ break;
+ case Instruction::APUT_WIDE:
+ checkArray = mir->ssaRep->uses[2];
+ checkIndex = mir->ssaRep->uses[3];
+ default:
+ break;
+ }
+ if (checkArray == INVALID_SREG) {
+ continue;
+ }
+ if ((arraySreg == checkArray) && (indexSreg == checkIndex)) {
+ if (cUnit->printMe) {
+ LOG(INFO) << "Squashing range check @ 0x" << std::hex
+ << mir->offset;
+ }
+ mir->optimizationFlags |= MIR_IGNORE_RANGE_CHECK;
+ }
+ }
+}
+
/* Allocate a compiler temp, return Sreg. Reuse existing if no conflict */
int allocCompilerTempSreg(CompilationUnit* cUnit, ArenaBitVector* bv)
{
@@ -1704,7 +1794,39 @@
for (MIR* mir = bb->firstMIRInsn; mir; mir = mir->next) {
// Look for interesting opcodes, skip otherwise
- switch(mir->dalvikInsn.opcode) {
+ Instruction::Code opcode = mir->dalvikInsn.opcode;
+ switch(opcode) {
+ case Instruction::AGET:
+ case Instruction::AGET_OBJECT:
+ case Instruction::AGET_BOOLEAN:
+ case Instruction::AGET_BYTE:
+ case Instruction::AGET_CHAR:
+ case Instruction::AGET_SHORT:
+ case Instruction::AGET_WIDE:
+ if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+ int arrSreg = mir->ssaRep->uses[0];
+ int idxSreg = mir->ssaRep->uses[1];
+ BasicBlock* tbb = bb;
+ squashDupRangeChecks(cUnit, &tbb, mir, arrSreg, idxSreg);
+ }
+ break;
+ case Instruction::APUT:
+ case Instruction::APUT_OBJECT:
+ case Instruction::APUT_SHORT:
+ case Instruction::APUT_CHAR:
+ case Instruction::APUT_BYTE:
+ case Instruction::APUT_BOOLEAN:
+ case Instruction::APUT_WIDE:
+ if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+ int start = (opcode == Instruction::APUT_WIDE) ? 2 : 1;
+ int arrSreg = mir->ssaRep->uses[start];
+ int idxSreg = mir->ssaRep->uses[start + 1];
+ BasicBlock* tbb = bb;
+ squashDupRangeChecks(cUnit, &tbb, mir, arrSreg, idxSreg);
+ }
+ break;
+#if 0
+ // Probably not worthwhile.
case Instruction::IGET_OBJECT: {
// TODO: look for CSE
if (mir->optimizationFlags & MIR_DUP) {
@@ -1720,7 +1842,7 @@
BasicBlock* tbb = bb;
MIR* tm = mir;
while (true) {
- tm = advanceMIR(cUnit, &tbb, tm, tempBlockV);
+ tm = advanceMIR(cUnit, &tbb, tm, tempBlockV, true);
if ((tm == NULL) || (tm == mir)) {
break;
}
@@ -1730,9 +1852,11 @@
&& (tm->dalvikInsn.vC == fieldIdx)) {
if (cUnit->printMe) {
LOG(INFO) << "Got DUP IGET_OBJECT @ 0x"
- << std::hex << tm->offset << ", from 0x"
- << std::hex <<mir->offset;
+ << std::hex << tm->offset
+ << ", from 0x" << std::hex
+ << mir->offset;
}
+ tm->optimizationFlags |= MIR_MARK;
matches++;
} else if ((opcode == Instruction::IPUT_OBJECT)
&& (tm->ssaRep->uses[0] == objSreg)
@@ -1753,24 +1877,19 @@
newMir->ssaRep->defs[0] = dstSreg;
newMir->ssaRep->uses[0] = tmpSreg;
mir->ssaRep->defs[0] = tmpSreg;
+ mir->optimizationFlags &= ~MIR_MARK;
oatInsertMIRAfter(bb, mir, newMir);
+ BasicBlock* tbb = bb;
+ replaceOperands(cUnit, &tbb, mir, dstSreg, tmpSreg);
}
}
break;
- case Instruction::IF_EQ:
- case Instruction::IF_NE:
- case Instruction::IF_LT:
- case Instruction::IF_GE:
- case Instruction::IF_GT:
- case Instruction::IF_LE:
- // TODO: Check for and fuse preceeding comparison
- break;
- case Instruction::IF_EQZ:
- case Instruction::IF_NEZ:
- case Instruction::IF_LTZ:
- case Instruction::IF_GEZ:
- case Instruction::IF_GTZ:
- case Instruction::IF_LEZ:
+#endif
+ case Instruction::CMPL_FLOAT:
+ case Instruction::CMPL_DOUBLE:
+ case Instruction::CMPG_FLOAT:
+ case Instruction::CMPG_DOUBLE:
+ case Instruction::CMP_LONG:
// TODO: Check for and fuse preceeding comparison
break;
default:
@@ -1901,8 +2020,15 @@
// Already nullchecked?
if (dfAttributes & DF_HAS_NULL_CHKS) {
- int srcSreg = (dfAttributes & DF_NULL_CHK_1) ?
- mir->ssaRep->uses[1] : mir->ssaRep->uses[0];
+ int srcIdx;
+ if (dfAttributes & DF_NULL_CHK_1) {
+ srcIdx = 1;
+ } else if (dfAttributes & DF_NULL_CHK_2) {
+ srcIdx = 2;
+ } else {
+ srcIdx = 0;
+ }
+ int srcSreg = mir->ssaRep->uses[srcIdx];
if (oatIsBitSet(cUnit->tempSSARegisterV, srcSreg)) {
// Eliminate the null check
mir->optimizationFlags |= MIR_IGNORE_NULL_CHECK;
@@ -1938,13 +2064,202 @@
void oatMethodBasicBlockOptimization(CompilationUnit *cUnit)
{
- oatInitGrowableList(cUnit, &cUnit->compilerTemps, 6, kListMisc);
- DCHECK_EQ(cUnit->numCompilerTemps, 0);
if (!(cUnit->disableOpt & (1 << kBBOpt))) {
- oatDataFlowAnalysisDispatcher(cUnit, basicBlockOpt,
- kAllNodes,
- false /* isIterative */);
+ oatInitGrowableList(cUnit, &cUnit->compilerTemps, 6, kListMisc);
+ DCHECK_EQ(cUnit->numCompilerTemps, 0);
+ if (!(cUnit->disableOpt & (1 << kBBOpt))) {
+ oatDataFlowAnalysisDispatcher(cUnit, basicBlockOpt,
+ kAllNodes, false /* isIterative */);
+ }
}
}
+void addLoopHeader(CompilationUnit* cUnit, BasicBlock* header,
+ BasicBlock* backEdge)
+{
+ GrowableListIterator iter;
+ oatGrowableListIteratorInit(&cUnit->loopHeaders, &iter);
+ for (LoopInfo* loop = (LoopInfo*)oatGrowableListIteratorNext(&iter);
+ (loop != NULL); loop = (LoopInfo*)oatGrowableListIteratorNext(&iter)) {
+ if (loop->header == header) {
+ oatInsertGrowableList(cUnit, &loop->incomingBackEdges,
+ (intptr_t)backEdge);
+ return;
+ }
+ }
+ LoopInfo* info = (LoopInfo*)oatNew(cUnit, sizeof(LoopInfo), true,
+ kAllocDFInfo);
+ info->header = header;
+ oatInitGrowableList(cUnit, &info->incomingBackEdges, 2, kListMisc);
+ oatInsertGrowableList(cUnit, &info->incomingBackEdges, (intptr_t)backEdge);
+ oatInsertGrowableList(cUnit, &cUnit->loopHeaders, (intptr_t)info);
+}
+
+bool findBackEdges(struct CompilationUnit* cUnit, struct BasicBlock* bb)
+{
+ if ((bb->dataFlowInfo == NULL) || (bb->lastMIRInsn == NULL)) {
+ return false;
+ }
+ Instruction::Code opcode = bb->lastMIRInsn->dalvikInsn.opcode;
+ if (Instruction::Flags(opcode) & Instruction::kBranch) {
+ if (bb->taken && (bb->taken->startOffset <= bb->startOffset)) {
+ DCHECK(bb->dominators != NULL);
+ if (oatIsBitSet(bb->dominators, bb->taken->id)) {
+ if (cUnit->printMe) {
+ LOG(INFO) << "Loop backedge from 0x"
+ << std::hex << bb->lastMIRInsn->offset
+ << " to 0x" << std::hex << bb->taken->startOffset;
+ }
+ addLoopHeader(cUnit, bb->taken, bb);
+ }
+ }
+ }
+ return false;
+}
+
+void addBlocksToLoop(CompilationUnit* cUnit, ArenaBitVector* blocks,
+ BasicBlock* bb, int headId)
+{
+ if (!oatIsBitSet(bb->dominators, headId) ||
+ oatIsBitSet(blocks, bb->id)) {
+ return;
+ }
+ oatSetBit(cUnit, blocks, bb->id);
+ GrowableListIterator iter;
+ oatGrowableListIteratorInit(bb->predecessors, &iter);
+ BasicBlock* predBB;
+ for (predBB = (BasicBlock*)oatGrowableListIteratorNext(&iter); predBB;
+ predBB = (BasicBlock*)oatGrowableListIteratorNext(&iter)) {
+ addBlocksToLoop(cUnit, blocks, predBB, headId);
+ }
+}
+
+void oatDumpLoops(CompilationUnit *cUnit)
+{
+ GrowableListIterator iter;
+ oatGrowableListIteratorInit(&cUnit->loopHeaders, &iter);
+ for (LoopInfo* loop = (LoopInfo*)oatGrowableListIteratorNext(&iter);
+ (loop != NULL); loop = (LoopInfo*)oatGrowableListIteratorNext(&iter)) {
+ LOG(INFO) << "Loop head block id " << loop->header->id
+ << ", offset 0x" << std::hex << loop->header->startOffset
+ << ", Depth: " << loop->header->nestingDepth;
+ GrowableListIterator iter;
+ oatGrowableListIteratorInit(&loop->incomingBackEdges, &iter);
+ BasicBlock* edgeBB;
+ for (edgeBB = (BasicBlock*)oatGrowableListIteratorNext(&iter); edgeBB;
+ edgeBB = (BasicBlock*)oatGrowableListIteratorNext(&iter)) {
+ LOG(INFO) << " Backedge block id " << edgeBB->id
+ << ", offset 0x" << std::hex << edgeBB->startOffset;
+ ArenaBitVectorIterator bIter;
+ oatBitVectorIteratorInit(loop->blocks, &bIter);
+ for (int bbId = oatBitVectorIteratorNext(&bIter); bbId != -1;
+ bbId = oatBitVectorIteratorNext(&bIter)) {
+ BasicBlock *bb;
+ bb = (BasicBlock*)
+ oatGrowableListGetElement(&cUnit->blockList, bbId);
+ LOG(INFO) << " (" << bb->id << ", 0x" << std::hex
+ << bb->startOffset << ")";
+ }
+ }
+ }
+}
+
+void oatMethodLoopDetection(CompilationUnit *cUnit)
+{
+ if (cUnit->disableOpt & (1 << kPromoteRegs)) {
+ return;
+ }
+ oatInitGrowableList(cUnit, &cUnit->loopHeaders, 6, kListMisc);
+ // Find the loop headers
+ oatDataFlowAnalysisDispatcher(cUnit, findBackEdges,
+ kAllNodes, false /* isIterative */);
+ GrowableListIterator iter;
+ oatGrowableListIteratorInit(&cUnit->loopHeaders, &iter);
+ // Add blocks to each header
+ for (LoopInfo* loop = (LoopInfo*)oatGrowableListIteratorNext(&iter);
+ loop; loop = (LoopInfo*)oatGrowableListIteratorNext(&iter)) {
+ loop->blocks = oatAllocBitVector(cUnit, cUnit->numBlocks, true,
+ kBitMapMisc);
+ oatSetBit(cUnit, loop->blocks, loop->header->id);
+ GrowableListIterator iter;
+ oatGrowableListIteratorInit(&loop->incomingBackEdges, &iter);
+ BasicBlock* edgeBB;
+ for (edgeBB = (BasicBlock*)oatGrowableListIteratorNext(&iter); edgeBB;
+ edgeBB = (BasicBlock*)oatGrowableListIteratorNext(&iter)) {
+ addBlocksToLoop(cUnit, loop->blocks, edgeBB, loop->header->id);
+ }
+ }
+ // Compute the nesting depth of each header
+ oatGrowableListIteratorInit(&cUnit->loopHeaders, &iter);
+ for (LoopInfo* loop = (LoopInfo*)oatGrowableListIteratorNext(&iter);
+ loop; loop = (LoopInfo*)oatGrowableListIteratorNext(&iter)) {
+ GrowableListIterator iter2;
+ oatGrowableListIteratorInit(&cUnit->loopHeaders, &iter2);
+ LoopInfo* loop2;
+ for (loop2 = (LoopInfo*)oatGrowableListIteratorNext(&iter2);
+ loop2; loop2 = (LoopInfo*)oatGrowableListIteratorNext(&iter2)) {
+ if (oatIsBitSet(loop2->blocks, loop->header->id)) {
+ loop->header->nestingDepth++;
+ }
+ }
+ }
+ // Assign nesting depth to each block in all loops
+ oatGrowableListIteratorInit(&cUnit->loopHeaders, &iter);
+ for (LoopInfo* loop = (LoopInfo*)oatGrowableListIteratorNext(&iter);
+ (loop != NULL); loop = (LoopInfo*)oatGrowableListIteratorNext(&iter)) {
+ ArenaBitVectorIterator bIter;
+ oatBitVectorIteratorInit(loop->blocks, &bIter);
+ for (int bbId = oatBitVectorIteratorNext(&bIter); bbId != -1;
+ bbId = oatBitVectorIteratorNext(&bIter)) {
+ BasicBlock *bb;
+ bb = (BasicBlock*) oatGrowableListGetElement(&cUnit->blockList,
+ bbId);
+ bb->nestingDepth = std::max(bb->nestingDepth,
+ loop->header->nestingDepth);
+ }
+ }
+ if (cUnit->printMe) {
+ oatDumpLoops(cUnit);
+ }
+}
+
+/*
+ * Count uses, weighting by loop nesting depth. This code only
+ * counts explicitly used sRegs. A later phase will add implicit
+ * counts for things such as Method*, null-checked references, etc.
+ */
+bool countUses(struct CompilationUnit* cUnit, struct BasicBlock* bb)
+{
+ if (bb->blockType != kDalvikByteCode) {
+ return false;
+ }
+ for (MIR* mir = bb->firstMIRInsn; (mir != NULL); mir = mir->next) {
+ if (mir->ssaRep == NULL) {
+ continue;
+ }
+ for (int i = 0; i < mir->ssaRep->numUses; i++) {
+ int sReg = mir->ssaRep->uses[i];
+ uint32_t weight = std::min(16U, (uint32_t)bb->nestingDepth);
+ DCHECK_LT(sReg, (int)cUnit->useCounts.numUsed);
+ cUnit->useCounts.elemList[sReg] += (1 << weight);
+ }
+ }
+ return false;
+}
+
+void oatMethodUseCount(CompilationUnit *cUnit)
+{
+ if (cUnit->disableOpt & (1 << kPromoteRegs)) {
+ return;
+ }
+ oatInitGrowableList(cUnit, &cUnit->useCounts, cUnit->numSSARegs + 32,
+ kListMisc);
+ // Initialize list
+ for (int i = 0; i < cUnit->numSSARegs; i++) {
+ oatInsertGrowableList(cUnit, &cUnit->useCounts, 0);
+ }
+ oatDataFlowAnalysisDispatcher(cUnit, countUses,
+ kAllNodes, false /* isIterative */);
+}
+
} // namespace art
diff --git a/src/compiler/Dataflow.h b/src/compiler/Dataflow.h
index 2df9373..77bf756 100644
--- a/src/compiler/Dataflow.h
+++ b/src/compiler/Dataflow.h
@@ -37,15 +37,16 @@
kFormat35c,
kFormat3rc,
kPhi,
- kNullCheckSrc0, // Null check of src[0]
- kNullCheckSrc1, // Null check of src[1]
+ kNullCheckSrc0, // Null check of uses[0]
+ kNullCheckSrc1, // Null check of uses[1]
+ kNullCheckSrc2, // Null check of uses[2]
kNullCheckOut0, // Null check out outgoing arg0
kDstNonNull, // May assume dst is non-null
kRetNonNull, // May assume retval is non-null
kNullTransferSrc0, // Object copy src[0] -> dst
kNullTransferSrcN, // Phi null check state transfer
- kRangeCheckSrc1, // Range check of src[1]
- kRangeCheckSrc2, // Range check of src[2]
+ kRangeCheckSrc1, // Range check of uses[1]
+ kRangeCheckSrc2, // Range check of uses[2]
kFPA,
kFPB,
kFPC,
@@ -73,6 +74,7 @@
#define DF_PHI (1 << kPhi)
#define DF_NULL_CHK_0 (1 << kNullCheckSrc0)
#define DF_NULL_CHK_1 (1 << kNullCheckSrc1)
+#define DF_NULL_CHK_2 (1 << kNullCheckSrc2)
#define DF_NULL_CHK_OUT0 (1 << kNullCheckOut0)
#define DF_NON_NULL_DST (1 << kDstNonNull)
#define DF_NON_NULL_RET (1 << kRetNonNull)
@@ -96,6 +98,7 @@
#define DF_HAS_NULL_CHKS (DF_NULL_CHK_0 | \
DF_NULL_CHK_1 | \
+ DF_NULL_CHK_2 | \
DF_NULL_CHK_OUT0)
#define DF_HAS_NR_CHKS (DF_HAS_NULL_CHKS | \
@@ -147,6 +150,16 @@
int minC; // For DIV - will affect lower bound checking
};
+struct LoopInfo {
+ BasicBlock* header;
+ GrowableList incomingBackEdges;
+ ArenaBitVector* blocks;
+};
+
+void oatMethodLoopDetection(CompilationUnit*);
+
+void oatMethodUseCount(CompilationUnit*);
+
void oatMethodNullCheckElimination(CompilationUnit*);
void oatMethodBasicBlockOptimization(CompilationUnit*);
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index fc7fcb4..d97c20f 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -34,7 +34,7 @@
//(1 << kTrackLiveTemps) |
//(1 << kSkipLargeMethodOptimization) |
//(1 << kSafeOptimizations) |
- (1 << kBBOpt) |
+ //(1 << kBBOpt) |
0;
uint32_t compilerDebugFlags = 0 | // Enable debug/testing modes
@@ -967,13 +967,17 @@
/* Perform SSA transformation for the whole method */
oatMethodSSATransformation(cUnit.get());
+ /* Detect loops */
+ oatMethodLoopDetection(cUnit.get());
+
+ /* Count uses */
+ oatMethodUseCount(cUnit.get());
+
/* Perform null check elimination */
oatMethodNullCheckElimination(cUnit.get());
-#if 0
/* Do some basic block optimizations */
oatMethodBasicBlockOptimization(cUnit.get());
-#endif
oatInitializeRegAlloc(cUnit.get()); // Needs to happen after SSA naming
diff --git a/src/compiler/codegen/CodegenUtil.cc b/src/compiler/codegen/CodegenUtil.cc
index 27433ca..cc31b29 100644
--- a/src/compiler/codegen/CodegenUtil.cc
+++ b/src/compiler/codegen/CodegenUtil.cc
@@ -371,6 +371,7 @@
LOG(INFO) << "Outs : " << cUnit->numOuts;
LOG(INFO) << "CoreSpills : " << cUnit->numCoreSpills;
LOG(INFO) << "FPSpills : " << cUnit->numFPSpills;
+ LOG(INFO) << "CompilerTemps : " << cUnit->numCompilerTemps;
LOG(INFO) << "Frame size : " << cUnit->frameSize;
LOG(INFO) << "code size is " << cUnit->totalSize <<
" bytes, Dalvik size is " << insnsSize * 2;
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index bfc5639..34d9fb9 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -1341,22 +1341,23 @@
opRegCopy(cUnit, regPtr, rlArray.lowReg);
}
- if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
- int regLen = oatAllocTemp(cUnit);
+ bool needsRangeCheck = (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK));
+ int regLen = INVALID_REG;
+ if (needsRangeCheck) {
+ regLen = oatAllocTemp(cUnit);
//NOTE: max live temps(4) here.
/* Get len */
loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
- /* regPtr -> array data */
- opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+ }
+ /* regPtr -> array data */
+ opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+ /* at this point, regPtr points to array, 2 live temps */
+ rlSrc = loadValue(cUnit, rlSrc, regClass);
+ if (needsRangeCheck) {
genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
kThrowArrayBounds);
oatFreeTemp(cUnit, regLen);
- } else {
- /* regPtr -> array data */
- opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
}
- /* at this point, regPtr points to array, 2 live temps */
- rlSrc = loadValue(cUnit, rlSrc, regClass);
storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
scale, kWord);
}
@@ -1406,21 +1407,15 @@
}
#else
int regPtr = oatAllocTemp(cUnit);
- if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
- int regLen = oatAllocTemp(cUnit);
+ bool needsRangeCheck = (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK));
+ int regLen = INVALID_REG;
+ if (needsRangeCheck) {
+ regLen = oatAllocTemp(cUnit);
/* Get len */
loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
- /* regPtr -> array data */
- opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
- // TODO: change kCondCS to a more meaningful name, is the sense of
- // carry-set/clear flipped?
- genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
- kThrowArrayBounds);
- oatFreeTemp(cUnit, regLen);
- } else {
- /* regPtr -> array data */
- opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
}
+ /* regPtr -> array data */
+ opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
oatFreeTemp(cUnit, rlArray.lowReg);
if ((size == kLong) || (size == kDouble)) {
if (scale) {
@@ -1434,6 +1429,13 @@
oatFreeTemp(cUnit, rlIndex.lowReg);
rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
+ if (needsRangeCheck) {
+ // TODO: change kCondCS to a more meaningful name, is the sense of
+ // carry-set/clear flipped?
+ genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
+ kThrowArrayBounds);
+ oatFreeTemp(cUnit, regLen);
+ }
loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
oatFreeTemp(cUnit, regPtr);
@@ -1441,6 +1443,13 @@
} else {
rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
+ if (needsRangeCheck) {
+ // TODO: change kCondCS to a more meaningful name, is the sense of
+ // carry-set/clear flipped?
+ genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
+ kThrowArrayBounds);
+ oatFreeTemp(cUnit, regLen);
+ }
loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
scale, size);
@@ -1483,20 +1492,16 @@
/* null object? */
genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, mir);
- if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
- int regLen = oatAllocTemp(cUnit);
+ bool needsRangeCheck = (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK));
+ int regLen = INVALID_REG;
+ if (needsRangeCheck) {
+ regLen = oatAllocTemp(cUnit);
//NOTE: max live temps(4) here.
/* Get len */
loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
- /* regPtr -> array data */
- opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
- genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
- kThrowArrayBounds);
- oatFreeTemp(cUnit, regLen);
- } else {
- /* regPtr -> array data */
- opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
}
+ /* regPtr -> array data */
+ opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
/* at this point, regPtr points to array, 2 live temps */
if ((size == kLong) || (size == kDouble)) {
//TUNING: specific wide routine that can handle fp regs
@@ -1510,12 +1515,22 @@
}
rlSrc = loadValueWide(cUnit, rlSrc, regClass);
+ if (needsRangeCheck) {
+ genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
+ kThrowArrayBounds);
+ oatFreeTemp(cUnit, regLen);
+ }
+
storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
oatFreeTemp(cUnit, regPtr);
} else {
rlSrc = loadValue(cUnit, rlSrc, regClass);
-
+ if (needsRangeCheck) {
+ genRegRegCheck(cUnit, kCondCs, rlIndex.lowReg, regLen, mir,
+ kThrowArrayBounds);
+ oatFreeTemp(cUnit, regLen);
+ }
storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
scale, size);
}
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index 64f55c6..45a0c75 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -757,9 +757,12 @@
newLIR1(cUnit, kPseudoSSARep, (int) ssaString);
break;
}
- case kMirOpCopy:
- UNIMPLEMENTED(FATAL) << "Need kMirOpCopy";
+ case kMirOpCopy: {
+ RegLocation rlSrc = oatGetSrc(cUnit, mir, 0);
+ RegLocation rlDest = oatGetDest(cUnit, mir, 0);
+ storeValue(cUnit, rlDest, rlSrc);
break;
+ }
default:
break;
}
diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc
index afbefff..c08b2e8 100644
--- a/src/compiler/codegen/RallocUtil.cc
+++ b/src/compiler/codegen/RallocUtil.cc
@@ -1043,52 +1043,31 @@
void oatCountRefs(CompilationUnit *cUnit, BasicBlock* bb,
RefCounts* coreCounts, RefCounts* fpCounts)
{
- MIR* mir;
- if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock &&
- bb->blockType != kExitBlock)
+ if ((cUnit->disableOpt & (1 << kPromoteRegs)) ||
+ !((bb->blockType == kEntryBlock) || (bb->blockType == kExitBlock) ||
+ (bb->blockType == kDalvikByteCode))) {
return;
-
- for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
- SSARepresentation *ssaRep = mir->ssaRep;
- if (ssaRep) {
- for (int i = 0; i < ssaRep->numDefs;) {
- RegLocation loc = cUnit->regLocation[ssaRep->defs[i]];
- RefCounts* counts = loc.fp ? fpCounts : coreCounts;
- int vReg = SRegToVReg(cUnit, ssaRep->defs[i]);
- if (loc.defined) {
- counts[vReg].count++;
- }
- if (loc.wide) {
- if (loc.defined) {
- if (loc.fp) {
- counts[vReg].doubleStart = true;
- }
- counts[vReg+1].count++;
- }
- i += 2;
- } else {
- i++;
+ }
+ for (int i = 0; i < cUnit->numSSARegs;) {
+ RegLocation loc = cUnit->regLocation[i];
+ RefCounts* counts = loc.fp ? fpCounts : coreCounts;
+ int vReg = SRegToVReg(cUnit, loc.sRegLow);
+ if (vReg < 0) {
+ vReg = cUnit->numDalvikRegisters - (vReg + 1);
+ }
+ if (loc.defined) {
+ counts[vReg].count += cUnit->useCounts.elemList[i];
+ }
+ if (loc.wide) {
+ if (loc.defined) {
+ if (loc.fp) {
+ counts[vReg].doubleStart = true;
+ counts[vReg+1].count += cUnit->useCounts.elemList[i+1];
}
}
- for (int i = 0; i < ssaRep->numUses;) {
- RegLocation loc = cUnit->regLocation[ssaRep->uses[i]];
- RefCounts* counts = loc.fp ? fpCounts : coreCounts;
- int vReg = SRegToVReg(cUnit, ssaRep->uses[i]);
- if (loc.defined) {
- counts[vReg].count++;
- }
- if (loc.wide) {
- if (loc.defined) {
- if (loc.fp) {
- counts[vReg].doubleStart = true;
- }
- counts[vReg+1].count++;
- }
- i += 2;
- } else {
- i++;
- }
- }
+ i += 2;
+ } else {
+ i++;
}
}
}
@@ -1115,7 +1094,9 @@
*/
extern void oatDoPromotion(CompilationUnit* cUnit)
{
- int numRegs = cUnit->numDalvikRegisters;
+ int regBias = cUnit->numCompilerTemps + 1;
+ int dalvikRegs = cUnit->numDalvikRegisters;
+ int numRegs = dalvikRegs + regBias;
// Allow target code to add any special registers
oatAdjustSpillMask(cUnit);
@@ -1135,9 +1116,14 @@
oatNew(cUnit, sizeof(RefCounts) * numRegs, true, kAllocRegAlloc);
RefCounts *fpRegs = (RefCounts *)
oatNew(cUnit, sizeof(RefCounts) * numRegs, true, kAllocRegAlloc);
- for (int i = 0; i < numRegs; i++) {
+ // Set ssa names for original Dalvik registers
+ for (int i = 0; i < dalvikRegs; i++) {
coreRegs[i].sReg = fpRegs[i].sReg = i;
}
+ // Set ssa names for Method* and compiler temps
+ for (int i = 0; i < regBias; i++) {
+ coreRegs[dalvikRegs + i].sReg = fpRegs[dalvikRegs + i].sReg = (-1 - i);
+ }
GrowableListIterator iterator;
oatGrowableListIteratorInit(&cUnit->blockList, &iterator);
while (true) {
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
index fdf0ca2..c79f7c6 100644
--- a/src/compiler/codegen/arm/Thumb2/Factory.cc
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -1034,7 +1034,7 @@
}
}
LIR* res = rawLIR(cUnit, cUnit->currentDalvikOffset, opcode, rDest, rSrc);
- if (!(cUnit->disableOpt && (1 << kSafeOptimizations)) && rDest == rSrc) {
+ if (!(cUnit->disableOpt & (1 << kSafeOptimizations)) && rDest == rSrc) {
res->flags.isNop = true;
}
return res;
diff --git a/src/compiler/codegen/mips/Mips32/Gen.cc b/src/compiler/codegen/mips/Mips32/Gen.cc
index e86a942..dc98508 100644
--- a/src/compiler/codegen/mips/Mips32/Gen.cc
+++ b/src/compiler/codegen/mips/Mips32/Gen.cc
@@ -450,7 +450,7 @@
#endif
LIR* res = rawLIR(cUnit, cUnit->currentDalvikOffset, kMipsMove,
rDest, rSrc);
- if (!(cUnit->disableOpt && (1 << kSafeOptimizations)) && rDest == rSrc) {
+ if (!(cUnit->disableOpt & (1 << kSafeOptimizations)) && rDest == rSrc) {
res->flags.isNop = true;
}
return res;