diff options
| author | 2011-10-11 18:08:40 -0700 | |
|---|---|---|
| committer | 2011-10-14 10:54:03 -0700 | |
| commit | 67bc236a207852d652f6ddeab0a90efc1bd111bb (patch) | |
| tree | eea13fcb90ad8ce5b2b3819fb8caf0281583cd61 /src/compiler/codegen/arm/ArmRallocUtil.cc | |
| parent | 95caa791e560da97363c0c0d22bfda4a7e7377c3 (diff) | |
Register promotion fix
Restructured the type inference mechanism, added lots of DCHECKS,
bumped the default memory allocation size to reflect AOT
compilation and tweaked the bit vector manipulation routines
to be better at handling large sparse vectors (something the old
trace JIT didn't encounter enough to care).
With this CL, optimization is back on by default. Should also see
a significant boost in compilation speed (~2x better for boot.oat).
Change-Id: Ifd134ef337be173a1be756bb9198b24c5b4936b3
Diffstat (limited to 'src/compiler/codegen/arm/ArmRallocUtil.cc')
| -rw-r--r-- | src/compiler/codegen/arm/ArmRallocUtil.cc | 185 |
1 files changed, 92 insertions, 93 deletions
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc index ed8a5b2ab3..4af3d0713a 100644 --- a/src/compiler/codegen/arm/ArmRallocUtil.cc +++ b/src/compiler/codegen/arm/ArmRallocUtil.cc @@ -37,7 +37,7 @@ typedef struct RefCounts { /* USE SSA names to count references of base Dalvik vRegs. */ STATIC void countRefs(CompilationUnit *cUnit, BasicBlock* bb, - RefCounts* counts, bool fp) + RefCounts* coreCounts, RefCounts* fpCounts) { MIR* mir; if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock && @@ -47,59 +47,42 @@ STATIC void countRefs(CompilationUnit *cUnit, BasicBlock* bb, for (mir = bb->firstMIRInsn; mir; mir = mir->next) { SSARepresentation *ssaRep = mir->ssaRep; if (ssaRep) { - int i; - int attrs = oatDataFlowAttributes[mir->dalvikInsn.opcode]; - if (fp) { - // Mark 1st reg of double pairs - int first = 0; - int sReg; - if ((attrs & (DF_DA_WIDE|DF_FP_A)) == (DF_DA_WIDE|DF_FP_A)) { - sReg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->defs[0])); - counts[sReg].doubleStart = true; + for (int i = 0; i < ssaRep->numDefs;) { + RegLocation loc = cUnit->regLocation[ssaRep->defs[i]]; + RefCounts* counts = loc.fp ? fpCounts : coreCounts; + int vReg = oatS2VReg(cUnit, ssaRep->defs[i]); + if (loc.defined) { + counts[vReg].count++; } - if ((attrs & (DF_UA_WIDE|DF_FP_A)) == (DF_UA_WIDE|DF_FP_A)) { - sReg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first])); - counts[sReg].doubleStart = true; - } - if (attrs & DF_UA_WIDE) { - first += 2; - } - if ((attrs & (DF_UB_WIDE|DF_FP_B)) == (DF_UB_WIDE|DF_FP_B)) { - sReg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first])); - counts[sReg].doubleStart = true; - } - if (attrs & DF_UB_WIDE) { - first += 2; - } - if ((attrs & (DF_UC_WIDE|DF_FP_C)) == (DF_UC_WIDE|DF_FP_C)) { - sReg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first])); - counts[sReg].doubleStart = true; - } - } - for (i=0; i< ssaRep->numUses; i++) { - int origSreg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->uses[i])); - DCHECK_LT(origSreg, cUnit->method->NumRegisters()); - bool fpUse = ssaRep->fpUse ? ssaRep->fpUse[i] : false; - if (fp == fpUse) { - counts[origSreg].count++; + if (loc.wide) { + if (loc.defined) { + if (loc.fp) { + counts[vReg].doubleStart = true; + } + counts[vReg+1].count++; + } + i += 2; + } else { + i++; } } - for (i=0; i< ssaRep->numDefs; i++) { - if (attrs & DF_SETS_CONST) { - // CONST opcodes are untyped - don't pollute the counts - continue; + for (int i = 0; i < ssaRep->numUses;) { + RegLocation loc = cUnit->regLocation[ssaRep->uses[i]]; + RefCounts* counts = loc.fp ? fpCounts : coreCounts; + int vReg = oatS2VReg(cUnit, ssaRep->uses[i]); + if (loc.defined) { + counts[vReg].count++; } - int origSreg = DECODE_REG( - oatConvertSSARegToDalvik(cUnit, ssaRep->defs[i])); - DCHECK_LT(origSreg, cUnit->method->NumRegisters()); - bool fpDef = ssaRep->fpDef ? ssaRep->fpDef[i] : false; - if (fp == fpDef) { - counts[origSreg].count++; + if (loc.wide) { + if (loc.defined) { + if (loc.fp) { + counts[vReg].doubleStart = true; + } + counts[vReg+1].count++; + } + i += 2; + } else { + i++; } } } @@ -159,8 +142,7 @@ extern void oatDoPromotion(CompilationUnit* cUnit) BasicBlock* bb; bb = (BasicBlock*)oatGrowableListIteratorNext(&iterator); if (bb == NULL) break; - countRefs(cUnit, bb, coreRegs, false); - countRefs(cUnit, bb, fpRegs, true); + countRefs(cUnit, bb, coreRegs, fpRegs); } /* @@ -178,21 +160,27 @@ extern void oatDoPromotion(CompilationUnit* cUnit) qsort(coreRegs, numRegs, sizeof(RefCounts), sortCounts); qsort(fpRegs, numRegs, sizeof(RefCounts), sortCounts); + if (cUnit->printMe) { + dumpCounts(coreRegs, numRegs, "Core regs after sort"); + dumpCounts(fpRegs, numRegs, "Fp regs after sort"); + } + if (!(cUnit->disableOpt & (1 << kPromoteRegs))) { // Promote fpRegs for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) { - if (cUnit->regLocation[fpRegs[i].sReg].fpLocation != kLocPhysReg) { + if (cUnit->promotionMap[fpRegs[i].sReg].fpLocation != kLocPhysReg) { int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg, fpRegs[i].doubleStart); if (reg < 0) { - break; // No more left + break; // No more left } } } // Promote core regs for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) { - if (cUnit->regLocation[i].location != kLocPhysReg) { + if (cUnit->promotionMap[coreRegs[i].sReg].coreLocation != + kLocPhysReg) { int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg); if (reg < 0) { break; // No more left @@ -203,58 +191,69 @@ extern void oatDoPromotion(CompilationUnit* cUnit) // Now, update SSA names to new home locations for (int i = 0; i < cUnit->numSSARegs; i++) { - int baseSreg = cUnit->regLocation[i].sRegLow; - RegLocation *base = &cUnit->regLocation[baseSreg]; - RegLocation *baseNext = &cUnit->regLocation[baseSreg+1]; RegLocation *curr = &cUnit->regLocation[i]; - if (curr->fp) { - /* Single or double, check fpLocation of base */ - if (base->fpLocation == kLocPhysReg) { - if (curr->wide) { - /* TUNING: consider alignment during allocation */ - if ((base->fpLowReg & 1) || - (baseNext->fpLocation != kLocPhysReg)) { - /* Half-promoted or bad alignment - demote */ - curr->location = kLocDalvikFrame; - curr->lowReg = INVALID_REG; - curr->highReg = INVALID_REG; - continue; - } - curr->highReg = baseNext->fpLowReg; + int baseVReg = oatS2VReg(cUnit, curr->sRegLow); + if (!curr->wide) { + if (curr->fp) { + if (cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) { + curr->location = kLocPhysReg; + curr->lowReg = cUnit->promotionMap[baseVReg].fpReg; + curr->home = true; + } + } else { + if (cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) { + curr->location = kLocPhysReg; + curr->lowReg = cUnit->promotionMap[baseVReg].coreReg; + curr->home = true; } - curr->location = kLocPhysReg; - curr->lowReg = base->fpLowReg; - curr->home = true; } + curr->highReg = INVALID_REG; } else { - /* Core or wide */ - if (base->location == kLocPhysReg) { - if (curr->wide) { - /* Make sure upper half is also in reg or skip */ - if (baseNext->location != kLocPhysReg) { - /* Only half promoted; demote to frame */ - curr->location = kLocDalvikFrame; - curr->lowReg = INVALID_REG; - curr->highReg = INVALID_REG; - continue; + if (curr->highWord) { + continue; + } + if (curr->fp) { + if ((cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) && + (cUnit->promotionMap[baseVReg+1].fpLocation == + kLocPhysReg)) { + int lowReg = cUnit->promotionMap[baseVReg].fpReg; + int highReg = cUnit->promotionMap[baseVReg+1].fpReg; + // Doubles require pair of singles starting at even reg + if (((lowReg & 0x1) == 0) && ((lowReg + 1) == highReg)) { + curr->location = kLocPhysReg; + curr->lowReg = lowReg; + curr->highReg = highReg; + curr->home = true; } - curr->highReg = baseNext->lowReg; } - curr->location = kLocPhysReg; - curr->lowReg = base->lowReg; - curr->home = true; + } else { + if ((cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) + && (cUnit->promotionMap[baseVReg+1].coreLocation == + kLocPhysReg)) { + curr->location = kLocPhysReg; + curr->lowReg = cUnit->promotionMap[baseVReg].coreReg; + curr->highReg = cUnit->promotionMap[baseVReg+1].coreReg; + curr->home = true; + } } } } } -/* Returns sp-relative offset in bytes */ -extern int oatVRegOffset(CompilationUnit* cUnit, int reg) +/* Returns sp-relative offset in bytes for a VReg */ +extern int oatVRegOffset(CompilationUnit* cUnit, int vReg) { - return (reg < cUnit->numRegs) ? cUnit->regsOffset + (reg << 2) : - cUnit->insOffset + ((reg - cUnit->numRegs) << 2); + return (vReg < cUnit->numRegs) ? cUnit->regsOffset + (vReg << 2) : + cUnit->insOffset + ((vReg - cUnit->numRegs) << 2); } +/* Returns sp-relative offset in bytes for a SReg */ +extern int oatSRegOffset(CompilationUnit* cUnit, int sReg) +{ + return oatVRegOffset(cUnit, oatS2VReg(cUnit, sReg)); +} + + /* Return sp-relative offset in bytes using Method* */ extern int oatVRegOffsetFromMethod(Method* method, int reg) { |