diff options
| author | 2012-08-20 11:12:18 -0700 | |
|---|---|---|
| committer | 2012-08-23 15:22:25 -0700 | |
| commit | ca7a5e484ac02927247cc77ad40f291bf6613ed5 (patch) | |
| tree | 5dd6f55984fe8eb0dd2cf80bd3b2aff5f514afa2 /src/compiler/codegen | |
| parent | b18e77abdb06a443744fbb6589e0932fa89f6073 (diff) | |
Quick compiler: restore optimizations
This CL re-enables optizations on the Quick compile path.
Notes:
o Although all optimization are enabled, several are now useless
because of llvm and bitcode constraints:
- Large method de-optimization (i.e. - skipping expensive dataflow
analysis) can't be done because we have to do the analysis to
produce a CFG that makes the bitcode verifier happy.
- Small method pattern matching isn't applicable w/ bitcode (though
I can probably do something similar in the Quick backend, but
looking for bitcode instead of dex patterns).
- Branch fusing doesn't translate to bitcode.
- Bitcode generation has de-optimized code layout. We'll try to
repair the damage in a subsequent CL.
o There is an ugly workaround related to the way we're loading and
unloading the compiler .so containing llvm. [See comment in compiler.cc]
o We're still running single-threaded - need to add the magic to allow
multi-threaded use of llvm.
o With the CL, the phone boots, all target tests pass and all cts VM
tests pass (except those being dealt with via a verifier change).
o Compile time is pretty bad - when flashing it's best to follow
with an adb sync to avoid on-device compilation of system apps.
Change-Id: I1c98f9e64aefbcbd24b957c71544c28450eb2023
Diffstat (limited to 'src/compiler/codegen')
| -rw-r--r-- | src/compiler/codegen/MethodBitcode.cc | 136 | ||||
| -rw-r--r-- | src/compiler/codegen/Ralloc.h | 3 | ||||
| -rw-r--r-- | src/compiler/codegen/RallocUtil.cc | 43 |
3 files changed, 158 insertions, 24 deletions
diff --git a/src/compiler/codegen/MethodBitcode.cc b/src/compiler/codegen/MethodBitcode.cc index e7e4e5a190..6b78765490 100644 --- a/src/compiler/codegen/MethodBitcode.cc +++ b/src/compiler/codegen/MethodBitcode.cc @@ -107,17 +107,63 @@ void createLocFromValue(CompilationUnit* cUnit, llvm::Value* val) loc.wide = ((ty == cUnit->irb->getInt64Ty()) || (ty == cUnit->irb->getDoubleTy())); loc.defined = true; - if ((ty == cUnit->irb->getFloatTy()) || - (ty == cUnit->irb->getDoubleTy())) { + loc.home = false; // May change during promotion + loc.sRegLow = baseSReg; + loc.origSReg = cUnit->locMap.size(); + PromotionMap pMap = cUnit->promotionMap[baseSReg]; + if (ty == cUnit->irb->getFloatTy()) { + loc.fp = true; + if (pMap.fpLocation == kLocPhysReg) { + loc.lowReg = pMap.fpReg; + loc.location = kLocPhysReg; + loc.home = true; + } + } else if (ty == cUnit->irb->getDoubleTy()) { loc.fp = true; + PromotionMap pMapHigh = cUnit->promotionMap[baseSReg + 1]; + if ((pMap.fpLocation == kLocPhysReg) && + (pMapHigh.fpLocation == kLocPhysReg) && + ((pMap.fpReg & 0x1) == 0) && + (pMap.fpReg + 1 == pMapHigh.fpReg)) { + loc.lowReg = pMap.fpReg; + loc.highReg = pMapHigh.fpReg; + loc.location = kLocPhysReg; + loc.home = true; + } } else if (ty == cUnit->irb->GetJObjectTy()) { loc.ref = true; + if (pMap.coreLocation == kLocPhysReg) { + loc.lowReg = pMap.coreReg; + loc.location = kLocPhysReg; + loc.home = true; + } + } else if (ty == cUnit->irb->getInt64Ty()) { + loc.core = true; + PromotionMap pMapHigh = cUnit->promotionMap[baseSReg + 1]; + if ((pMap.coreLocation == kLocPhysReg) && + (pMapHigh.coreLocation == kLocPhysReg)) { + loc.lowReg = pMap.coreReg; + loc.highReg = pMapHigh.coreReg; + loc.location = kLocPhysReg; + loc.home = true; + } } else { loc.core = true; + if (pMap.coreLocation == kLocPhysReg) { + loc.lowReg = pMap.coreReg; + loc.location = kLocPhysReg; + loc.home = true; + } + } + + if (cUnit->printMe && loc.home) { + if (loc.wide) { + LOG(INFO) << "Promoted wide " << s << " to regs " << loc.lowReg + << "/" << loc.highReg; + } else { + LOG(INFO) << "Promoted " << s << " to reg " << loc.lowReg; + } } - loc.home = false; // Will change during promotion - loc.sRegLow = baseSReg; - loc.origSReg = cUnit->locMap.size(); cUnit->locMap.Put(val, loc); } @@ -2883,12 +2929,19 @@ bool methodBitcodeBlockCodeGen(CompilationUnit* cUnit, llvm::BasicBlock* bb) oatNew(cUnit, sizeof(RegLocation) * cUnit->numIns, true, kAllocMisc); llvm::Function::arg_iterator it(cUnit->func->arg_begin()); llvm::Function::arg_iterator it_end(cUnit->func->arg_end()); + // Skip past Method* + it++; for (unsigned i = 0; it != it_end; ++it) { llvm::Value* val = it; argLocs[i++] = valToLoc(cUnit, val); llvm::Type* ty = val->getType(); if ((ty == cUnit->irb->getInt64Ty()) || (ty == cUnit->irb->getDoubleTy())) { - argLocs[i++].sRegLow = INVALID_SREG; + argLocs[i] = argLocs[i-1]; + argLocs[i].lowReg = argLocs[i].highReg; + argLocs[i].origSReg++; + argLocs[i].sRegLow = INVALID_SREG; + argLocs[i].highWord = true; + i++; } } genEntrySequence(cUnit, argLocs, cUnit->methodLoc); @@ -3365,15 +3418,78 @@ void oatMethodBitcode2LIR(CompilationUnit* cUnit) cUnit->numFPSpills = 0; cUnit->coreVmapTable.clear(); cUnit->fpVmapTable.clear(); - oatAdjustSpillMask(cUnit); - cUnit->frameSize = oatComputeFrameSize(cUnit); /* * At this point, we've lost all knowledge of register promotion. * Rebuild that info from the MethodInfo intrinsic (if it - * exists - not required for correctness). + * exists - not required for correctness). Normally, this will + * be the first instruction we encounter, so we won't have to iterate + * through everything. */ - // TODO: find and recover MethodInfo. + for (llvm::inst_iterator i = llvm::inst_begin(func), + e = llvm::inst_end(func); i != e; ++i) { + llvm::CallInst* callInst = llvm::dyn_cast<llvm::CallInst>(&*i); + if (callInst != NULL) { + llvm::Function* callee = callInst->getCalledFunction(); + greenland::IntrinsicHelper::IntrinsicId id = + cUnit->intrinsic_helper->GetIntrinsicId(callee); + if (id == greenland::IntrinsicHelper::MethodInfo) { + if (cUnit->printMe) { + LOG(INFO) << "Found MethodInfo"; + } + llvm::MDNode* regInfoNode = callInst->getMetadata("RegInfo"); + if (regInfoNode != NULL) { + llvm::ConstantInt* numInsValue = + static_cast<llvm::ConstantInt*>(regInfoNode->getOperand(0)); + llvm::ConstantInt* numRegsValue = + static_cast<llvm::ConstantInt*>(regInfoNode->getOperand(1)); + llvm::ConstantInt* numOutsValue = + static_cast<llvm::ConstantInt*>(regInfoNode->getOperand(2)); + llvm::ConstantInt* numCompilerTempsValue = + static_cast<llvm::ConstantInt*>(regInfoNode->getOperand(3)); + llvm::ConstantInt* numSSARegsValue = + static_cast<llvm::ConstantInt*>(regInfoNode->getOperand(4)); + if (cUnit->printMe) { + LOG(INFO) << "RegInfo - Ins:" << numInsValue->getZExtValue() + << ", Regs:" << numRegsValue->getZExtValue() + << ", Outs:" << numOutsValue->getZExtValue() + << ", CTemps:" << numCompilerTempsValue->getZExtValue() + << ", SSARegs:" << numSSARegsValue->getZExtValue(); + } + } + llvm::MDNode* pmapInfoNode = callInst->getMetadata("PromotionMap"); + if (pmapInfoNode != NULL) { + int elems = pmapInfoNode->getNumOperands(); + if (cUnit->printMe) { + LOG(INFO) << "PMap size: " << elems; + } + for (int i = 0; i < elems; i++) { + llvm::ConstantInt* rawMapData = + static_cast<llvm::ConstantInt*>(pmapInfoNode->getOperand(i)); + uint32_t mapData = rawMapData->getZExtValue(); + PromotionMap* p = &cUnit->promotionMap[i]; + p->firstInPair = (mapData >> 24) & 0xff; + p->fpReg = (mapData >> 16) & 0xff; + p->coreReg = (mapData >> 8) & 0xff; + p->fpLocation = static_cast<RegLocationType>((mapData >> 4) & 0xf); + if (p->fpLocation == kLocPhysReg) { + oatRecordFpPromotion(cUnit, p->fpReg, i); + } + p->coreLocation = static_cast<RegLocationType>(mapData & 0xf); + if (p->coreLocation == kLocPhysReg) { + oatRecordCorePromotion(cUnit, p->coreReg, i); + } + } + if (cUnit->printMe) { + oatDumpPromotionMap(cUnit); + } + } + break; + } + } + } + oatAdjustSpillMask(cUnit); + cUnit->frameSize = oatComputeFrameSize(cUnit); // Create RegLocations for arguments llvm::Function::arg_iterator it(cUnit->func->arg_begin()); diff --git a/src/compiler/codegen/Ralloc.h b/src/compiler/codegen/Ralloc.h index d1518e8e3f..db8fc7dd47 100644 --- a/src/compiler/codegen/Ralloc.h +++ b/src/compiler/codegen/Ralloc.h @@ -198,6 +198,9 @@ extern int oatSRegOffset(CompilationUnit* cUnit, int reg); extern void oatCountRefs(CompilationUnit*, BasicBlock*, RefCounts*, RefCounts*); extern int oatSortCounts(const void *val1, const void *val2); extern void oatDumpCounts(const RefCounts* arr, int size, const char* msg); +extern void oatRecordCorePromotion(CompilationUnit* cUnit, int reg, int sReg); +extern void oatRecordFpPromotion(CompilationUnit* cUnit, int reg, int sReg); + /* * Architecture-dependent register allocation routines implemented in diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc index 9d1878a02b..8fa110a90d 100644 --- a/src/compiler/codegen/RallocUtil.cc +++ b/src/compiler/codegen/RallocUtil.cc @@ -161,6 +161,20 @@ int SRegToPMap(CompilationUnit* cUnit, int sReg) } } +void oatRecordCorePromotion(CompilationUnit* cUnit, int reg, int sReg) +{ + int pMapIdx = SRegToPMap(cUnit, sReg); + int vReg = SRegToVReg(cUnit, sReg); + oatGetRegInfo(cUnit, reg)->inUse = true; + cUnit->coreSpillMask |= (1 << reg); + // Include reg for later sort + cUnit->coreVmapTable.push_back(reg << VREG_NUM_WIDTH | + (vReg & ((1 << VREG_NUM_WIDTH) - 1))); + cUnit->numCoreSpills++; + cUnit->promotionMap[pMapIdx].coreLocation = kLocPhysReg; + cUnit->promotionMap[pMapIdx].coreReg = reg; +} + /* Reserve a callee-save register. Return -1 if none available */ extern int oatAllocPreservedCoreReg(CompilationUnit* cUnit, int sReg) { @@ -168,21 +182,24 @@ extern int oatAllocPreservedCoreReg(CompilationUnit* cUnit, int sReg) RegisterInfo* coreRegs = cUnit->regPool->coreRegs; for (int i = 0; i < cUnit->regPool->numCoreRegs; i++) { if (!coreRegs[i].isTemp && !coreRegs[i].inUse) { - int vReg = SRegToVReg(cUnit, sReg); - int pMapIdx = SRegToPMap(cUnit, sReg); res = coreRegs[i].reg; - coreRegs[i].inUse = true; - cUnit->coreSpillMask |= (1 << res); - cUnit->coreVmapTable.push_back(vReg); - cUnit->numCoreSpills++; - cUnit->promotionMap[pMapIdx].coreLocation = kLocPhysReg; - cUnit->promotionMap[pMapIdx].coreReg = res; + oatRecordCorePromotion(cUnit, res, sReg); break; } } return res; } +void oatRecordFpPromotion(CompilationUnit* cUnit, int reg, int sReg) +{ + int pMapIdx = SRegToPMap(cUnit, sReg); + int vReg = SRegToVReg(cUnit, sReg); + oatGetRegInfo(cUnit, reg)->inUse = true; + oatMarkPreservedSingle(cUnit, vReg, reg); + cUnit->promotionMap[pMapIdx].fpLocation = kLocPhysReg; + cUnit->promotionMap[pMapIdx].fpReg = reg; +} + /* * Reserve a callee-save fp single register. Try to fullfill request for * even/odd allocation, but go ahead and allocate anything if not @@ -195,13 +212,8 @@ int allocPreservedSingle(CompilationUnit* cUnit, int sReg, bool even) for (int i = 0; i < cUnit->regPool->numFPRegs; i++) { if (!FPRegs[i].isTemp && !FPRegs[i].inUse && ((FPRegs[i].reg & 0x1) == 0) == even) { - int vReg = SRegToVReg(cUnit, sReg); - int pMapIdx = SRegToPMap(cUnit, sReg); res = FPRegs[i].reg; - FPRegs[i].inUse = true; - oatMarkPreservedSingle(cUnit, vReg, res); - cUnit->promotionMap[pMapIdx].fpLocation = kLocPhysReg; - cUnit->promotionMap[pMapIdx].fpReg = res; + oatRecordFpPromotion(cUnit, res, sReg); break; } } @@ -1237,6 +1249,9 @@ extern void oatDoPromotion(CompilationUnit* cUnit) } } } + if (cUnit->printMe) { + oatDumpPromotionMap(cUnit); + } } /* Returns sp-relative offset in bytes for a VReg */ |