diff options
| author | 2012-03-11 18:39:19 -0700 | |
|---|---|---|
| committer | 2012-03-13 20:59:18 -0700 | |
| commit | e196567b50a084b163937ea9605b51ee1e48adeb (patch) | |
| tree | 709964fc09a36132490d9a3a4805983ec80c57e3 /src | |
| parent | 13b835a45f3dccff1c6d024ad82a2044831c7c41 (diff) | |
SSA rework and support compiler temps in the frame
Add ability for the compiler to allocate new frame temporaries
that play nicely with the register allocation mechanism. To do this
we assign negative virtual register numbers and give them SSA names.
As part of this change, I did a general cleanup of the ssa naming.
An ssa name (or SReg) is in index into an array of (virtual reg, subscript)
pairs. Previously, 16 bits were allocated for the reg and the subscript.
This CL expands the virtual reg and subscript to 32 bits each.
Method* is now treated as a RegLocation, and will be subject to
temp register tracking and reuse. This CL does not yet include
support for promotion of Method* - that will show up in the next one.
Also included is the beginning of a basic block optimization pass (not
yet in a runable state, so conditionally compiled out).
(cherry picked from commit f689ffec8827f1dd6b31084f8a6bb240338c7acf)
Change-Id: Ibbdeb97fe05d0e33c1f4a9a6ccbdef1cac7646fc
Diffstat (limited to 'src')
| -rw-r--r-- | src/compiler/Compiler.h | 4 | ||||
| -rw-r--r-- | src/compiler/CompilerIR.h | 35 | ||||
| -rw-r--r-- | src/compiler/CompilerUtility.h | 1 | ||||
| -rw-r--r-- | src/compiler/Dataflow.cc | 272 | ||||
| -rw-r--r-- | src/compiler/Dataflow.h | 9 | ||||
| -rw-r--r-- | src/compiler/Frontend.cc | 6 | ||||
| -rw-r--r-- | src/compiler/IntermediateRep.cc | 2 | ||||
| -rw-r--r-- | src/compiler/Ralloc.cc | 54 | ||||
| -rw-r--r-- | src/compiler/SSATransformation.cc | 25 | ||||
| -rw-r--r-- | src/compiler/Utility.cc | 13 | ||||
| -rw-r--r-- | src/compiler/codegen/CodegenFactory.cc | 40 | ||||
| -rw-r--r-- | src/compiler/codegen/GenCommon.cc | 34 | ||||
| -rw-r--r-- | src/compiler/codegen/MethodCodegenDriver.cc | 18 | ||||
| -rw-r--r-- | src/compiler/codegen/Ralloc.h | 6 | ||||
| -rw-r--r-- | src/compiler/codegen/RallocUtil.cc | 48 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/ArchFactory.cc | 16 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/ArmRallocUtil.cc | 8 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/Thumb2/Ralloc.cc | 4 | ||||
| -rw-r--r-- | src/compiler/codegen/mips/MipsRallocUtil.cc | 8 | ||||
| -rw-r--r-- | src/compiler/codegen/x86/X86RallocUtil.cc | 8 | ||||
| -rw-r--r-- | src/stack.cc | 2 |
21 files changed, 437 insertions, 176 deletions
diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h index 9516f25ba5..94d20e5caf 100644 --- a/src/compiler/Compiler.h +++ b/src/compiler/Compiler.h @@ -50,6 +50,7 @@ enum optControlVector { kTrackLiveTemps, kSkipLargeMethodOptimization, kSafeOptimizations, + kBBOpt, }; /* Type of allocation for memory tuning */ @@ -177,7 +178,8 @@ bool oatStartup(void); void oatShutdown(void); void oatScanAllClassPointers(void (*callback)(void* ptr)); void oatInitializeSSAConversion(struct CompilationUnit* cUnit); -int oatConvertSSARegToDalvik(const struct CompilationUnit* cUnit, int ssaReg); +int SRegToVReg(const struct CompilationUnit* cUnit, int ssaReg); +int SRegToSubscript(const struct CompilationUnit* cUnit, int ssaReg); bool oatFindLocalLiveIn(struct CompilationUnit* cUnit, struct BasicBlock* bb); bool oatDoSSAConversion(struct CompilationUnit* cUnit, diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h index 611d1dff3a..bd4c156e4d 100644 --- a/src/compiler/CompilerIR.h +++ b/src/compiler/CompilerIR.h @@ -44,7 +44,7 @@ enum RegisterClass { enum RegLocationType { kLocDalvikFrame = 0, // Normal Dalvik register kLocPhysReg, - kLocSpill, + kLocCompilerTemp, }; struct PromotionMap { @@ -65,7 +65,12 @@ struct RegLocation { unsigned home:1; // Does this represent the home location? u1 lowReg; // First physical register u1 highReg; // 2nd physical register (if wide) - s2 sRegLow; // SSA name for low Dalvik word + int32_t sRegLow; // SSA name for low Dalvik word +}; + +struct CompilerTemp { + int sReg; + ArenaBitVector* bv; }; /* @@ -101,6 +106,11 @@ struct RegisterPool { #define INVALID_REG (0xFF) #define INVALID_OFFSET (-1) +/* SSA encodings for special registers */ +#define SSA_METHOD_BASEREG (-1) +/* First compiler temp basereg, grows smaller */ +#define SSA_CTEMP_BASEREG (-2) + /* * Some code patterns cause the generation of excessively large * methods - in particular initialization sequences. There isn't much @@ -153,8 +163,7 @@ enum ExtendedMIROpcode { kMirOpNullNRangeUpCheck, kMirOpNullNRangeDownCheck, kMirOpLowerBound, - kMirOpPunt, - kMirOpCheckInlinePrediction, // Gen checks for predicted inlining + kMirOpCopy, kMirOpLast, }; @@ -169,6 +178,7 @@ enum MIROptimizationFlagPositons { kMIRInlinedPred, // Invoke is inlined via prediction kMIRCallee, // Instruction is inlined from callee kMIRIgnoreSuspendCheck, + kMIRDup, }; #define MIR_IGNORE_NULL_CHECK (1 << kMIRIgnoreNullCheck) @@ -179,6 +189,7 @@ enum MIROptimizationFlagPositons { #define MIR_INLINED_PRED (1 << kMIRInlinedPred) #define MIR_CALLEE (1 << kMIRCallee) #define MIR_IGNORE_SUSPEND_CHECK (1 << kMIRIgnoreSuspendCheck) +#define MIR_DUP (1 << kMIRDup) struct CallsiteInfo { const char* classDescriptor; @@ -222,6 +233,7 @@ struct BasicBlock { bool visited; bool hidden; bool catchEntry; + bool fallThroughTarget; // Reached via fallthrough unsigned int startOffset; const Method* containingMethod; // For blocks from the callee BBType blockType; @@ -310,12 +322,13 @@ struct CompilationUnit { InstructionSet instructionSet; /* Number of total regs used in the whole cUnit after SSA transformation */ int numSSARegs; - /* Map SSA reg i to the Dalvik[15..0]/Sub[31..16] pair. */ - GrowableList* ssaToDalvikMap; + /* Map SSA reg i to the base virtual register/subscript */ + GrowableList* ssaBaseVRegs; + GrowableList* ssaSubscripts; /* The following are new data structures to support SSA representations */ - /* Map original Dalvik reg i to the SSA[15..0]/Sub[31..16] pair */ - int* dalvikToSSAMap; // length == method->registersSize + /* Map original Dalvik virtual reg i to the current SSA name */ + int* vRegToSSAMap; // length == method->registersSize int* SSALastDefs; // length == method->registersSize ArenaBitVector* isConstantV; // length == numSSAReg int* constantValues; // length == numSSAReg @@ -329,6 +342,9 @@ struct CompilationUnit { /* Keep track of Dalvik vReg to physical register mappings */ PromotionMap* promotionMap; + /* SSA name for Method* */ + int methodSReg; + /* * Set to the Dalvik PC of the switch instruction if it has more than * MAX_CHAINED_SWITCH_CASES cases. @@ -336,7 +352,7 @@ struct CompilationUnit { const u2* switchOverflowPad; int numReachableBlocks; - int numDalvikRegisters; // method->registersSize + inlined + int numDalvikRegisters; // method->registersSize BasicBlock* entryBlock; BasicBlock* exitBlock; BasicBlock* curBlock; @@ -346,6 +362,7 @@ struct CompilationUnit { GrowableList domPostOrderTraversal; GrowableList throwLaunchpads; GrowableList suspendLaunchpads; + GrowableList compilerTemps; int* iDomList; ArenaBitVector* tryBlockAddr; ArenaBitVector** defBlockMatrix; // numDalvikRegister x numBlocks diff --git a/src/compiler/CompilerUtility.h b/src/compiler/CompilerUtility.h index 357fe5114f..41f6cf16bd 100644 --- a/src/compiler/CompilerUtility.h +++ b/src/compiler/CompilerUtility.h @@ -118,6 +118,7 @@ bool oatUnifyBitVectors(ArenaBitVector* dest, const ArenaBitVector* src1, const ArenaBitVector* src2); bool oatCompareBitVectors(const ArenaBitVector* src1, const ArenaBitVector* src2); +bool oatTestBitVectors(const ArenaBitVector* src1, const ArenaBitVector* src2); int oatCountSetBits(const ArenaBitVector* pBits); void oatDumpLIRInsn(CompilationUnit* cUnit, struct LIR* lir, diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc index ad522bf482..581c463597 100644 --- a/src/compiler/Dataflow.cc +++ b/src/compiler/Dataflow.cc @@ -807,17 +807,19 @@ const int oatDataFlowAttributes[kMirOpLast] = { */ }; -/* Return the Dalvik register/subscript pair of a given SSA register */ -int oatConvertSSARegToDalvik(const CompilationUnit* cUnit, int ssaReg) +/* Return the base virtual register for a SSA name */ +int SRegToVReg(const CompilationUnit* cUnit, int ssaReg) { - return GET_ELEM_N(cUnit->ssaToDalvikMap, int, ssaReg); + DCHECK_LT(ssaReg, (int)cUnit->ssaBaseVRegs->numUsed); + return GET_ELEM_N(cUnit->ssaBaseVRegs, int, ssaReg); +} + +int SRegToSubscript(const CompilationUnit* cUnit, int ssaReg) +{ + DCHECK(ssaReg < (int)cUnit->ssaSubscripts->numUsed); + return GET_ELEM_N(cUnit->ssaSubscripts, int, ssaReg); } -/* - * Utility function to convert encoded SSA register value into Dalvik register - * and subscript pair. Each SSA register can be used to index the - * ssaToDalvikMap list to get the subscript[31..16]/dalvik_reg[15..0] mapping. - */ char* oatGetDalvikDisassembly(CompilationUnit* cUnit, const DecodedInstruction& insn, const char* note) { @@ -904,10 +906,8 @@ char* oatGetDalvikDisassembly(CompilationUnit* cUnit, char* getSSAName(const CompilationUnit* cUnit, int ssaReg, char* name) { - int ssa2DalvikValue = oatConvertSSARegToDalvik(cUnit, ssaReg); - - sprintf(name, "v%d_%d", - DECODE_REG(ssa2DalvikValue), DECODE_SUB(ssa2DalvikValue)); + sprintf(name, "v%d_%d", SRegToVReg(cUnit, ssaReg), + SRegToSubscript(cUnit, ssaReg)); return name; } @@ -1033,11 +1033,6 @@ done: return ret; } -/* - * Utility function to convert encoded SSA register value into Dalvik register - * and subscript pair. Each SSA register can be used to index the - * ssaToDalvikMap list to get the subscript[31..16]/dalvik_reg[15..0] mapping. - */ char* oatGetSSAString(CompilationUnit* cUnit, SSARepresentation* ssaRep) { char buffer[256]; @@ -1046,11 +1041,9 @@ char* oatGetSSAString(CompilationUnit* cUnit, SSARepresentation* ssaRep) buffer[0] = 0; for (i = 0; i < ssaRep->numDefs; i++) { - int ssa2DalvikValue = oatConvertSSARegToDalvik(cUnit, ssaRep->defs[i]); - - sprintf(buffer + strlen(buffer), "s%d(v%d_%d) ", - ssaRep->defs[i], DECODE_REG(ssa2DalvikValue), - DECODE_SUB(ssa2DalvikValue)); + int ssaReg = ssaRep->defs[i]; + sprintf(buffer + strlen(buffer), "s%d(v%d_%d) ", ssaReg, + SRegToVReg(cUnit, ssaReg), SRegToSubscript(cUnit, ssaReg)); } if (ssaRep->numDefs) { @@ -1058,12 +1051,12 @@ char* oatGetSSAString(CompilationUnit* cUnit, SSARepresentation* ssaRep) } for (i = 0; i < ssaRep->numUses; i++) { - int ssa2DalvikValue = oatConvertSSARegToDalvik(cUnit, ssaRep->uses[i]); int len = strlen(buffer); + int ssaReg = ssaRep->uses[i]; - if (snprintf(buffer + len, 250 - len, "s%d(v%d_%d) ", - ssaRep->uses[i], DECODE_REG(ssa2DalvikValue), - DECODE_SUB(ssa2DalvikValue)) >= (250 - len)) { + if (snprintf(buffer + len, 250 - len, "s%d(v%d_%d) ", ssaReg, + SRegToVReg(cUnit, ssaReg), + SRegToSubscript(cUnit, ssaReg))) { strcat(buffer, "..."); break; } @@ -1157,29 +1150,32 @@ bool oatFindLocalLiveIn(CompilationUnit* cUnit, BasicBlock* bb) return true; } +int addNewSReg(CompilationUnit* cUnit, int vReg) +{ + // Compiler temps always have a subscript of 0 + int subscript = (vReg < 0) ? 0 : ++cUnit->SSALastDefs[vReg]; + int ssaReg = cUnit->numSSARegs++; + oatInsertGrowableList(cUnit, cUnit->ssaBaseVRegs, vReg); + oatInsertGrowableList(cUnit, cUnit->ssaSubscripts, subscript); + DCHECK_EQ(cUnit->ssaBaseVRegs->numUsed, cUnit->ssaSubscripts->numUsed); + return ssaReg; +} + /* Find out the latest SSA register for a given Dalvik register */ void handleSSAUse(CompilationUnit* cUnit, int* uses, int dalvikReg, int regIndex) { - int encodedValue = cUnit->dalvikToSSAMap[dalvikReg]; - int ssaReg = DECODE_REG(encodedValue); - uses[regIndex] = ssaReg; + DCHECK((dalvikReg >= 0) && (dalvikReg < cUnit->numDalvikRegisters)); + uses[regIndex] = cUnit->vRegToSSAMap[dalvikReg]; } /* Setup a new SSA register for a given Dalvik register */ void handleSSADef(CompilationUnit* cUnit, int* defs, int dalvikReg, int regIndex) { - int ssaReg = cUnit->numSSARegs++; - /* Bump up the subscript */ - int dalvikSub = ++cUnit->SSALastDefs[dalvikReg]; - int newD2SMapping = ENCODE_REG_SUB(ssaReg, dalvikSub); - - cUnit->dalvikToSSAMap[dalvikReg] = newD2SMapping; - - int newS2DMapping = ENCODE_REG_SUB(dalvikReg, dalvikSub); - oatInsertGrowableList(cUnit, cUnit->ssaToDalvikMap, newS2DMapping); - + DCHECK((dalvikReg >= 0) && (dalvikReg < cUnit->numDalvikRegisters)); + int ssaReg = addNewSReg(cUnit, dalvikReg); + cUnit->vRegToSSAMap[dalvikReg] = ssaReg; defs[regIndex] = ssaReg; } @@ -1351,11 +1347,11 @@ bool oatDoSSAConversion(CompilationUnit* cUnit, BasicBlock* bb) * input to PHI nodes can be derived from the snapshot of all * predecessor blocks. */ - bb->dataFlowInfo->dalvikToSSAMap = + bb->dataFlowInfo->vRegToSSAMap = (int *)oatNew(cUnit, sizeof(int) * cUnit->numDalvikRegisters, false, kAllocDFInfo); - memcpy(bb->dataFlowInfo->dalvikToSSAMap, cUnit->dalvikToSSAMap, + memcpy(bb->dataFlowInfo->vRegToSSAMap, cUnit->vRegToSSAMap, sizeof(int) * cUnit->numDalvikRegisters); } return true; @@ -1447,10 +1443,15 @@ void oatInitializeSSAConversion(CompilationUnit* cUnit) int i; int numDalvikReg = cUnit->numDalvikRegisters; - cUnit->ssaToDalvikMap = (GrowableList *)oatNew(cUnit, sizeof(GrowableList), - false, kAllocDFInfo); - // Create the SSAtoDalvikMap, estimating the max size - oatInitGrowableList(cUnit, cUnit->ssaToDalvikMap, + cUnit->ssaBaseVRegs = (GrowableList *)oatNew(cUnit, sizeof(GrowableList), + false, kAllocDFInfo); + cUnit->ssaSubscripts = (GrowableList *)oatNew(cUnit, sizeof(GrowableList), + false, kAllocDFInfo); + // Create the ssa mappings, estimating the max size + oatInitGrowableList(cUnit, cUnit->ssaBaseVRegs, + numDalvikReg + cUnit->defCount + 128, + kListSSAtoDalvikMap); + oatInitGrowableList(cUnit, cUnit->ssaSubscripts, numDalvikReg + cUnit->defCount + 128, kListSSAtoDalvikMap); /* @@ -1465,26 +1466,28 @@ void oatInitializeSSAConversion(CompilationUnit* cUnit) * into "(0 << 16) | i" */ for (i = 0; i < numDalvikReg; i++) { - oatInsertGrowableList(cUnit, cUnit->ssaToDalvikMap, - ENCODE_REG_SUB(i, 0)); + oatInsertGrowableList(cUnit, cUnit->ssaBaseVRegs, i); + oatInsertGrowableList(cUnit, cUnit->ssaSubscripts, 0); } /* - * Initialize the DalvikToSSAMap map. The low 16 bit is the SSA register id, - * while the high 16 bit is the current subscript. The original Dalvik - * register N is mapped to SSA register N with subscript 0. + * Initialize the DalvikToSSAMap map. There is one entry for each + * Dalvik register, and the SSA names for those are the same. */ - cUnit->dalvikToSSAMap = (int *)oatNew(cUnit, sizeof(int) * numDalvikReg, + cUnit->vRegToSSAMap = (int *)oatNew(cUnit, sizeof(int) * numDalvikReg, false, kAllocDFInfo); /* Keep track of the higest def for each dalvik reg */ cUnit->SSALastDefs = (int *)oatNew(cUnit, sizeof(int) * numDalvikReg, false, kAllocDFInfo); for (i = 0; i < numDalvikReg; i++) { - cUnit->dalvikToSSAMap[i] = i; + cUnit->vRegToSSAMap[i] = i; cUnit->SSALastDefs[i] = 0; } + /* Add ssa reg for Method* */ + cUnit->methodSReg = addNewSReg(cUnit, SSA_METHOD_BASEREG); + /* * Allocate the BasicBlockDataFlow structure for the entry and code blocks */ @@ -1627,6 +1630,160 @@ void oatDataFlowAnalysisDispatcher(CompilationUnit* cUnit, } } +/* Advance to next strictly dominated MIR node in an extended basic block */ +MIR* advanceMIR(CompilationUnit* cUnit, BasicBlock** pBb, MIR* mir, ArenaBitVector* bv) { + BasicBlock* bb = *pBb; + if (mir != NULL) { + mir = mir->next; + if (mir == NULL) { + bb = bb->fallThrough; + if ((bb == NULL) || bb->predecessors->numUsed != 1) { + mir = NULL; + } else { + if (bv) { + oatSetBit(cUnit, bv, bb->id); + } + *pBb = bb; + mir = bb->firstMIRInsn; + } + } + } + return mir; +} + +/* Allocate a compiler temp, return Sreg. Reuse existing if no conflict */ +int allocCompilerTempSreg(CompilationUnit* cUnit, ArenaBitVector* bv) +{ + for (int i = 0; i < cUnit->numCompilerTemps; i++) { + CompilerTemp* ct = (CompilerTemp*)cUnit->compilerTemps.elemList[i]; + ArenaBitVector* tBv = ct->bv; + if (!oatTestBitVectors(bv, tBv)) { + // Combine live maps and reuse existing temp + oatUnifyBitVectors(tBv, tBv, bv); + return ct->sReg; + } + } + + // Create a new compiler temp & associated live bitmap + CompilerTemp* ct = (CompilerTemp*)oatNew(cUnit, sizeof(CompilerTemp), + true, kAllocMisc); + ArenaBitVector *nBv = oatAllocBitVector(cUnit, cUnit->numBlocks, true, + kBitMapMisc); + oatCopyBitVector(nBv, bv); + ct->bv = nBv; + ct->sReg = addNewSReg(cUnit, SSA_CTEMP_BASEREG - cUnit->numCompilerTemps); + cUnit->numCompilerTemps++; + oatInsertGrowableList(cUnit, &cUnit->compilerTemps, (intptr_t)ct); + DCHECK_EQ(cUnit->numCompilerTemps, (int)cUnit->compilerTemps.numUsed); + return ct->sReg; +} + +/* Creata a new MIR node for a new pseudo op. */ +MIR* rawMIR(CompilationUnit* cUnit, Instruction::Code opcode, int defs, int uses) +{ + MIR* res = (MIR*)oatNew( cUnit, sizeof(MIR), true, kAllocMIR); + res->ssaRep =(struct SSARepresentation *) + oatNew(cUnit, sizeof(SSARepresentation), true, kAllocDFInfo); + if (uses) { + res->ssaRep->numUses = uses; + res->ssaRep->uses = (int*)oatNew(cUnit, sizeof(int) * uses, false, kAllocDFInfo); + } + if (defs) { + res->ssaRep->numDefs = defs; + res->ssaRep->defs = (int*)oatNew(cUnit, sizeof(int) * defs, false, kAllocDFInfo); + res->ssaRep->fpDef = (bool*)oatNew(cUnit, sizeof(bool) * defs, true, kAllocDFInfo); + } + res->dalvikInsn.opcode = opcode; + return res; +} + +/* Do some MIR-level basic block optimizations */ +bool basicBlockOpt(CompilationUnit* cUnit, BasicBlock* bb) +{ + int numTemps = 0; + + for (MIR* mir = bb->firstMIRInsn; mir; mir = mir->next) { + // Look for interesting opcodes, skip otherwise + switch(mir->dalvikInsn.opcode) { + case Instruction::IGET_OBJECT: { + // TODO: look for CSE + if (mir->optimizationFlags & MIR_DUP) { + break; + } + ArenaBitVector* tempBlockV = cUnit->tempBlockV; + oatClearAllBits(tempBlockV); + oatSetBit(cUnit, tempBlockV, bb->id); + int objSreg = mir->ssaRep->uses[0]; + int dstSreg = mir->ssaRep->defs[0]; + uint32_t fieldIdx = mir->dalvikInsn.vC; + int matches = 0; + BasicBlock* tbb = bb; + MIR* tm = mir; + while (true) { + tm = advanceMIR(cUnit, &tbb, tm, tempBlockV); + if ((tm == NULL) || (tm == mir)) { + break; + } + Instruction::Code opcode = tm->dalvikInsn.opcode; + if ((opcode == Instruction::IGET_OBJECT) + && (tm->ssaRep->uses[0] == objSreg) + && (tm->dalvikInsn.vC == fieldIdx)) { + if (cUnit->printMe) { + LOG(INFO) << "Got DUP IGET_OBJECT @ 0x" + << std::hex << tm->offset << ", from 0x" + << std::hex <<mir->offset; + } + matches++; + } else if ((opcode == Instruction::IPUT_OBJECT) + && (tm->ssaRep->uses[0] == objSreg) + && (tm->dalvikInsn.vC == fieldIdx)) { + if (cUnit->printMe) { + LOG(INFO) << "Clobbered IGET_OBJECT @ 0x" + << std::hex << tm->offset; + } + break; + } + } + if (matches >= 2) { + // Allocate compiler temp, redirect 1st load to temp, + // insert copy to real target. Convert all dups to + // copies and rename all uses. + int tmpSreg = allocCompilerTempSreg(cUnit, tempBlockV); + MIR* newMir = rawMIR(cUnit, (Instruction::Code)kMirOpCopy, 1, 1); + newMir->ssaRep->defs[0] = dstSreg; + newMir->ssaRep->uses[0] = tmpSreg; + mir->ssaRep->defs[0] = tmpSreg; + oatInsertMIRAfter(bb, mir, newMir); + } + } + break; + case Instruction::IF_EQ: + case Instruction::IF_NE: + case Instruction::IF_LT: + case Instruction::IF_GE: + case Instruction::IF_GT: + case Instruction::IF_LE: + // TODO: Check for and fuse preceeding comparison + break; + case Instruction::IF_EQZ: + case Instruction::IF_NEZ: + case Instruction::IF_LTZ: + case Instruction::IF_GEZ: + case Instruction::IF_GTZ: + case Instruction::IF_LEZ: + // TODO: Check for and fuse preceeding comparison + break; + default: + break; + } + } + + if (numTemps > cUnit->numCompilerTemps) { + cUnit->numCompilerTemps = numTemps; + } + return true; +} + bool nullCheckEliminationInit(struct CompilationUnit* cUnit, struct BasicBlock* bb) { @@ -1779,4 +1936,15 @@ void oatMethodNullCheckElimination(CompilationUnit *cUnit) } } +void oatMethodBasicBlockOptimization(CompilationUnit *cUnit) +{ + oatInitGrowableList(cUnit, &cUnit->compilerTemps, 6, kListMisc); + DCHECK_EQ(cUnit->numCompilerTemps, 0); + if (!(cUnit->disableOpt & (1 << kBBOpt))) { + oatDataFlowAnalysisDispatcher(cUnit, basicBlockOpt, + kAllNodes, + false /* isIterative */); + } +} + } // namespace art diff --git a/src/compiler/Dataflow.h b/src/compiler/Dataflow.h index a9917a3396..2df9373b85 100644 --- a/src/compiler/Dataflow.h +++ b/src/compiler/Dataflow.h @@ -115,7 +115,7 @@ struct BasicBlockDataFlow { ArenaBitVector* defV; ArenaBitVector* liveInV; ArenaBitVector* phiV; - int* dalvikToSSAMap; + int* vRegToSSAMap; ArenaBitVector* endingNullCheckV; }; @@ -147,13 +147,10 @@ struct ArrayAccessInfo { int minC; // For DIV - will affect lower bound checking }; -#define ENCODE_REG_SUB(r,s) ((s<<16) | r) -#define DECODE_REG(v) (v & 0xffff) -#define DECODE_SUB(v) (((unsigned int) v) >> 16) - - void oatMethodNullCheckElimination(CompilationUnit*); +void oatMethodBasicBlockOptimization(CompilationUnit*); + } // namespace art #endif // ART_SRC_COMPILER_DATAFLOW_H_ diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc index e1b2c608bb..8ffcc72590 100644 --- a/src/compiler/Frontend.cc +++ b/src/compiler/Frontend.cc @@ -34,6 +34,7 @@ uint32_t compilerOptimizerDisableFlags = 0 | // Disable specific optimizations //(1 << kTrackLiveTemps) | //(1 << kSkipLargeMethodOptimization) | //(1 << kSafeOptimizations) | + (1 << kBBOpt) | 0; uint32_t compilerDebugFlags = 0 | // Enable debug/testing modes @@ -970,6 +971,11 @@ CompiledMethod* oatCompileMethod(Compiler& compiler, /* Perform null check elimination */ oatMethodNullCheckElimination(cUnit.get()); +#if 0 + /* Do some basic block optimizations */ + oatMethodBasicBlockOptimization(cUnit.get()); +#endif + oatInitializeRegAlloc(cUnit.get()); // Needs to happen after SSA naming /* Allocate Registers using simple local allocation scheme */ diff --git a/src/compiler/IntermediateRep.cc b/src/compiler/IntermediateRep.cc index 662687777d..d1ba45c18c 100644 --- a/src/compiler/IntermediateRep.cc +++ b/src/compiler/IntermediateRep.cc @@ -110,7 +110,7 @@ void oatPrependMIR(BasicBlock* bb, MIR* mir) } } -/* Insert an MIR instruction after the specified MIR */ +/* Insert a MIR instruction after the specified MIR */ void oatInsertMIRAfter(BasicBlock* bb, MIR* currentMIR, MIR* newMIR) { newMIR->prev = currentMIR; diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc index 2d85812d97..dfb25abc8f 100644 --- a/src/compiler/Ralloc.cc +++ b/src/compiler/Ralloc.cc @@ -108,8 +108,8 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) if (attrs & DF_DA_WIDE) { cUnit->regLocation[ssaRep->defs[0]].wide = true; cUnit->regLocation[ssaRep->defs[1]].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, ssaRep->defs[0])+1, - oatS2VReg(cUnit, ssaRep->defs[1])); + DCHECK_EQ(SRegToVReg(cUnit, ssaRep->defs[0])+1, + SRegToVReg(cUnit, ssaRep->defs[1])); } } @@ -122,8 +122,8 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) if (attrs & DF_UA_WIDE) { cUnit->regLocation[ssaRep->uses[next]].wide = true; cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1, - oatS2VReg(cUnit, ssaRep->uses[next + 1])); + DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1, + SRegToVReg(cUnit, ssaRep->uses[next + 1])); next += 2; } else { next++; @@ -136,8 +136,8 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) if (attrs & DF_UB_WIDE) { cUnit->regLocation[ssaRep->uses[next]].wide = true; cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1, - oatS2VReg(cUnit, ssaRep->uses[next + 1])); + DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1, + SRegToVReg(cUnit, ssaRep->uses[next + 1])); next += 2; } else { next++; @@ -150,8 +150,8 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) if (attrs & DF_UC_WIDE) { cUnit->regLocation[ssaRep->uses[next]].wide = true; cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1, - oatS2VReg(cUnit, ssaRep->uses[next + 1])); + DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1, + SRegToVReg(cUnit, ssaRep->uses[next + 1])); } } @@ -200,16 +200,16 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) cUnit->regLocation[ssaRep->uses[i]].wide = true; cUnit->regLocation[ssaRep->uses[i+1]].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1, - oatS2VReg(cUnit, ssaRep->uses[i+1])); + DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[i])+1, + SRegToVReg(cUnit, ssaRep->uses[i+1])); i++; break; case 'J': cUnit->regLocation[ssaRep->uses[i]].wide = true; cUnit->regLocation[ssaRep->uses[i+1]].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1, - oatS2VReg(cUnit, ssaRep->uses[i+1])); + DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[i])+1, + SRegToVReg(cUnit, ssaRep->uses[i+1])); changed |= setCore(cUnit, ssaRep->uses[i],true); i++; break; @@ -320,13 +320,24 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) loc[i] = freshLoc; loc[i].sRegLow = i; } + + /* Patch up the locations for Method* and the compiler temps */ + loc[cUnit->methodSReg].location = kLocCompilerTemp; + for (i = 0; i < cUnit->numCompilerTemps; i++) { + CompilerTemp* ct = (CompilerTemp*)cUnit->compilerTemps.elemList[i]; + loc[ct->sReg].location = kLocCompilerTemp; + } + cUnit->regLocation = loc; /* Allocation the promotion map */ int numRegs = cUnit->numDalvikRegisters; - cUnit->promotionMap = - (PromotionMap*)oatNew(cUnit, numRegs * sizeof(cUnit->promotionMap[0]), - true, kAllocRegAlloc); + PromotionMap* tMap = + (PromotionMap*)oatNew(cUnit, (numRegs + cUnit->numCompilerTemps + 1) * + sizeof(cUnit->promotionMap[0]), true, + kAllocRegAlloc); + // Bias the promotion map + cUnit->promotionMap = &tMap[cUnit->numCompilerTemps + 1]; /* Add types of incoming arguments based on signature */ int numIns = cUnit->numIns; @@ -346,8 +357,8 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) cUnit->regLocation[sReg].wide = true; cUnit->regLocation[sReg+1].highWord = true; cUnit->regLocation[sReg+1].fp = true; - DCHECK_EQ(oatS2VReg(cUnit, sReg)+1, - oatS2VReg(cUnit, sReg+1)); + DCHECK_EQ(SRegToVReg(cUnit, sReg)+1, + SRegToVReg(cUnit, sReg+1)); cUnit->regLocation[sReg].fp = true; cUnit->regLocation[sReg].defined = true; sReg++; @@ -355,8 +366,8 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) case 'J': cUnit->regLocation[sReg].wide = true; cUnit->regLocation[sReg+1].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, sReg)+1, - oatS2VReg(cUnit, sReg+1)); + DCHECK_EQ(SRegToVReg(cUnit, sReg)+1, + SRegToVReg(cUnit, sReg+1)); cUnit->regLocation[sReg].core = true; cUnit->regLocation[sReg].defined = true; sReg++; @@ -390,8 +401,9 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) * allocator, remove this remapping. */ for (i=0; i < cUnit->numSSARegs; i++) { - cUnit->regLocation[i].sRegLow = - DECODE_REG(oatConvertSSARegToDalvik(cUnit, loc[i].sRegLow)); + if (cUnit->regLocation[i].location != kLocCompilerTemp) { + cUnit->regLocation[i].sRegLow = SRegToVReg(cUnit, loc[i].sRegLow); + } } cUnit->coreSpillMask = 0; diff --git a/src/compiler/SSATransformation.cc b/src/compiler/SSATransformation.cc index cdb711e44b..2daa533a7b 100644 --- a/src/compiler/SSATransformation.cc +++ b/src/compiler/SSATransformation.cc @@ -29,7 +29,10 @@ void recordDFSOrders(CompilationUnit* cUnit, BasicBlock* block) /* Enqueue the preOrder block id */ oatInsertGrowableList(cUnit, &cUnit->dfsOrder, block->id); - if (block->fallThrough) recordDFSOrders(cUnit, block->fallThrough); + if (block->fallThrough) { + block->fallThrough->fallThroughTarget = true; + recordDFSOrders(cUnit, block->fallThrough); + } if (block->taken) recordDFSOrders(cUnit, block->taken); if (block->successorBlockList.blockListType != kNotUsed) { GrowableListIterator iterator; @@ -669,9 +672,8 @@ bool insertPhiNodeOperands(CompilationUnit* cUnit, BasicBlock* bb) if (mir->dalvikInsn.opcode != (Instruction::Code)kMirOpPhi) return true; int ssaReg = mir->ssaRep->defs[0]; - int encodedDalvikValue = - (int) oatGrowableListGetElement(cUnit->ssaToDalvikMap, ssaReg); - int dalvikReg = DECODE_REG(encodedDalvikValue); + DCHECK_GE(ssaReg, 0); // Shouldn't see compiler temps here + int vReg = SRegToVReg(cUnit, ssaReg); oatClearAllBits(ssaRegV); @@ -681,9 +683,8 @@ bool insertPhiNodeOperands(CompilationUnit* cUnit, BasicBlock* bb) BasicBlock* predBB = (BasicBlock*)oatGrowableListIteratorNext(&iter); if (!predBB) break; - int encodedSSAValue = - predBB->dataFlowInfo->dalvikToSSAMap[dalvikReg]; - int ssaReg = DECODE_REG(encodedSSAValue); + int ssaReg = + predBB->dataFlowInfo->vRegToSSAMap[vReg]; oatSetBit(cUnit, ssaRegV, ssaReg); } @@ -724,17 +725,17 @@ void doDFSPreOrderSSARename(CompilationUnit* cUnit, BasicBlock* block) /* Save SSA map snapshot */ int* savedSSAMap = (int*)oatNew(cUnit, mapSize, false, kAllocDalvikToSSAMap); - memcpy(savedSSAMap, cUnit->dalvikToSSAMap, mapSize); + memcpy(savedSSAMap, cUnit->vRegToSSAMap, mapSize); if (block->fallThrough) { doDFSPreOrderSSARename(cUnit, block->fallThrough); /* Restore SSA map snapshot */ - memcpy(cUnit->dalvikToSSAMap, savedSSAMap, mapSize); + memcpy(cUnit->vRegToSSAMap, savedSSAMap, mapSize); } if (block->taken) { doDFSPreOrderSSARename(cUnit, block->taken); /* Restore SSA map snapshot */ - memcpy(cUnit->dalvikToSSAMap, savedSSAMap, mapSize); + memcpy(cUnit->vRegToSSAMap, savedSSAMap, mapSize); } if (block->successorBlockList.blockListType != kNotUsed) { GrowableListIterator iterator; @@ -747,10 +748,10 @@ void doDFSPreOrderSSARename(CompilationUnit* cUnit, BasicBlock* block) BasicBlock* succBB = successorBlockInfo->block; doDFSPreOrderSSARename(cUnit, succBB); /* Restore SSA map snapshot */ - memcpy(cUnit->dalvikToSSAMap, savedSSAMap, mapSize); + memcpy(cUnit->vRegToSSAMap, savedSSAMap, mapSize); } } - cUnit->dalvikToSSAMap = savedSSAMap; + cUnit->vRegToSSAMap = savedSSAMap; return; } diff --git a/src/compiler/Utility.cc b/src/compiler/Utility.cc index 082f7a4c6f..3674aa9299 100644 --- a/src/compiler/Utility.cc +++ b/src/compiler/Utility.cc @@ -574,6 +574,19 @@ bool oatUnifyBitVectors(ArenaBitVector* dest, const ArenaBitVector* src1, } /* + * Return true if any bits collide. Vectors must be same size. + */ +bool oatTestBitVectors(const ArenaBitVector* src1, + const ArenaBitVector* src2) +{ + DCHECK_EQ(src1->storageSize, src2->storageSize); + for (uint32_t idx = 0; idx < src1->storageSize; idx++) { + if (src1->storage[idx] & src2->storage[idx]) return true; + } + return false; +} + +/* * Compare two bit vectors and return true if difference is seen. */ bool oatCompareBitVectors(const ArenaBitVector* src1, diff --git a/src/compiler/codegen/CodegenFactory.cc b/src/compiler/codegen/CodegenFactory.cc index 8a6e1bc967..5444816ddf 100644 --- a/src/compiler/codegen/CodegenFactory.cc +++ b/src/compiler/codegen/CodegenFactory.cc @@ -65,7 +65,8 @@ void loadValueDirect(CompilationUnit* cUnit, RegLocation rlSrc, int reg1) if (rlSrc.location == kLocPhysReg) { opRegCopy(cUnit, reg1, rlSrc.lowReg); } else { - DCHECK(rlSrc.location == kLocDalvikFrame); + DCHECK((rlSrc.location == kLocDalvikFrame) || + (rlSrc.location == kLocCompilerTemp)); loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, rlSrc.sRegLow), reg1); } } @@ -94,7 +95,8 @@ void loadValueDirectWide(CompilationUnit* cUnit, RegLocation rlSrc, int regLo, if (rlSrc.location == kLocPhysReg) { opRegCopyWide(cUnit, regLo, regHi, rlSrc.lowReg, rlSrc.highReg); } else { - DCHECK(rlSrc.location == kLocDalvikFrame); + DCHECK((rlSrc.location == kLocDalvikFrame) || + (rlSrc.location == kLocCompilerTemp)); loadBaseDispWide(cUnit, NULL, rSP, oatSRegOffset(cUnit, rlSrc.sRegLow), regLo, regHi, INVALID_SREG); @@ -120,7 +122,9 @@ RegLocation loadValue(CompilationUnit* cUnit, RegLocation rlSrc, RegisterClass opKind) { rlSrc = oatEvalLoc(cUnit, rlSrc, opKind, false); - if (rlSrc.location == kLocDalvikFrame) { + if (rlSrc.location != kLocPhysReg) { + DCHECK((rlSrc.location == kLocDalvikFrame) || + (rlSrc.location == kLocCompilerTemp)); loadValueDirect(cUnit, rlSrc, rlSrc.lowReg); rlSrc.location = kLocPhysReg; oatMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow); @@ -176,7 +180,9 @@ RegLocation loadValueWide(CompilationUnit* cUnit, RegLocation rlSrc, { DCHECK(rlSrc.wide); rlSrc = oatEvalLoc(cUnit, rlSrc, opKind, false); - if (rlSrc.location == kLocDalvikFrame) { + if (rlSrc.location != kLocPhysReg) { + DCHECK((rlSrc.location == kLocDalvikFrame) || + (rlSrc.location == kLocCompilerTemp)); loadValueDirectWide(cUnit, rlSrc, rlSrc.lowReg, rlSrc.highReg); rlSrc.location = kLocPhysReg; oatMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow); @@ -232,8 +238,8 @@ void storeValueWide(CompilationUnit* cUnit, RegLocation rlDest, (oatLiveOut(cUnit, rlDest.sRegLow) || oatLiveOut(cUnit, oatSRegHi(rlDest.sRegLow)))) { defStart = (LIR*)cUnit->lastLIRInsn; - DCHECK_EQ((oatS2VReg(cUnit, rlDest.sRegLow)+1), - oatS2VReg(cUnit, oatSRegHi(rlDest.sRegLow))); + DCHECK_EQ((SRegToVReg(cUnit, rlDest.sRegLow)+1), + SRegToVReg(cUnit, oatSRegHi(rlDest.sRegLow))); storeBaseDispWide(cUnit, rSP, oatSRegOffset(cUnit, rlDest.sRegLow), rlDest.lowReg, rlDest.highReg); oatMarkClean(cUnit, rlDest); @@ -265,29 +271,15 @@ void markGCCard(CompilationUnit* cUnit, int valReg, int tgtAddrReg) #endif } -/* - * Utility to load the current Method*. Broken out - * to allow easy change between placing the current Method* in a - * dedicated register or its home location in the frame. - */ +/* Utilities to load the current Method* */ void loadCurrMethodDirect(CompilationUnit *cUnit, int rTgt) { -#if defined(METHOD_IN_REG) - opRegCopy(cUnit, rTgt, rMETHOD); -#else - loadWordDisp(cUnit, rSP, 0, rTgt); -#endif + loadValueDirectFixed(cUnit, cUnit->regLocation[cUnit->methodSReg], rTgt); } -int loadCurrMethod(CompilationUnit *cUnit) +RegLocation loadCurrMethod(CompilationUnit *cUnit) { -#if defined(METHOD_IN_REG) - return rMETHOD; -#else - int mReg = oatAllocTemp(cUnit); - loadCurrMethodDirect(cUnit, mReg); - return mReg; -#endif + return loadValue(cUnit, cUnit->regLocation[cUnit->methodSReg], kCoreReg); } diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc index e2c306de37..9b1654f764 100644 --- a/src/compiler/codegen/GenCommon.cc +++ b/src/compiler/codegen/GenCommon.cc @@ -275,10 +275,12 @@ void genFilledNewArray(CompilationUnit* cUnit, MIR* mir, bool isRange) rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pCheckAndAllocArrayFromCodeWithAccessCheck)); } - loadCurrMethodDirect(cUnit, rARG1); // arg1 <- Method* loadConstant(cUnit, rARG0, typeId); // arg0 <- type_id loadConstant(cUnit, rARG2, elems); // arg2 <- count + loadCurrMethodDirect(cUnit, rARG1); // arg1 <- Method* callRuntimeHelper(cUnit, rTgt); + oatFreeTemp(cUnit, rARG2); + oatFreeTemp(cUnit, rARG1); /* * NOTE: the implicit target for Instruction::FILLED_NEW_ARRAY is the * return region. Because AllocFromCode placed the new array @@ -387,12 +389,11 @@ void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc, if (fastPath && !SLOW_FIELD_PATH) { DCHECK_GE(fieldOffset, 0); int rBase; - int rMethod; if (isReferrersClass) { // Fast path, static storage base is this method's class - rMethod = loadCurrMethod(cUnit); + RegLocation rlMethod = loadCurrMethod(cUnit); rBase = oatAllocTemp(cUnit); - loadWordDisp(cUnit, rMethod, + loadWordDisp(cUnit, rlMethod.lowReg, Method::DeclaringClassOffset().Int32Value(), rBase); } else { // Medium path, static storage base in a different class which @@ -402,7 +403,7 @@ void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc, oatFlushAllRegs(cUnit); // Using fixed register to sync with possible call to runtime // support. - rMethod = rARG1; + int rMethod = rARG1; oatLockTemp(cUnit, rMethod); loadCurrMethodDirect(cUnit, rMethod); rBase = rARG0; @@ -427,9 +428,9 @@ void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc, #endif LIR* skipTarget = newLIR0(cUnit, kPseudoTargetLabel); branchOver->target = (LIR*)skipTarget; + oatFreeTemp(cUnit, rMethod); } // rBase now holds static storage base - oatFreeTemp(cUnit, rMethod); if (isLongOrDouble) { rlSrc = oatGetSrcWide(cUnit, mir, 0, 1); rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg); @@ -496,12 +497,11 @@ void genSget(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, if (fastPath && !SLOW_FIELD_PATH) { DCHECK_GE(fieldOffset, 0); int rBase; - int rMethod; if (isReferrersClass) { // Fast path, static storage base is this method's class - rMethod = loadCurrMethod(cUnit); + RegLocation rlMethod = loadCurrMethod(cUnit); rBase = oatAllocTemp(cUnit); - loadWordDisp(cUnit, rMethod, + loadWordDisp(cUnit, rlMethod.lowReg, Method::DeclaringClassOffset().Int32Value(), rBase); } else { // Medium path, static storage base in a different class which @@ -511,7 +511,7 @@ void genSget(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, oatFlushAllRegs(cUnit); // Using fixed register to sync with possible call to runtime // support - rMethod = rARG1; + int rMethod = rARG1; oatLockTemp(cUnit, rMethod); loadCurrMethodDirect(cUnit, rMethod); rBase = rARG0; @@ -537,9 +537,9 @@ void genSget(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, #endif LIR* skipTarget = newLIR0(cUnit, kPseudoTargetLabel); branchOver->target = (LIR*)skipTarget; + oatFreeTemp(cUnit, rMethod); } // rBase now holds static storage base - oatFreeTemp(cUnit, rMethod); rlDest = isLongOrDouble ? oatGetDestWide(cUnit, mir, 0, 1) : oatGetDest(cUnit, mir, 0); RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true); @@ -837,7 +837,7 @@ void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, RegLocation rlSrc) { uint32_t type_idx = mir->dalvikInsn.vB; - int mReg = loadCurrMethod(cUnit); + RegLocation rlMethod = loadCurrMethod(cUnit); int resReg = oatAllocTemp(cUnit); RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); if (!cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx, @@ -848,7 +848,7 @@ void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, // Resolved type returned in rRET0. int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pInitializeTypeAndVerifyAccessFromCode)); - opRegCopy(cUnit, rARG1, mReg); + opRegCopy(cUnit, rARG1, rlMethod.lowReg); loadConstant(cUnit, rARG0, type_idx); callRuntimeHelper(cUnit, rTgt); RegLocation rlResult = oatGetReturn(cUnit); @@ -857,7 +857,7 @@ void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, // We're don't need access checks, load type from dex cache int32_t dex_cache_offset = Method::DexCacheResolvedTypesOffset().Int32Value(); - loadWordDisp(cUnit, mReg, dex_cache_offset, resReg); + loadWordDisp(cUnit, rlMethod.lowReg, dex_cache_offset, resReg); int32_t offset_of_type = Array::DataOffset(sizeof(Class*)).Int32Value() + (sizeof(Class*) * type_idx); @@ -876,7 +876,7 @@ void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, // Call out to helper, which will return resolved type in rARG0 int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pInitializeTypeFromCode)); - opRegCopy(cUnit, rARG1, mReg); + opRegCopy(cUnit, rARG1, rlMethod.lowReg); loadConstant(cUnit, rARG0, type_idx); callRuntimeHelper(cUnit, rTgt); RegLocation rlResult = oatGetReturn(cUnit); @@ -930,10 +930,10 @@ void genConstString(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, genBarrier(cUnit); storeValue(cUnit, rlDest, oatGetReturn(cUnit)); } else { - int mReg = loadCurrMethod(cUnit); + RegLocation rlMethod = loadCurrMethod(cUnit); int resReg = oatAllocTemp(cUnit); RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); - loadWordDisp(cUnit, mReg, + loadWordDisp(cUnit, rlMethod.lowReg, Method::DexCacheStringsOffset().Int32Value(), resReg); loadWordDisp(cUnit, resReg, offset_of_string, rlResult.lowReg); storeValue(cUnit, rlDest, rlResult); diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc index 6b3283e13f..5baabf2f32 100644 --- a/src/compiler/codegen/MethodCodegenDriver.cc +++ b/src/compiler/codegen/MethodCodegenDriver.cc @@ -716,8 +716,7 @@ const char* extendedMIROpNames[kMirOpLast - kMirOpFirst] = { "kMirOpNullNRangeUpCheck", "kMirOpNullNRangeDownCheck", "kMirOpLowerBound", - "kMirOpPunt", - "kMirOpCheckInlinePrediction", + "kMirOpCopy", }; /* Extended MIR instructions like PHI */ @@ -742,6 +741,9 @@ void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir) newLIR1(cUnit, kPseudoSSARep, (int) ssaString); break; } + case kMirOpCopy: + UNIMPLEMENTED(FATAL) << "Need kMirOpCopy"; + break; default: break; } @@ -761,11 +763,19 @@ bool methodBlockCodeGen(CompilationUnit* cUnit, BasicBlock* bb) labelList[blockId].opcode = kPseudoNormalBlockLabel; oatAppendLIR(cUnit, (LIR*) &labelList[blockId]); - /* Reset local optimization data on block boundaries */ + /* Free temp registers and reset redundant store tracking */ oatResetRegPool(cUnit); - oatClobberAllRegs(cUnit); oatResetDefTracking(cUnit); + /* + * If control reached us from our immediate predecessor via + * fallthrough and we have no other incoming arcs we can + * reuse existing liveness. Otherwise, reset. + */ + if (!bb->fallThroughTarget || bb->predecessors->numUsed != 1) { + oatClobberAllRegs(cUnit); + } + LIR* headLIR = NULL; if (bb->blockType == kEntryBlock) { diff --git a/src/compiler/codegen/Ralloc.h b/src/compiler/codegen/Ralloc.h index 8c8c693edb..d32545c248 100644 --- a/src/compiler/codegen/Ralloc.h +++ b/src/compiler/codegen/Ralloc.h @@ -35,12 +35,6 @@ struct RefCounts { }; -inline int oatS2VReg(CompilationUnit* cUnit, int sReg) -{ - DCHECK_NE(sReg, INVALID_SREG); - return DECODE_REG(oatConvertSSARegToDalvik(cUnit, sReg)); -} - /* * Get the "real" sreg number associated with an sReg slot. In general, * sReg values passed through codegen are the SSA names created by diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc index b5ebf65da0..8f5d1bbe5a 100644 --- a/src/compiler/codegen/RallocUtil.cc +++ b/src/compiler/codegen/RallocUtil.cc @@ -147,6 +147,19 @@ extern void oatClobberSReg(CompilationUnit* cUnit, int sReg) sReg); } +/* Sanity check */ +bool validSreg(CompilationUnit* cUnit, int sReg) +{ + bool res = ((-(cUnit->numCompilerTemps + 1) <= sReg) && + (sReg < cUnit->numDalvikRegisters)); + if (!res) { + LOG(WARNING) << "Bad sreg: " << sReg; + LOG(WARNING) << " low = " << -(cUnit->numCompilerTemps + 1); + LOG(WARNING) << " high = " << cUnit->numRegs; + } + return res; +} + /* Reserve a callee-save register. Return -1 if none available */ extern int oatAllocPreservedCoreReg(CompilationUnit* cUnit, int sReg) { @@ -160,7 +173,8 @@ extern int oatAllocPreservedCoreReg(CompilationUnit* cUnit, int sReg) cUnit->coreVmapTable.push_back(sReg); cUnit->numCoreSpills++; // Should be promoting based on initial sReg set - DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg)); + DCHECK_EQ(sReg, SRegToVReg(cUnit, sReg)); + DCHECK(validSreg(cUnit,sReg)); cUnit->promotionMap[sReg].coreLocation = kLocPhysReg; cUnit->promotionMap[sReg].coreReg = res; break; @@ -184,8 +198,9 @@ int allocPreservedSingle(CompilationUnit* cUnit, int sReg, bool even) res = FPRegs[i].reg; FPRegs[i].inUse = true; // Should be promoting based on initial sReg set - DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg)); + DCHECK_EQ(sReg, SRegToVReg(cUnit, sReg)); oatMarkPreservedSingle(cUnit, sReg, res); + DCHECK(validSreg(cUnit,sReg)); cUnit->promotionMap[sReg].fpLocation = kLocPhysReg; cUnit->promotionMap[sReg].fpReg = res; break; @@ -206,7 +221,8 @@ int allocPreservedDouble(CompilationUnit* cUnit, int sReg) { int res = -1; // Assume failure // Should be promoting based on initial sReg set - DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg)); + DCHECK_EQ(sReg, SRegToVReg(cUnit, sReg)); + DCHECK(validSreg(cUnit,sReg+1)); if (cUnit->promotionMap[sReg+1].fpLocation == kLocPhysReg) { // Upper reg is already allocated. Can we fit? int highReg = cUnit->promotionMap[sReg+1].fpReg; @@ -244,8 +260,10 @@ int allocPreservedDouble(CompilationUnit* cUnit, int sReg) } } if (res != -1) { + DCHECK(validSreg(cUnit,sReg)); cUnit->promotionMap[sReg].fpLocation = kLocPhysReg; cUnit->promotionMap[sReg].fpReg = res; + DCHECK(validSreg(cUnit,sReg+1)); cUnit->promotionMap[sReg+1].fpLocation = kLocPhysReg; cUnit->promotionMap[sReg+1].fpReg = res + 1; } @@ -788,7 +806,9 @@ extern RegLocation oatUpdateLoc(CompilationUnit* cUnit, RegLocation loc) { DCHECK(!loc.wide); DCHECK(oatCheckCorePoolSanity(cUnit)); - if (loc.location == kLocDalvikFrame) { + if (loc.location != kLocPhysReg) { + DCHECK((loc.location == kLocDalvikFrame) || + (loc.location == kLocCompilerTemp)); RegisterInfo* infoLo = allocLive(cUnit, loc.sRegLow, kAnyReg); if (infoLo) { if (infoLo->pair) { @@ -837,7 +857,9 @@ extern RegLocation oatUpdateLocWide(CompilationUnit* cUnit, RegLocation loc) { DCHECK(loc.wide); DCHECK(oatCheckCorePoolSanity(cUnit)); - if (loc.location == kLocDalvikFrame) { + if (loc.location != kLocPhysReg) { + DCHECK((loc.location == kLocDalvikFrame) || + (loc.location == kLocCompilerTemp)); // Are the dalvik regs already live in physical registers? RegisterInfo* infoLo = allocLive(cUnit, loc.sRegLow, kAnyReg); RegisterInfo* infoHi = allocLive(cUnit, @@ -1026,7 +1048,7 @@ void oatCountRefs(CompilationUnit *cUnit, BasicBlock* bb, for (int i = 0; i < ssaRep->numDefs;) { RegLocation loc = cUnit->regLocation[ssaRep->defs[i]]; RefCounts* counts = loc.fp ? fpCounts : coreCounts; - int vReg = oatS2VReg(cUnit, ssaRep->defs[i]); + int vReg = SRegToVReg(cUnit, ssaRep->defs[i]); if (loc.defined) { counts[vReg].count++; } @@ -1045,7 +1067,7 @@ void oatCountRefs(CompilationUnit *cUnit, BasicBlock* bb, for (int i = 0; i < ssaRep->numUses;) { RegLocation loc = cUnit->regLocation[ssaRep->uses[i]]; RefCounts* counts = loc.fp ? fpCounts : coreCounts; - int vReg = oatS2VReg(cUnit, ssaRep->uses[i]); + int vReg = SRegToVReg(cUnit, ssaRep->uses[i]); if (loc.defined) { counts[vReg].count++; } @@ -1142,6 +1164,7 @@ extern void oatDoPromotion(CompilationUnit* cUnit) if (!(cUnit->disableOpt & (1 << kPromoteRegs))) { // Promote fpRegs for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) { + DCHECK(validSreg(cUnit,fpRegs[i].sReg)); if (cUnit->promotionMap[fpRegs[i].sReg].fpLocation != kLocPhysReg) { int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg, fpRegs[i].doubleStart); @@ -1153,6 +1176,7 @@ extern void oatDoPromotion(CompilationUnit* cUnit) // Promote core regs for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) { + DCHECK(validSreg(cUnit,coreRegs[i].sReg)); if (cUnit->promotionMap[coreRegs[i].sReg].coreLocation != kLocPhysReg) { int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg); @@ -1166,15 +1190,17 @@ extern void oatDoPromotion(CompilationUnit* cUnit) // Now, update SSA names to new home locations for (int i = 0; i < cUnit->numSSARegs; i++) { RegLocation *curr = &cUnit->regLocation[i]; - int baseVReg = oatS2VReg(cUnit, curr->sRegLow); + int baseVReg = SRegToVReg(cUnit, curr->sRegLow); if (!curr->wide) { if (curr->fp) { + DCHECK(validSreg(cUnit,baseVReg)); if (cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) { curr->location = kLocPhysReg; curr->lowReg = cUnit->promotionMap[baseVReg].fpReg; curr->home = true; } } else { + DCHECK(validSreg(cUnit,baseVReg)); if (cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) { curr->location = kLocPhysReg; curr->lowReg = cUnit->promotionMap[baseVReg].coreReg; @@ -1187,6 +1213,8 @@ extern void oatDoPromotion(CompilationUnit* cUnit) continue; } if (curr->fp) { + DCHECK(validSreg(cUnit,baseVReg)); + DCHECK(validSreg(cUnit,baseVReg+1)); if ((cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) && (cUnit->promotionMap[baseVReg+1].fpLocation == kLocPhysReg)) { @@ -1201,6 +1229,8 @@ extern void oatDoPromotion(CompilationUnit* cUnit) } } } else { + DCHECK(validSreg(cUnit,baseVReg)); + DCHECK(validSreg(cUnit,baseVReg+1)); if ((cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) && (cUnit->promotionMap[baseVReg+1].coreLocation == kLocPhysReg)) { @@ -1224,7 +1254,7 @@ extern int oatVRegOffset(CompilationUnit* cUnit, int vReg) /* Returns sp-relative offset in bytes for a SReg */ extern int oatSRegOffset(CompilationUnit* cUnit, int sReg) { - return oatVRegOffset(cUnit, oatS2VReg(cUnit, sReg)); + return oatVRegOffset(cUnit, SRegToVReg(cUnit, sReg)); } } // namespace art diff --git a/src/compiler/codegen/arm/ArchFactory.cc b/src/compiler/codegen/arm/ArchFactory.cc index 8a23d5c2e1..da5de521f3 100644 --- a/src/compiler/codegen/arm/ArchFactory.cc +++ b/src/compiler/codegen/arm/ArchFactory.cc @@ -106,7 +106,21 @@ void genEntrySequence(CompilationUnit* cUnit, BasicBlock* bb) opRegImm(cUnit, kOpSub, rSP, cUnit->frameSize - (spillCount * 4)); } - storeBaseDisp(cUnit, rSP, 0, r0, kWord); + + /* + * Dummy up a RegLocation for the incoming Method* + * It will attempt to keep r0 live (or copy it to home location + * if promoted). + */ + RegLocation rlSrc = cUnit->regLocation[cUnit->methodSReg]; + RegLocation rlMethod = cUnit->regLocation[cUnit->methodSReg]; + rlSrc.location = kLocPhysReg; + rlSrc.lowReg = r0; + rlSrc.home = false; + oatMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow); + storeValue(cUnit, rlMethod, rlSrc); + + /* Flush the rest of the ins */ flushIns(cUnit); if (cUnit->genDebugger) { diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc index 3335f5997e..e7627f2367 100644 --- a/src/compiler/codegen/arm/ArmRallocUtil.cc +++ b/src/compiler/codegen/arm/ArmRallocUtil.cc @@ -76,10 +76,10 @@ void oatFlushRegWide(CompilationUnit* cUnit, int reg1, int reg2) info1->dirty = false; info2->dirty = false; - if (oatS2VReg(cUnit, info2->sReg) < - oatS2VReg(cUnit, info1->sReg)) + if (SRegToVReg(cUnit, info2->sReg) < + SRegToVReg(cUnit, info1->sReg)) info1 = info2; - int vReg = oatS2VReg(cUnit, info1->sReg); + int vReg = SRegToVReg(cUnit, info1->sReg); oatFlushRegWideImpl(cUnit, rSP, oatVRegOffset(cUnit, vReg), info1->reg, info1->partner); @@ -91,7 +91,7 @@ void oatFlushReg(CompilationUnit* cUnit, int reg) RegisterInfo* info = oatGetRegInfo(cUnit, reg); if (info->live && info->dirty) { info->dirty = false; - int vReg = oatS2VReg(cUnit, info->sReg); + int vReg = SRegToVReg(cUnit, info->sReg); oatFlushRegImpl(cUnit, rSP, oatVRegOffset(cUnit, vReg), reg, kWord); diff --git a/src/compiler/codegen/arm/Thumb2/Ralloc.cc b/src/compiler/codegen/arm/Thumb2/Ralloc.cc index c0f2c771ca..7858318001 100644 --- a/src/compiler/codegen/arm/Thumb2/Ralloc.cc +++ b/src/compiler/codegen/arm/Thumb2/Ralloc.cc @@ -88,6 +88,10 @@ void oatInitializeRegAlloc(CompilationUnit* cUnit) for (int i = 0; i < numFPTemps; i++) { oatMarkTemp(cUnit, fpTemps[i]); } + + // Start allocation at r2 in an attempt to avoid clobbering return values + pool->nextCoreReg = r2; + // Construct the alias map. cUnit->phiAliasMap = (int*)oatNew(cUnit, cUnit->numSSARegs * sizeof(cUnit->phiAliasMap[0]), false, diff --git a/src/compiler/codegen/mips/MipsRallocUtil.cc b/src/compiler/codegen/mips/MipsRallocUtil.cc index 7fd9b598cd..7ed3f86c3e 100644 --- a/src/compiler/codegen/mips/MipsRallocUtil.cc +++ b/src/compiler/codegen/mips/MipsRallocUtil.cc @@ -65,10 +65,10 @@ void oatFlushRegWide(CompilationUnit* cUnit, int reg1, int reg2) info1->dirty = false; info2->dirty = false; - if (oatS2VReg(cUnit, info2->sReg) < - oatS2VReg(cUnit, info1->sReg)) + if (SRegToVReg(cUnit, info2->sReg) < + SRegToVReg(cUnit, info1->sReg)) info1 = info2; - int vReg = oatS2VReg(cUnit, info1->sReg); + int vReg = SRegToVReg(cUnit, info1->sReg); oatFlushRegWideImpl(cUnit, rSP, oatVRegOffset(cUnit, vReg), info1->reg, info1->partner); @@ -80,7 +80,7 @@ void oatFlushReg(CompilationUnit* cUnit, int reg) RegisterInfo* info = oatGetRegInfo(cUnit, reg); if (info->live && info->dirty) { info->dirty = false; - int vReg = oatS2VReg(cUnit, info->sReg); + int vReg = SRegToVReg(cUnit, info->sReg); oatFlushRegImpl(cUnit, rSP, oatVRegOffset(cUnit, vReg), reg, kWord); diff --git a/src/compiler/codegen/x86/X86RallocUtil.cc b/src/compiler/codegen/x86/X86RallocUtil.cc index 7c99fd6298..1b4eca4158 100644 --- a/src/compiler/codegen/x86/X86RallocUtil.cc +++ b/src/compiler/codegen/x86/X86RallocUtil.cc @@ -60,10 +60,10 @@ void oatFlushRegWide(CompilationUnit* cUnit, int reg1, int reg2) info1->dirty = false; info2->dirty = false; - if (oatS2VReg(cUnit, info2->sReg) < - oatS2VReg(cUnit, info1->sReg)) + if (SRegToVReg(cUnit, info2->sReg) < + SRegToVReg(cUnit, info1->sReg)) info1 = info2; - int vReg = oatS2VReg(cUnit, info1->sReg); + int vReg = SRegToVReg(cUnit, info1->sReg); oatFlushRegWideImpl(cUnit, rSP, oatVRegOffset(cUnit, vReg), info1->reg, info1->partner); @@ -75,7 +75,7 @@ void oatFlushReg(CompilationUnit* cUnit, int reg) RegisterInfo* info = oatGetRegInfo(cUnit, reg); if (info->live && info->dirty) { info->dirty = false; - int vReg = oatS2VReg(cUnit, info->sReg); + int vReg = SRegToVReg(cUnit, info->sReg); oatFlushRegImpl(cUnit, rSP, oatVRegOffset(cUnit, vReg), reg, kWord); diff --git a/src/stack.cc b/src/stack.cc index da5c31eab3..e4d1133390 100644 --- a/src/stack.cc +++ b/src/stack.cc @@ -63,7 +63,7 @@ void Frame::SetReturnPC(uintptr_t pc) { * +========================+ {Note: start of callee's frame} * | core callee-save spill | {variable sized} * +------------------------+ - * | fp calle-save spill | + * | fp callee-save spill | * +------------------------+ * | V[locals-1] | * | V[locals-2] | |