diff options
| -rw-r--r-- | src/compiler/Compiler.h | 4 | ||||
| -rw-r--r-- | src/compiler/CompilerIR.h | 35 | ||||
| -rw-r--r-- | src/compiler/CompilerUtility.h | 1 | ||||
| -rw-r--r-- | src/compiler/Dataflow.cc | 272 | ||||
| -rw-r--r-- | src/compiler/Dataflow.h | 9 | ||||
| -rw-r--r-- | src/compiler/Frontend.cc | 6 | ||||
| -rw-r--r-- | src/compiler/IntermediateRep.cc | 2 | ||||
| -rw-r--r-- | src/compiler/Ralloc.cc | 54 | ||||
| -rw-r--r-- | src/compiler/SSATransformation.cc | 25 | ||||
| -rw-r--r-- | src/compiler/Utility.cc | 13 | ||||
| -rw-r--r-- | src/compiler/codegen/CodegenFactory.cc | 40 | ||||
| -rw-r--r-- | src/compiler/codegen/GenCommon.cc | 34 | ||||
| -rw-r--r-- | src/compiler/codegen/MethodCodegenDriver.cc | 18 | ||||
| -rw-r--r-- | src/compiler/codegen/Ralloc.h | 6 | ||||
| -rw-r--r-- | src/compiler/codegen/RallocUtil.cc | 48 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/ArchFactory.cc | 16 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/ArmRallocUtil.cc | 8 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/Thumb2/Ralloc.cc | 4 | ||||
| -rw-r--r-- | src/compiler/codegen/mips/MipsRallocUtil.cc | 8 | ||||
| -rw-r--r-- | src/compiler/codegen/x86/X86RallocUtil.cc | 8 | ||||
| -rw-r--r-- | src/stack.cc | 2 |
21 files changed, 437 insertions, 176 deletions
diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h index 9516f25ba5..94d20e5caf 100644 --- a/src/compiler/Compiler.h +++ b/src/compiler/Compiler.h @@ -50,6 +50,7 @@ enum optControlVector { kTrackLiveTemps, kSkipLargeMethodOptimization, kSafeOptimizations, + kBBOpt, }; /* Type of allocation for memory tuning */ @@ -177,7 +178,8 @@ bool oatStartup(void); void oatShutdown(void); void oatScanAllClassPointers(void (*callback)(void* ptr)); void oatInitializeSSAConversion(struct CompilationUnit* cUnit); -int oatConvertSSARegToDalvik(const struct CompilationUnit* cUnit, int ssaReg); +int SRegToVReg(const struct CompilationUnit* cUnit, int ssaReg); +int SRegToSubscript(const struct CompilationUnit* cUnit, int ssaReg); bool oatFindLocalLiveIn(struct CompilationUnit* cUnit, struct BasicBlock* bb); bool oatDoSSAConversion(struct CompilationUnit* cUnit, diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h index 611d1dff3a..bd4c156e4d 100644 --- a/src/compiler/CompilerIR.h +++ b/src/compiler/CompilerIR.h @@ -44,7 +44,7 @@ enum RegisterClass { enum RegLocationType { kLocDalvikFrame = 0, // Normal Dalvik register kLocPhysReg, - kLocSpill, + kLocCompilerTemp, }; struct PromotionMap { @@ -65,7 +65,12 @@ struct RegLocation { unsigned home:1; // Does this represent the home location? u1 lowReg; // First physical register u1 highReg; // 2nd physical register (if wide) - s2 sRegLow; // SSA name for low Dalvik word + int32_t sRegLow; // SSA name for low Dalvik word +}; + +struct CompilerTemp { + int sReg; + ArenaBitVector* bv; }; /* @@ -101,6 +106,11 @@ struct RegisterPool { #define INVALID_REG (0xFF) #define INVALID_OFFSET (-1) +/* SSA encodings for special registers */ +#define SSA_METHOD_BASEREG (-1) +/* First compiler temp basereg, grows smaller */ +#define SSA_CTEMP_BASEREG (-2) + /* * Some code patterns cause the generation of excessively large * methods - in particular initialization sequences. There isn't much @@ -153,8 +163,7 @@ enum ExtendedMIROpcode { kMirOpNullNRangeUpCheck, kMirOpNullNRangeDownCheck, kMirOpLowerBound, - kMirOpPunt, - kMirOpCheckInlinePrediction, // Gen checks for predicted inlining + kMirOpCopy, kMirOpLast, }; @@ -169,6 +178,7 @@ enum MIROptimizationFlagPositons { kMIRInlinedPred, // Invoke is inlined via prediction kMIRCallee, // Instruction is inlined from callee kMIRIgnoreSuspendCheck, + kMIRDup, }; #define MIR_IGNORE_NULL_CHECK (1 << kMIRIgnoreNullCheck) @@ -179,6 +189,7 @@ enum MIROptimizationFlagPositons { #define MIR_INLINED_PRED (1 << kMIRInlinedPred) #define MIR_CALLEE (1 << kMIRCallee) #define MIR_IGNORE_SUSPEND_CHECK (1 << kMIRIgnoreSuspendCheck) +#define MIR_DUP (1 << kMIRDup) struct CallsiteInfo { const char* classDescriptor; @@ -222,6 +233,7 @@ struct BasicBlock { bool visited; bool hidden; bool catchEntry; + bool fallThroughTarget; // Reached via fallthrough unsigned int startOffset; const Method* containingMethod; // For blocks from the callee BBType blockType; @@ -310,12 +322,13 @@ struct CompilationUnit { InstructionSet instructionSet; /* Number of total regs used in the whole cUnit after SSA transformation */ int numSSARegs; - /* Map SSA reg i to the Dalvik[15..0]/Sub[31..16] pair. */ - GrowableList* ssaToDalvikMap; + /* Map SSA reg i to the base virtual register/subscript */ + GrowableList* ssaBaseVRegs; + GrowableList* ssaSubscripts; /* The following are new data structures to support SSA representations */ - /* Map original Dalvik reg i to the SSA[15..0]/Sub[31..16] pair */ - int* dalvikToSSAMap; // length == method->registersSize + /* Map original Dalvik virtual reg i to the current SSA name */ + int* vRegToSSAMap; // length == method->registersSize int* SSALastDefs; // length == method->registersSize ArenaBitVector* isConstantV; // length == numSSAReg int* constantValues; // length == numSSAReg @@ -329,6 +342,9 @@ struct CompilationUnit { /* Keep track of Dalvik vReg to physical register mappings */ PromotionMap* promotionMap; + /* SSA name for Method* */ + int methodSReg; + /* * Set to the Dalvik PC of the switch instruction if it has more than * MAX_CHAINED_SWITCH_CASES cases. @@ -336,7 +352,7 @@ struct CompilationUnit { const u2* switchOverflowPad; int numReachableBlocks; - int numDalvikRegisters; // method->registersSize + inlined + int numDalvikRegisters; // method->registersSize BasicBlock* entryBlock; BasicBlock* exitBlock; BasicBlock* curBlock; @@ -346,6 +362,7 @@ struct CompilationUnit { GrowableList domPostOrderTraversal; GrowableList throwLaunchpads; GrowableList suspendLaunchpads; + GrowableList compilerTemps; int* iDomList; ArenaBitVector* tryBlockAddr; ArenaBitVector** defBlockMatrix; // numDalvikRegister x numBlocks diff --git a/src/compiler/CompilerUtility.h b/src/compiler/CompilerUtility.h index 357fe5114f..41f6cf16bd 100644 --- a/src/compiler/CompilerUtility.h +++ b/src/compiler/CompilerUtility.h @@ -118,6 +118,7 @@ bool oatUnifyBitVectors(ArenaBitVector* dest, const ArenaBitVector* src1, const ArenaBitVector* src2); bool oatCompareBitVectors(const ArenaBitVector* src1, const ArenaBitVector* src2); +bool oatTestBitVectors(const ArenaBitVector* src1, const ArenaBitVector* src2); int oatCountSetBits(const ArenaBitVector* pBits); void oatDumpLIRInsn(CompilationUnit* cUnit, struct LIR* lir, diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc index ad522bf482..581c463597 100644 --- a/src/compiler/Dataflow.cc +++ b/src/compiler/Dataflow.cc @@ -807,17 +807,19 @@ const int oatDataFlowAttributes[kMirOpLast] = { */ }; -/* Return the Dalvik register/subscript pair of a given SSA register */ -int oatConvertSSARegToDalvik(const CompilationUnit* cUnit, int ssaReg) +/* Return the base virtual register for a SSA name */ +int SRegToVReg(const CompilationUnit* cUnit, int ssaReg) { - return GET_ELEM_N(cUnit->ssaToDalvikMap, int, ssaReg); + DCHECK_LT(ssaReg, (int)cUnit->ssaBaseVRegs->numUsed); + return GET_ELEM_N(cUnit->ssaBaseVRegs, int, ssaReg); +} + +int SRegToSubscript(const CompilationUnit* cUnit, int ssaReg) +{ + DCHECK(ssaReg < (int)cUnit->ssaSubscripts->numUsed); + return GET_ELEM_N(cUnit->ssaSubscripts, int, ssaReg); } -/* - * Utility function to convert encoded SSA register value into Dalvik register - * and subscript pair. Each SSA register can be used to index the - * ssaToDalvikMap list to get the subscript[31..16]/dalvik_reg[15..0] mapping. - */ char* oatGetDalvikDisassembly(CompilationUnit* cUnit, const DecodedInstruction& insn, const char* note) { @@ -904,10 +906,8 @@ char* oatGetDalvikDisassembly(CompilationUnit* cUnit, char* getSSAName(const CompilationUnit* cUnit, int ssaReg, char* name) { - int ssa2DalvikValue = oatConvertSSARegToDalvik(cUnit, ssaReg); - - sprintf(name, "v%d_%d", - DECODE_REG(ssa2DalvikValue), DECODE_SUB(ssa2DalvikValue)); + sprintf(name, "v%d_%d", SRegToVReg(cUnit, ssaReg), + SRegToSubscript(cUnit, ssaReg)); return name; } @@ -1033,11 +1033,6 @@ done: return ret; } -/* - * Utility function to convert encoded SSA register value into Dalvik register - * and subscript pair. Each SSA register can be used to index the - * ssaToDalvikMap list to get the subscript[31..16]/dalvik_reg[15..0] mapping. - */ char* oatGetSSAString(CompilationUnit* cUnit, SSARepresentation* ssaRep) { char buffer[256]; @@ -1046,11 +1041,9 @@ char* oatGetSSAString(CompilationUnit* cUnit, SSARepresentation* ssaRep) buffer[0] = 0; for (i = 0; i < ssaRep->numDefs; i++) { - int ssa2DalvikValue = oatConvertSSARegToDalvik(cUnit, ssaRep->defs[i]); - - sprintf(buffer + strlen(buffer), "s%d(v%d_%d) ", - ssaRep->defs[i], DECODE_REG(ssa2DalvikValue), - DECODE_SUB(ssa2DalvikValue)); + int ssaReg = ssaRep->defs[i]; + sprintf(buffer + strlen(buffer), "s%d(v%d_%d) ", ssaReg, + SRegToVReg(cUnit, ssaReg), SRegToSubscript(cUnit, ssaReg)); } if (ssaRep->numDefs) { @@ -1058,12 +1051,12 @@ char* oatGetSSAString(CompilationUnit* cUnit, SSARepresentation* ssaRep) } for (i = 0; i < ssaRep->numUses; i++) { - int ssa2DalvikValue = oatConvertSSARegToDalvik(cUnit, ssaRep->uses[i]); int len = strlen(buffer); + int ssaReg = ssaRep->uses[i]; - if (snprintf(buffer + len, 250 - len, "s%d(v%d_%d) ", - ssaRep->uses[i], DECODE_REG(ssa2DalvikValue), - DECODE_SUB(ssa2DalvikValue)) >= (250 - len)) { + if (snprintf(buffer + len, 250 - len, "s%d(v%d_%d) ", ssaReg, + SRegToVReg(cUnit, ssaReg), + SRegToSubscript(cUnit, ssaReg))) { strcat(buffer, "..."); break; } @@ -1157,29 +1150,32 @@ bool oatFindLocalLiveIn(CompilationUnit* cUnit, BasicBlock* bb) return true; } +int addNewSReg(CompilationUnit* cUnit, int vReg) +{ + // Compiler temps always have a subscript of 0 + int subscript = (vReg < 0) ? 0 : ++cUnit->SSALastDefs[vReg]; + int ssaReg = cUnit->numSSARegs++; + oatInsertGrowableList(cUnit, cUnit->ssaBaseVRegs, vReg); + oatInsertGrowableList(cUnit, cUnit->ssaSubscripts, subscript); + DCHECK_EQ(cUnit->ssaBaseVRegs->numUsed, cUnit->ssaSubscripts->numUsed); + return ssaReg; +} + /* Find out the latest SSA register for a given Dalvik register */ void handleSSAUse(CompilationUnit* cUnit, int* uses, int dalvikReg, int regIndex) { - int encodedValue = cUnit->dalvikToSSAMap[dalvikReg]; - int ssaReg = DECODE_REG(encodedValue); - uses[regIndex] = ssaReg; + DCHECK((dalvikReg >= 0) && (dalvikReg < cUnit->numDalvikRegisters)); + uses[regIndex] = cUnit->vRegToSSAMap[dalvikReg]; } /* Setup a new SSA register for a given Dalvik register */ void handleSSADef(CompilationUnit* cUnit, int* defs, int dalvikReg, int regIndex) { - int ssaReg = cUnit->numSSARegs++; - /* Bump up the subscript */ - int dalvikSub = ++cUnit->SSALastDefs[dalvikReg]; - int newD2SMapping = ENCODE_REG_SUB(ssaReg, dalvikSub); - - cUnit->dalvikToSSAMap[dalvikReg] = newD2SMapping; - - int newS2DMapping = ENCODE_REG_SUB(dalvikReg, dalvikSub); - oatInsertGrowableList(cUnit, cUnit->ssaToDalvikMap, newS2DMapping); - + DCHECK((dalvikReg >= 0) && (dalvikReg < cUnit->numDalvikRegisters)); + int ssaReg = addNewSReg(cUnit, dalvikReg); + cUnit->vRegToSSAMap[dalvikReg] = ssaReg; defs[regIndex] = ssaReg; } @@ -1351,11 +1347,11 @@ bool oatDoSSAConversion(CompilationUnit* cUnit, BasicBlock* bb) * input to PHI nodes can be derived from the snapshot of all * predecessor blocks. */ - bb->dataFlowInfo->dalvikToSSAMap = + bb->dataFlowInfo->vRegToSSAMap = (int *)oatNew(cUnit, sizeof(int) * cUnit->numDalvikRegisters, false, kAllocDFInfo); - memcpy(bb->dataFlowInfo->dalvikToSSAMap, cUnit->dalvikToSSAMap, + memcpy(bb->dataFlowInfo->vRegToSSAMap, cUnit->vRegToSSAMap, sizeof(int) * cUnit->numDalvikRegisters); } return true; @@ -1447,10 +1443,15 @@ void oatInitializeSSAConversion(CompilationUnit* cUnit) int i; int numDalvikReg = cUnit->numDalvikRegisters; - cUnit->ssaToDalvikMap = (GrowableList *)oatNew(cUnit, sizeof(GrowableList), - false, kAllocDFInfo); - // Create the SSAtoDalvikMap, estimating the max size - oatInitGrowableList(cUnit, cUnit->ssaToDalvikMap, + cUnit->ssaBaseVRegs = (GrowableList *)oatNew(cUnit, sizeof(GrowableList), + false, kAllocDFInfo); + cUnit->ssaSubscripts = (GrowableList *)oatNew(cUnit, sizeof(GrowableList), + false, kAllocDFInfo); + // Create the ssa mappings, estimating the max size + oatInitGrowableList(cUnit, cUnit->ssaBaseVRegs, + numDalvikReg + cUnit->defCount + 128, + kListSSAtoDalvikMap); + oatInitGrowableList(cUnit, cUnit->ssaSubscripts, numDalvikReg + cUnit->defCount + 128, kListSSAtoDalvikMap); /* @@ -1465,26 +1466,28 @@ void oatInitializeSSAConversion(CompilationUnit* cUnit) * into "(0 << 16) | i" */ for (i = 0; i < numDalvikReg; i++) { - oatInsertGrowableList(cUnit, cUnit->ssaToDalvikMap, - ENCODE_REG_SUB(i, 0)); + oatInsertGrowableList(cUnit, cUnit->ssaBaseVRegs, i); + oatInsertGrowableList(cUnit, cUnit->ssaSubscripts, 0); } /* - * Initialize the DalvikToSSAMap map. The low 16 bit is the SSA register id, - * while the high 16 bit is the current subscript. The original Dalvik - * register N is mapped to SSA register N with subscript 0. + * Initialize the DalvikToSSAMap map. There is one entry for each + * Dalvik register, and the SSA names for those are the same. */ - cUnit->dalvikToSSAMap = (int *)oatNew(cUnit, sizeof(int) * numDalvikReg, + cUnit->vRegToSSAMap = (int *)oatNew(cUnit, sizeof(int) * numDalvikReg, false, kAllocDFInfo); /* Keep track of the higest def for each dalvik reg */ cUnit->SSALastDefs = (int *)oatNew(cUnit, sizeof(int) * numDalvikReg, false, kAllocDFInfo); for (i = 0; i < numDalvikReg; i++) { - cUnit->dalvikToSSAMap[i] = i; + cUnit->vRegToSSAMap[i] = i; cUnit->SSALastDefs[i] = 0; } + /* Add ssa reg for Method* */ + cUnit->methodSReg = addNewSReg(cUnit, SSA_METHOD_BASEREG); + /* * Allocate the BasicBlockDataFlow structure for the entry and code blocks */ @@ -1627,6 +1630,160 @@ void oatDataFlowAnalysisDispatcher(CompilationUnit* cUnit, } } +/* Advance to next strictly dominated MIR node in an extended basic block */ +MIR* advanceMIR(CompilationUnit* cUnit, BasicBlock** pBb, MIR* mir, ArenaBitVector* bv) { + BasicBlock* bb = *pBb; + if (mir != NULL) { + mir = mir->next; + if (mir == NULL) { + bb = bb->fallThrough; + if ((bb == NULL) || bb->predecessors->numUsed != 1) { + mir = NULL; + } else { + if (bv) { + oatSetBit(cUnit, bv, bb->id); + } + *pBb = bb; + mir = bb->firstMIRInsn; + } + } + } + return mir; +} + +/* Allocate a compiler temp, return Sreg. Reuse existing if no conflict */ +int allocCompilerTempSreg(CompilationUnit* cUnit, ArenaBitVector* bv) +{ + for (int i = 0; i < cUnit->numCompilerTemps; i++) { + CompilerTemp* ct = (CompilerTemp*)cUnit->compilerTemps.elemList[i]; + ArenaBitVector* tBv = ct->bv; + if (!oatTestBitVectors(bv, tBv)) { + // Combine live maps and reuse existing temp + oatUnifyBitVectors(tBv, tBv, bv); + return ct->sReg; + } + } + + // Create a new compiler temp & associated live bitmap + CompilerTemp* ct = (CompilerTemp*)oatNew(cUnit, sizeof(CompilerTemp), + true, kAllocMisc); + ArenaBitVector *nBv = oatAllocBitVector(cUnit, cUnit->numBlocks, true, + kBitMapMisc); + oatCopyBitVector(nBv, bv); + ct->bv = nBv; + ct->sReg = addNewSReg(cUnit, SSA_CTEMP_BASEREG - cUnit->numCompilerTemps); + cUnit->numCompilerTemps++; + oatInsertGrowableList(cUnit, &cUnit->compilerTemps, (intptr_t)ct); + DCHECK_EQ(cUnit->numCompilerTemps, (int)cUnit->compilerTemps.numUsed); + return ct->sReg; +} + +/* Creata a new MIR node for a new pseudo op. */ +MIR* rawMIR(CompilationUnit* cUnit, Instruction::Code opcode, int defs, int uses) +{ + MIR* res = (MIR*)oatNew( cUnit, sizeof(MIR), true, kAllocMIR); + res->ssaRep =(struct SSARepresentation *) + oatNew(cUnit, sizeof(SSARepresentation), true, kAllocDFInfo); + if (uses) { + res->ssaRep->numUses = uses; + res->ssaRep->uses = (int*)oatNew(cUnit, sizeof(int) * uses, false, kAllocDFInfo); + } + if (defs) { + res->ssaRep->numDefs = defs; + res->ssaRep->defs = (int*)oatNew(cUnit, sizeof(int) * defs, false, kAllocDFInfo); + res->ssaRep->fpDef = (bool*)oatNew(cUnit, sizeof(bool) * defs, true, kAllocDFInfo); + } + res->dalvikInsn.opcode = opcode; + return res; +} + +/* Do some MIR-level basic block optimizations */ +bool basicBlockOpt(CompilationUnit* cUnit, BasicBlock* bb) +{ + int numTemps = 0; + + for (MIR* mir = bb->firstMIRInsn; mir; mir = mir->next) { + // Look for interesting opcodes, skip otherwise + switch(mir->dalvikInsn.opcode) { + case Instruction::IGET_OBJECT: { + // TODO: look for CSE + if (mir->optimizationFlags & MIR_DUP) { + break; + } + ArenaBitVector* tempBlockV = cUnit->tempBlockV; + oatClearAllBits(tempBlockV); + oatSetBit(cUnit, tempBlockV, bb->id); + int objSreg = mir->ssaRep->uses[0]; + int dstSreg = mir->ssaRep->defs[0]; + uint32_t fieldIdx = mir->dalvikInsn.vC; + int matches = 0; + BasicBlock* tbb = bb; + MIR* tm = mir; + while (true) { + tm = advanceMIR(cUnit, &tbb, tm, tempBlockV); + if ((tm == NULL) || (tm == mir)) { + break; + } + Instruction::Code opcode = tm->dalvikInsn.opcode; + if ((opcode == Instruction::IGET_OBJECT) + && (tm->ssaRep->uses[0] == objSreg) + && (tm->dalvikInsn.vC == fieldIdx)) { + if (cUnit->printMe) { + LOG(INFO) << "Got DUP IGET_OBJECT @ 0x" + << std::hex << tm->offset << ", from 0x" + << std::hex <<mir->offset; + } + matches++; + } else if ((opcode == Instruction::IPUT_OBJECT) + && (tm->ssaRep->uses[0] == objSreg) + && (tm->dalvikInsn.vC == fieldIdx)) { + if (cUnit->printMe) { + LOG(INFO) << "Clobbered IGET_OBJECT @ 0x" + << std::hex << tm->offset; + } + break; + } + } + if (matches >= 2) { + // Allocate compiler temp, redirect 1st load to temp, + // insert copy to real target. Convert all dups to + // copies and rename all uses. + int tmpSreg = allocCompilerTempSreg(cUnit, tempBlockV); + MIR* newMir = rawMIR(cUnit, (Instruction::Code)kMirOpCopy, 1, 1); + newMir->ssaRep->defs[0] = dstSreg; + newMir->ssaRep->uses[0] = tmpSreg; + mir->ssaRep->defs[0] = tmpSreg; + oatInsertMIRAfter(bb, mir, newMir); + } + } + break; + case Instruction::IF_EQ: + case Instruction::IF_NE: + case Instruction::IF_LT: + case Instruction::IF_GE: + case Instruction::IF_GT: + case Instruction::IF_LE: + // TODO: Check for and fuse preceeding comparison + break; + case Instruction::IF_EQZ: + case Instruction::IF_NEZ: + case Instruction::IF_LTZ: + case Instruction::IF_GEZ: + case Instruction::IF_GTZ: + case Instruction::IF_LEZ: + // TODO: Check for and fuse preceeding comparison + break; + default: + break; + } + } + + if (numTemps > cUnit->numCompilerTemps) { + cUnit->numCompilerTemps = numTemps; + } + return true; +} + bool nullCheckEliminationInit(struct CompilationUnit* cUnit, struct BasicBlock* bb) { @@ -1779,4 +1936,15 @@ void oatMethodNullCheckElimination(CompilationUnit *cUnit) } } +void oatMethodBasicBlockOptimization(CompilationUnit *cUnit) +{ + oatInitGrowableList(cUnit, &cUnit->compilerTemps, 6, kListMisc); + DCHECK_EQ(cUnit->numCompilerTemps, 0); + if (!(cUnit->disableOpt & (1 << kBBOpt))) { + oatDataFlowAnalysisDispatcher(cUnit, basicBlockOpt, + kAllNodes, + false /* isIterative */); + } +} + } // namespace art diff --git a/src/compiler/Dataflow.h b/src/compiler/Dataflow.h index a9917a3396..2df9373b85 100644 --- a/src/compiler/Dataflow.h +++ b/src/compiler/Dataflow.h @@ -115,7 +115,7 @@ struct BasicBlockDataFlow { ArenaBitVector* defV; ArenaBitVector* liveInV; ArenaBitVector* phiV; - int* dalvikToSSAMap; + int* vRegToSSAMap; ArenaBitVector* endingNullCheckV; }; @@ -147,13 +147,10 @@ struct ArrayAccessInfo { int minC; // For DIV - will affect lower bound checking }; -#define ENCODE_REG_SUB(r,s) ((s<<16) | r) -#define DECODE_REG(v) (v & 0xffff) -#define DECODE_SUB(v) (((unsigned int) v) >> 16) - - void oatMethodNullCheckElimination(CompilationUnit*); +void oatMethodBasicBlockOptimization(CompilationUnit*); + } // namespace art #endif // ART_SRC_COMPILER_DATAFLOW_H_ diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc index e1b2c608bb..8ffcc72590 100644 --- a/src/compiler/Frontend.cc +++ b/src/compiler/Frontend.cc @@ -34,6 +34,7 @@ uint32_t compilerOptimizerDisableFlags = 0 | // Disable specific optimizations //(1 << kTrackLiveTemps) | //(1 << kSkipLargeMethodOptimization) | //(1 << kSafeOptimizations) | + (1 << kBBOpt) | 0; uint32_t compilerDebugFlags = 0 | // Enable debug/testing modes @@ -970,6 +971,11 @@ CompiledMethod* oatCompileMethod(Compiler& compiler, /* Perform null check elimination */ oatMethodNullCheckElimination(cUnit.get()); +#if 0 + /* Do some basic block optimizations */ + oatMethodBasicBlockOptimization(cUnit.get()); +#endif + oatInitializeRegAlloc(cUnit.get()); // Needs to happen after SSA naming /* Allocate Registers using simple local allocation scheme */ diff --git a/src/compiler/IntermediateRep.cc b/src/compiler/IntermediateRep.cc index 662687777d..d1ba45c18c 100644 --- a/src/compiler/IntermediateRep.cc +++ b/src/compiler/IntermediateRep.cc @@ -110,7 +110,7 @@ void oatPrependMIR(BasicBlock* bb, MIR* mir) } } -/* Insert an MIR instruction after the specified MIR */ +/* Insert a MIR instruction after the specified MIR */ void oatInsertMIRAfter(BasicBlock* bb, MIR* currentMIR, MIR* newMIR) { newMIR->prev = currentMIR; diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc index 2d85812d97..dfb25abc8f 100644 --- a/src/compiler/Ralloc.cc +++ b/src/compiler/Ralloc.cc @@ -108,8 +108,8 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) if (attrs & DF_DA_WIDE) { cUnit->regLocation[ssaRep->defs[0]].wide = true; cUnit->regLocation[ssaRep->defs[1]].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, ssaRep->defs[0])+1, - oatS2VReg(cUnit, ssaRep->defs[1])); + DCHECK_EQ(SRegToVReg(cUnit, ssaRep->defs[0])+1, + SRegToVReg(cUnit, ssaRep->defs[1])); } } @@ -122,8 +122,8 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) if (attrs & DF_UA_WIDE) { cUnit->regLocation[ssaRep->uses[next]].wide = true; cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1, - oatS2VReg(cUnit, ssaRep->uses[next + 1])); + DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1, + SRegToVReg(cUnit, ssaRep->uses[next + 1])); next += 2; } else { next++; @@ -136,8 +136,8 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) if (attrs & DF_UB_WIDE) { cUnit->regLocation[ssaRep->uses[next]].wide = true; cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1, - oatS2VReg(cUnit, ssaRep->uses[next + 1])); + DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1, + SRegToVReg(cUnit, ssaRep->uses[next + 1])); next += 2; } else { next++; @@ -150,8 +150,8 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) if (attrs & DF_UC_WIDE) { cUnit->regLocation[ssaRep->uses[next]].wide = true; cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1, - oatS2VReg(cUnit, ssaRep->uses[next + 1])); + DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1, + SRegToVReg(cUnit, ssaRep->uses[next + 1])); } } @@ -200,16 +200,16 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb) cUnit->regLocation[ssaRep->uses[i]].wide = true; cUnit->regLocation[ssaRep->uses[i+1]].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1, - oatS2VReg(cUnit, ssaRep->uses[i+1])); + DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[i])+1, + SRegToVReg(cUnit, ssaRep->uses[i+1])); i++; break; case 'J': cUnit->regLocation[ssaRep->uses[i]].wide = true; cUnit->regLocation[ssaRep->uses[i+1]].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1, - oatS2VReg(cUnit, ssaRep->uses[i+1])); + DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[i])+1, + SRegToVReg(cUnit, ssaRep->uses[i+1])); changed |= setCore(cUnit, ssaRep->uses[i],true); i++; break; @@ -320,13 +320,24 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) loc[i] = freshLoc; loc[i].sRegLow = i; } + + /* Patch up the locations for Method* and the compiler temps */ + loc[cUnit->methodSReg].location = kLocCompilerTemp; + for (i = 0; i < cUnit->numCompilerTemps; i++) { + CompilerTemp* ct = (CompilerTemp*)cUnit->compilerTemps.elemList[i]; + loc[ct->sReg].location = kLocCompilerTemp; + } + cUnit->regLocation = loc; /* Allocation the promotion map */ int numRegs = cUnit->numDalvikRegisters; - cUnit->promotionMap = - (PromotionMap*)oatNew(cUnit, numRegs * sizeof(cUnit->promotionMap[0]), - true, kAllocRegAlloc); + PromotionMap* tMap = + (PromotionMap*)oatNew(cUnit, (numRegs + cUnit->numCompilerTemps + 1) * + sizeof(cUnit->promotionMap[0]), true, + kAllocRegAlloc); + // Bias the promotion map + cUnit->promotionMap = &tMap[cUnit->numCompilerTemps + 1]; /* Add types of incoming arguments based on signature */ int numIns = cUnit->numIns; @@ -346,8 +357,8 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) cUnit->regLocation[sReg].wide = true; cUnit->regLocation[sReg+1].highWord = true; cUnit->regLocation[sReg+1].fp = true; - DCHECK_EQ(oatS2VReg(cUnit, sReg)+1, - oatS2VReg(cUnit, sReg+1)); + DCHECK_EQ(SRegToVReg(cUnit, sReg)+1, + SRegToVReg(cUnit, sReg+1)); cUnit->regLocation[sReg].fp = true; cUnit->regLocation[sReg].defined = true; sReg++; @@ -355,8 +366,8 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) case 'J': cUnit->regLocation[sReg].wide = true; cUnit->regLocation[sReg+1].highWord = true; - DCHECK_EQ(oatS2VReg(cUnit, sReg)+1, - oatS2VReg(cUnit, sReg+1)); + DCHECK_EQ(SRegToVReg(cUnit, sReg)+1, + SRegToVReg(cUnit, sReg+1)); cUnit->regLocation[sReg].core = true; cUnit->regLocation[sReg].defined = true; sReg++; @@ -390,8 +401,9 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit) * allocator, remove this remapping. */ for (i=0; i < cUnit->numSSARegs; i++) { - cUnit->regLocation[i].sRegLow = - DECODE_REG(oatConvertSSARegToDalvik(cUnit, loc[i].sRegLow)); + if (cUnit->regLocation[i].location != kLocCompilerTemp) { + cUnit->regLocation[i].sRegLow = SRegToVReg(cUnit, loc[i].sRegLow); + } } cUnit->coreSpillMask = 0; diff --git a/src/compiler/SSATransformation.cc b/src/compiler/SSATransformation.cc index cdb711e44b..2daa533a7b 100644 --- a/src/compiler/SSATransformation.cc +++ b/src/compiler/SSATransformation.cc @@ -29,7 +29,10 @@ void recordDFSOrders(CompilationUnit* cUnit, BasicBlock* block) /* Enqueue the preOrder block id */ oatInsertGrowableList(cUnit, &cUnit->dfsOrder, block->id); - if (block->fallThrough) recordDFSOrders(cUnit, block->fallThrough); + if (block->fallThrough) { + block->fallThrough->fallThroughTarget = true; + recordDFSOrders(cUnit, block->fallThrough); + } if (block->taken) recordDFSOrders(cUnit, block->taken); if (block->successorBlockList.blockListType != kNotUsed) { GrowableListIterator iterator; @@ -669,9 +672,8 @@ bool insertPhiNodeOperands(CompilationUnit* cUnit, BasicBlock* bb) if (mir->dalvikInsn.opcode != (Instruction::Code)kMirOpPhi) return true; int ssaReg = mir->ssaRep->defs[0]; - int encodedDalvikValue = - (int) oatGrowableListGetElement(cUnit->ssaToDalvikMap, ssaReg); - int dalvikReg = DECODE_REG(encodedDalvikValue); + DCHECK_GE(ssaReg, 0); // Shouldn't see compiler temps here + int vReg = SRegToVReg(cUnit, ssaReg); oatClearAllBits(ssaRegV); @@ -681,9 +683,8 @@ bool insertPhiNodeOperands(CompilationUnit* cUnit, BasicBlock* bb) BasicBlock* predBB = (BasicBlock*)oatGrowableListIteratorNext(&iter); if (!predBB) break; - int encodedSSAValue = - predBB->dataFlowInfo->dalvikToSSAMap[dalvikReg]; - int ssaReg = DECODE_REG(encodedSSAValue); + int ssaReg = + predBB->dataFlowInfo->vRegToSSAMap[vReg]; oatSetBit(cUnit, ssaRegV, ssaReg); } @@ -724,17 +725,17 @@ void doDFSPreOrderSSARename(CompilationUnit* cUnit, BasicBlock* block) /* Save SSA map snapshot */ int* savedSSAMap = (int*)oatNew(cUnit, mapSize, false, kAllocDalvikToSSAMap); - memcpy(savedSSAMap, cUnit->dalvikToSSAMap, mapSize); + memcpy(savedSSAMap, cUnit->vRegToSSAMap, mapSize); if (block->fallThrough) { doDFSPreOrderSSARename(cUnit, block->fallThrough); /* Restore SSA map snapshot */ - memcpy(cUnit->dalvikToSSAMap, savedSSAMap, mapSize); + memcpy(cUnit->vRegToSSAMap, savedSSAMap, mapSize); } if (block->taken) { doDFSPreOrderSSARename(cUnit, block->taken); /* Restore SSA map snapshot */ - memcpy(cUnit->dalvikToSSAMap, savedSSAMap, mapSize); + memcpy(cUnit->vRegToSSAMap, savedSSAMap, mapSize); } if (block->successorBlockList.blockListType != kNotUsed) { GrowableListIterator iterator; @@ -747,10 +748,10 @@ void doDFSPreOrderSSARename(CompilationUnit* cUnit, BasicBlock* block) BasicBlock* succBB = successorBlockInfo->block; doDFSPreOrderSSARename(cUnit, succBB); /* Restore SSA map snapshot */ - memcpy(cUnit->dalvikToSSAMap, savedSSAMap, mapSize); + memcpy(cUnit->vRegToSSAMap, savedSSAMap, mapSize); } } - cUnit->dalvikToSSAMap = savedSSAMap; + cUnit->vRegToSSAMap = savedSSAMap; return; } diff --git a/src/compiler/Utility.cc b/src/compiler/Utility.cc index 082f7a4c6f..3674aa9299 100644 --- a/src/compiler/Utility.cc +++ b/src/compiler/Utility.cc @@ -574,6 +574,19 @@ bool oatUnifyBitVectors(ArenaBitVector* dest, const ArenaBitVector* src1, } /* + * Return true if any bits collide. Vectors must be same size. + */ +bool oatTestBitVectors(const ArenaBitVector* src1, + const ArenaBitVector* src2) +{ + DCHECK_EQ(src1->storageSize, src2->storageSize); + for (uint32_t idx = 0; idx < src1->storageSize; idx++) { + if (src1->storage[idx] & src2->storage[idx]) return true; + } + return false; +} + +/* * Compare two bit vectors and return true if difference is seen. */ bool oatCompareBitVectors(const ArenaBitVector* src1, diff --git a/src/compiler/codegen/CodegenFactory.cc b/src/compiler/codegen/CodegenFactory.cc index 8a6e1bc967..5444816ddf 100644 --- a/src/compiler/codegen/CodegenFactory.cc +++ b/src/compiler/codegen/CodegenFactory.cc @@ -65,7 +65,8 @@ void loadValueDirect(CompilationUnit* cUnit, RegLocation rlSrc, int reg1) if (rlSrc.location == kLocPhysReg) { opRegCopy(cUnit, reg1, rlSrc.lowReg); } else { - DCHECK(rlSrc.location == kLocDalvikFrame); + DCHECK((rlSrc.location == kLocDalvikFrame) || + (rlSrc.location == kLocCompilerTemp)); loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, rlSrc.sRegLow), reg1); } } @@ -94,7 +95,8 @@ void loadValueDirectWide(CompilationUnit* cUnit, RegLocation rlSrc, int regLo, if (rlSrc.location == kLocPhysReg) { opRegCopyWide(cUnit, regLo, regHi, rlSrc.lowReg, rlSrc.highReg); } else { - DCHECK(rlSrc.location == kLocDalvikFrame); + DCHECK((rlSrc.location == kLocDalvikFrame) || + (rlSrc.location == kLocCompilerTemp)); loadBaseDispWide(cUnit, NULL, rSP, oatSRegOffset(cUnit, rlSrc.sRegLow), regLo, regHi, INVALID_SREG); @@ -120,7 +122,9 @@ RegLocation loadValue(CompilationUnit* cUnit, RegLocation rlSrc, RegisterClass opKind) { rlSrc = oatEvalLoc(cUnit, rlSrc, opKind, false); - if (rlSrc.location == kLocDalvikFrame) { + if (rlSrc.location != kLocPhysReg) { + DCHECK((rlSrc.location == kLocDalvikFrame) || + (rlSrc.location == kLocCompilerTemp)); loadValueDirect(cUnit, rlSrc, rlSrc.lowReg); rlSrc.location = kLocPhysReg; oatMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow); @@ -176,7 +180,9 @@ RegLocation loadValueWide(CompilationUnit* cUnit, RegLocation rlSrc, { DCHECK(rlSrc.wide); rlSrc = oatEvalLoc(cUnit, rlSrc, opKind, false); - if (rlSrc.location == kLocDalvikFrame) { + if (rlSrc.location != kLocPhysReg) { + DCHECK((rlSrc.location == kLocDalvikFrame) || + (rlSrc.location == kLocCompilerTemp)); loadValueDirectWide(cUnit, rlSrc, rlSrc.lowReg, rlSrc.highReg); rlSrc.location = kLocPhysReg; oatMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow); @@ -232,8 +238,8 @@ void storeValueWide(CompilationUnit* cUnit, RegLocation rlDest, (oatLiveOut(cUnit, rlDest.sRegLow) || oatLiveOut(cUnit, oatSRegHi(rlDest.sRegLow)))) { defStart = (LIR*)cUnit->lastLIRInsn; - DCHECK_EQ((oatS2VReg(cUnit, rlDest.sRegLow)+1), - oatS2VReg(cUnit, oatSRegHi(rlDest.sRegLow))); + DCHECK_EQ((SRegToVReg(cUnit, rlDest.sRegLow)+1), + SRegToVReg(cUnit, oatSRegHi(rlDest.sRegLow))); storeBaseDispWide(cUnit, rSP, oatSRegOffset(cUnit, rlDest.sRegLow), rlDest.lowReg, rlDest.highReg); oatMarkClean(cUnit, rlDest); @@ -265,29 +271,15 @@ void markGCCard(CompilationUnit* cUnit, int valReg, int tgtAddrReg) #endif } -/* - * Utility to load the current Method*. Broken out - * to allow easy change between placing the current Method* in a - * dedicated register or its home location in the frame. - */ +/* Utilities to load the current Method* */ void loadCurrMethodDirect(CompilationUnit *cUnit, int rTgt) { -#if defined(METHOD_IN_REG) - opRegCopy(cUnit, rTgt, rMETHOD); -#else - loadWordDisp(cUnit, rSP, 0, rTgt); -#endif + loadValueDirectFixed(cUnit, cUnit->regLocation[cUnit->methodSReg], rTgt); } -int loadCurrMethod(CompilationUnit *cUnit) +RegLocation loadCurrMethod(CompilationUnit *cUnit) { -#if defined(METHOD_IN_REG) - return rMETHOD; -#else - int mReg = oatAllocTemp(cUnit); - loadCurrMethodDirect(cUnit, mReg); - return mReg; -#endif + return loadValue(cUnit, cUnit->regLocation[cUnit->methodSReg], kCoreReg); } diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc index e2c306de37..9b1654f764 100644 --- a/src/compiler/codegen/GenCommon.cc +++ b/src/compiler/codegen/GenCommon.cc @@ -275,10 +275,12 @@ void genFilledNewArray(CompilationUnit* cUnit, MIR* mir, bool isRange) rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pCheckAndAllocArrayFromCodeWithAccessCheck)); } - loadCurrMethodDirect(cUnit, rARG1); // arg1 <- Method* loadConstant(cUnit, rARG0, typeId); // arg0 <- type_id loadConstant(cUnit, rARG2, elems); // arg2 <- count + loadCurrMethodDirect(cUnit, rARG1); // arg1 <- Method* callRuntimeHelper(cUnit, rTgt); + oatFreeTemp(cUnit, rARG2); + oatFreeTemp(cUnit, rARG1); /* * NOTE: the implicit target for Instruction::FILLED_NEW_ARRAY is the * return region. Because AllocFromCode placed the new array @@ -387,12 +389,11 @@ void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc, if (fastPath && !SLOW_FIELD_PATH) { DCHECK_GE(fieldOffset, 0); int rBase; - int rMethod; if (isReferrersClass) { // Fast path, static storage base is this method's class - rMethod = loadCurrMethod(cUnit); + RegLocation rlMethod = loadCurrMethod(cUnit); rBase = oatAllocTemp(cUnit); - loadWordDisp(cUnit, rMethod, + loadWordDisp(cUnit, rlMethod.lowReg, Method::DeclaringClassOffset().Int32Value(), rBase); } else { // Medium path, static storage base in a different class which @@ -402,7 +403,7 @@ void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc, oatFlushAllRegs(cUnit); // Using fixed register to sync with possible call to runtime // support. - rMethod = rARG1; + int rMethod = rARG1; oatLockTemp(cUnit, rMethod); loadCurrMethodDirect(cUnit, rMethod); rBase = rARG0; @@ -427,9 +428,9 @@ void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc, #endif LIR* skipTarget = newLIR0(cUnit, kPseudoTargetLabel); branchOver->target = (LIR*)skipTarget; + oatFreeTemp(cUnit, rMethod); } // rBase now holds static storage base - oatFreeTemp(cUnit, rMethod); if (isLongOrDouble) { rlSrc = oatGetSrcWide(cUnit, mir, 0, 1); rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg); @@ -496,12 +497,11 @@ void genSget(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, if (fastPath && !SLOW_FIELD_PATH) { DCHECK_GE(fieldOffset, 0); int rBase; - int rMethod; if (isReferrersClass) { // Fast path, static storage base is this method's class - rMethod = loadCurrMethod(cUnit); + RegLocation rlMethod = loadCurrMethod(cUnit); rBase = oatAllocTemp(cUnit); - loadWordDisp(cUnit, rMethod, + loadWordDisp(cUnit, rlMethod.lowReg, Method::DeclaringClassOffset().Int32Value(), rBase); } else { // Medium path, static storage base in a different class which @@ -511,7 +511,7 @@ void genSget(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, oatFlushAllRegs(cUnit); // Using fixed register to sync with possible call to runtime // support - rMethod = rARG1; + int rMethod = rARG1; oatLockTemp(cUnit, rMethod); loadCurrMethodDirect(cUnit, rMethod); rBase = rARG0; @@ -537,9 +537,9 @@ void genSget(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, #endif LIR* skipTarget = newLIR0(cUnit, kPseudoTargetLabel); branchOver->target = (LIR*)skipTarget; + oatFreeTemp(cUnit, rMethod); } // rBase now holds static storage base - oatFreeTemp(cUnit, rMethod); rlDest = isLongOrDouble ? oatGetDestWide(cUnit, mir, 0, 1) : oatGetDest(cUnit, mir, 0); RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true); @@ -837,7 +837,7 @@ void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, RegLocation rlSrc) { uint32_t type_idx = mir->dalvikInsn.vB; - int mReg = loadCurrMethod(cUnit); + RegLocation rlMethod = loadCurrMethod(cUnit); int resReg = oatAllocTemp(cUnit); RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); if (!cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx, @@ -848,7 +848,7 @@ void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, // Resolved type returned in rRET0. int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pInitializeTypeAndVerifyAccessFromCode)); - opRegCopy(cUnit, rARG1, mReg); + opRegCopy(cUnit, rARG1, rlMethod.lowReg); loadConstant(cUnit, rARG0, type_idx); callRuntimeHelper(cUnit, rTgt); RegLocation rlResult = oatGetReturn(cUnit); @@ -857,7 +857,7 @@ void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, // We're don't need access checks, load type from dex cache int32_t dex_cache_offset = Method::DexCacheResolvedTypesOffset().Int32Value(); - loadWordDisp(cUnit, mReg, dex_cache_offset, resReg); + loadWordDisp(cUnit, rlMethod.lowReg, dex_cache_offset, resReg); int32_t offset_of_type = Array::DataOffset(sizeof(Class*)).Int32Value() + (sizeof(Class*) * type_idx); @@ -876,7 +876,7 @@ void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, // Call out to helper, which will return resolved type in rARG0 int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pInitializeTypeFromCode)); - opRegCopy(cUnit, rARG1, mReg); + opRegCopy(cUnit, rARG1, rlMethod.lowReg); loadConstant(cUnit, rARG0, type_idx); callRuntimeHelper(cUnit, rTgt); RegLocation rlResult = oatGetReturn(cUnit); @@ -930,10 +930,10 @@ void genConstString(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, genBarrier(cUnit); storeValue(cUnit, rlDest, oatGetReturn(cUnit)); } else { - int mReg = loadCurrMethod(cUnit); + RegLocation rlMethod = loadCurrMethod(cUnit); int resReg = oatAllocTemp(cUnit); RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); - loadWordDisp(cUnit, mReg, + loadWordDisp(cUnit, rlMethod.lowReg, Method::DexCacheStringsOffset().Int32Value(), resReg); loadWordDisp(cUnit, resReg, offset_of_string, rlResult.lowReg); storeValue(cUnit, rlDest, rlResult); diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc index 6b3283e13f..5baabf2f32 100644 --- a/src/compiler/codegen/MethodCodegenDriver.cc +++ b/src/compiler/codegen/MethodCodegenDriver.cc @@ -716,8 +716,7 @@ const char* extendedMIROpNames[kMirOpLast - kMirOpFirst] = { "kMirOpNullNRangeUpCheck", "kMirOpNullNRangeDownCheck", "kMirOpLowerBound", - "kMirOpPunt", - "kMirOpCheckInlinePrediction", + "kMirOpCopy", }; /* Extended MIR instructions like PHI */ @@ -742,6 +741,9 @@ void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir) newLIR1(cUnit, kPseudoSSARep, (int) ssaString); break; } + case kMirOpCopy: + UNIMPLEMENTED(FATAL) << "Need kMirOpCopy"; + break; default: break; } @@ -761,11 +763,19 @@ bool methodBlockCodeGen(CompilationUnit* cUnit, BasicBlock* bb) labelList[blockId].opcode = kPseudoNormalBlockLabel; oatAppendLIR(cUnit, (LIR*) &labelList[blockId]); - /* Reset local optimization data on block boundaries */ + /* Free temp registers and reset redundant store tracking */ oatResetRegPool(cUnit); - oatClobberAllRegs(cUnit); oatResetDefTracking(cUnit); + /* + * If control reached us from our immediate predecessor via + * fallthrough and we have no other incoming arcs we can + * reuse existing liveness. Otherwise, reset. + */ + if (!bb->fallThroughTarget || bb->predecessors->numUsed != 1) { + oatClobberAllRegs(cUnit); + } + LIR* headLIR = NULL; if (bb->blockType == kEntryBlock) { diff --git a/src/compiler/codegen/Ralloc.h b/src/compiler/codegen/Ralloc.h index 8c8c693edb..d32545c248 100644 --- a/src/compiler/codegen/Ralloc.h +++ b/src/compiler/codegen/Ralloc.h @@ -35,12 +35,6 @@ struct RefCounts { }; -inline int oatS2VReg(CompilationUnit* cUnit, int sReg) -{ - DCHECK_NE(sReg, INVALID_SREG); - return DECODE_REG(oatConvertSSARegToDalvik(cUnit, sReg)); -} - /* * Get the "real" sreg number associated with an sReg slot. In general, * sReg values passed through codegen are the SSA names created by diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc index b5ebf65da0..8f5d1bbe5a 100644 --- a/src/compiler/codegen/RallocUtil.cc +++ b/src/compiler/codegen/RallocUtil.cc @@ -147,6 +147,19 @@ extern void oatClobberSReg(CompilationUnit* cUnit, int sReg) sReg); } +/* Sanity check */ +bool validSreg(CompilationUnit* cUnit, int sReg) +{ + bool res = ((-(cUnit->numCompilerTemps + 1) <= sReg) && + (sReg < cUnit->numDalvikRegisters)); + if (!res) { + LOG(WARNING) << "Bad sreg: " << sReg; + LOG(WARNING) << " low = " << -(cUnit->numCompilerTemps + 1); + LOG(WARNING) << " high = " << cUnit->numRegs; + } + return res; +} + /* Reserve a callee-save register. Return -1 if none available */ extern int oatAllocPreservedCoreReg(CompilationUnit* cUnit, int sReg) { @@ -160,7 +173,8 @@ extern int oatAllocPreservedCoreReg(CompilationUnit* cUnit, int sReg) cUnit->coreVmapTable.push_back(sReg); cUnit->numCoreSpills++; // Should be promoting based on initial sReg set - DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg)); + DCHECK_EQ(sReg, SRegToVReg(cUnit, sReg)); + DCHECK(validSreg(cUnit,sReg)); cUnit->promotionMap[sReg].coreLocation = kLocPhysReg; cUnit->promotionMap[sReg].coreReg = res; break; @@ -184,8 +198,9 @@ int allocPreservedSingle(CompilationUnit* cUnit, int sReg, bool even) res = FPRegs[i].reg; FPRegs[i].inUse = true; // Should be promoting based on initial sReg set - DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg)); + DCHECK_EQ(sReg, SRegToVReg(cUnit, sReg)); oatMarkPreservedSingle(cUnit, sReg, res); + DCHECK(validSreg(cUnit,sReg)); cUnit->promotionMap[sReg].fpLocation = kLocPhysReg; cUnit->promotionMap[sReg].fpReg = res; break; @@ -206,7 +221,8 @@ int allocPreservedDouble(CompilationUnit* cUnit, int sReg) { int res = -1; // Assume failure // Should be promoting based on initial sReg set - DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg)); + DCHECK_EQ(sReg, SRegToVReg(cUnit, sReg)); + DCHECK(validSreg(cUnit,sReg+1)); if (cUnit->promotionMap[sReg+1].fpLocation == kLocPhysReg) { // Upper reg is already allocated. Can we fit? int highReg = cUnit->promotionMap[sReg+1].fpReg; @@ -244,8 +260,10 @@ int allocPreservedDouble(CompilationUnit* cUnit, int sReg) } } if (res != -1) { + DCHECK(validSreg(cUnit,sReg)); cUnit->promotionMap[sReg].fpLocation = kLocPhysReg; cUnit->promotionMap[sReg].fpReg = res; + DCHECK(validSreg(cUnit,sReg+1)); cUnit->promotionMap[sReg+1].fpLocation = kLocPhysReg; cUnit->promotionMap[sReg+1].fpReg = res + 1; } @@ -788,7 +806,9 @@ extern RegLocation oatUpdateLoc(CompilationUnit* cUnit, RegLocation loc) { DCHECK(!loc.wide); DCHECK(oatCheckCorePoolSanity(cUnit)); - if (loc.location == kLocDalvikFrame) { + if (loc.location != kLocPhysReg) { + DCHECK((loc.location == kLocDalvikFrame) || + (loc.location == kLocCompilerTemp)); RegisterInfo* infoLo = allocLive(cUnit, loc.sRegLow, kAnyReg); if (infoLo) { if (infoLo->pair) { @@ -837,7 +857,9 @@ extern RegLocation oatUpdateLocWide(CompilationUnit* cUnit, RegLocation loc) { DCHECK(loc.wide); DCHECK(oatCheckCorePoolSanity(cUnit)); - if (loc.location == kLocDalvikFrame) { + if (loc.location != kLocPhysReg) { + DCHECK((loc.location == kLocDalvikFrame) || + (loc.location == kLocCompilerTemp)); // Are the dalvik regs already live in physical registers? RegisterInfo* infoLo = allocLive(cUnit, loc.sRegLow, kAnyReg); RegisterInfo* infoHi = allocLive(cUnit, @@ -1026,7 +1048,7 @@ void oatCountRefs(CompilationUnit *cUnit, BasicBlock* bb, for (int i = 0; i < ssaRep->numDefs;) { RegLocation loc = cUnit->regLocation[ssaRep->defs[i]]; RefCounts* counts = loc.fp ? fpCounts : coreCounts; - int vReg = oatS2VReg(cUnit, ssaRep->defs[i]); + int vReg = SRegToVReg(cUnit, ssaRep->defs[i]); if (loc.defined) { counts[vReg].count++; } @@ -1045,7 +1067,7 @@ void oatCountRefs(CompilationUnit *cUnit, BasicBlock* bb, for (int i = 0; i < ssaRep->numUses;) { RegLocation loc = cUnit->regLocation[ssaRep->uses[i]]; RefCounts* counts = loc.fp ? fpCounts : coreCounts; - int vReg = oatS2VReg(cUnit, ssaRep->uses[i]); + int vReg = SRegToVReg(cUnit, ssaRep->uses[i]); if (loc.defined) { counts[vReg].count++; } @@ -1142,6 +1164,7 @@ extern void oatDoPromotion(CompilationUnit* cUnit) if (!(cUnit->disableOpt & (1 << kPromoteRegs))) { // Promote fpRegs for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) { + DCHECK(validSreg(cUnit,fpRegs[i].sReg)); if (cUnit->promotionMap[fpRegs[i].sReg].fpLocation != kLocPhysReg) { int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg, fpRegs[i].doubleStart); @@ -1153,6 +1176,7 @@ extern void oatDoPromotion(CompilationUnit* cUnit) // Promote core regs for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) { + DCHECK(validSreg(cUnit,coreRegs[i].sReg)); if (cUnit->promotionMap[coreRegs[i].sReg].coreLocation != kLocPhysReg) { int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg); @@ -1166,15 +1190,17 @@ extern void oatDoPromotion(CompilationUnit* cUnit) // Now, update SSA names to new home locations for (int i = 0; i < cUnit->numSSARegs; i++) { RegLocation *curr = &cUnit->regLocation[i]; - int baseVReg = oatS2VReg(cUnit, curr->sRegLow); + int baseVReg = SRegToVReg(cUnit, curr->sRegLow); if (!curr->wide) { if (curr->fp) { + DCHECK(validSreg(cUnit,baseVReg)); if (cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) { curr->location = kLocPhysReg; curr->lowReg = cUnit->promotionMap[baseVReg].fpReg; curr->home = true; } } else { + DCHECK(validSreg(cUnit,baseVReg)); if (cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) { curr->location = kLocPhysReg; curr->lowReg = cUnit->promotionMap[baseVReg].coreReg; @@ -1187,6 +1213,8 @@ extern void oatDoPromotion(CompilationUnit* cUnit) continue; } if (curr->fp) { + DCHECK(validSreg(cUnit,baseVReg)); + DCHECK(validSreg(cUnit,baseVReg+1)); if ((cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) && (cUnit->promotionMap[baseVReg+1].fpLocation == kLocPhysReg)) { @@ -1201,6 +1229,8 @@ extern void oatDoPromotion(CompilationUnit* cUnit) } } } else { + DCHECK(validSreg(cUnit,baseVReg)); + DCHECK(validSreg(cUnit,baseVReg+1)); if ((cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) && (cUnit->promotionMap[baseVReg+1].coreLocation == kLocPhysReg)) { @@ -1224,7 +1254,7 @@ extern int oatVRegOffset(CompilationUnit* cUnit, int vReg) /* Returns sp-relative offset in bytes for a SReg */ extern int oatSRegOffset(CompilationUnit* cUnit, int sReg) { - return oatVRegOffset(cUnit, oatS2VReg(cUnit, sReg)); + return oatVRegOffset(cUnit, SRegToVReg(cUnit, sReg)); } } // namespace art diff --git a/src/compiler/codegen/arm/ArchFactory.cc b/src/compiler/codegen/arm/ArchFactory.cc index 8a23d5c2e1..da5de521f3 100644 --- a/src/compiler/codegen/arm/ArchFactory.cc +++ b/src/compiler/codegen/arm/ArchFactory.cc @@ -106,7 +106,21 @@ void genEntrySequence(CompilationUnit* cUnit, BasicBlock* bb) opRegImm(cUnit, kOpSub, rSP, cUnit->frameSize - (spillCount * 4)); } - storeBaseDisp(cUnit, rSP, 0, r0, kWord); + + /* + * Dummy up a RegLocation for the incoming Method* + * It will attempt to keep r0 live (or copy it to home location + * if promoted). + */ + RegLocation rlSrc = cUnit->regLocation[cUnit->methodSReg]; + RegLocation rlMethod = cUnit->regLocation[cUnit->methodSReg]; + rlSrc.location = kLocPhysReg; + rlSrc.lowReg = r0; + rlSrc.home = false; + oatMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow); + storeValue(cUnit, rlMethod, rlSrc); + + /* Flush the rest of the ins */ flushIns(cUnit); if (cUnit->genDebugger) { diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc index 3335f5997e..e7627f2367 100644 --- a/src/compiler/codegen/arm/ArmRallocUtil.cc +++ b/src/compiler/codegen/arm/ArmRallocUtil.cc @@ -76,10 +76,10 @@ void oatFlushRegWide(CompilationUnit* cUnit, int reg1, int reg2) info1->dirty = false; info2->dirty = false; - if (oatS2VReg(cUnit, info2->sReg) < - oatS2VReg(cUnit, info1->sReg)) + if (SRegToVReg(cUnit, info2->sReg) < + SRegToVReg(cUnit, info1->sReg)) info1 = info2; - int vReg = oatS2VReg(cUnit, info1->sReg); + int vReg = SRegToVReg(cUnit, info1->sReg); oatFlushRegWideImpl(cUnit, rSP, oatVRegOffset(cUnit, vReg), info1->reg, info1->partner); @@ -91,7 +91,7 @@ void oatFlushReg(CompilationUnit* cUnit, int reg) RegisterInfo* info = oatGetRegInfo(cUnit, reg); if (info->live && info->dirty) { info->dirty = false; - int vReg = oatS2VReg(cUnit, info->sReg); + int vReg = SRegToVReg(cUnit, info->sReg); oatFlushRegImpl(cUnit, rSP, oatVRegOffset(cUnit, vReg), reg, kWord); diff --git a/src/compiler/codegen/arm/Thumb2/Ralloc.cc b/src/compiler/codegen/arm/Thumb2/Ralloc.cc index c0f2c771ca..7858318001 100644 --- a/src/compiler/codegen/arm/Thumb2/Ralloc.cc +++ b/src/compiler/codegen/arm/Thumb2/Ralloc.cc @@ -88,6 +88,10 @@ void oatInitializeRegAlloc(CompilationUnit* cUnit) for (int i = 0; i < numFPTemps; i++) { oatMarkTemp(cUnit, fpTemps[i]); } + + // Start allocation at r2 in an attempt to avoid clobbering return values + pool->nextCoreReg = r2; + // Construct the alias map. cUnit->phiAliasMap = (int*)oatNew(cUnit, cUnit->numSSARegs * sizeof(cUnit->phiAliasMap[0]), false, diff --git a/src/compiler/codegen/mips/MipsRallocUtil.cc b/src/compiler/codegen/mips/MipsRallocUtil.cc index 7fd9b598cd..7ed3f86c3e 100644 --- a/src/compiler/codegen/mips/MipsRallocUtil.cc +++ b/src/compiler/codegen/mips/MipsRallocUtil.cc @@ -65,10 +65,10 @@ void oatFlushRegWide(CompilationUnit* cUnit, int reg1, int reg2) info1->dirty = false; info2->dirty = false; - if (oatS2VReg(cUnit, info2->sReg) < - oatS2VReg(cUnit, info1->sReg)) + if (SRegToVReg(cUnit, info2->sReg) < + SRegToVReg(cUnit, info1->sReg)) info1 = info2; - int vReg = oatS2VReg(cUnit, info1->sReg); + int vReg = SRegToVReg(cUnit, info1->sReg); oatFlushRegWideImpl(cUnit, rSP, oatVRegOffset(cUnit, vReg), info1->reg, info1->partner); @@ -80,7 +80,7 @@ void oatFlushReg(CompilationUnit* cUnit, int reg) RegisterInfo* info = oatGetRegInfo(cUnit, reg); if (info->live && info->dirty) { info->dirty = false; - int vReg = oatS2VReg(cUnit, info->sReg); + int vReg = SRegToVReg(cUnit, info->sReg); oatFlushRegImpl(cUnit, rSP, oatVRegOffset(cUnit, vReg), reg, kWord); diff --git a/src/compiler/codegen/x86/X86RallocUtil.cc b/src/compiler/codegen/x86/X86RallocUtil.cc index 7c99fd6298..1b4eca4158 100644 --- a/src/compiler/codegen/x86/X86RallocUtil.cc +++ b/src/compiler/codegen/x86/X86RallocUtil.cc @@ -60,10 +60,10 @@ void oatFlushRegWide(CompilationUnit* cUnit, int reg1, int reg2) info1->dirty = false; info2->dirty = false; - if (oatS2VReg(cUnit, info2->sReg) < - oatS2VReg(cUnit, info1->sReg)) + if (SRegToVReg(cUnit, info2->sReg) < + SRegToVReg(cUnit, info1->sReg)) info1 = info2; - int vReg = oatS2VReg(cUnit, info1->sReg); + int vReg = SRegToVReg(cUnit, info1->sReg); oatFlushRegWideImpl(cUnit, rSP, oatVRegOffset(cUnit, vReg), info1->reg, info1->partner); @@ -75,7 +75,7 @@ void oatFlushReg(CompilationUnit* cUnit, int reg) RegisterInfo* info = oatGetRegInfo(cUnit, reg); if (info->live && info->dirty) { info->dirty = false; - int vReg = oatS2VReg(cUnit, info->sReg); + int vReg = SRegToVReg(cUnit, info->sReg); oatFlushRegImpl(cUnit, rSP, oatVRegOffset(cUnit, vReg), reg, kWord); diff --git a/src/stack.cc b/src/stack.cc index da5c31eab3..e4d1133390 100644 --- a/src/stack.cc +++ b/src/stack.cc @@ -63,7 +63,7 @@ void Frame::SetReturnPC(uintptr_t pc) { * +========================+ {Note: start of callee's frame} * | core callee-save spill | {variable sized} * +------------------------+ - * | fp calle-save spill | + * | fp callee-save spill | * +------------------------+ * | V[locals-1] | * | V[locals-2] | |