diff options
| author | 2011-09-26 17:51:15 -0700 | |
|---|---|---|
| committer | 2011-09-26 17:51:15 -0700 | |
| commit | 1ff230d113d25fbcd2986773799ac1ef504a6af0 (patch) | |
| tree | bff83a4e728f339d18a1bf7f8cb9384348bcf59c /src/compiler/codegen | |
| parent | 20d8d673ed402e9d0292f1770313476ce94a9ef2 (diff) | |
| parent | b29e4d1423028fab47db3be6e41e4b2a067bf100 (diff) | |
Merge "Misc codegen fixes related to register promotion" into dalvik-dev
Diffstat (limited to 'src/compiler/codegen')
| -rw-r--r-- | src/compiler/codegen/CodegenFactory.cc | 7 | ||||
| -rw-r--r-- | src/compiler/codegen/Ralloc.h | 2 | ||||
| -rw-r--r-- | src/compiler/codegen/RallocUtil.cc | 8 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/ArchUtility.cc | 26 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/MethodCodegenDriver.cc | 10 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/Thumb2/Gen.cc | 54 |
6 files changed, 59 insertions, 48 deletions
diff --git a/src/compiler/codegen/CodegenFactory.cc b/src/compiler/codegen/CodegenFactory.cc index 108bc83521..55ed8af080 100644 --- a/src/compiler/codegen/CodegenFactory.cc +++ b/src/compiler/codegen/CodegenFactory.cc @@ -131,8 +131,9 @@ STATIC void storeValue(CompilationUnit* cUnit, RegLocation rlDest, rlDest = oatUpdateLoc(cUnit, rlDest); if (rlSrc.location == kLocPhysReg) { if (oatIsLive(cUnit, rlSrc.lowReg) || + oatIsPromoted(cUnit, rlSrc.lowReg) || (rlDest.location == kLocPhysReg)) { - // Src is live or Dest has assigned reg. + // Src is live/promoted or Dest has assigned reg. rlDest = oatEvalLoc(cUnit, rlDest, kAnyReg, false); genRegCopy(cUnit, rlDest.lowReg, rlSrc.lowReg); } else { @@ -192,8 +193,10 @@ STATIC void storeValueWide(CompilationUnit* cUnit, RegLocation rlDest, if (rlSrc.location == kLocPhysReg) { if (oatIsLive(cUnit, rlSrc.lowReg) || oatIsLive(cUnit, rlSrc.highReg) || + oatIsPromoted(cUnit, rlSrc.lowReg) || + oatIsPromoted(cUnit, rlSrc.highReg) || (rlDest.location == kLocPhysReg)) { - // Src is live or Dest has assigned reg. + // Src is live or promoted or Dest has assigned reg. rlDest = oatEvalLoc(cUnit, rlDest, kAnyReg, false); genRegCopyWide(cUnit, rlDest.lowReg, rlDest.highReg, rlSrc.lowReg, rlSrc.highReg); diff --git a/src/compiler/codegen/Ralloc.h b/src/compiler/codegen/Ralloc.h index 0c3fbcaea2..e2cb1ce377 100644 --- a/src/compiler/codegen/Ralloc.h +++ b/src/compiler/codegen/Ralloc.h @@ -141,6 +141,8 @@ extern void oatClobberCallRegs(CompilationUnit* cUnit); extern RegisterInfo *oatIsTemp(CompilationUnit* cUnit, int reg); +extern RegisterInfo *oatIsPromoted(CompilationUnit* cUnit, int reg); + extern bool oatIsDirty(CompilationUnit* cUnit, int reg); extern void oatMarkInUse(CompilationUnit* cUnit, int reg); diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc index 6875718154..69b98d4814 100644 --- a/src/compiler/codegen/RallocUtil.cc +++ b/src/compiler/codegen/RallocUtil.cc @@ -547,6 +547,12 @@ extern RegisterInfo* oatIsTemp(CompilationUnit* cUnit, int reg) return (p->isTemp) ? p : NULL; } +extern RegisterInfo* oatIsPromoted(CompilationUnit* cUnit, int reg) +{ + RegisterInfo* p = getRegInfo(cUnit, reg); + return (p->isTemp) ? NULL : p; +} + extern bool oatIsDirty(CompilationUnit* cUnit, int reg) { RegisterInfo* p = getRegInfo(cUnit, reg); @@ -849,7 +855,7 @@ STATIC void copyRegInfo(CompilationUnit* cUnit, int newReg, int oldReg) /* * Return an updated location record with current in-register status. * If the value lives in live temps, reflect that fact. No code - * is generated. The the live value is part of an older pair, + * is generated. If the live value is part of an older pair, * clobber both low and high. * TUNING: clobbering both is a bit heavy-handed, but the alternative * is a bit complex when dealing with FP regs. Examine code to see diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc index ccafecbda6..be1ab1ea38 100644 --- a/src/compiler/codegen/arm/ArchUtility.cc +++ b/src/compiler/codegen/arm/ArchUtility.cc @@ -456,30 +456,6 @@ void oatCodegenDump(CompilationUnit* cUnit) ToModifiedUtf8(); char buf[256]; -#if 0 - int linebreak = 0; - //TODO: delete when we're sure it's no longer necessary - LOG(INFO) << "*/"; - sprintf(buf,"\n u1 %s%s_%s_code[] = {", descriptor.c_str(), - name.c_str(), signature.c_str()); - for (unsigned int i = 0; i < strlen(buf); i++) - if (buf[i] == ';') buf[i] = '_'; - LOG(INFO) << buf; - strcpy(buf," "); - u1* pLiterals = (u1*)&cUnit->codeBuffer[0]; - for (int i = 0; i < cUnit->totalSize; i++) { - sprintf(buf+strlen(buf),"0x%02x,", pLiterals[i]); - if (++linebreak == 8) { - linebreak = 0; - LOG(INFO) << buf; - strcpy(buf," "); - } - } - if (strlen(buf) > 8) { - LOG(INFO) << buf; - } - LOG(INFO) << " };\n\n"; -#endif // Dump mapping table if (cUnit->mappingTable.size() > 0) { @@ -498,6 +474,4 @@ void oatCodegenDump(CompilationUnit* cUnit) } LOG(INFO) <<" };\n\n"; } - - // Dump vmap table } diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc index 0a5fc7eab5..aeb0134337 100644 --- a/src/compiler/codegen/arm/MethodCodegenDriver.cc +++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc @@ -1817,8 +1817,14 @@ STATIC void flushIns(CompilationUnit* cUnit) } if (loc.location == kLocPhysReg) { if (loc.wide) { - loadBaseDispWide(cUnit, NULL, rSP, loc.spOffset, - loc.lowReg, loc.highReg, INVALID_SREG); + if (loc.fp && (loc.lowReg & 1) != 0) { + // Misaligned - need to load as a pair of singles + loadWordDisp(cUnit, rSP, loc.spOffset, loc.lowReg); + loadWordDisp(cUnit, rSP, loc.spOffset + 4, loc.highReg); + } else { + loadBaseDispWide(cUnit, NULL, rSP, loc.spOffset, + loc.lowReg, loc.highReg, INVALID_SREG); + } i++; } else { loadWordDisp(cUnit, rSP, loc.spOffset, loc.lowReg); diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc index c247fe7c2b..c9d1bb9a6f 100644 --- a/src/compiler/codegen/arm/Thumb2/Gen.cc +++ b/src/compiler/codegen/arm/Thumb2/Gen.cc @@ -830,7 +830,6 @@ STATIC void genLong3Addr(CompilationUnit* cUnit, MIR* mir, OpKind firstOp, rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); // The longs may overlap - use intermediate temp if so if (rlResult.lowReg == rlSrc1.highReg) { - //FIXME: review all long arithmetic ops - there may be more of these int tReg = oatAllocTemp(cUnit); genRegCopy(cUnit, tReg, rlSrc1.highReg); opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, @@ -1044,32 +1043,34 @@ STATIC void genCmpLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2) { - RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change ArmLIR* target1; ArmLIR* target2; rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg); rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); - rlTemp.lowReg = oatAllocTemp(cUnit); - loadConstant(cUnit, rlTemp.lowReg, -1); + int tReg = oatAllocTemp(cUnit); + loadConstant(cUnit, tReg, -1); opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg); ArmLIR* branch1 = opCondBranch(cUnit, kArmCondLt); ArmLIR* branch2 = opCondBranch(cUnit, kArmCondGt); - opRegRegReg(cUnit, kOpSub, rlTemp.lowReg, rlSrc1.lowReg, rlSrc2.lowReg); + opRegRegReg(cUnit, kOpSub, tReg, rlSrc1.lowReg, rlSrc2.lowReg); ArmLIR* branch3 = opCondBranch(cUnit, kArmCondEq); genIT(cUnit, kArmCondHi, "E"); - newLIR2(cUnit, kThumb2MovImmShift, rlTemp.lowReg, modifiedImmediate(-1)); - loadConstant(cUnit, rlTemp.lowReg, 1); + newLIR2(cUnit, kThumb2MovImmShift, tReg, modifiedImmediate(-1)); + loadConstant(cUnit, tReg, 1); genBarrier(cUnit); target2 = newLIR0(cUnit, kArmPseudoTargetLabel); target2->defMask = -1; - opRegReg(cUnit, kOpNeg, rlTemp.lowReg, rlTemp.lowReg); + opRegReg(cUnit, kOpNeg, tReg, tReg); target1 = newLIR0(cUnit, kArmPseudoTargetLabel); target1->defMask = -1; + RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change + rlTemp.lowReg = tReg; storeValue(cUnit, rlDest, rlTemp); + oatFreeTemp(cUnit, tReg); branch1->generic.target = (LIR*)target1; branch2->generic.target = (LIR*)target2; @@ -1498,8 +1499,17 @@ STATIC bool genArithOpLong(CompilationUnit* cUnit, MIR* mir, case OP_NOT_LONG: rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); - opRegReg(cUnit, kOpMvn, rlResult.lowReg, rlSrc2.lowReg); - opRegReg(cUnit, kOpMvn, rlResult.highReg, rlSrc2.highReg); + // Check for destructive overlap + if (rlResult.lowReg == rlSrc2.highReg) { + int tReg = oatAllocTemp(cUnit); + genRegCopy(cUnit, tReg, rlSrc2.highReg); + opRegReg(cUnit, kOpMvn, rlResult.lowReg, rlSrc2.lowReg); + opRegReg(cUnit, kOpMvn, rlResult.highReg, tReg); + oatFreeTemp(cUnit, tReg); + } else { + opRegReg(cUnit, kOpMvn, rlResult.lowReg, rlSrc2.lowReg); + opRegReg(cUnit, kOpMvn, rlResult.highReg, rlSrc2.highReg); + } storeValueWide(cUnit, rlDest, rlResult); return false; break; @@ -1548,15 +1558,25 @@ STATIC bool genArithOpLong(CompilationUnit* cUnit, MIR* mir, secondOp = kOpXor; break; case OP_NEG_LONG: { - //TUNING: can improve this using Thumb2 code - int tReg = oatAllocTemp(cUnit); rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true); - loadConstantNoClobber(cUnit, tReg, 0); - opRegRegReg(cUnit, kOpSub, rlResult.lowReg, - tReg, rlSrc2.lowReg); - opRegReg(cUnit, kOpSbc, tReg, rlSrc2.highReg); - genRegCopy(cUnit, rlResult.highReg, tReg); + int zReg = oatAllocTemp(cUnit); + loadConstantNoClobber(cUnit, zReg, 0); + // Check for destructive overlap + if (rlResult.lowReg == rlSrc2.highReg) { + int tReg = oatAllocTemp(cUnit); + opRegRegReg(cUnit, kOpSub, rlResult.lowReg, + zReg, rlSrc2.lowReg); + opRegRegReg(cUnit, kOpSbc, rlResult.highReg, + zReg, tReg); + oatFreeTemp(cUnit, tReg); + } else { + opRegRegReg(cUnit, kOpSub, rlResult.lowReg, + zReg, rlSrc2.lowReg); + opRegRegReg(cUnit, kOpSbc, rlResult.highReg, + zReg, rlSrc2.highReg); + } + oatFreeTemp(cUnit, zReg); storeValueWide(cUnit, rlDest, rlResult); return false; } |