summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/compiler/Compiler.h4
-rw-r--r--src/compiler/CompilerIR.h35
-rw-r--r--src/compiler/CompilerUtility.h1
-rw-r--r--src/compiler/Dataflow.cc272
-rw-r--r--src/compiler/Dataflow.h9
-rw-r--r--src/compiler/Frontend.cc6
-rw-r--r--src/compiler/IntermediateRep.cc2
-rw-r--r--src/compiler/Ralloc.cc54
-rw-r--r--src/compiler/SSATransformation.cc25
-rw-r--r--src/compiler/Utility.cc13
-rw-r--r--src/compiler/codegen/CodegenFactory.cc40
-rw-r--r--src/compiler/codegen/GenCommon.cc34
-rw-r--r--src/compiler/codegen/MethodCodegenDriver.cc18
-rw-r--r--src/compiler/codegen/Ralloc.h6
-rw-r--r--src/compiler/codegen/RallocUtil.cc48
-rw-r--r--src/compiler/codegen/arm/ArchFactory.cc16
-rw-r--r--src/compiler/codegen/arm/ArmRallocUtil.cc8
-rw-r--r--src/compiler/codegen/arm/Thumb2/Ralloc.cc4
-rw-r--r--src/compiler/codegen/mips/MipsRallocUtil.cc8
-rw-r--r--src/compiler/codegen/x86/X86RallocUtil.cc8
-rw-r--r--src/stack.cc2
21 files changed, 437 insertions, 176 deletions
diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h
index 9516f25ba5..94d20e5caf 100644
--- a/src/compiler/Compiler.h
+++ b/src/compiler/Compiler.h
@@ -50,6 +50,7 @@ enum optControlVector {
kTrackLiveTemps,
kSkipLargeMethodOptimization,
kSafeOptimizations,
+ kBBOpt,
};
/* Type of allocation for memory tuning */
@@ -177,7 +178,8 @@ bool oatStartup(void);
void oatShutdown(void);
void oatScanAllClassPointers(void (*callback)(void* ptr));
void oatInitializeSSAConversion(struct CompilationUnit* cUnit);
-int oatConvertSSARegToDalvik(const struct CompilationUnit* cUnit, int ssaReg);
+int SRegToVReg(const struct CompilationUnit* cUnit, int ssaReg);
+int SRegToSubscript(const struct CompilationUnit* cUnit, int ssaReg);
bool oatFindLocalLiveIn(struct CompilationUnit* cUnit,
struct BasicBlock* bb);
bool oatDoSSAConversion(struct CompilationUnit* cUnit,
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 611d1dff3a..bd4c156e4d 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -44,7 +44,7 @@ enum RegisterClass {
enum RegLocationType {
kLocDalvikFrame = 0, // Normal Dalvik register
kLocPhysReg,
- kLocSpill,
+ kLocCompilerTemp,
};
struct PromotionMap {
@@ -65,7 +65,12 @@ struct RegLocation {
unsigned home:1; // Does this represent the home location?
u1 lowReg; // First physical register
u1 highReg; // 2nd physical register (if wide)
- s2 sRegLow; // SSA name for low Dalvik word
+ int32_t sRegLow; // SSA name for low Dalvik word
+};
+
+struct CompilerTemp {
+ int sReg;
+ ArenaBitVector* bv;
};
/*
@@ -101,6 +106,11 @@ struct RegisterPool {
#define INVALID_REG (0xFF)
#define INVALID_OFFSET (-1)
+/* SSA encodings for special registers */
+#define SSA_METHOD_BASEREG (-1)
+/* First compiler temp basereg, grows smaller */
+#define SSA_CTEMP_BASEREG (-2)
+
/*
* Some code patterns cause the generation of excessively large
* methods - in particular initialization sequences. There isn't much
@@ -153,8 +163,7 @@ enum ExtendedMIROpcode {
kMirOpNullNRangeUpCheck,
kMirOpNullNRangeDownCheck,
kMirOpLowerBound,
- kMirOpPunt,
- kMirOpCheckInlinePrediction, // Gen checks for predicted inlining
+ kMirOpCopy,
kMirOpLast,
};
@@ -169,6 +178,7 @@ enum MIROptimizationFlagPositons {
kMIRInlinedPred, // Invoke is inlined via prediction
kMIRCallee, // Instruction is inlined from callee
kMIRIgnoreSuspendCheck,
+ kMIRDup,
};
#define MIR_IGNORE_NULL_CHECK (1 << kMIRIgnoreNullCheck)
@@ -179,6 +189,7 @@ enum MIROptimizationFlagPositons {
#define MIR_INLINED_PRED (1 << kMIRInlinedPred)
#define MIR_CALLEE (1 << kMIRCallee)
#define MIR_IGNORE_SUSPEND_CHECK (1 << kMIRIgnoreSuspendCheck)
+#define MIR_DUP (1 << kMIRDup)
struct CallsiteInfo {
const char* classDescriptor;
@@ -222,6 +233,7 @@ struct BasicBlock {
bool visited;
bool hidden;
bool catchEntry;
+ bool fallThroughTarget; // Reached via fallthrough
unsigned int startOffset;
const Method* containingMethod; // For blocks from the callee
BBType blockType;
@@ -310,12 +322,13 @@ struct CompilationUnit {
InstructionSet instructionSet;
/* Number of total regs used in the whole cUnit after SSA transformation */
int numSSARegs;
- /* Map SSA reg i to the Dalvik[15..0]/Sub[31..16] pair. */
- GrowableList* ssaToDalvikMap;
+ /* Map SSA reg i to the base virtual register/subscript */
+ GrowableList* ssaBaseVRegs;
+ GrowableList* ssaSubscripts;
/* The following are new data structures to support SSA representations */
- /* Map original Dalvik reg i to the SSA[15..0]/Sub[31..16] pair */
- int* dalvikToSSAMap; // length == method->registersSize
+ /* Map original Dalvik virtual reg i to the current SSA name */
+ int* vRegToSSAMap; // length == method->registersSize
int* SSALastDefs; // length == method->registersSize
ArenaBitVector* isConstantV; // length == numSSAReg
int* constantValues; // length == numSSAReg
@@ -329,6 +342,9 @@ struct CompilationUnit {
/* Keep track of Dalvik vReg to physical register mappings */
PromotionMap* promotionMap;
+ /* SSA name for Method* */
+ int methodSReg;
+
/*
* Set to the Dalvik PC of the switch instruction if it has more than
* MAX_CHAINED_SWITCH_CASES cases.
@@ -336,7 +352,7 @@ struct CompilationUnit {
const u2* switchOverflowPad;
int numReachableBlocks;
- int numDalvikRegisters; // method->registersSize + inlined
+ int numDalvikRegisters; // method->registersSize
BasicBlock* entryBlock;
BasicBlock* exitBlock;
BasicBlock* curBlock;
@@ -346,6 +362,7 @@ struct CompilationUnit {
GrowableList domPostOrderTraversal;
GrowableList throwLaunchpads;
GrowableList suspendLaunchpads;
+ GrowableList compilerTemps;
int* iDomList;
ArenaBitVector* tryBlockAddr;
ArenaBitVector** defBlockMatrix; // numDalvikRegister x numBlocks
diff --git a/src/compiler/CompilerUtility.h b/src/compiler/CompilerUtility.h
index 357fe5114f..41f6cf16bd 100644
--- a/src/compiler/CompilerUtility.h
+++ b/src/compiler/CompilerUtility.h
@@ -118,6 +118,7 @@ bool oatUnifyBitVectors(ArenaBitVector* dest, const ArenaBitVector* src1,
const ArenaBitVector* src2);
bool oatCompareBitVectors(const ArenaBitVector* src1,
const ArenaBitVector* src2);
+bool oatTestBitVectors(const ArenaBitVector* src1, const ArenaBitVector* src2);
int oatCountSetBits(const ArenaBitVector* pBits);
void oatDumpLIRInsn(CompilationUnit* cUnit, struct LIR* lir,
diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc
index ad522bf482..581c463597 100644
--- a/src/compiler/Dataflow.cc
+++ b/src/compiler/Dataflow.cc
@@ -807,17 +807,19 @@ const int oatDataFlowAttributes[kMirOpLast] = {
*/
};
-/* Return the Dalvik register/subscript pair of a given SSA register */
-int oatConvertSSARegToDalvik(const CompilationUnit* cUnit, int ssaReg)
+/* Return the base virtual register for a SSA name */
+int SRegToVReg(const CompilationUnit* cUnit, int ssaReg)
{
- return GET_ELEM_N(cUnit->ssaToDalvikMap, int, ssaReg);
+ DCHECK_LT(ssaReg, (int)cUnit->ssaBaseVRegs->numUsed);
+ return GET_ELEM_N(cUnit->ssaBaseVRegs, int, ssaReg);
+}
+
+int SRegToSubscript(const CompilationUnit* cUnit, int ssaReg)
+{
+ DCHECK(ssaReg < (int)cUnit->ssaSubscripts->numUsed);
+ return GET_ELEM_N(cUnit->ssaSubscripts, int, ssaReg);
}
-/*
- * Utility function to convert encoded SSA register value into Dalvik register
- * and subscript pair. Each SSA register can be used to index the
- * ssaToDalvikMap list to get the subscript[31..16]/dalvik_reg[15..0] mapping.
- */
char* oatGetDalvikDisassembly(CompilationUnit* cUnit,
const DecodedInstruction& insn, const char* note)
{
@@ -904,10 +906,8 @@ char* oatGetDalvikDisassembly(CompilationUnit* cUnit,
char* getSSAName(const CompilationUnit* cUnit, int ssaReg, char* name)
{
- int ssa2DalvikValue = oatConvertSSARegToDalvik(cUnit, ssaReg);
-
- sprintf(name, "v%d_%d",
- DECODE_REG(ssa2DalvikValue), DECODE_SUB(ssa2DalvikValue));
+ sprintf(name, "v%d_%d", SRegToVReg(cUnit, ssaReg),
+ SRegToSubscript(cUnit, ssaReg));
return name;
}
@@ -1033,11 +1033,6 @@ done:
return ret;
}
-/*
- * Utility function to convert encoded SSA register value into Dalvik register
- * and subscript pair. Each SSA register can be used to index the
- * ssaToDalvikMap list to get the subscript[31..16]/dalvik_reg[15..0] mapping.
- */
char* oatGetSSAString(CompilationUnit* cUnit, SSARepresentation* ssaRep)
{
char buffer[256];
@@ -1046,11 +1041,9 @@ char* oatGetSSAString(CompilationUnit* cUnit, SSARepresentation* ssaRep)
buffer[0] = 0;
for (i = 0; i < ssaRep->numDefs; i++) {
- int ssa2DalvikValue = oatConvertSSARegToDalvik(cUnit, ssaRep->defs[i]);
-
- sprintf(buffer + strlen(buffer), "s%d(v%d_%d) ",
- ssaRep->defs[i], DECODE_REG(ssa2DalvikValue),
- DECODE_SUB(ssa2DalvikValue));
+ int ssaReg = ssaRep->defs[i];
+ sprintf(buffer + strlen(buffer), "s%d(v%d_%d) ", ssaReg,
+ SRegToVReg(cUnit, ssaReg), SRegToSubscript(cUnit, ssaReg));
}
if (ssaRep->numDefs) {
@@ -1058,12 +1051,12 @@ char* oatGetSSAString(CompilationUnit* cUnit, SSARepresentation* ssaRep)
}
for (i = 0; i < ssaRep->numUses; i++) {
- int ssa2DalvikValue = oatConvertSSARegToDalvik(cUnit, ssaRep->uses[i]);
int len = strlen(buffer);
+ int ssaReg = ssaRep->uses[i];
- if (snprintf(buffer + len, 250 - len, "s%d(v%d_%d) ",
- ssaRep->uses[i], DECODE_REG(ssa2DalvikValue),
- DECODE_SUB(ssa2DalvikValue)) >= (250 - len)) {
+ if (snprintf(buffer + len, 250 - len, "s%d(v%d_%d) ", ssaReg,
+ SRegToVReg(cUnit, ssaReg),
+ SRegToSubscript(cUnit, ssaReg))) {
strcat(buffer, "...");
break;
}
@@ -1157,29 +1150,32 @@ bool oatFindLocalLiveIn(CompilationUnit* cUnit, BasicBlock* bb)
return true;
}
+int addNewSReg(CompilationUnit* cUnit, int vReg)
+{
+ // Compiler temps always have a subscript of 0
+ int subscript = (vReg < 0) ? 0 : ++cUnit->SSALastDefs[vReg];
+ int ssaReg = cUnit->numSSARegs++;
+ oatInsertGrowableList(cUnit, cUnit->ssaBaseVRegs, vReg);
+ oatInsertGrowableList(cUnit, cUnit->ssaSubscripts, subscript);
+ DCHECK_EQ(cUnit->ssaBaseVRegs->numUsed, cUnit->ssaSubscripts->numUsed);
+ return ssaReg;
+}
+
/* Find out the latest SSA register for a given Dalvik register */
void handleSSAUse(CompilationUnit* cUnit, int* uses, int dalvikReg,
int regIndex)
{
- int encodedValue = cUnit->dalvikToSSAMap[dalvikReg];
- int ssaReg = DECODE_REG(encodedValue);
- uses[regIndex] = ssaReg;
+ DCHECK((dalvikReg >= 0) && (dalvikReg < cUnit->numDalvikRegisters));
+ uses[regIndex] = cUnit->vRegToSSAMap[dalvikReg];
}
/* Setup a new SSA register for a given Dalvik register */
void handleSSADef(CompilationUnit* cUnit, int* defs, int dalvikReg,
int regIndex)
{
- int ssaReg = cUnit->numSSARegs++;
- /* Bump up the subscript */
- int dalvikSub = ++cUnit->SSALastDefs[dalvikReg];
- int newD2SMapping = ENCODE_REG_SUB(ssaReg, dalvikSub);
-
- cUnit->dalvikToSSAMap[dalvikReg] = newD2SMapping;
-
- int newS2DMapping = ENCODE_REG_SUB(dalvikReg, dalvikSub);
- oatInsertGrowableList(cUnit, cUnit->ssaToDalvikMap, newS2DMapping);
-
+ DCHECK((dalvikReg >= 0) && (dalvikReg < cUnit->numDalvikRegisters));
+ int ssaReg = addNewSReg(cUnit, dalvikReg);
+ cUnit->vRegToSSAMap[dalvikReg] = ssaReg;
defs[regIndex] = ssaReg;
}
@@ -1351,11 +1347,11 @@ bool oatDoSSAConversion(CompilationUnit* cUnit, BasicBlock* bb)
* input to PHI nodes can be derived from the snapshot of all
* predecessor blocks.
*/
- bb->dataFlowInfo->dalvikToSSAMap =
+ bb->dataFlowInfo->vRegToSSAMap =
(int *)oatNew(cUnit, sizeof(int) * cUnit->numDalvikRegisters, false,
kAllocDFInfo);
- memcpy(bb->dataFlowInfo->dalvikToSSAMap, cUnit->dalvikToSSAMap,
+ memcpy(bb->dataFlowInfo->vRegToSSAMap, cUnit->vRegToSSAMap,
sizeof(int) * cUnit->numDalvikRegisters);
}
return true;
@@ -1447,10 +1443,15 @@ void oatInitializeSSAConversion(CompilationUnit* cUnit)
int i;
int numDalvikReg = cUnit->numDalvikRegisters;
- cUnit->ssaToDalvikMap = (GrowableList *)oatNew(cUnit, sizeof(GrowableList),
- false, kAllocDFInfo);
- // Create the SSAtoDalvikMap, estimating the max size
- oatInitGrowableList(cUnit, cUnit->ssaToDalvikMap,
+ cUnit->ssaBaseVRegs = (GrowableList *)oatNew(cUnit, sizeof(GrowableList),
+ false, kAllocDFInfo);
+ cUnit->ssaSubscripts = (GrowableList *)oatNew(cUnit, sizeof(GrowableList),
+ false, kAllocDFInfo);
+ // Create the ssa mappings, estimating the max size
+ oatInitGrowableList(cUnit, cUnit->ssaBaseVRegs,
+ numDalvikReg + cUnit->defCount + 128,
+ kListSSAtoDalvikMap);
+ oatInitGrowableList(cUnit, cUnit->ssaSubscripts,
numDalvikReg + cUnit->defCount + 128,
kListSSAtoDalvikMap);
/*
@@ -1465,26 +1466,28 @@ void oatInitializeSSAConversion(CompilationUnit* cUnit)
* into "(0 << 16) | i"
*/
for (i = 0; i < numDalvikReg; i++) {
- oatInsertGrowableList(cUnit, cUnit->ssaToDalvikMap,
- ENCODE_REG_SUB(i, 0));
+ oatInsertGrowableList(cUnit, cUnit->ssaBaseVRegs, i);
+ oatInsertGrowableList(cUnit, cUnit->ssaSubscripts, 0);
}
/*
- * Initialize the DalvikToSSAMap map. The low 16 bit is the SSA register id,
- * while the high 16 bit is the current subscript. The original Dalvik
- * register N is mapped to SSA register N with subscript 0.
+ * Initialize the DalvikToSSAMap map. There is one entry for each
+ * Dalvik register, and the SSA names for those are the same.
*/
- cUnit->dalvikToSSAMap = (int *)oatNew(cUnit, sizeof(int) * numDalvikReg,
+ cUnit->vRegToSSAMap = (int *)oatNew(cUnit, sizeof(int) * numDalvikReg,
false, kAllocDFInfo);
/* Keep track of the higest def for each dalvik reg */
cUnit->SSALastDefs = (int *)oatNew(cUnit, sizeof(int) * numDalvikReg,
false, kAllocDFInfo);
for (i = 0; i < numDalvikReg; i++) {
- cUnit->dalvikToSSAMap[i] = i;
+ cUnit->vRegToSSAMap[i] = i;
cUnit->SSALastDefs[i] = 0;
}
+ /* Add ssa reg for Method* */
+ cUnit->methodSReg = addNewSReg(cUnit, SSA_METHOD_BASEREG);
+
/*
* Allocate the BasicBlockDataFlow structure for the entry and code blocks
*/
@@ -1627,6 +1630,160 @@ void oatDataFlowAnalysisDispatcher(CompilationUnit* cUnit,
}
}
+/* Advance to next strictly dominated MIR node in an extended basic block */
+MIR* advanceMIR(CompilationUnit* cUnit, BasicBlock** pBb, MIR* mir, ArenaBitVector* bv) {
+ BasicBlock* bb = *pBb;
+ if (mir != NULL) {
+ mir = mir->next;
+ if (mir == NULL) {
+ bb = bb->fallThrough;
+ if ((bb == NULL) || bb->predecessors->numUsed != 1) {
+ mir = NULL;
+ } else {
+ if (bv) {
+ oatSetBit(cUnit, bv, bb->id);
+ }
+ *pBb = bb;
+ mir = bb->firstMIRInsn;
+ }
+ }
+ }
+ return mir;
+}
+
+/* Allocate a compiler temp, return Sreg. Reuse existing if no conflict */
+int allocCompilerTempSreg(CompilationUnit* cUnit, ArenaBitVector* bv)
+{
+ for (int i = 0; i < cUnit->numCompilerTemps; i++) {
+ CompilerTemp* ct = (CompilerTemp*)cUnit->compilerTemps.elemList[i];
+ ArenaBitVector* tBv = ct->bv;
+ if (!oatTestBitVectors(bv, tBv)) {
+ // Combine live maps and reuse existing temp
+ oatUnifyBitVectors(tBv, tBv, bv);
+ return ct->sReg;
+ }
+ }
+
+ // Create a new compiler temp & associated live bitmap
+ CompilerTemp* ct = (CompilerTemp*)oatNew(cUnit, sizeof(CompilerTemp),
+ true, kAllocMisc);
+ ArenaBitVector *nBv = oatAllocBitVector(cUnit, cUnit->numBlocks, true,
+ kBitMapMisc);
+ oatCopyBitVector(nBv, bv);
+ ct->bv = nBv;
+ ct->sReg = addNewSReg(cUnit, SSA_CTEMP_BASEREG - cUnit->numCompilerTemps);
+ cUnit->numCompilerTemps++;
+ oatInsertGrowableList(cUnit, &cUnit->compilerTemps, (intptr_t)ct);
+ DCHECK_EQ(cUnit->numCompilerTemps, (int)cUnit->compilerTemps.numUsed);
+ return ct->sReg;
+}
+
+/* Creata a new MIR node for a new pseudo op. */
+MIR* rawMIR(CompilationUnit* cUnit, Instruction::Code opcode, int defs, int uses)
+{
+ MIR* res = (MIR*)oatNew( cUnit, sizeof(MIR), true, kAllocMIR);
+ res->ssaRep =(struct SSARepresentation *)
+ oatNew(cUnit, sizeof(SSARepresentation), true, kAllocDFInfo);
+ if (uses) {
+ res->ssaRep->numUses = uses;
+ res->ssaRep->uses = (int*)oatNew(cUnit, sizeof(int) * uses, false, kAllocDFInfo);
+ }
+ if (defs) {
+ res->ssaRep->numDefs = defs;
+ res->ssaRep->defs = (int*)oatNew(cUnit, sizeof(int) * defs, false, kAllocDFInfo);
+ res->ssaRep->fpDef = (bool*)oatNew(cUnit, sizeof(bool) * defs, true, kAllocDFInfo);
+ }
+ res->dalvikInsn.opcode = opcode;
+ return res;
+}
+
+/* Do some MIR-level basic block optimizations */
+bool basicBlockOpt(CompilationUnit* cUnit, BasicBlock* bb)
+{
+ int numTemps = 0;
+
+ for (MIR* mir = bb->firstMIRInsn; mir; mir = mir->next) {
+ // Look for interesting opcodes, skip otherwise
+ switch(mir->dalvikInsn.opcode) {
+ case Instruction::IGET_OBJECT: {
+ // TODO: look for CSE
+ if (mir->optimizationFlags & MIR_DUP) {
+ break;
+ }
+ ArenaBitVector* tempBlockV = cUnit->tempBlockV;
+ oatClearAllBits(tempBlockV);
+ oatSetBit(cUnit, tempBlockV, bb->id);
+ int objSreg = mir->ssaRep->uses[0];
+ int dstSreg = mir->ssaRep->defs[0];
+ uint32_t fieldIdx = mir->dalvikInsn.vC;
+ int matches = 0;
+ BasicBlock* tbb = bb;
+ MIR* tm = mir;
+ while (true) {
+ tm = advanceMIR(cUnit, &tbb, tm, tempBlockV);
+ if ((tm == NULL) || (tm == mir)) {
+ break;
+ }
+ Instruction::Code opcode = tm->dalvikInsn.opcode;
+ if ((opcode == Instruction::IGET_OBJECT)
+ && (tm->ssaRep->uses[0] == objSreg)
+ && (tm->dalvikInsn.vC == fieldIdx)) {
+ if (cUnit->printMe) {
+ LOG(INFO) << "Got DUP IGET_OBJECT @ 0x"
+ << std::hex << tm->offset << ", from 0x"
+ << std::hex <<mir->offset;
+ }
+ matches++;
+ } else if ((opcode == Instruction::IPUT_OBJECT)
+ && (tm->ssaRep->uses[0] == objSreg)
+ && (tm->dalvikInsn.vC == fieldIdx)) {
+ if (cUnit->printMe) {
+ LOG(INFO) << "Clobbered IGET_OBJECT @ 0x"
+ << std::hex << tm->offset;
+ }
+ break;
+ }
+ }
+ if (matches >= 2) {
+ // Allocate compiler temp, redirect 1st load to temp,
+ // insert copy to real target. Convert all dups to
+ // copies and rename all uses.
+ int tmpSreg = allocCompilerTempSreg(cUnit, tempBlockV);
+ MIR* newMir = rawMIR(cUnit, (Instruction::Code)kMirOpCopy, 1, 1);
+ newMir->ssaRep->defs[0] = dstSreg;
+ newMir->ssaRep->uses[0] = tmpSreg;
+ mir->ssaRep->defs[0] = tmpSreg;
+ oatInsertMIRAfter(bb, mir, newMir);
+ }
+ }
+ break;
+ case Instruction::IF_EQ:
+ case Instruction::IF_NE:
+ case Instruction::IF_LT:
+ case Instruction::IF_GE:
+ case Instruction::IF_GT:
+ case Instruction::IF_LE:
+ // TODO: Check for and fuse preceeding comparison
+ break;
+ case Instruction::IF_EQZ:
+ case Instruction::IF_NEZ:
+ case Instruction::IF_LTZ:
+ case Instruction::IF_GEZ:
+ case Instruction::IF_GTZ:
+ case Instruction::IF_LEZ:
+ // TODO: Check for and fuse preceeding comparison
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (numTemps > cUnit->numCompilerTemps) {
+ cUnit->numCompilerTemps = numTemps;
+ }
+ return true;
+}
+
bool nullCheckEliminationInit(struct CompilationUnit* cUnit,
struct BasicBlock* bb)
{
@@ -1779,4 +1936,15 @@ void oatMethodNullCheckElimination(CompilationUnit *cUnit)
}
}
+void oatMethodBasicBlockOptimization(CompilationUnit *cUnit)
+{
+ oatInitGrowableList(cUnit, &cUnit->compilerTemps, 6, kListMisc);
+ DCHECK_EQ(cUnit->numCompilerTemps, 0);
+ if (!(cUnit->disableOpt & (1 << kBBOpt))) {
+ oatDataFlowAnalysisDispatcher(cUnit, basicBlockOpt,
+ kAllNodes,
+ false /* isIterative */);
+ }
+}
+
} // namespace art
diff --git a/src/compiler/Dataflow.h b/src/compiler/Dataflow.h
index a9917a3396..2df9373b85 100644
--- a/src/compiler/Dataflow.h
+++ b/src/compiler/Dataflow.h
@@ -115,7 +115,7 @@ struct BasicBlockDataFlow {
ArenaBitVector* defV;
ArenaBitVector* liveInV;
ArenaBitVector* phiV;
- int* dalvikToSSAMap;
+ int* vRegToSSAMap;
ArenaBitVector* endingNullCheckV;
};
@@ -147,13 +147,10 @@ struct ArrayAccessInfo {
int minC; // For DIV - will affect lower bound checking
};
-#define ENCODE_REG_SUB(r,s) ((s<<16) | r)
-#define DECODE_REG(v) (v & 0xffff)
-#define DECODE_SUB(v) (((unsigned int) v) >> 16)
-
-
void oatMethodNullCheckElimination(CompilationUnit*);
+void oatMethodBasicBlockOptimization(CompilationUnit*);
+
} // namespace art
#endif // ART_SRC_COMPILER_DATAFLOW_H_
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index e1b2c608bb..8ffcc72590 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -34,6 +34,7 @@ uint32_t compilerOptimizerDisableFlags = 0 | // Disable specific optimizations
//(1 << kTrackLiveTemps) |
//(1 << kSkipLargeMethodOptimization) |
//(1 << kSafeOptimizations) |
+ (1 << kBBOpt) |
0;
uint32_t compilerDebugFlags = 0 | // Enable debug/testing modes
@@ -970,6 +971,11 @@ CompiledMethod* oatCompileMethod(Compiler& compiler,
/* Perform null check elimination */
oatMethodNullCheckElimination(cUnit.get());
+#if 0
+ /* Do some basic block optimizations */
+ oatMethodBasicBlockOptimization(cUnit.get());
+#endif
+
oatInitializeRegAlloc(cUnit.get()); // Needs to happen after SSA naming
/* Allocate Registers using simple local allocation scheme */
diff --git a/src/compiler/IntermediateRep.cc b/src/compiler/IntermediateRep.cc
index 662687777d..d1ba45c18c 100644
--- a/src/compiler/IntermediateRep.cc
+++ b/src/compiler/IntermediateRep.cc
@@ -110,7 +110,7 @@ void oatPrependMIR(BasicBlock* bb, MIR* mir)
}
}
-/* Insert an MIR instruction after the specified MIR */
+/* Insert a MIR instruction after the specified MIR */
void oatInsertMIRAfter(BasicBlock* bb, MIR* currentMIR, MIR* newMIR)
{
newMIR->prev = currentMIR;
diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc
index 2d85812d97..dfb25abc8f 100644
--- a/src/compiler/Ralloc.cc
+++ b/src/compiler/Ralloc.cc
@@ -108,8 +108,8 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)
if (attrs & DF_DA_WIDE) {
cUnit->regLocation[ssaRep->defs[0]].wide = true;
cUnit->regLocation[ssaRep->defs[1]].highWord = true;
- DCHECK_EQ(oatS2VReg(cUnit, ssaRep->defs[0])+1,
- oatS2VReg(cUnit, ssaRep->defs[1]));
+ DCHECK_EQ(SRegToVReg(cUnit, ssaRep->defs[0])+1,
+ SRegToVReg(cUnit, ssaRep->defs[1]));
}
}
@@ -122,8 +122,8 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)
if (attrs & DF_UA_WIDE) {
cUnit->regLocation[ssaRep->uses[next]].wide = true;
cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true;
- DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1,
- oatS2VReg(cUnit, ssaRep->uses[next + 1]));
+ DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1,
+ SRegToVReg(cUnit, ssaRep->uses[next + 1]));
next += 2;
} else {
next++;
@@ -136,8 +136,8 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)
if (attrs & DF_UB_WIDE) {
cUnit->regLocation[ssaRep->uses[next]].wide = true;
cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true;
- DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1,
- oatS2VReg(cUnit, ssaRep->uses[next + 1]));
+ DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1,
+ SRegToVReg(cUnit, ssaRep->uses[next + 1]));
next += 2;
} else {
next++;
@@ -150,8 +150,8 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)
if (attrs & DF_UC_WIDE) {
cUnit->regLocation[ssaRep->uses[next]].wide = true;
cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true;
- DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1,
- oatS2VReg(cUnit, ssaRep->uses[next + 1]));
+ DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[next])+1,
+ SRegToVReg(cUnit, ssaRep->uses[next + 1]));
}
}
@@ -200,16 +200,16 @@ bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)
cUnit->regLocation[ssaRep->uses[i]].wide = true;
cUnit->regLocation[ssaRep->uses[i+1]].highWord
= true;
- DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1,
- oatS2VReg(cUnit, ssaRep->uses[i+1]));
+ DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[i])+1,
+ SRegToVReg(cUnit, ssaRep->uses[i+1]));
i++;
break;
case 'J':
cUnit->regLocation[ssaRep->uses[i]].wide = true;
cUnit->regLocation[ssaRep->uses[i+1]].highWord
= true;
- DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1,
- oatS2VReg(cUnit, ssaRep->uses[i+1]));
+ DCHECK_EQ(SRegToVReg(cUnit, ssaRep->uses[i])+1,
+ SRegToVReg(cUnit, ssaRep->uses[i+1]));
changed |= setCore(cUnit, ssaRep->uses[i],true);
i++;
break;
@@ -320,13 +320,24 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit)
loc[i] = freshLoc;
loc[i].sRegLow = i;
}
+
+ /* Patch up the locations for Method* and the compiler temps */
+ loc[cUnit->methodSReg].location = kLocCompilerTemp;
+ for (i = 0; i < cUnit->numCompilerTemps; i++) {
+ CompilerTemp* ct = (CompilerTemp*)cUnit->compilerTemps.elemList[i];
+ loc[ct->sReg].location = kLocCompilerTemp;
+ }
+
cUnit->regLocation = loc;
/* Allocation the promotion map */
int numRegs = cUnit->numDalvikRegisters;
- cUnit->promotionMap =
- (PromotionMap*)oatNew(cUnit, numRegs * sizeof(cUnit->promotionMap[0]),
- true, kAllocRegAlloc);
+ PromotionMap* tMap =
+ (PromotionMap*)oatNew(cUnit, (numRegs + cUnit->numCompilerTemps + 1) *
+ sizeof(cUnit->promotionMap[0]), true,
+ kAllocRegAlloc);
+ // Bias the promotion map
+ cUnit->promotionMap = &tMap[cUnit->numCompilerTemps + 1];
/* Add types of incoming arguments based on signature */
int numIns = cUnit->numIns;
@@ -346,8 +357,8 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit)
cUnit->regLocation[sReg].wide = true;
cUnit->regLocation[sReg+1].highWord = true;
cUnit->regLocation[sReg+1].fp = true;
- DCHECK_EQ(oatS2VReg(cUnit, sReg)+1,
- oatS2VReg(cUnit, sReg+1));
+ DCHECK_EQ(SRegToVReg(cUnit, sReg)+1,
+ SRegToVReg(cUnit, sReg+1));
cUnit->regLocation[sReg].fp = true;
cUnit->regLocation[sReg].defined = true;
sReg++;
@@ -355,8 +366,8 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit)
case 'J':
cUnit->regLocation[sReg].wide = true;
cUnit->regLocation[sReg+1].highWord = true;
- DCHECK_EQ(oatS2VReg(cUnit, sReg)+1,
- oatS2VReg(cUnit, sReg+1));
+ DCHECK_EQ(SRegToVReg(cUnit, sReg)+1,
+ SRegToVReg(cUnit, sReg+1));
cUnit->regLocation[sReg].core = true;
cUnit->regLocation[sReg].defined = true;
sReg++;
@@ -390,8 +401,9 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit)
* allocator, remove this remapping.
*/
for (i=0; i < cUnit->numSSARegs; i++) {
- cUnit->regLocation[i].sRegLow =
- DECODE_REG(oatConvertSSARegToDalvik(cUnit, loc[i].sRegLow));
+ if (cUnit->regLocation[i].location != kLocCompilerTemp) {
+ cUnit->regLocation[i].sRegLow = SRegToVReg(cUnit, loc[i].sRegLow);
+ }
}
cUnit->coreSpillMask = 0;
diff --git a/src/compiler/SSATransformation.cc b/src/compiler/SSATransformation.cc
index cdb711e44b..2daa533a7b 100644
--- a/src/compiler/SSATransformation.cc
+++ b/src/compiler/SSATransformation.cc
@@ -29,7 +29,10 @@ void recordDFSOrders(CompilationUnit* cUnit, BasicBlock* block)
/* Enqueue the preOrder block id */
oatInsertGrowableList(cUnit, &cUnit->dfsOrder, block->id);
- if (block->fallThrough) recordDFSOrders(cUnit, block->fallThrough);
+ if (block->fallThrough) {
+ block->fallThrough->fallThroughTarget = true;
+ recordDFSOrders(cUnit, block->fallThrough);
+ }
if (block->taken) recordDFSOrders(cUnit, block->taken);
if (block->successorBlockList.blockListType != kNotUsed) {
GrowableListIterator iterator;
@@ -669,9 +672,8 @@ bool insertPhiNodeOperands(CompilationUnit* cUnit, BasicBlock* bb)
if (mir->dalvikInsn.opcode != (Instruction::Code)kMirOpPhi)
return true;
int ssaReg = mir->ssaRep->defs[0];
- int encodedDalvikValue =
- (int) oatGrowableListGetElement(cUnit->ssaToDalvikMap, ssaReg);
- int dalvikReg = DECODE_REG(encodedDalvikValue);
+ DCHECK_GE(ssaReg, 0); // Shouldn't see compiler temps here
+ int vReg = SRegToVReg(cUnit, ssaReg);
oatClearAllBits(ssaRegV);
@@ -681,9 +683,8 @@ bool insertPhiNodeOperands(CompilationUnit* cUnit, BasicBlock* bb)
BasicBlock* predBB =
(BasicBlock*)oatGrowableListIteratorNext(&iter);
if (!predBB) break;
- int encodedSSAValue =
- predBB->dataFlowInfo->dalvikToSSAMap[dalvikReg];
- int ssaReg = DECODE_REG(encodedSSAValue);
+ int ssaReg =
+ predBB->dataFlowInfo->vRegToSSAMap[vReg];
oatSetBit(cUnit, ssaRegV, ssaReg);
}
@@ -724,17 +725,17 @@ void doDFSPreOrderSSARename(CompilationUnit* cUnit, BasicBlock* block)
/* Save SSA map snapshot */
int* savedSSAMap = (int*)oatNew(cUnit, mapSize, false,
kAllocDalvikToSSAMap);
- memcpy(savedSSAMap, cUnit->dalvikToSSAMap, mapSize);
+ memcpy(savedSSAMap, cUnit->vRegToSSAMap, mapSize);
if (block->fallThrough) {
doDFSPreOrderSSARename(cUnit, block->fallThrough);
/* Restore SSA map snapshot */
- memcpy(cUnit->dalvikToSSAMap, savedSSAMap, mapSize);
+ memcpy(cUnit->vRegToSSAMap, savedSSAMap, mapSize);
}
if (block->taken) {
doDFSPreOrderSSARename(cUnit, block->taken);
/* Restore SSA map snapshot */
- memcpy(cUnit->dalvikToSSAMap, savedSSAMap, mapSize);
+ memcpy(cUnit->vRegToSSAMap, savedSSAMap, mapSize);
}
if (block->successorBlockList.blockListType != kNotUsed) {
GrowableListIterator iterator;
@@ -747,10 +748,10 @@ void doDFSPreOrderSSARename(CompilationUnit* cUnit, BasicBlock* block)
BasicBlock* succBB = successorBlockInfo->block;
doDFSPreOrderSSARename(cUnit, succBB);
/* Restore SSA map snapshot */
- memcpy(cUnit->dalvikToSSAMap, savedSSAMap, mapSize);
+ memcpy(cUnit->vRegToSSAMap, savedSSAMap, mapSize);
}
}
- cUnit->dalvikToSSAMap = savedSSAMap;
+ cUnit->vRegToSSAMap = savedSSAMap;
return;
}
diff --git a/src/compiler/Utility.cc b/src/compiler/Utility.cc
index 082f7a4c6f..3674aa9299 100644
--- a/src/compiler/Utility.cc
+++ b/src/compiler/Utility.cc
@@ -574,6 +574,19 @@ bool oatUnifyBitVectors(ArenaBitVector* dest, const ArenaBitVector* src1,
}
/*
+ * Return true if any bits collide. Vectors must be same size.
+ */
+bool oatTestBitVectors(const ArenaBitVector* src1,
+ const ArenaBitVector* src2)
+{
+ DCHECK_EQ(src1->storageSize, src2->storageSize);
+ for (uint32_t idx = 0; idx < src1->storageSize; idx++) {
+ if (src1->storage[idx] & src2->storage[idx]) return true;
+ }
+ return false;
+}
+
+/*
* Compare two bit vectors and return true if difference is seen.
*/
bool oatCompareBitVectors(const ArenaBitVector* src1,
diff --git a/src/compiler/codegen/CodegenFactory.cc b/src/compiler/codegen/CodegenFactory.cc
index 8a6e1bc967..5444816ddf 100644
--- a/src/compiler/codegen/CodegenFactory.cc
+++ b/src/compiler/codegen/CodegenFactory.cc
@@ -65,7 +65,8 @@ void loadValueDirect(CompilationUnit* cUnit, RegLocation rlSrc, int reg1)
if (rlSrc.location == kLocPhysReg) {
opRegCopy(cUnit, reg1, rlSrc.lowReg);
} else {
- DCHECK(rlSrc.location == kLocDalvikFrame);
+ DCHECK((rlSrc.location == kLocDalvikFrame) ||
+ (rlSrc.location == kLocCompilerTemp));
loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, rlSrc.sRegLow), reg1);
}
}
@@ -94,7 +95,8 @@ void loadValueDirectWide(CompilationUnit* cUnit, RegLocation rlSrc, int regLo,
if (rlSrc.location == kLocPhysReg) {
opRegCopyWide(cUnit, regLo, regHi, rlSrc.lowReg, rlSrc.highReg);
} else {
- DCHECK(rlSrc.location == kLocDalvikFrame);
+ DCHECK((rlSrc.location == kLocDalvikFrame) ||
+ (rlSrc.location == kLocCompilerTemp));
loadBaseDispWide(cUnit, NULL, rSP,
oatSRegOffset(cUnit, rlSrc.sRegLow),
regLo, regHi, INVALID_SREG);
@@ -120,7 +122,9 @@ RegLocation loadValue(CompilationUnit* cUnit, RegLocation rlSrc,
RegisterClass opKind)
{
rlSrc = oatEvalLoc(cUnit, rlSrc, opKind, false);
- if (rlSrc.location == kLocDalvikFrame) {
+ if (rlSrc.location != kLocPhysReg) {
+ DCHECK((rlSrc.location == kLocDalvikFrame) ||
+ (rlSrc.location == kLocCompilerTemp));
loadValueDirect(cUnit, rlSrc, rlSrc.lowReg);
rlSrc.location = kLocPhysReg;
oatMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow);
@@ -176,7 +180,9 @@ RegLocation loadValueWide(CompilationUnit* cUnit, RegLocation rlSrc,
{
DCHECK(rlSrc.wide);
rlSrc = oatEvalLoc(cUnit, rlSrc, opKind, false);
- if (rlSrc.location == kLocDalvikFrame) {
+ if (rlSrc.location != kLocPhysReg) {
+ DCHECK((rlSrc.location == kLocDalvikFrame) ||
+ (rlSrc.location == kLocCompilerTemp));
loadValueDirectWide(cUnit, rlSrc, rlSrc.lowReg, rlSrc.highReg);
rlSrc.location = kLocPhysReg;
oatMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow);
@@ -232,8 +238,8 @@ void storeValueWide(CompilationUnit* cUnit, RegLocation rlDest,
(oatLiveOut(cUnit, rlDest.sRegLow) ||
oatLiveOut(cUnit, oatSRegHi(rlDest.sRegLow)))) {
defStart = (LIR*)cUnit->lastLIRInsn;
- DCHECK_EQ((oatS2VReg(cUnit, rlDest.sRegLow)+1),
- oatS2VReg(cUnit, oatSRegHi(rlDest.sRegLow)));
+ DCHECK_EQ((SRegToVReg(cUnit, rlDest.sRegLow)+1),
+ SRegToVReg(cUnit, oatSRegHi(rlDest.sRegLow)));
storeBaseDispWide(cUnit, rSP, oatSRegOffset(cUnit, rlDest.sRegLow),
rlDest.lowReg, rlDest.highReg);
oatMarkClean(cUnit, rlDest);
@@ -265,29 +271,15 @@ void markGCCard(CompilationUnit* cUnit, int valReg, int tgtAddrReg)
#endif
}
-/*
- * Utility to load the current Method*. Broken out
- * to allow easy change between placing the current Method* in a
- * dedicated register or its home location in the frame.
- */
+/* Utilities to load the current Method* */
void loadCurrMethodDirect(CompilationUnit *cUnit, int rTgt)
{
-#if defined(METHOD_IN_REG)
- opRegCopy(cUnit, rTgt, rMETHOD);
-#else
- loadWordDisp(cUnit, rSP, 0, rTgt);
-#endif
+ loadValueDirectFixed(cUnit, cUnit->regLocation[cUnit->methodSReg], rTgt);
}
-int loadCurrMethod(CompilationUnit *cUnit)
+RegLocation loadCurrMethod(CompilationUnit *cUnit)
{
-#if defined(METHOD_IN_REG)
- return rMETHOD;
-#else
- int mReg = oatAllocTemp(cUnit);
- loadCurrMethodDirect(cUnit, mReg);
- return mReg;
-#endif
+ return loadValue(cUnit, cUnit->regLocation[cUnit->methodSReg], kCoreReg);
}
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index e2c306de37..9b1654f764 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -275,10 +275,12 @@ void genFilledNewArray(CompilationUnit* cUnit, MIR* mir, bool isRange)
rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread,
pCheckAndAllocArrayFromCodeWithAccessCheck));
}
- loadCurrMethodDirect(cUnit, rARG1); // arg1 <- Method*
loadConstant(cUnit, rARG0, typeId); // arg0 <- type_id
loadConstant(cUnit, rARG2, elems); // arg2 <- count
+ loadCurrMethodDirect(cUnit, rARG1); // arg1 <- Method*
callRuntimeHelper(cUnit, rTgt);
+ oatFreeTemp(cUnit, rARG2);
+ oatFreeTemp(cUnit, rARG1);
/*
* NOTE: the implicit target for Instruction::FILLED_NEW_ARRAY is the
* return region. Because AllocFromCode placed the new array
@@ -387,12 +389,11 @@ void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
if (fastPath && !SLOW_FIELD_PATH) {
DCHECK_GE(fieldOffset, 0);
int rBase;
- int rMethod;
if (isReferrersClass) {
// Fast path, static storage base is this method's class
- rMethod = loadCurrMethod(cUnit);
+ RegLocation rlMethod = loadCurrMethod(cUnit);
rBase = oatAllocTemp(cUnit);
- loadWordDisp(cUnit, rMethod,
+ loadWordDisp(cUnit, rlMethod.lowReg,
Method::DeclaringClassOffset().Int32Value(), rBase);
} else {
// Medium path, static storage base in a different class which
@@ -402,7 +403,7 @@ void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
oatFlushAllRegs(cUnit);
// Using fixed register to sync with possible call to runtime
// support.
- rMethod = rARG1;
+ int rMethod = rARG1;
oatLockTemp(cUnit, rMethod);
loadCurrMethodDirect(cUnit, rMethod);
rBase = rARG0;
@@ -427,9 +428,9 @@ void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
#endif
LIR* skipTarget = newLIR0(cUnit, kPseudoTargetLabel);
branchOver->target = (LIR*)skipTarget;
+ oatFreeTemp(cUnit, rMethod);
}
// rBase now holds static storage base
- oatFreeTemp(cUnit, rMethod);
if (isLongOrDouble) {
rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
@@ -496,12 +497,11 @@ void genSget(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
if (fastPath && !SLOW_FIELD_PATH) {
DCHECK_GE(fieldOffset, 0);
int rBase;
- int rMethod;
if (isReferrersClass) {
// Fast path, static storage base is this method's class
- rMethod = loadCurrMethod(cUnit);
+ RegLocation rlMethod = loadCurrMethod(cUnit);
rBase = oatAllocTemp(cUnit);
- loadWordDisp(cUnit, rMethod,
+ loadWordDisp(cUnit, rlMethod.lowReg,
Method::DeclaringClassOffset().Int32Value(), rBase);
} else {
// Medium path, static storage base in a different class which
@@ -511,7 +511,7 @@ void genSget(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
oatFlushAllRegs(cUnit);
// Using fixed register to sync with possible call to runtime
// support
- rMethod = rARG1;
+ int rMethod = rARG1;
oatLockTemp(cUnit, rMethod);
loadCurrMethodDirect(cUnit, rMethod);
rBase = rARG0;
@@ -537,9 +537,9 @@ void genSget(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
#endif
LIR* skipTarget = newLIR0(cUnit, kPseudoTargetLabel);
branchOver->target = (LIR*)skipTarget;
+ oatFreeTemp(cUnit, rMethod);
}
// rBase now holds static storage base
- oatFreeTemp(cUnit, rMethod);
rlDest = isLongOrDouble ? oatGetDestWide(cUnit, mir, 0, 1)
: oatGetDest(cUnit, mir, 0);
RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
@@ -837,7 +837,7 @@ void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
RegLocation rlSrc)
{
uint32_t type_idx = mir->dalvikInsn.vB;
- int mReg = loadCurrMethod(cUnit);
+ RegLocation rlMethod = loadCurrMethod(cUnit);
int resReg = oatAllocTemp(cUnit);
RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
if (!cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx,
@@ -848,7 +848,7 @@ void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
// Resolved type returned in rRET0.
int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread,
pInitializeTypeAndVerifyAccessFromCode));
- opRegCopy(cUnit, rARG1, mReg);
+ opRegCopy(cUnit, rARG1, rlMethod.lowReg);
loadConstant(cUnit, rARG0, type_idx);
callRuntimeHelper(cUnit, rTgt);
RegLocation rlResult = oatGetReturn(cUnit);
@@ -857,7 +857,7 @@ void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
// We're don't need access checks, load type from dex cache
int32_t dex_cache_offset =
Method::DexCacheResolvedTypesOffset().Int32Value();
- loadWordDisp(cUnit, mReg, dex_cache_offset, resReg);
+ loadWordDisp(cUnit, rlMethod.lowReg, dex_cache_offset, resReg);
int32_t offset_of_type =
Array::DataOffset(sizeof(Class*)).Int32Value() + (sizeof(Class*)
* type_idx);
@@ -876,7 +876,7 @@ void genConstClass(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
// Call out to helper, which will return resolved type in rARG0
int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread,
pInitializeTypeFromCode));
- opRegCopy(cUnit, rARG1, mReg);
+ opRegCopy(cUnit, rARG1, rlMethod.lowReg);
loadConstant(cUnit, rARG0, type_idx);
callRuntimeHelper(cUnit, rTgt);
RegLocation rlResult = oatGetReturn(cUnit);
@@ -930,10 +930,10 @@ void genConstString(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
genBarrier(cUnit);
storeValue(cUnit, rlDest, oatGetReturn(cUnit));
} else {
- int mReg = loadCurrMethod(cUnit);
+ RegLocation rlMethod = loadCurrMethod(cUnit);
int resReg = oatAllocTemp(cUnit);
RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
- loadWordDisp(cUnit, mReg,
+ loadWordDisp(cUnit, rlMethod.lowReg,
Method::DexCacheStringsOffset().Int32Value(), resReg);
loadWordDisp(cUnit, resReg, offset_of_string, rlResult.lowReg);
storeValue(cUnit, rlDest, rlResult);
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index 6b3283e13f..5baabf2f32 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -716,8 +716,7 @@ const char* extendedMIROpNames[kMirOpLast - kMirOpFirst] = {
"kMirOpNullNRangeUpCheck",
"kMirOpNullNRangeDownCheck",
"kMirOpLowerBound",
- "kMirOpPunt",
- "kMirOpCheckInlinePrediction",
+ "kMirOpCopy",
};
/* Extended MIR instructions like PHI */
@@ -742,6 +741,9 @@ void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir)
newLIR1(cUnit, kPseudoSSARep, (int) ssaString);
break;
}
+ case kMirOpCopy:
+ UNIMPLEMENTED(FATAL) << "Need kMirOpCopy";
+ break;
default:
break;
}
@@ -761,11 +763,19 @@ bool methodBlockCodeGen(CompilationUnit* cUnit, BasicBlock* bb)
labelList[blockId].opcode = kPseudoNormalBlockLabel;
oatAppendLIR(cUnit, (LIR*) &labelList[blockId]);
- /* Reset local optimization data on block boundaries */
+ /* Free temp registers and reset redundant store tracking */
oatResetRegPool(cUnit);
- oatClobberAllRegs(cUnit);
oatResetDefTracking(cUnit);
+ /*
+ * If control reached us from our immediate predecessor via
+ * fallthrough and we have no other incoming arcs we can
+ * reuse existing liveness. Otherwise, reset.
+ */
+ if (!bb->fallThroughTarget || bb->predecessors->numUsed != 1) {
+ oatClobberAllRegs(cUnit);
+ }
+
LIR* headLIR = NULL;
if (bb->blockType == kEntryBlock) {
diff --git a/src/compiler/codegen/Ralloc.h b/src/compiler/codegen/Ralloc.h
index 8c8c693edb..d32545c248 100644
--- a/src/compiler/codegen/Ralloc.h
+++ b/src/compiler/codegen/Ralloc.h
@@ -35,12 +35,6 @@ struct RefCounts {
};
-inline int oatS2VReg(CompilationUnit* cUnit, int sReg)
-{
- DCHECK_NE(sReg, INVALID_SREG);
- return DECODE_REG(oatConvertSSARegToDalvik(cUnit, sReg));
-}
-
/*
* Get the "real" sreg number associated with an sReg slot. In general,
* sReg values passed through codegen are the SSA names created by
diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc
index b5ebf65da0..8f5d1bbe5a 100644
--- a/src/compiler/codegen/RallocUtil.cc
+++ b/src/compiler/codegen/RallocUtil.cc
@@ -147,6 +147,19 @@ extern void oatClobberSReg(CompilationUnit* cUnit, int sReg)
sReg);
}
+/* Sanity check */
+bool validSreg(CompilationUnit* cUnit, int sReg)
+{
+ bool res = ((-(cUnit->numCompilerTemps + 1) <= sReg) &&
+ (sReg < cUnit->numDalvikRegisters));
+ if (!res) {
+ LOG(WARNING) << "Bad sreg: " << sReg;
+ LOG(WARNING) << " low = " << -(cUnit->numCompilerTemps + 1);
+ LOG(WARNING) << " high = " << cUnit->numRegs;
+ }
+ return res;
+}
+
/* Reserve a callee-save register. Return -1 if none available */
extern int oatAllocPreservedCoreReg(CompilationUnit* cUnit, int sReg)
{
@@ -160,7 +173,8 @@ extern int oatAllocPreservedCoreReg(CompilationUnit* cUnit, int sReg)
cUnit->coreVmapTable.push_back(sReg);
cUnit->numCoreSpills++;
// Should be promoting based on initial sReg set
- DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg));
+ DCHECK_EQ(sReg, SRegToVReg(cUnit, sReg));
+ DCHECK(validSreg(cUnit,sReg));
cUnit->promotionMap[sReg].coreLocation = kLocPhysReg;
cUnit->promotionMap[sReg].coreReg = res;
break;
@@ -184,8 +198,9 @@ int allocPreservedSingle(CompilationUnit* cUnit, int sReg, bool even)
res = FPRegs[i].reg;
FPRegs[i].inUse = true;
// Should be promoting based on initial sReg set
- DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg));
+ DCHECK_EQ(sReg, SRegToVReg(cUnit, sReg));
oatMarkPreservedSingle(cUnit, sReg, res);
+ DCHECK(validSreg(cUnit,sReg));
cUnit->promotionMap[sReg].fpLocation = kLocPhysReg;
cUnit->promotionMap[sReg].fpReg = res;
break;
@@ -206,7 +221,8 @@ int allocPreservedDouble(CompilationUnit* cUnit, int sReg)
{
int res = -1; // Assume failure
// Should be promoting based on initial sReg set
- DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg));
+ DCHECK_EQ(sReg, SRegToVReg(cUnit, sReg));
+ DCHECK(validSreg(cUnit,sReg+1));
if (cUnit->promotionMap[sReg+1].fpLocation == kLocPhysReg) {
// Upper reg is already allocated. Can we fit?
int highReg = cUnit->promotionMap[sReg+1].fpReg;
@@ -244,8 +260,10 @@ int allocPreservedDouble(CompilationUnit* cUnit, int sReg)
}
}
if (res != -1) {
+ DCHECK(validSreg(cUnit,sReg));
cUnit->promotionMap[sReg].fpLocation = kLocPhysReg;
cUnit->promotionMap[sReg].fpReg = res;
+ DCHECK(validSreg(cUnit,sReg+1));
cUnit->promotionMap[sReg+1].fpLocation = kLocPhysReg;
cUnit->promotionMap[sReg+1].fpReg = res + 1;
}
@@ -788,7 +806,9 @@ extern RegLocation oatUpdateLoc(CompilationUnit* cUnit, RegLocation loc)
{
DCHECK(!loc.wide);
DCHECK(oatCheckCorePoolSanity(cUnit));
- if (loc.location == kLocDalvikFrame) {
+ if (loc.location != kLocPhysReg) {
+ DCHECK((loc.location == kLocDalvikFrame) ||
+ (loc.location == kLocCompilerTemp));
RegisterInfo* infoLo = allocLive(cUnit, loc.sRegLow, kAnyReg);
if (infoLo) {
if (infoLo->pair) {
@@ -837,7 +857,9 @@ extern RegLocation oatUpdateLocWide(CompilationUnit* cUnit, RegLocation loc)
{
DCHECK(loc.wide);
DCHECK(oatCheckCorePoolSanity(cUnit));
- if (loc.location == kLocDalvikFrame) {
+ if (loc.location != kLocPhysReg) {
+ DCHECK((loc.location == kLocDalvikFrame) ||
+ (loc.location == kLocCompilerTemp));
// Are the dalvik regs already live in physical registers?
RegisterInfo* infoLo = allocLive(cUnit, loc.sRegLow, kAnyReg);
RegisterInfo* infoHi = allocLive(cUnit,
@@ -1026,7 +1048,7 @@ void oatCountRefs(CompilationUnit *cUnit, BasicBlock* bb,
for (int i = 0; i < ssaRep->numDefs;) {
RegLocation loc = cUnit->regLocation[ssaRep->defs[i]];
RefCounts* counts = loc.fp ? fpCounts : coreCounts;
- int vReg = oatS2VReg(cUnit, ssaRep->defs[i]);
+ int vReg = SRegToVReg(cUnit, ssaRep->defs[i]);
if (loc.defined) {
counts[vReg].count++;
}
@@ -1045,7 +1067,7 @@ void oatCountRefs(CompilationUnit *cUnit, BasicBlock* bb,
for (int i = 0; i < ssaRep->numUses;) {
RegLocation loc = cUnit->regLocation[ssaRep->uses[i]];
RefCounts* counts = loc.fp ? fpCounts : coreCounts;
- int vReg = oatS2VReg(cUnit, ssaRep->uses[i]);
+ int vReg = SRegToVReg(cUnit, ssaRep->uses[i]);
if (loc.defined) {
counts[vReg].count++;
}
@@ -1142,6 +1164,7 @@ extern void oatDoPromotion(CompilationUnit* cUnit)
if (!(cUnit->disableOpt & (1 << kPromoteRegs))) {
// Promote fpRegs
for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) {
+ DCHECK(validSreg(cUnit,fpRegs[i].sReg));
if (cUnit->promotionMap[fpRegs[i].sReg].fpLocation != kLocPhysReg) {
int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg,
fpRegs[i].doubleStart);
@@ -1153,6 +1176,7 @@ extern void oatDoPromotion(CompilationUnit* cUnit)
// Promote core regs
for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) {
+ DCHECK(validSreg(cUnit,coreRegs[i].sReg));
if (cUnit->promotionMap[coreRegs[i].sReg].coreLocation !=
kLocPhysReg) {
int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg);
@@ -1166,15 +1190,17 @@ extern void oatDoPromotion(CompilationUnit* cUnit)
// Now, update SSA names to new home locations
for (int i = 0; i < cUnit->numSSARegs; i++) {
RegLocation *curr = &cUnit->regLocation[i];
- int baseVReg = oatS2VReg(cUnit, curr->sRegLow);
+ int baseVReg = SRegToVReg(cUnit, curr->sRegLow);
if (!curr->wide) {
if (curr->fp) {
+ DCHECK(validSreg(cUnit,baseVReg));
if (cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) {
curr->location = kLocPhysReg;
curr->lowReg = cUnit->promotionMap[baseVReg].fpReg;
curr->home = true;
}
} else {
+ DCHECK(validSreg(cUnit,baseVReg));
if (cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) {
curr->location = kLocPhysReg;
curr->lowReg = cUnit->promotionMap[baseVReg].coreReg;
@@ -1187,6 +1213,8 @@ extern void oatDoPromotion(CompilationUnit* cUnit)
continue;
}
if (curr->fp) {
+ DCHECK(validSreg(cUnit,baseVReg));
+ DCHECK(validSreg(cUnit,baseVReg+1));
if ((cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) &&
(cUnit->promotionMap[baseVReg+1].fpLocation ==
kLocPhysReg)) {
@@ -1201,6 +1229,8 @@ extern void oatDoPromotion(CompilationUnit* cUnit)
}
}
} else {
+ DCHECK(validSreg(cUnit,baseVReg));
+ DCHECK(validSreg(cUnit,baseVReg+1));
if ((cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg)
&& (cUnit->promotionMap[baseVReg+1].coreLocation ==
kLocPhysReg)) {
@@ -1224,7 +1254,7 @@ extern int oatVRegOffset(CompilationUnit* cUnit, int vReg)
/* Returns sp-relative offset in bytes for a SReg */
extern int oatSRegOffset(CompilationUnit* cUnit, int sReg)
{
- return oatVRegOffset(cUnit, oatS2VReg(cUnit, sReg));
+ return oatVRegOffset(cUnit, SRegToVReg(cUnit, sReg));
}
} // namespace art
diff --git a/src/compiler/codegen/arm/ArchFactory.cc b/src/compiler/codegen/arm/ArchFactory.cc
index 8a23d5c2e1..da5de521f3 100644
--- a/src/compiler/codegen/arm/ArchFactory.cc
+++ b/src/compiler/codegen/arm/ArchFactory.cc
@@ -106,7 +106,21 @@ void genEntrySequence(CompilationUnit* cUnit, BasicBlock* bb)
opRegImm(cUnit, kOpSub, rSP,
cUnit->frameSize - (spillCount * 4));
}
- storeBaseDisp(cUnit, rSP, 0, r0, kWord);
+
+ /*
+ * Dummy up a RegLocation for the incoming Method*
+ * It will attempt to keep r0 live (or copy it to home location
+ * if promoted).
+ */
+ RegLocation rlSrc = cUnit->regLocation[cUnit->methodSReg];
+ RegLocation rlMethod = cUnit->regLocation[cUnit->methodSReg];
+ rlSrc.location = kLocPhysReg;
+ rlSrc.lowReg = r0;
+ rlSrc.home = false;
+ oatMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow);
+ storeValue(cUnit, rlMethod, rlSrc);
+
+ /* Flush the rest of the ins */
flushIns(cUnit);
if (cUnit->genDebugger) {
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc
index 3335f5997e..e7627f2367 100644
--- a/src/compiler/codegen/arm/ArmRallocUtil.cc
+++ b/src/compiler/codegen/arm/ArmRallocUtil.cc
@@ -76,10 +76,10 @@ void oatFlushRegWide(CompilationUnit* cUnit, int reg1, int reg2)
info1->dirty = false;
info2->dirty = false;
- if (oatS2VReg(cUnit, info2->sReg) <
- oatS2VReg(cUnit, info1->sReg))
+ if (SRegToVReg(cUnit, info2->sReg) <
+ SRegToVReg(cUnit, info1->sReg))
info1 = info2;
- int vReg = oatS2VReg(cUnit, info1->sReg);
+ int vReg = SRegToVReg(cUnit, info1->sReg);
oatFlushRegWideImpl(cUnit, rSP,
oatVRegOffset(cUnit, vReg),
info1->reg, info1->partner);
@@ -91,7 +91,7 @@ void oatFlushReg(CompilationUnit* cUnit, int reg)
RegisterInfo* info = oatGetRegInfo(cUnit, reg);
if (info->live && info->dirty) {
info->dirty = false;
- int vReg = oatS2VReg(cUnit, info->sReg);
+ int vReg = SRegToVReg(cUnit, info->sReg);
oatFlushRegImpl(cUnit, rSP,
oatVRegOffset(cUnit, vReg),
reg, kWord);
diff --git a/src/compiler/codegen/arm/Thumb2/Ralloc.cc b/src/compiler/codegen/arm/Thumb2/Ralloc.cc
index c0f2c771ca..7858318001 100644
--- a/src/compiler/codegen/arm/Thumb2/Ralloc.cc
+++ b/src/compiler/codegen/arm/Thumb2/Ralloc.cc
@@ -88,6 +88,10 @@ void oatInitializeRegAlloc(CompilationUnit* cUnit)
for (int i = 0; i < numFPTemps; i++) {
oatMarkTemp(cUnit, fpTemps[i]);
}
+
+ // Start allocation at r2 in an attempt to avoid clobbering return values
+ pool->nextCoreReg = r2;
+
// Construct the alias map.
cUnit->phiAliasMap = (int*)oatNew(cUnit, cUnit->numSSARegs *
sizeof(cUnit->phiAliasMap[0]), false,
diff --git a/src/compiler/codegen/mips/MipsRallocUtil.cc b/src/compiler/codegen/mips/MipsRallocUtil.cc
index 7fd9b598cd..7ed3f86c3e 100644
--- a/src/compiler/codegen/mips/MipsRallocUtil.cc
+++ b/src/compiler/codegen/mips/MipsRallocUtil.cc
@@ -65,10 +65,10 @@ void oatFlushRegWide(CompilationUnit* cUnit, int reg1, int reg2)
info1->dirty = false;
info2->dirty = false;
- if (oatS2VReg(cUnit, info2->sReg) <
- oatS2VReg(cUnit, info1->sReg))
+ if (SRegToVReg(cUnit, info2->sReg) <
+ SRegToVReg(cUnit, info1->sReg))
info1 = info2;
- int vReg = oatS2VReg(cUnit, info1->sReg);
+ int vReg = SRegToVReg(cUnit, info1->sReg);
oatFlushRegWideImpl(cUnit, rSP,
oatVRegOffset(cUnit, vReg),
info1->reg, info1->partner);
@@ -80,7 +80,7 @@ void oatFlushReg(CompilationUnit* cUnit, int reg)
RegisterInfo* info = oatGetRegInfo(cUnit, reg);
if (info->live && info->dirty) {
info->dirty = false;
- int vReg = oatS2VReg(cUnit, info->sReg);
+ int vReg = SRegToVReg(cUnit, info->sReg);
oatFlushRegImpl(cUnit, rSP,
oatVRegOffset(cUnit, vReg),
reg, kWord);
diff --git a/src/compiler/codegen/x86/X86RallocUtil.cc b/src/compiler/codegen/x86/X86RallocUtil.cc
index 7c99fd6298..1b4eca4158 100644
--- a/src/compiler/codegen/x86/X86RallocUtil.cc
+++ b/src/compiler/codegen/x86/X86RallocUtil.cc
@@ -60,10 +60,10 @@ void oatFlushRegWide(CompilationUnit* cUnit, int reg1, int reg2)
info1->dirty = false;
info2->dirty = false;
- if (oatS2VReg(cUnit, info2->sReg) <
- oatS2VReg(cUnit, info1->sReg))
+ if (SRegToVReg(cUnit, info2->sReg) <
+ SRegToVReg(cUnit, info1->sReg))
info1 = info2;
- int vReg = oatS2VReg(cUnit, info1->sReg);
+ int vReg = SRegToVReg(cUnit, info1->sReg);
oatFlushRegWideImpl(cUnit, rSP,
oatVRegOffset(cUnit, vReg),
info1->reg, info1->partner);
@@ -75,7 +75,7 @@ void oatFlushReg(CompilationUnit* cUnit, int reg)
RegisterInfo* info = oatGetRegInfo(cUnit, reg);
if (info->live && info->dirty) {
info->dirty = false;
- int vReg = oatS2VReg(cUnit, info->sReg);
+ int vReg = SRegToVReg(cUnit, info->sReg);
oatFlushRegImpl(cUnit, rSP,
oatVRegOffset(cUnit, vReg),
reg, kWord);
diff --git a/src/stack.cc b/src/stack.cc
index da5c31eab3..e4d1133390 100644
--- a/src/stack.cc
+++ b/src/stack.cc
@@ -63,7 +63,7 @@ void Frame::SetReturnPC(uintptr_t pc) {
* +========================+ {Note: start of callee's frame}
* | core callee-save spill | {variable sized}
* +------------------------+
- * | fp calle-save spill |
+ * | fp callee-save spill |
* +------------------------+
* | V[locals-1] |
* | V[locals-2] |