summaryrefslogtreecommitdiff
path: root/src/compiler/Ralloc.cc
diff options
context:
space:
mode:
author buzbee <buzbee@google.com> 2011-10-11 18:08:40 -0700
committer buzbee <buzbee@google.com> 2011-10-14 10:54:03 -0700
commit67bc236a207852d652f6ddeab0a90efc1bd111bb (patch)
treeeea13fcb90ad8ce5b2b3819fb8caf0281583cd61 /src/compiler/Ralloc.cc
parent95caa791e560da97363c0c0d22bfda4a7e7377c3 (diff)
Register promotion fix
Restructured the type inference mechanism, added lots of DCHECKS, bumped the default memory allocation size to reflect AOT compilation and tweaked the bit vector manipulation routines to be better at handling large sparse vectors (something the old trace JIT didn't encounter enough to care). With this CL, optimization is back on by default. Should also see a significant boost in compilation speed (~2x better for boot.oat). Change-Id: Ifd134ef337be173a1be756bb9198b24c5b4936b3
Diffstat (limited to 'src/compiler/Ralloc.cc')
-rw-r--r--src/compiler/Ralloc.cc174
1 files changed, 137 insertions, 37 deletions
diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc
index aaf9b97f15..b4cc0b5689 100644
--- a/src/compiler/Ralloc.cc
+++ b/src/compiler/Ralloc.cc
@@ -21,8 +21,25 @@
STATIC bool setFp(CompilationUnit* cUnit, int index, bool isFP) {
bool change = false;
+ if (cUnit->regLocation[index].highWord) {
+ return change;
+ }
if (isFP && !cUnit->regLocation[index].fp) {
cUnit->regLocation[index].fp = true;
+ cUnit->regLocation[index].defined = true;
+ change = true;
+ }
+ return change;
+}
+
+STATIC bool setCore(CompilationUnit* cUnit, int index, bool isCore) {
+ bool change = false;
+ if (cUnit->regLocation[index].highWord) {
+ return change;
+ }
+ if (isCore && !cUnit->regLocation[index].defined) {
+ cUnit->regLocation[index].core = true;
+ cUnit->regLocation[index].defined = true;
change = true;
}
return change;
@@ -66,21 +83,60 @@ STATIC bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)
SSARepresentation *ssaRep = mir->ssaRep;
if (ssaRep) {
int attrs = oatDataFlowAttributes[mir->dalvikInsn.opcode];
- int next = 0;
- if (attrs & DF_DA_WIDE) {
- cUnit->regLocation[ssaRep->defs[0]].wide = true;
+
+ // Handle defs
+ if (attrs & (DF_DA | DF_DA_WIDE)) {
+ if (attrs & DF_CORE_A) {
+ changed |= setCore(cUnit, ssaRep->defs[0], true);
+ }
+ if (attrs & DF_DA_WIDE) {
+ cUnit->regLocation[ssaRep->defs[0]].wide = true;
+ cUnit->regLocation[ssaRep->defs[1]].highWord = true;
+ DCHECK_EQ(oatS2VReg(cUnit, ssaRep->defs[0])+1,
+ oatS2VReg(cUnit, ssaRep->defs[1]));
+ }
}
- if (attrs & DF_UA_WIDE) {
- cUnit->regLocation[ssaRep->uses[next]].wide = true;
- next += 2;
+
+ // Handles uses
+ int next = 0;
+ if (attrs & (DF_UA | DF_UA_WIDE)) {
+ if (attrs & DF_CORE_A) {
+ changed |= setCore(cUnit, ssaRep->uses[next], true);
+ }
+ if (attrs & DF_UA_WIDE) {
+ cUnit->regLocation[ssaRep->uses[next]].wide = true;
+ cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true;
+ DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1,
+ oatS2VReg(cUnit, ssaRep->uses[next + 1]));
+ next += 2;
+ } else {
+ next++;
+ }
}
- if (attrs & DF_UB_WIDE) {
- cUnit->regLocation[ssaRep->uses[next]].wide = true;
- next += 2;
+ if (attrs & (DF_UB | DF_UB_WIDE)) {
+ if (attrs & DF_CORE_B) {
+ changed |= setCore(cUnit, ssaRep->uses[next], true);
+ }
+ if (attrs & DF_UB_WIDE) {
+ cUnit->regLocation[ssaRep->uses[next]].wide = true;
+ cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true;
+ DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1,
+ oatS2VReg(cUnit, ssaRep->uses[next + 1]));
+ next += 2;
+ } else {
+ next++;
+ }
}
- if (attrs & DF_UC_WIDE) {
- cUnit->regLocation[ssaRep->uses[next]].wide = true;
- next += 2;
+ if (attrs & (DF_UC | DF_UC_WIDE)) {
+ if (attrs & DF_CORE_C) {
+ changed |= setCore(cUnit, ssaRep->uses[next], true);
+ }
+ if (attrs & DF_UC_WIDE) {
+ cUnit->regLocation[ssaRep->uses[next]].wide = true;
+ cUnit->regLocation[ssaRep->uses[next + 1]].highWord = true;
+ DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[next])+1,
+ oatS2VReg(cUnit, ssaRep->uses[next + 1]));
+ }
}
// Special-case handling for format 35c/3rc invokes
@@ -97,6 +153,8 @@ STATIC bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)
// If this is a non-static invoke, skip implicit "this"
if (((mir->dalvikInsn.opcode != OP_INVOKE_STATIC) &&
(mir->dalvikInsn.opcode != OP_INVOKE_STATIC_RANGE))) {
+ cUnit->regLocation[ssaRep->uses[next]].defined = true;
+ cUnit->regLocation[ssaRep->uses[next]].core = true;
next++;
}
uint32_t cpos = 1;
@@ -108,16 +166,26 @@ STATIC bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)
ssaRep->fpUse[i] = true;
ssaRep->fpUse[i+1] = true;
cUnit->regLocation[ssaRep->uses[i]].wide = true;
+ cUnit->regLocation[ssaRep->uses[i+1]].highWord
+ = true;
+ DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1,
+ oatS2VReg(cUnit, ssaRep->uses[i+1]));
i++;
break;
case 'J':
cUnit->regLocation[ssaRep->uses[i]].wide = true;
+ cUnit->regLocation[ssaRep->uses[i+1]].highWord
+ = true;
+ DCHECK_EQ(oatS2VReg(cUnit, ssaRep->uses[i])+1,
+ oatS2VReg(cUnit, ssaRep->uses[i+1]));
+ changed |= setCore(cUnit, ssaRep->uses[i],true);
i++;
break;
case 'F':
ssaRep->fpUse[i] = true;
break;
default:
+ changed |= setCore(cUnit,ssaRep->uses[i], true);
break;
}
i++;
@@ -135,13 +203,25 @@ STATIC bool inferTypeAndSize(CompilationUnit* cUnit, BasicBlock* bb)
}
// Special-case handling for moves & Phi
if (attrs & (DF_IS_MOVE | DF_NULL_TRANSFER_N)) {
- bool isFP = cUnit->regLocation[ssaRep->defs[0]].fp;
+ // If any of our inputs or outputs is defined, set all
+ bool definedFP = false;
+ bool definedCore = false;
+ definedFP |= (cUnit->regLocation[ssaRep->defs[0]].defined &&
+ cUnit->regLocation[ssaRep->defs[0]].fp);
+ definedCore |= (cUnit->regLocation[ssaRep->defs[0]].defined &&
+ cUnit->regLocation[ssaRep->defs[0]].core);
for (int i = 0; i < ssaRep->numUses; i++) {
- isFP |= cUnit->regLocation[ssaRep->uses[i]].fp;
+ definedFP |= (cUnit->regLocation[ssaRep->uses[i]].defined &&
+ cUnit->regLocation[ssaRep->uses[i]].fp);
+ definedCore |= (cUnit->regLocation[ssaRep->uses[i]].defined
+ && cUnit->regLocation[ssaRep->uses[i]].core);
}
- changed |= setFp(cUnit, ssaRep->defs[0], isFP);
+ DCHECK(!(definedFP && definedCore));
+ changed |= setFp(cUnit, ssaRep->defs[0], definedFP);
+ changed |= setCore(cUnit, ssaRep->defs[0], definedCore);
for (int i = 0; i < ssaRep->numUses; i++) {
- changed |= setFp(cUnit, ssaRep->uses[i], isFP);
+ changed |= setFp(cUnit, ssaRep->uses[i], definedFP);
+ changed |= setCore(cUnit, ssaRep->uses[i], definedCore);
}
}
}
@@ -155,20 +235,19 @@ void oatDumpRegLocTable(RegLocation* table, int count)
{
for (int i = 0; i < count; i++) {
char buf[100];
- snprintf(buf, 100, "Loc[%02d] : %s, %c %c r%d r%d S%d : %s s%d s%d",
+ snprintf(buf, 100, "Loc[%02d] : %s, %c %c %c %c %c %c%d %c%d S%d",
i, storageName[table[i].location], table[i].wide ? 'W' : 'N',
- table[i].fp ? 'F' : 'C', table[i].lowReg, table[i].highReg,
- table[i].sRegLow, storageName[table[i].fpLocation],
- table[i].fpLowReg & FP_REG_MASK, table[i].fpHighReg &
- FP_REG_MASK);
+ table[i].defined ? 'D' : 'U', table[i].fp ? 'F' : 'C',
+ table[i].highWord ? 'H' : 'L', table[i].home ? 'h' : 't',
+ FPREG(table[i].lowReg) ? 's' : 'r', table[i].lowReg & FP_REG_MASK,
+ FPREG(table[i].highReg) ? 's' : 'r', table[i].highReg & FP_REG_MASK,
+ table[i].sRegLow);
LOG(INFO) << buf;
}
}
-static const RegLocation freshLoc = {kLocDalvikFrame, 0, 0, INVALID_REG,
- INVALID_REG, INVALID_SREG, 0,
- kLocDalvikFrame, INVALID_REG, INVALID_REG,
- INVALID_OFFSET};
+static const RegLocation freshLoc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0,
+ INVALID_REG, INVALID_REG, INVALID_SREG};
/*
* Simple register allocation. Some Dalvik virtual registers may
@@ -189,6 +268,10 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit)
}
cUnit->regLocation = loc;
+ /* Allocation the promotion map */
+ cUnit->promotionMap = (PromotionMap*)oatNew( cUnit->method->NumRegisters()
+ * sizeof(cUnit->promotionMap[0]), true);
+
/* Add types of incoming arguments based on signature */
int numRegs = cUnit->method->NumRegisters();
int numIns = cUnit->method->NumIns();
@@ -196,16 +279,39 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit)
int sReg = numRegs - numIns;
if (!cUnit->method->IsStatic()) {
// Skip past "this"
+ cUnit->regLocation[sReg].defined = true;
+ cUnit->regLocation[sReg].core = true;
sReg++;
}
const String* shorty = cUnit->method->GetShorty();
for (int i = 1; i < shorty->GetLength(); i++) {
- char arg = shorty->CharAt(i);
- // Is it wide?
- if ((arg == 'D') || (arg == 'J')) {
- cUnit->regLocation[sReg].wide = true;
- cUnit->regLocation[sReg+1].fp = cUnit->regLocation[sReg].fp;
- sReg++; // Skip to next
+ switch(shorty->CharAt(i)) {
+ case 'D':
+ cUnit->regLocation[sReg].wide = true;
+ cUnit->regLocation[sReg+1].highWord = true;
+ DCHECK_EQ(oatS2VReg(cUnit, sReg)+1,
+ oatS2VReg(cUnit, sReg+1));
+ cUnit->regLocation[sReg].fp = true;
+ cUnit->regLocation[sReg].defined = true;
+ sReg++;
+ break;
+ case 'J':
+ cUnit->regLocation[sReg].wide = true;
+ cUnit->regLocation[sReg+1].highWord = true;
+ DCHECK_EQ(oatS2VReg(cUnit, sReg)+1,
+ oatS2VReg(cUnit, sReg+1));
+ cUnit->regLocation[sReg].core = true;
+ cUnit->regLocation[sReg].defined = true;
+ sReg++;
+ break;
+ case 'F':
+ cUnit->regLocation[sReg].fp = true;
+ cUnit->regLocation[sReg].defined = true;
+ break;
+ default:
+ cUnit->regLocation[sReg].core = true;
+ cUnit->regLocation[sReg].defined = true;
+ break;
}
sReg++;
}
@@ -254,10 +360,4 @@ void oatSimpleRegAlloc(CompilationUnit* cUnit)
cUnit->numPadding + 2) * 4;
cUnit->insOffset = cUnit->frameSize + 4;
cUnit->regsOffset = (cUnit->numOuts + cUnit->numPadding + 1) * 4;
-
- /* Compute sp-relative home location offsets */
- for (i = 0; i < cUnit->numSSARegs; i++) {
- int vReg = oatS2VReg(cUnit, cUnit->regLocation[i].sRegLow);
- cUnit->regLocation[i].spOffset = oatVRegOffset(cUnit, vReg);
- }
}