Register promotion fix

Restructured the type inference mechanism, added lots of DCHECKS,
bumped the default memory allocation size to reflect AOT
compilation and tweaked the bit vector manipulation routines
to be better at handling large sparse vectors (something the old
trace JIT didn't encounter enough to care).

With this CL, optimization is back on by default.  Should also see
a significant boost in compilation speed (~2x better for boot.oat).

Change-Id: Ifd134ef337be173a1be756bb9198b24c5b4936b3
diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc
index aef98fa..c4d3b6d 100644
--- a/src/compiler/codegen/arm/ArchUtility.cc
+++ b/src/compiler/codegen/arm/ArchUtility.cc
@@ -16,6 +16,7 @@
 
 #include "../../CompilerInternals.h"
 #include "ArmLIR.h"
+#include "../Ralloc.h"
 
 static const char* coreRegNames[16] = {
     "r0",
@@ -391,6 +392,38 @@
     }
 }
 
+void oatDumpPromotionMap(CompilationUnit *cUnit)
+{
+    const Method *method = cUnit->method;
+    for (int i = 0; i < method->NumRegisters(); i++) {
+        PromotionMap vRegMap = cUnit->promotionMap[i];
+        char buf[100];
+        if (vRegMap.fpLocation == kLocPhysReg) {
+            snprintf(buf, 100, " : s%d", vRegMap.fpReg & FP_REG_MASK);
+        } else {
+            buf[0] = 0;
+        }
+        char buf2[100];
+        snprintf(buf2, 100, "V[%02d] -> %s%d%s", i,
+                 vRegMap.coreLocation == kLocPhysReg ?
+                 "r" : "SP+", vRegMap.coreLocation == kLocPhysReg ?
+                 vRegMap.coreReg : oatSRegOffset(cUnit, i), buf);
+        LOG(INFO) << buf2;
+    }
+}
+
+void oatDumpFullPromotionMap(CompilationUnit *cUnit)
+{
+    const Method *method = cUnit->method;
+    for (int i = 0; i < method->NumRegisters(); i++) {
+        PromotionMap vRegMap = cUnit->promotionMap[i];
+        LOG(INFO) << i << " -> " << "CL:" << (int)vRegMap.coreLocation <<
+            ", CR:" << (int)vRegMap.coreReg << ", FL:" <<
+            (int)vRegMap.fpLocation << ", FR:" << (int)vRegMap.fpReg <<
+            ", - " << (int)vRegMap.firstInPair;
+    }
+}
+
 /* Dump instructions and constant pool contents */
 void oatCodegenDump(CompilationUnit* cUnit)
 {
@@ -414,22 +447,7 @@
         " bytes, Dalvik size is " << insnsSize * 2;
     LOG(INFO) << "expansion factor: " <<
          (float)cUnit->totalSize / (float)(insnsSize * 2);
-    for (int i = 0; i < method->NumRegisters(); i++) {
-        RegLocation loc = cUnit->regLocation[i];
-        char buf[100];
-        if (loc.fpLocation == kLocPhysReg) {
-            snprintf(buf, 100, " : s%d", loc.fpLowReg & FP_REG_MASK);
-        } else {
-            buf[0] = 0;
-        }
-        char buf2[100];
-        snprintf(buf2, 100, "V[%02d] -> %s%d%s", i,
-                 loc.location == kLocPhysReg ?
-                 "r" : "SP+", loc.location == kLocPhysReg ?
-                 loc.lowReg : loc.spOffset, buf);
-        LOG(INFO) << buf2;
-
-    }
+    oatDumpPromotionMap(cUnit);
     for (lirInsn = cUnit->firstLIRInsn; lirInsn; lirInsn = lirInsn->next) {
         oatDumpLIRInsn(cUnit, lirInsn, 0);
     }
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index 1e4022e..729e708 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -123,16 +123,13 @@
 #define rNone   (-1)
 
 /* RegisterLocation templates return values (r0, or r0/r1) */
-#define LOC_C_RETURN {kLocPhysReg, 0, 0, r0, INVALID_REG, INVALID_SREG, \
-                      1, kLocPhysReg, r0, INVALID_REG, INVALID_OFFSET}
-#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, r0, r1, INVALID_SREG, \
-                      1, kLocPhysReg, r0, r1, INVALID_OFFSET}
+#define LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, INVALID_SREG}
+#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, INVALID_SREG}
 /* RegisterLocation templates for interpState->retVal; */
-#define LOC_DALVIK_RETURN_VAL {kLocPhysReg, 0, 0, r0, INVALID_REG, \
-                      INVALID_SREG, 1, kLocPhysReg, r0, INVALID_REG, \
-                      INVALID_OFFSET}
-#define LOC_DALVIK_RETURN_VAL_WIDE {kLocPhysReg, 1, 0, r0, r1, \
-                      INVALID_SREG, 1, kLocPhysReg, r0, r1, INVALID_OFFSET}
+#define LOC_DALVIK_RETURN_VAL {kLocPhysReg, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, \
+                      INVALID_SREG}
+#define LOC_DALVIK_RETURN_VAL_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, \
+                      INVALID_SREG}
 
  /*
  * Data structure tracking the mapping between a Dalvik register (pair) and a
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc
index ed8a5b2..4af3d07 100644
--- a/src/compiler/codegen/arm/ArmRallocUtil.cc
+++ b/src/compiler/codegen/arm/ArmRallocUtil.cc
@@ -37,7 +37,7 @@
 
 /* USE SSA names to count references of base Dalvik vRegs. */
 STATIC void countRefs(CompilationUnit *cUnit, BasicBlock* bb,
-                      RefCounts* counts, bool fp)
+                      RefCounts* coreCounts, RefCounts* fpCounts)
 {
     MIR* mir;
     if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock &&
@@ -47,59 +47,42 @@
     for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
         SSARepresentation *ssaRep = mir->ssaRep;
         if (ssaRep) {
-            int i;
-            int attrs = oatDataFlowAttributes[mir->dalvikInsn.opcode];
-            if (fp) {
-                // Mark 1st reg of double pairs
-                int first = 0;
-                int sReg;
-                if ((attrs & (DF_DA_WIDE|DF_FP_A)) == (DF_DA_WIDE|DF_FP_A)) {
-                    sReg = DECODE_REG(
-                        oatConvertSSARegToDalvik(cUnit, ssaRep->defs[0]));
-                    counts[sReg].doubleStart = true;
+            for (int i = 0; i < ssaRep->numDefs;) {
+                RegLocation loc = cUnit->regLocation[ssaRep->defs[i]];
+                RefCounts* counts = loc.fp ? fpCounts : coreCounts;
+                int vReg = oatS2VReg(cUnit, ssaRep->defs[i]);
+                if (loc.defined) {
+                    counts[vReg].count++;
                 }
-                if ((attrs & (DF_UA_WIDE|DF_FP_A)) == (DF_UA_WIDE|DF_FP_A)) {
-                    sReg = DECODE_REG(
-                        oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
-                    counts[sReg].doubleStart = true;
-                }
-                if (attrs & DF_UA_WIDE) {
-                    first += 2;
-                }
-                if ((attrs & (DF_UB_WIDE|DF_FP_B)) == (DF_UB_WIDE|DF_FP_B)) {
-                    sReg = DECODE_REG(
-                        oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
-                    counts[sReg].doubleStart = true;
-                }
-                if (attrs & DF_UB_WIDE) {
-                    first += 2;
-                }
-                if ((attrs & (DF_UC_WIDE|DF_FP_C)) == (DF_UC_WIDE|DF_FP_C)) {
-                    sReg = DECODE_REG(
-                        oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
-                    counts[sReg].doubleStart = true;
+                if (loc.wide) {
+                    if (loc.defined) {
+                        if (loc.fp) {
+                            counts[vReg].doubleStart = true;
+                        }
+                        counts[vReg+1].count++;
+                    }
+                    i += 2;
+                } else {
+                    i++;
                 }
             }
-            for (i=0; i< ssaRep->numUses; i++) {
-                int origSreg = DECODE_REG(
-                    oatConvertSSARegToDalvik(cUnit, ssaRep->uses[i]));
-                DCHECK_LT(origSreg, cUnit->method->NumRegisters());
-                bool fpUse = ssaRep->fpUse ? ssaRep->fpUse[i] : false;
-                if (fp == fpUse) {
-                    counts[origSreg].count++;
+            for (int i = 0; i < ssaRep->numUses;) {
+                RegLocation loc = cUnit->regLocation[ssaRep->uses[i]];
+                RefCounts* counts = loc.fp ? fpCounts : coreCounts;
+                int vReg = oatS2VReg(cUnit, ssaRep->uses[i]);
+                if (loc.defined) {
+                    counts[vReg].count++;
                 }
-            }
-            for (i=0; i< ssaRep->numDefs; i++) {
-                if (attrs & DF_SETS_CONST) {
-                    // CONST opcodes are untyped - don't pollute the counts
-                    continue;
-                }
-                int origSreg = DECODE_REG(
-                    oatConvertSSARegToDalvik(cUnit, ssaRep->defs[i]));
-                DCHECK_LT(origSreg, cUnit->method->NumRegisters());
-                bool fpDef = ssaRep->fpDef ? ssaRep->fpDef[i] : false;
-                if (fp == fpDef) {
-                    counts[origSreg].count++;
+                if (loc.wide) {
+                    if (loc.defined) {
+                        if (loc.fp) {
+                            counts[vReg].doubleStart = true;
+                        }
+                        counts[vReg+1].count++;
+                    }
+                    i += 2;
+                } else {
+                    i++;
                 }
             }
         }
@@ -159,8 +142,7 @@
         BasicBlock* bb;
         bb = (BasicBlock*)oatGrowableListIteratorNext(&iterator);
         if (bb == NULL) break;
-        countRefs(cUnit, bb, coreRegs, false);
-        countRefs(cUnit, bb, fpRegs, true);
+        countRefs(cUnit, bb, coreRegs, fpRegs);
     }
 
     /*
@@ -178,21 +160,27 @@
     qsort(coreRegs, numRegs, sizeof(RefCounts), sortCounts);
     qsort(fpRegs, numRegs, sizeof(RefCounts), sortCounts);
 
+    if (cUnit->printMe) {
+        dumpCounts(coreRegs, numRegs, "Core regs after sort");
+        dumpCounts(fpRegs, numRegs, "Fp regs after sort");
+    }
+
     if (!(cUnit->disableOpt & (1 << kPromoteRegs))) {
         // Promote fpRegs
         for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) {
-            if (cUnit->regLocation[fpRegs[i].sReg].fpLocation != kLocPhysReg) {
+            if (cUnit->promotionMap[fpRegs[i].sReg].fpLocation != kLocPhysReg) {
                 int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg,
                     fpRegs[i].doubleStart);
                 if (reg < 0) {
-                   break;  // No more left
+                    break;  // No more left
                 }
             }
         }
 
         // Promote core regs
         for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) {
-            if (cUnit->regLocation[i].location != kLocPhysReg) {
+            if (cUnit->promotionMap[coreRegs[i].sReg].coreLocation !=
+                    kLocPhysReg) {
                 int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg);
                 if (reg < 0) {
                    break;  // No more left
@@ -203,58 +191,69 @@
 
     // Now, update SSA names to new home locations
     for (int i = 0; i < cUnit->numSSARegs; i++) {
-        int baseSreg = cUnit->regLocation[i].sRegLow;
-        RegLocation *base = &cUnit->regLocation[baseSreg];
-        RegLocation *baseNext = &cUnit->regLocation[baseSreg+1];
         RegLocation *curr = &cUnit->regLocation[i];
-        if (curr->fp) {
-            /* Single or double, check fpLocation of base */
-            if (base->fpLocation == kLocPhysReg) {
-                if (curr->wide) {
-                    /* TUNING: consider alignment during allocation */
-                    if ((base->fpLowReg & 1) ||
-                        (baseNext->fpLocation != kLocPhysReg)) {
-                        /* Half-promoted or bad alignment - demote */
-                        curr->location = kLocDalvikFrame;
-                        curr->lowReg = INVALID_REG;
-                        curr->highReg = INVALID_REG;
-                        continue;
-                    }
-                    curr->highReg = baseNext->fpLowReg;
+        int baseVReg = oatS2VReg(cUnit, curr->sRegLow);
+        if (!curr->wide) {
+            if (curr->fp) {
+                if (cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) {
+                    curr->location = kLocPhysReg;
+                    curr->lowReg = cUnit->promotionMap[baseVReg].fpReg;
+                    curr->home = true;
                 }
-                curr->location = kLocPhysReg;
-                curr->lowReg = base->fpLowReg;
-                curr->home = true;
+            } else {
+                if (cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) {
+                    curr->location = kLocPhysReg;
+                    curr->lowReg = cUnit->promotionMap[baseVReg].coreReg;
+                    curr->home = true;
+                }
             }
+            curr->highReg = INVALID_REG;
         } else {
-            /* Core or wide */
-            if (base->location == kLocPhysReg) {
-                if (curr->wide) {
-                    /* Make sure upper half is also in reg or skip */
-                    if (baseNext->location != kLocPhysReg) {
-                        /* Only half promoted; demote to frame */
-                        curr->location = kLocDalvikFrame;
-                        curr->lowReg = INVALID_REG;
-                        curr->highReg = INVALID_REG;
-                        continue;
+            if (curr->highWord) {
+                continue;
+            }
+            if (curr->fp) {
+                if ((cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) &&
+                    (cUnit->promotionMap[baseVReg+1].fpLocation ==
+                    kLocPhysReg)) {
+                    int lowReg = cUnit->promotionMap[baseVReg].fpReg;
+                    int highReg = cUnit->promotionMap[baseVReg+1].fpReg;
+                    // Doubles require pair of singles starting at even reg
+                    if (((lowReg & 0x1) == 0) && ((lowReg + 1) == highReg)) {
+                        curr->location = kLocPhysReg;
+                        curr->lowReg = lowReg;
+                        curr->highReg = highReg;
+                        curr->home = true;
                     }
-                    curr->highReg = baseNext->lowReg;
                 }
-                curr->location = kLocPhysReg;
-                curr->lowReg = base->lowReg;
-                curr->home = true;
+            } else {
+                if ((cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg)
+                     && (cUnit->promotionMap[baseVReg+1].coreLocation ==
+                     kLocPhysReg)) {
+                    curr->location = kLocPhysReg;
+                    curr->lowReg = cUnit->promotionMap[baseVReg].coreReg;
+                    curr->highReg = cUnit->promotionMap[baseVReg+1].coreReg;
+                    curr->home = true;
+                }
             }
         }
     }
 }
 
-/* Returns sp-relative offset in bytes */
-extern int oatVRegOffset(CompilationUnit* cUnit, int reg)
+/* Returns sp-relative offset in bytes for a VReg */
+extern int oatVRegOffset(CompilationUnit* cUnit, int vReg)
 {
-    return (reg < cUnit->numRegs) ? cUnit->regsOffset + (reg << 2) :
-            cUnit->insOffset + ((reg - cUnit->numRegs) << 2);
+    return (vReg < cUnit->numRegs) ? cUnit->regsOffset + (vReg << 2) :
+            cUnit->insOffset + ((vReg - cUnit->numRegs) << 2);
 }
 
+/* Returns sp-relative offset in bytes for a SReg */
+extern int oatSRegOffset(CompilationUnit* cUnit, int sReg)
+{
+    return oatVRegOffset(cUnit, oatS2VReg(cUnit, sReg));
+}
+
+
 /* Return sp-relative offset in bytes using Method* */
 extern int oatVRegOffsetFromMethod(Method* method, int reg)
 {
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index 6331254..4a65771 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -17,10 +17,8 @@
 #define DISPLAY_MISSING_TARGETS (cUnit->enableDebug & \
     (1 << kDebugDisplayMissingTargets))
 
-STATIC const RegLocation badLoc = {kLocDalvikFrame, 0, 0, INVALID_REG,
-                                   INVALID_REG, INVALID_SREG, 0,
-                                   kLocDalvikFrame, INVALID_REG, INVALID_REG,
-                                   INVALID_OFFSET};
+STATIC const RegLocation badLoc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, INVALID_REG,
+                                   INVALID_REG, INVALID_SREG};
 
 /* Mark register usage state and return long retloc */
 STATIC RegLocation getRetLocWide(CompilationUnit* cUnit)
@@ -99,7 +97,8 @@
             RegLocation loc = oatUpdateLoc(cUnit,
                 oatGetSrc(cUnit, mir, i));
             if (loc.location == kLocPhysReg) {
-                storeBaseDisp(cUnit, rSP, loc.spOffset, loc.lowReg, kWord);
+                storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
+                              loc.lowReg, kWord);
             }
         }
         /*
@@ -113,7 +112,8 @@
         int rVal = rLR;  // Using a lot of temps, rLR is known free here
         // Set up source pointer
         RegLocation rlFirst = oatGetSrc(cUnit, mir, 0);
-        opRegRegImm(cUnit, kOpAdd, rSrc, rSP, rlFirst.spOffset);
+        opRegRegImm(cUnit, kOpAdd, rSrc, rSP,
+                    oatSRegOffset(cUnit, rlFirst.sRegLow));
         // Set up the target pointer
         opRegRegImm(cUnit, kOpAdd, rDst, r0,
                     Array::DataOffset().Int32Value());
@@ -773,7 +773,8 @@
             } else {
                 // r2 & r3 can safely be used here
                 reg = r3;
-                loadWordDisp(cUnit, rSP, rlArg.spOffset + 4, reg);
+                loadWordDisp(cUnit, rSP,
+                             oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
                 callState = nextCallInsn(cUnit, mir, dInsn, callState,
                                          rollback);
             }
@@ -872,20 +873,23 @@
         if (loc.wide) {
             loc = oatUpdateLocWide(cUnit, loc);
             if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
-                storeBaseDispWide(cUnit, rSP, loc.spOffset, loc.lowReg,
-                                  loc.highReg);
+                storeBaseDispWide(cUnit, rSP,
+                                  oatSRegOffset(cUnit, loc.sRegLow),
+                                  loc.lowReg, loc.highReg);
             }
             nextArg += 2;
         } else {
             loc = oatUpdateLoc(cUnit, loc);
             if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
-                storeBaseDisp(cUnit, rSP, loc.spOffset, loc.lowReg, kWord);
+                storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
+                              loc.lowReg, kWord);
             }
             nextArg++;
         }
     }
 
-    int startOffset = cUnit->regLocation[mir->ssaRep->uses[3]].spOffset;
+    int startOffset = oatSRegOffset(cUnit,
+        cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow);
     int outsOffset = 4 /* Method* */ + (3 * 4);
     if (numArgs >= 20) {
         // Generate memcpy
@@ -1790,63 +1794,44 @@
     }
 }
 
-/* If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.
- * Note: at this pointCopy any ins that are passed in register to their
- * home location */
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame.  Perform intial
+ * assignment of promoted arguments.
+ */
 STATIC void flushIns(CompilationUnit* cUnit)
 {
     if (cUnit->method->NumIns() == 0)
         return;
-    int inRegs = (cUnit->method->NumIns() > 2) ? 3
-                                               : cUnit->method->NumIns();
-    int startReg = r1;
-    int startLoc = cUnit->method->NumRegisters() -
+    int firstArgReg = r1;
+    int lastArgReg = r3;
+    int startVReg = cUnit->method->NumRegisters() -
         cUnit->method->NumIns();
-    for (int i = 0; i < inRegs; i++) {
-        RegLocation loc = cUnit->regLocation[startLoc + i];
-        //TUNING: be smarter about flushing ins to frame
-        storeBaseDisp(cUnit, rSP, loc.spOffset, startReg + i, kWord);
-        if (loc.location == kLocPhysReg) {
-            genRegCopy(cUnit, loc.lowReg, startReg + i);
-        }
-    }
-
-    // Handle special case of wide argument half in regs, half in frame
-    if (inRegs == 3) {
-        RegLocation loc = cUnit->regLocation[startLoc + 2];
-        if (loc.wide && loc.location == kLocPhysReg) {
-            // Load the other half of the arg into the promoted pair
-            loadWordDisp(cUnit, rSP, loc.spOffset + 4, loc.highReg);
-            inRegs++;
-        }
-    }
-
-    // Now, do initial assignment of all promoted arguments passed in frame
-    for (int i = inRegs; i < cUnit->method->NumIns();) {
-        RegLocation loc = cUnit->regLocation[startLoc + i];
-        if (loc.fpLocation == kLocPhysReg) {
-            loc.location = kLocPhysReg;
-            loc.fp = true;
-            loc.lowReg = loc.fpLowReg;
-            loc.highReg = loc.fpHighReg;
-        }
-        if (loc.location == kLocPhysReg) {
-            if (loc.wide) {
-                if (loc.fp && (loc.lowReg & 1) != 0) {
-                    // Misaligned - need to load as a pair of singles
-                    loadWordDisp(cUnit, rSP, loc.spOffset, loc.lowReg);
-                    loadWordDisp(cUnit, rSP, loc.spOffset + 4, loc.highReg);
-                } else {
-                    loadBaseDispWide(cUnit, NULL, rSP, loc.spOffset,
-                                     loc.lowReg, loc.highReg, INVALID_SREG);
-                }
-                i++;
-            } else {
-                loadWordDisp(cUnit, rSP, loc.spOffset, loc.lowReg);
+    for (int i = 0; i < cUnit->method->NumIns(); i++) {
+        PromotionMap vMap = cUnit->promotionMap[startVReg + i];
+        // For arguments only, should have at most one promotion kind
+        DCHECK(!((vMap.coreLocation == kLocPhysReg) &&
+                 (vMap.fpLocation == kLocPhysReg)));
+        if (i <= (lastArgReg - firstArgReg)) {
+            // If arriving in register, copy or flush
+            if (vMap.coreLocation == kLocPhysReg) {
+                genRegCopy(cUnit, vMap.coreReg, firstArgReg + i);
+            } else if (vMap.fpLocation == kLocPhysReg) {
+                genRegCopy(cUnit, vMap.fpReg, firstArgReg + i);
+            }
+            // Also put a copy in memory in case we're partially promoted
+            storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
+                          firstArgReg + i, kWord);
+        } else {
+            // If arriving in frame, initialize promoted target regs
+            if (vMap.coreLocation == kLocPhysReg) {
+                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
+                             vMap.coreReg);
+            } else if (vMap.fpLocation == kLocPhysReg) {
+                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
+                             vMap.fpReg);
             }
         }
-        i++;
     }
 }