SSA rework and support compiler temps in the frame

Add ability for the compiler to allocate new frame temporaries
that play nicely with the register allocation mechanism.  To do this
we assign negative virtual register numbers and give them SSA names.
As part of this change, I did a general cleanup of the ssa naming.
An ssa name (or SReg) is in index into an array of (virtual reg, subscript)
pairs.  Previously, 16 bits were allocated for the reg and the subscript.
This CL expands the virtual reg and subscript to 32 bits each.

Method* is now treated as a RegLocation, and will be subject to
temp register tracking and reuse.  This CL does not yet include
support for promotion of Method* - that will show up in the next one.

Also included is the beginning of a basic block optimization pass (not
yet in a runable state, so conditionally compiled out).

(cherry picked from commit f689ffec8827f1dd6b31084f8a6bb240338c7acf)

Change-Id: Ibbdeb97fe05d0e33c1f4a9a6ccbdef1cac7646fc
diff --git a/src/compiler/codegen/CodegenFactory.cc b/src/compiler/codegen/CodegenFactory.cc
index 8a6e1bc..5444816 100644
--- a/src/compiler/codegen/CodegenFactory.cc
+++ b/src/compiler/codegen/CodegenFactory.cc
@@ -65,7 +65,8 @@
     if (rlSrc.location == kLocPhysReg) {
         opRegCopy(cUnit, reg1, rlSrc.lowReg);
     } else {
-        DCHECK(rlSrc.location == kLocDalvikFrame);
+        DCHECK((rlSrc.location == kLocDalvikFrame) ||
+               (rlSrc.location == kLocCompilerTemp));
         loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, rlSrc.sRegLow), reg1);
     }
 }
@@ -94,7 +95,8 @@
     if (rlSrc.location == kLocPhysReg) {
         opRegCopyWide(cUnit, regLo, regHi, rlSrc.lowReg, rlSrc.highReg);
     } else {
-        DCHECK(rlSrc.location == kLocDalvikFrame);
+        DCHECK((rlSrc.location == kLocDalvikFrame) ||
+               (rlSrc.location == kLocCompilerTemp));
         loadBaseDispWide(cUnit, NULL, rSP,
                          oatSRegOffset(cUnit, rlSrc.sRegLow),
                          regLo, regHi, INVALID_SREG);
@@ -120,7 +122,9 @@
                       RegisterClass opKind)
 {
     rlSrc = oatEvalLoc(cUnit, rlSrc, opKind, false);
-    if (rlSrc.location == kLocDalvikFrame) {
+    if (rlSrc.location != kLocPhysReg) {
+        DCHECK((rlSrc.location == kLocDalvikFrame) ||
+               (rlSrc.location == kLocCompilerTemp));
         loadValueDirect(cUnit, rlSrc, rlSrc.lowReg);
         rlSrc.location = kLocPhysReg;
         oatMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow);
@@ -176,7 +180,9 @@
 {
     DCHECK(rlSrc.wide);
     rlSrc = oatEvalLoc(cUnit, rlSrc, opKind, false);
-    if (rlSrc.location == kLocDalvikFrame) {
+    if (rlSrc.location != kLocPhysReg) {
+        DCHECK((rlSrc.location == kLocDalvikFrame) ||
+               (rlSrc.location == kLocCompilerTemp));
         loadValueDirectWide(cUnit, rlSrc, rlSrc.lowReg, rlSrc.highReg);
         rlSrc.location = kLocPhysReg;
         oatMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow);
@@ -232,8 +238,8 @@
         (oatLiveOut(cUnit, rlDest.sRegLow) ||
         oatLiveOut(cUnit, oatSRegHi(rlDest.sRegLow)))) {
         defStart = (LIR*)cUnit->lastLIRInsn;
-        DCHECK_EQ((oatS2VReg(cUnit, rlDest.sRegLow)+1),
-                oatS2VReg(cUnit, oatSRegHi(rlDest.sRegLow)));
+        DCHECK_EQ((SRegToVReg(cUnit, rlDest.sRegLow)+1),
+                   SRegToVReg(cUnit, oatSRegHi(rlDest.sRegLow)));
         storeBaseDispWide(cUnit, rSP, oatSRegOffset(cUnit, rlDest.sRegLow),
                           rlDest.lowReg, rlDest.highReg);
         oatMarkClean(cUnit, rlDest);
@@ -265,29 +271,15 @@
 #endif
 }
 
-/*
- * Utility to load the current Method*.  Broken out
- * to allow easy change between placing the current Method* in a
- * dedicated register or its home location in the frame.
- */
+/* Utilities to load the current Method* */
 void loadCurrMethodDirect(CompilationUnit *cUnit, int rTgt)
 {
-#if defined(METHOD_IN_REG)
-    opRegCopy(cUnit, rTgt, rMETHOD);
-#else
-    loadWordDisp(cUnit, rSP, 0, rTgt);
-#endif
+    loadValueDirectFixed(cUnit, cUnit->regLocation[cUnit->methodSReg], rTgt);
 }
 
-int loadCurrMethod(CompilationUnit *cUnit)
+RegLocation loadCurrMethod(CompilationUnit *cUnit)
 {
-#if defined(METHOD_IN_REG)
-    return rMETHOD;
-#else
-    int mReg = oatAllocTemp(cUnit);
-    loadCurrMethodDirect(cUnit, mReg);
-    return mReg;
-#endif
+    return loadValue(cUnit, cUnit->regLocation[cUnit->methodSReg], kCoreReg);
 }
 
 
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index e2c306d..9b1654f 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -275,10 +275,12 @@
         rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread,
                           pCheckAndAllocArrayFromCodeWithAccessCheck));
     }
-    loadCurrMethodDirect(cUnit, rARG1);              // arg1 <- Method*
     loadConstant(cUnit, rARG0, typeId);              // arg0 <- type_id
     loadConstant(cUnit, rARG2, elems);               // arg2 <- count
+    loadCurrMethodDirect(cUnit, rARG1);              // arg1 <- Method*
     callRuntimeHelper(cUnit, rTgt);
+    oatFreeTemp(cUnit, rARG2);
+    oatFreeTemp(cUnit, rARG1);
     /*
      * NOTE: the implicit target for Instruction::FILLED_NEW_ARRAY is the
      * return region.  Because AllocFromCode placed the new array
@@ -387,12 +389,11 @@
     if (fastPath && !SLOW_FIELD_PATH) {
         DCHECK_GE(fieldOffset, 0);
         int rBase;
-        int rMethod;
         if (isReferrersClass) {
             // Fast path, static storage base is this method's class
-            rMethod  = loadCurrMethod(cUnit);
+            RegLocation rlMethod  = loadCurrMethod(cUnit);
             rBase = oatAllocTemp(cUnit);
-            loadWordDisp(cUnit, rMethod,
+            loadWordDisp(cUnit, rlMethod.lowReg,
                          Method::DeclaringClassOffset().Int32Value(), rBase);
         } else {
             // Medium path, static storage base in a different class which
@@ -402,7 +403,7 @@
             oatFlushAllRegs(cUnit);
             // Using fixed register to sync with possible call to runtime
             // support.
-            rMethod = rARG1;
+            int rMethod = rARG1;
             oatLockTemp(cUnit, rMethod);
             loadCurrMethodDirect(cUnit, rMethod);
             rBase = rARG0;
@@ -427,9 +428,9 @@
 #endif
             LIR* skipTarget = newLIR0(cUnit, kPseudoTargetLabel);
             branchOver->target = (LIR*)skipTarget;
+            oatFreeTemp(cUnit, rMethod);
         }
         // rBase now holds static storage base
-        oatFreeTemp(cUnit, rMethod);
         if (isLongOrDouble) {
             rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
             rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
@@ -496,12 +497,11 @@
     if (fastPath && !SLOW_FIELD_PATH) {
         DCHECK_GE(fieldOffset, 0);
         int rBase;
-        int rMethod;
         if (isReferrersClass) {
             // Fast path, static storage base is this method's class
-            rMethod  = loadCurrMethod(cUnit);
+            RegLocation rlMethod  = loadCurrMethod(cUnit);
             rBase = oatAllocTemp(cUnit);
-            loadWordDisp(cUnit, rMethod,
+            loadWordDisp(cUnit, rlMethod.lowReg,
                          Method::DeclaringClassOffset().Int32Value(), rBase);
         } else {
             // Medium path, static storage base in a different class which
@@ -511,7 +511,7 @@
             oatFlushAllRegs(cUnit);
             // Using fixed register to sync with possible call to runtime
             // support
-            rMethod = rARG1;
+            int rMethod = rARG1;
             oatLockTemp(cUnit, rMethod);
             loadCurrMethodDirect(cUnit, rMethod);
             rBase = rARG0;
@@ -537,9 +537,9 @@
 #endif
             LIR* skipTarget = newLIR0(cUnit, kPseudoTargetLabel);
             branchOver->target = (LIR*)skipTarget;
+            oatFreeTemp(cUnit, rMethod);
         }
         // rBase now holds static storage base
-        oatFreeTemp(cUnit, rMethod);
         rlDest = isLongOrDouble ? oatGetDestWide(cUnit, mir, 0, 1)
                                 : oatGetDest(cUnit, mir, 0);
         RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
@@ -837,7 +837,7 @@
                    RegLocation rlSrc)
 {
     uint32_t type_idx = mir->dalvikInsn.vB;
-    int mReg = loadCurrMethod(cUnit);
+    RegLocation rlMethod = loadCurrMethod(cUnit);
     int resReg = oatAllocTemp(cUnit);
     RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
     if (!cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx,
@@ -848,7 +848,7 @@
         // Resolved type returned in rRET0.
         int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread,
                               pInitializeTypeAndVerifyAccessFromCode));
-        opRegCopy(cUnit, rARG1, mReg);
+        opRegCopy(cUnit, rARG1, rlMethod.lowReg);
         loadConstant(cUnit, rARG0, type_idx);
         callRuntimeHelper(cUnit, rTgt);
         RegLocation rlResult = oatGetReturn(cUnit);
@@ -857,7 +857,7 @@
         // We're don't need access checks, load type from dex cache
         int32_t dex_cache_offset =
             Method::DexCacheResolvedTypesOffset().Int32Value();
-        loadWordDisp(cUnit, mReg, dex_cache_offset, resReg);
+        loadWordDisp(cUnit, rlMethod.lowReg, dex_cache_offset, resReg);
         int32_t offset_of_type =
             Array::DataOffset(sizeof(Class*)).Int32Value() + (sizeof(Class*)
                               * type_idx);
@@ -876,7 +876,7 @@
             // Call out to helper, which will return resolved type in rARG0
             int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread,
                                   pInitializeTypeFromCode));
-            opRegCopy(cUnit, rARG1, mReg);
+            opRegCopy(cUnit, rARG1, rlMethod.lowReg);
             loadConstant(cUnit, rARG0, type_idx);
             callRuntimeHelper(cUnit, rTgt);
             RegLocation rlResult = oatGetReturn(cUnit);
@@ -930,10 +930,10 @@
         genBarrier(cUnit);
         storeValue(cUnit, rlDest, oatGetReturn(cUnit));
     } else {
-        int mReg = loadCurrMethod(cUnit);
+        RegLocation rlMethod = loadCurrMethod(cUnit);
         int resReg = oatAllocTemp(cUnit);
         RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-        loadWordDisp(cUnit, mReg,
+        loadWordDisp(cUnit, rlMethod.lowReg,
                      Method::DexCacheStringsOffset().Int32Value(), resReg);
         loadWordDisp(cUnit, resReg, offset_of_string, rlResult.lowReg);
         storeValue(cUnit, rlDest, rlResult);
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index 6b3283e..5baabf2 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -716,8 +716,7 @@
     "kMirOpNullNRangeUpCheck",
     "kMirOpNullNRangeDownCheck",
     "kMirOpLowerBound",
-    "kMirOpPunt",
-    "kMirOpCheckInlinePrediction",
+    "kMirOpCopy",
 };
 
 /* Extended MIR instructions like PHI */
@@ -742,6 +741,9 @@
             newLIR1(cUnit, kPseudoSSARep, (int) ssaString);
             break;
         }
+        case kMirOpCopy:
+            UNIMPLEMENTED(FATAL) << "Need kMirOpCopy";
+            break;
         default:
             break;
     }
@@ -761,11 +763,19 @@
     labelList[blockId].opcode = kPseudoNormalBlockLabel;
     oatAppendLIR(cUnit, (LIR*) &labelList[blockId]);
 
-    /* Reset local optimization data on block boundaries */
+    /* Free temp registers and reset redundant store tracking */
     oatResetRegPool(cUnit);
-    oatClobberAllRegs(cUnit);
     oatResetDefTracking(cUnit);
 
+    /*
+     * If control reached us from our immediate predecessor via
+     * fallthrough and we have no other incoming arcs we can
+     * reuse existing liveness.  Otherwise, reset.
+     */
+    if (!bb->fallThroughTarget || bb->predecessors->numUsed != 1) {
+        oatClobberAllRegs(cUnit);
+    }
+
     LIR* headLIR = NULL;
 
     if (bb->blockType == kEntryBlock) {
diff --git a/src/compiler/codegen/Ralloc.h b/src/compiler/codegen/Ralloc.h
index 8c8c693..d32545c 100644
--- a/src/compiler/codegen/Ralloc.h
+++ b/src/compiler/codegen/Ralloc.h
@@ -35,12 +35,6 @@
 };
 
 
-inline int oatS2VReg(CompilationUnit* cUnit, int sReg)
-{
-    DCHECK_NE(sReg, INVALID_SREG);
-    return DECODE_REG(oatConvertSSARegToDalvik(cUnit, sReg));
-}
-
 /*
  * Get the "real" sreg number associated with an sReg slot.  In general,
  * sReg values passed through codegen are the SSA names created by
diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc
index b5ebf65..8f5d1bb 100644
--- a/src/compiler/codegen/RallocUtil.cc
+++ b/src/compiler/codegen/RallocUtil.cc
@@ -147,6 +147,19 @@
                     sReg);
 }
 
+/* Sanity check */
+bool validSreg(CompilationUnit* cUnit, int sReg)
+{
+    bool res = ((-(cUnit->numCompilerTemps + 1) <= sReg) &&
+                (sReg < cUnit->numDalvikRegisters));
+    if (!res) {
+        LOG(WARNING) << "Bad sreg: " << sReg;
+        LOG(WARNING) << "  low = " << -(cUnit->numCompilerTemps + 1);
+        LOG(WARNING) << "  high = " << cUnit->numRegs;
+    }
+    return res;
+}
+
 /* Reserve a callee-save register.  Return -1 if none available */
 extern int oatAllocPreservedCoreReg(CompilationUnit* cUnit, int sReg)
 {
@@ -160,7 +173,8 @@
             cUnit->coreVmapTable.push_back(sReg);
             cUnit->numCoreSpills++;
             //  Should be promoting based on initial sReg set
-            DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg));
+            DCHECK_EQ(sReg, SRegToVReg(cUnit, sReg));
+            DCHECK(validSreg(cUnit,sReg));
             cUnit->promotionMap[sReg].coreLocation = kLocPhysReg;
             cUnit->promotionMap[sReg].coreReg = res;
             break;
@@ -184,8 +198,9 @@
             res = FPRegs[i].reg;
             FPRegs[i].inUse = true;
             //  Should be promoting based on initial sReg set
-            DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg));
+            DCHECK_EQ(sReg, SRegToVReg(cUnit, sReg));
             oatMarkPreservedSingle(cUnit, sReg, res);
+            DCHECK(validSreg(cUnit,sReg));
             cUnit->promotionMap[sReg].fpLocation = kLocPhysReg;
             cUnit->promotionMap[sReg].fpReg = res;
             break;
@@ -206,7 +221,8 @@
 {
     int res = -1; // Assume failure
     //  Should be promoting based on initial sReg set
-    DCHECK_EQ(sReg, oatS2VReg(cUnit, sReg));
+    DCHECK_EQ(sReg, SRegToVReg(cUnit, sReg));
+    DCHECK(validSreg(cUnit,sReg+1));
     if (cUnit->promotionMap[sReg+1].fpLocation == kLocPhysReg) {
         // Upper reg is already allocated.  Can we fit?
         int highReg = cUnit->promotionMap[sReg+1].fpReg;
@@ -244,8 +260,10 @@
         }
     }
     if (res != -1) {
+        DCHECK(validSreg(cUnit,sReg));
         cUnit->promotionMap[sReg].fpLocation = kLocPhysReg;
         cUnit->promotionMap[sReg].fpReg = res;
+        DCHECK(validSreg(cUnit,sReg+1));
         cUnit->promotionMap[sReg+1].fpLocation = kLocPhysReg;
         cUnit->promotionMap[sReg+1].fpReg = res + 1;
     }
@@ -788,7 +806,9 @@
 {
     DCHECK(!loc.wide);
     DCHECK(oatCheckCorePoolSanity(cUnit));
-    if (loc.location == kLocDalvikFrame) {
+    if (loc.location != kLocPhysReg) {
+        DCHECK((loc.location == kLocDalvikFrame) ||
+               (loc.location == kLocCompilerTemp));
         RegisterInfo* infoLo = allocLive(cUnit, loc.sRegLow, kAnyReg);
         if (infoLo) {
             if (infoLo->pair) {
@@ -837,7 +857,9 @@
 {
     DCHECK(loc.wide);
     DCHECK(oatCheckCorePoolSanity(cUnit));
-    if (loc.location == kLocDalvikFrame) {
+    if (loc.location != kLocPhysReg) {
+        DCHECK((loc.location == kLocDalvikFrame) ||
+               (loc.location == kLocCompilerTemp));
         // Are the dalvik regs already live in physical registers?
         RegisterInfo* infoLo = allocLive(cUnit, loc.sRegLow, kAnyReg);
         RegisterInfo* infoHi = allocLive(cUnit,
@@ -1026,7 +1048,7 @@
             for (int i = 0; i < ssaRep->numDefs;) {
                 RegLocation loc = cUnit->regLocation[ssaRep->defs[i]];
                 RefCounts* counts = loc.fp ? fpCounts : coreCounts;
-                int vReg = oatS2VReg(cUnit, ssaRep->defs[i]);
+                int vReg = SRegToVReg(cUnit, ssaRep->defs[i]);
                 if (loc.defined) {
                     counts[vReg].count++;
                 }
@@ -1045,7 +1067,7 @@
             for (int i = 0; i < ssaRep->numUses;) {
                 RegLocation loc = cUnit->regLocation[ssaRep->uses[i]];
                 RefCounts* counts = loc.fp ? fpCounts : coreCounts;
-                int vReg = oatS2VReg(cUnit, ssaRep->uses[i]);
+                int vReg = SRegToVReg(cUnit, ssaRep->uses[i]);
                 if (loc.defined) {
                     counts[vReg].count++;
                 }
@@ -1142,6 +1164,7 @@
     if (!(cUnit->disableOpt & (1 << kPromoteRegs))) {
         // Promote fpRegs
         for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) {
+            DCHECK(validSreg(cUnit,fpRegs[i].sReg));
             if (cUnit->promotionMap[fpRegs[i].sReg].fpLocation != kLocPhysReg) {
                 int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg,
                     fpRegs[i].doubleStart);
@@ -1153,6 +1176,7 @@
 
         // Promote core regs
         for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) {
+            DCHECK(validSreg(cUnit,coreRegs[i].sReg));
             if (cUnit->promotionMap[coreRegs[i].sReg].coreLocation !=
                     kLocPhysReg) {
                 int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg);
@@ -1166,15 +1190,17 @@
     // Now, update SSA names to new home locations
     for (int i = 0; i < cUnit->numSSARegs; i++) {
         RegLocation *curr = &cUnit->regLocation[i];
-        int baseVReg = oatS2VReg(cUnit, curr->sRegLow);
+        int baseVReg = SRegToVReg(cUnit, curr->sRegLow);
         if (!curr->wide) {
             if (curr->fp) {
+                DCHECK(validSreg(cUnit,baseVReg));
                 if (cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) {
                     curr->location = kLocPhysReg;
                     curr->lowReg = cUnit->promotionMap[baseVReg].fpReg;
                     curr->home = true;
                 }
             } else {
+                DCHECK(validSreg(cUnit,baseVReg));
                 if (cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) {
                     curr->location = kLocPhysReg;
                     curr->lowReg = cUnit->promotionMap[baseVReg].coreReg;
@@ -1187,6 +1213,8 @@
                 continue;
             }
             if (curr->fp) {
+                DCHECK(validSreg(cUnit,baseVReg));
+                DCHECK(validSreg(cUnit,baseVReg+1));
                 if ((cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) &&
                     (cUnit->promotionMap[baseVReg+1].fpLocation ==
                     kLocPhysReg)) {
@@ -1201,6 +1229,8 @@
                     }
                 }
             } else {
+                DCHECK(validSreg(cUnit,baseVReg));
+                DCHECK(validSreg(cUnit,baseVReg+1));
                 if ((cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg)
                      && (cUnit->promotionMap[baseVReg+1].coreLocation ==
                      kLocPhysReg)) {
@@ -1224,7 +1254,7 @@
 /* Returns sp-relative offset in bytes for a SReg */
 extern int oatSRegOffset(CompilationUnit* cUnit, int sReg)
 {
-    return oatVRegOffset(cUnit, oatS2VReg(cUnit, sReg));
+    return oatVRegOffset(cUnit, SRegToVReg(cUnit, sReg));
 }
 
 }  // namespace art
diff --git a/src/compiler/codegen/arm/ArchFactory.cc b/src/compiler/codegen/arm/ArchFactory.cc
index 8a23d5c..da5de52 100644
--- a/src/compiler/codegen/arm/ArchFactory.cc
+++ b/src/compiler/codegen/arm/ArchFactory.cc
@@ -106,7 +106,21 @@
         opRegImm(cUnit, kOpSub, rSP,
                  cUnit->frameSize - (spillCount * 4));
     }
-    storeBaseDisp(cUnit, rSP, 0, r0, kWord);
+
+    /*
+     * Dummy up a RegLocation for the incoming Method*
+     * It will attempt to keep r0 live (or copy it to home location
+     * if promoted).
+     */
+    RegLocation rlSrc = cUnit->regLocation[cUnit->methodSReg];
+    RegLocation rlMethod = cUnit->regLocation[cUnit->methodSReg];
+    rlSrc.location = kLocPhysReg;
+    rlSrc.lowReg = r0;
+    rlSrc.home = false;
+    oatMarkLive(cUnit, rlSrc.lowReg, rlSrc.sRegLow);
+    storeValue(cUnit, rlMethod, rlSrc);
+
+    /* Flush the rest of the ins */
     flushIns(cUnit);
 
     if (cUnit->genDebugger) {
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc
index 3335f59..e7627f2 100644
--- a/src/compiler/codegen/arm/ArmRallocUtil.cc
+++ b/src/compiler/codegen/arm/ArmRallocUtil.cc
@@ -76,10 +76,10 @@
 
         info1->dirty = false;
         info2->dirty = false;
-        if (oatS2VReg(cUnit, info2->sReg) <
-            oatS2VReg(cUnit, info1->sReg))
+        if (SRegToVReg(cUnit, info2->sReg) <
+            SRegToVReg(cUnit, info1->sReg))
             info1 = info2;
-        int vReg = oatS2VReg(cUnit, info1->sReg);
+        int vReg = SRegToVReg(cUnit, info1->sReg);
         oatFlushRegWideImpl(cUnit, rSP,
                                     oatVRegOffset(cUnit, vReg),
                                     info1->reg, info1->partner);
@@ -91,7 +91,7 @@
     RegisterInfo* info = oatGetRegInfo(cUnit, reg);
     if (info->live && info->dirty) {
         info->dirty = false;
-        int vReg = oatS2VReg(cUnit, info->sReg);
+        int vReg = SRegToVReg(cUnit, info->sReg);
         oatFlushRegImpl(cUnit, rSP,
                                 oatVRegOffset(cUnit, vReg),
                                 reg, kWord);
diff --git a/src/compiler/codegen/arm/Thumb2/Ralloc.cc b/src/compiler/codegen/arm/Thumb2/Ralloc.cc
index c0f2c77..7858318 100644
--- a/src/compiler/codegen/arm/Thumb2/Ralloc.cc
+++ b/src/compiler/codegen/arm/Thumb2/Ralloc.cc
@@ -88,6 +88,10 @@
     for (int i = 0; i < numFPTemps; i++) {
         oatMarkTemp(cUnit, fpTemps[i]);
     }
+
+    // Start allocation at r2 in an attempt to avoid clobbering return values
+    pool->nextCoreReg = r2;
+
     // Construct the alias map.
     cUnit->phiAliasMap = (int*)oatNew(cUnit, cUnit->numSSARegs *
                                       sizeof(cUnit->phiAliasMap[0]), false,
diff --git a/src/compiler/codegen/mips/MipsRallocUtil.cc b/src/compiler/codegen/mips/MipsRallocUtil.cc
index 7fd9b59..7ed3f86 100644
--- a/src/compiler/codegen/mips/MipsRallocUtil.cc
+++ b/src/compiler/codegen/mips/MipsRallocUtil.cc
@@ -65,10 +65,10 @@
 
         info1->dirty = false;
         info2->dirty = false;
-        if (oatS2VReg(cUnit, info2->sReg) <
-            oatS2VReg(cUnit, info1->sReg))
+        if (SRegToVReg(cUnit, info2->sReg) <
+            SRegToVReg(cUnit, info1->sReg))
             info1 = info2;
-        int vReg = oatS2VReg(cUnit, info1->sReg);
+        int vReg = SRegToVReg(cUnit, info1->sReg);
         oatFlushRegWideImpl(cUnit, rSP,
                                     oatVRegOffset(cUnit, vReg),
                                     info1->reg, info1->partner);
@@ -80,7 +80,7 @@
     RegisterInfo* info = oatGetRegInfo(cUnit, reg);
     if (info->live && info->dirty) {
         info->dirty = false;
-        int vReg = oatS2VReg(cUnit, info->sReg);
+        int vReg = SRegToVReg(cUnit, info->sReg);
         oatFlushRegImpl(cUnit, rSP,
                                 oatVRegOffset(cUnit, vReg),
                                 reg, kWord);
diff --git a/src/compiler/codegen/x86/X86RallocUtil.cc b/src/compiler/codegen/x86/X86RallocUtil.cc
index 7c99fd6..1b4eca4 100644
--- a/src/compiler/codegen/x86/X86RallocUtil.cc
+++ b/src/compiler/codegen/x86/X86RallocUtil.cc
@@ -60,10 +60,10 @@
 
         info1->dirty = false;
         info2->dirty = false;
-        if (oatS2VReg(cUnit, info2->sReg) <
-            oatS2VReg(cUnit, info1->sReg))
+        if (SRegToVReg(cUnit, info2->sReg) <
+            SRegToVReg(cUnit, info1->sReg))
             info1 = info2;
-        int vReg = oatS2VReg(cUnit, info1->sReg);
+        int vReg = SRegToVReg(cUnit, info1->sReg);
         oatFlushRegWideImpl(cUnit, rSP,
                                     oatVRegOffset(cUnit, vReg),
                                     info1->reg, info1->partner);
@@ -75,7 +75,7 @@
     RegisterInfo* info = oatGetRegInfo(cUnit, reg);
     if (info->live && info->dirty) {
         info->dirty = false;
-        int vReg = oatS2VReg(cUnit, info->sReg);
+        int vReg = SRegToVReg(cUnit, info->sReg);
         oatFlushRegImpl(cUnit, rSP,
                                 oatVRegOffset(cUnit, vReg),
                                 reg, kWord);