10 files changed, 1329 insertions, 3000 deletions
diff --git a/src/compiler/codegen/arm/ArchFactory.cc b/src/compiler/codegen/arm/ArchFactory.cc
index d700e4843f..62e4b3eb29 100644
--- a/src/compiler/codegen/arm/ArchFactory.cc
+++ b/src/compiler/codegen/arm/ArchFactory.cc
@@ -22,11 +22,64 @@
  *
  */
 
+#define SLOW_FIELD_PATH (cUnit->enableDebug & (1 << kDebugSlowFieldPath))
+#define SLOW_INVOKE_PATH (cUnit->enableDebug & (1 << kDebugSlowInvokePath))
+#define SLOW_STRING_PATH (cUnit->enableDebug & (1 << kDebugSlowStringPath))
+#define SLOW_TYPE_PATH (cUnit->enableDebug & (1 << kDebugSlowTypePath))
+#define EXERCISE_SLOWEST_FIELD_PATH (cUnit->enableDebug & \
+    (1 << kDebugSlowestFieldPath))
+#define EXERCISE_SLOWEST_STRING_PATH (cUnit->enableDebug & \
+    (1 << kDebugSlowestStringPath))
+#define EXERCISE_RESOLVE_METHOD (cUnit->enableDebug & \
+    (1 << kDebugExerciseResolveMethod))
+
 namespace art {
 
-STATIC ArmLIR* genUnconditionalBranch(CompilationUnit*, ArmLIR*);
-STATIC ArmLIR* genConditionalBranch(CompilationUnit*, ArmConditionCode,
-                                    ArmLIR*);
+STATIC void genDebuggerUpdate(CompilationUnit* cUnit, int32_t offset);
+
+/* Generate conditional branch instructions */
+STATIC ArmLIR* genConditionalBranch(CompilationUnit* cUnit,
+                                    ArmConditionCode cond,
+                                    ArmLIR* target)
+{
+    ArmLIR* branch = opCondBranch(cUnit, cond);
+    branch->generic.target = (LIR*) target;
+    return branch;
+}
+
+/* Generate unconditional branch instructions */
+STATIC ArmLIR* genUnconditionalBranch(CompilationUnit* cUnit, ArmLIR* target)
+{
+    ArmLIR* branch = opNone(cUnit, kOpUncondBr);
+    branch->generic.target = (LIR*) target;
+    return branch;
+}
+
+STATIC ArmLIR* callRuntimeHelper(CompilationUnit* cUnit, int reg)
+{
+    oatClobberCalleeSave(cUnit);
+    return opReg(cUnit, kOpBlx, reg);
+}
+
+/*
+ * Mark garbage collection card. Skip if the value we're storing is null.
+ */
+STATIC void markGCCard(CompilationUnit* cUnit, int valReg, int tgtAddrReg)
+{
+    int regCardBase = oatAllocTemp(cUnit);
+    int regCardNo = oatAllocTemp(cUnit);
+    ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondEq, valReg, 0);
+    loadWordDisp(cUnit, rSELF, Thread::CardTableOffset().Int32Value(),
+                 regCardBase);
+    opRegRegImm(cUnit, kOpLsr, regCardNo, tgtAddrReg, GC_CARD_SHIFT);
+    storeBaseIndexed(cUnit, regCardBase, regCardNo, regCardBase, 0,
+                     kUnsignedByte);
+    ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
+    target->defMask = ENCODE_ALL;
+    branchOver->generic.target = (LIR*)target;
+    oatFreeTemp(cUnit, regCardBase);
+    oatFreeTemp(cUnit, regCardNo);
+}
 
 /*
  * Utiltiy to load the current Method*.  Broken out
@@ -113,4 +166,1154 @@ STATIC TGT_LIR* genRegRegCheck(CompilationUnit* cUnit, ArmConditionCode cCode,
     return branch;
 }
 
+/*
+ * Let helper function take care of everything.  Will call
+ * Array::AllocFromCode(type_idx, method, count);
+ * Note: AllocFromCode will handle checks for errNegativeArraySize.
+ */
+STATIC void genNewArray(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                        RegLocation rlSrc)
+{
+    oatFlushAllRegs(cUnit);    /* Everything to home location */
+    uint32_t type_idx = mir->dalvikInsn.vC;
+    if (cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx,
+                                                    cUnit->dex_cache,
+                                                    *cUnit->dex_file,
+                                                    type_idx)) {
+        loadWordDisp(cUnit, rSELF,
+                     OFFSETOF_MEMBER(Thread, pAllocArrayFromCode), rLR);
+    } else {
+        loadWordDisp(cUnit, rSELF,
+                     OFFSETOF_MEMBER(Thread, pAllocArrayFromCodeWithAccessCheck), rLR);
+    }
+    loadCurrMethodDirect(cUnit, r1);              // arg1 <- Method*
+    loadConstant(cUnit, r0, type_idx);            // arg0 <- type_id
+    loadValueDirectFixed(cUnit, rlSrc, r2);       // arg2 <- count
+    callRuntimeHelper(cUnit, rLR);
+    RegLocation rlResult = oatGetReturn(cUnit);
+    storeValue(cUnit, rlDest, rlResult);
+}
+
+/*
+ * Similar to genNewArray, but with post-allocation initialization.
+ * Verifier guarantees we're dealing with an array class.  Current
+ * code throws runtime exception "bad Filled array req" for 'D' and 'J'.
+ * Current code also throws internal unimp if not 'L', '[' or 'I'.
+ */
+STATIC void genFilledNewArray(CompilationUnit* cUnit, MIR* mir, bool isRange)
+{
+    DecodedInstruction* dInsn = &mir->dalvikInsn;
+    int elems = dInsn->vA;
+    int typeId = dInsn->vB;
+    oatFlushAllRegs(cUnit);    /* Everything to home location */
+    if (cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx,
+                                                    cUnit->dex_cache,
+                                                    *cUnit->dex_file,
+                                                    typeId)) {
+        loadWordDisp(cUnit, rSELF,
+                     OFFSETOF_MEMBER(Thread, pCheckAndAllocArrayFromCode), rLR);
+    } else {
+        loadWordDisp(cUnit, rSELF,
+                     OFFSETOF_MEMBER(Thread, pCheckAndAllocArrayFromCodeWithAccessCheck), rLR);
+    }
+    loadCurrMethodDirect(cUnit, r1);              // arg1 <- Method*
+    loadConstant(cUnit, r0, typeId);              // arg0 <- type_id
+    loadConstant(cUnit, r2, elems);               // arg2 <- count
+    callRuntimeHelper(cUnit, rLR);
+    /*
+     * NOTE: the implicit target for OP_FILLED_NEW_ARRAY is the
+     * return region.  Because AllocFromCode placed the new array
+     * in r0, we'll just lock it into place.  When debugger support is
+     * added, it may be necessary to additionally copy all return
+     * values to a home location in thread-local storage
+     */
+    oatLockTemp(cUnit, r0);
+
+    // Having a range of 0 is legal
+    if (isRange && (dInsn->vA > 0)) {
+        /*
+         * Bit of ugliness here.  We're going generate a mem copy loop
+         * on the register range, but it is possible that some regs
+         * in the range have been promoted.  This is unlikely, but
+         * before generating the copy, we'll just force a flush
+         * of any regs in the source range that have been promoted to
+         * home location.
+         */
+        for (unsigned int i = 0; i < dInsn->vA; i++) {
+            RegLocation loc = oatUpdateLoc(cUnit,
+                oatGetSrc(cUnit, mir, i));
+            if (loc.location == kLocPhysReg) {
+                storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
+                              loc.lowReg, kWord);
+            }
+        }
+        /*
+         * TUNING note: generated code here could be much improved, but
+         * this is an uncommon operation and isn't especially performance
+         * critical.
+         */
+        int rSrc = oatAllocTemp(cUnit);
+        int rDst = oatAllocTemp(cUnit);
+        int rIdx = oatAllocTemp(cUnit);
+        int rVal = rLR;  // Using a lot of temps, rLR is known free here
+        // Set up source pointer
+        RegLocation rlFirst = oatGetSrc(cUnit, mir, 0);
+        opRegRegImm(cUnit, kOpAdd, rSrc, rSP,
+                    oatSRegOffset(cUnit, rlFirst.sRegLow));
+        // Set up the target pointer
+        opRegRegImm(cUnit, kOpAdd, rDst, r0,
+                    Array::DataOffset().Int32Value());
+        // Set up the loop counter (known to be > 0)
+        loadConstant(cUnit, rIdx, dInsn->vA - 1);
+        // Generate the copy loop.  Going backwards for convenience
+        ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
+        target->defMask = ENCODE_ALL;
+        // Copy next element
+        loadBaseIndexed(cUnit, rSrc, rIdx, rVal, 2, kWord);
+        storeBaseIndexed(cUnit, rDst, rIdx, rVal, 2, kWord);
+        // Use setflags encoding here
+        newLIR3(cUnit, kThumb2SubsRRI12, rIdx, rIdx, 1);
+        ArmLIR* branch = opCondBranch(cUnit, kArmCondGe);
+        branch->generic.target = (LIR*)target;
+    } else if (!isRange) {
+        // TUNING: interleave
+        for (unsigned int i = 0; i < dInsn->vA; i++) {
+            RegLocation rlArg = loadValue(cUnit,
+                oatGetSrc(cUnit, mir, i), kCoreReg);
+            storeBaseDisp(cUnit, r0,
+                          Array::DataOffset().Int32Value() +
+                          i * 4, rlArg.lowReg, kWord);
+            // If the loadValue caused a temp to be allocated, free it
+            if (oatIsTemp(cUnit, rlArg.lowReg)) {
+                oatFreeTemp(cUnit, rlArg.lowReg);
+            }
+        }
+    }
+}
+
+STATIC void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
+                    bool isLongOrDouble, bool isObject)
+{
+    int fieldOffset;
+    int ssbIndex;
+    bool isVolatile;
+    bool isReferrersClass;
+    uint32_t fieldIdx = mir->dalvikInsn.vB;
+    bool fastPath =
+        cUnit->compiler->ComputeStaticFieldInfo(fieldIdx, cUnit,
+                                                fieldOffset, ssbIndex,
+                                                isReferrersClass, isVolatile, true);
+    if (fastPath && !SLOW_FIELD_PATH) {
+        DCHECK_GE(fieldOffset, 0);
+        int rBase;
+        int rMethod;
+        if (isReferrersClass) {
+            // Fast path, static storage base is this method's class
+            rMethod  = loadCurrMethod(cUnit);
+            rBase = oatAllocTemp(cUnit);
+            loadWordDisp(cUnit, rMethod,
+                         Method::DeclaringClassOffset().Int32Value(), rBase);
+        } else {
+            // Medium path, static storage base in a different class which
+            // requires checks that the other class is initialized.
+            DCHECK_GE(ssbIndex, 0);
+            // May do runtime call so everything to home locations.
+            oatFlushAllRegs(cUnit);
+            // Using fixed register to sync with possible call to runtime
+            // support.
+            rMethod = r1;
+            oatLockTemp(cUnit, rMethod);
+            loadCurrMethodDirect(cUnit, rMethod);
+            rBase = r0;
+            oatLockTemp(cUnit, rBase);
+            loadWordDisp(cUnit, rMethod,
+                Method::DexCacheInitializedStaticStorageOffset().Int32Value(),
+                rBase);
+            loadWordDisp(cUnit, rBase,
+                         Array::DataOffset().Int32Value() + sizeof(int32_t*) *
+                         ssbIndex, rBase);
+            // rBase now points at appropriate static storage base (Class*)
+            // or NULL if not initialized. Check for NULL and call helper if NULL.
+            // TUNING: fast path should fall through
+            ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondNe, rBase, 0);
+            loadWordDisp(cUnit, rSELF,
+                         OFFSETOF_MEMBER(Thread, pInitializeStaticStorage), rLR);
+            loadConstant(cUnit, r0, ssbIndex);
+            callRuntimeHelper(cUnit, rLR);
+            ArmLIR* skipTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
+            skipTarget->defMask = ENCODE_ALL;
+            branchOver->generic.target = (LIR*)skipTarget;
+        }
+        // rBase now holds static storage base
+        oatFreeTemp(cUnit, rMethod);
+        if (isLongOrDouble) {
+            rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
+            rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
+        } else {
+            rlSrc = oatGetSrc(cUnit, mir, 0);
+            rlSrc = loadValue(cUnit, rlSrc, kAnyReg);
+        }
+        if (isVolatile) {
+            oatGenMemBarrier(cUnit, kST);
+        }
+        if (isLongOrDouble) {
+            storeBaseDispWide(cUnit, rBase, fieldOffset, rlSrc.lowReg,
+                              rlSrc.highReg);
+        } else {
+            storeWordDisp(cUnit, rBase, fieldOffset, rlSrc.lowReg);
+        }
+        if (isVolatile) {
+            oatGenMemBarrier(cUnit, kSY);
+        }
+        if (isObject) {
+            markGCCard(cUnit, rlSrc.lowReg, rBase);
+        }
+        oatFreeTemp(cUnit, rBase);
+    } else {
+        oatFlushAllRegs(cUnit);  // Everything to home locations
+        int setterOffset = isLongOrDouble ? OFFSETOF_MEMBER(Thread, pSet64Static) :
+                           (isObject ? OFFSETOF_MEMBER(Thread, pSetObjStatic)
+                                     : OFFSETOF_MEMBER(Thread, pSet32Static));
+        loadWordDisp(cUnit, rSELF, setterOffset, rLR);
+        loadConstant(cUnit, r0, fieldIdx);
+        if (isLongOrDouble) {
+            loadValueDirectWideFixed(cUnit, rlSrc, r2, r3);
+        } else {
+            loadValueDirect(cUnit, rlSrc, r1);
+        }
+        callRuntimeHelper(cUnit, rLR);
+    }
+}
+
+STATIC void genSget(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                    bool isLongOrDouble, bool isObject)
+{
+    int fieldOffset;
+    int ssbIndex;
+    bool isVolatile;
+    bool isReferrersClass;
+    uint32_t fieldIdx = mir->dalvikInsn.vB;
+    bool fastPath =
+        cUnit->compiler->ComputeStaticFieldInfo(fieldIdx, cUnit,
+                                                fieldOffset, ssbIndex,
+                                                isReferrersClass, isVolatile, false);
+    if (fastPath && !SLOW_FIELD_PATH) {
+        DCHECK_GE(fieldOffset, 0);
+        int rBase;
+        int rMethod;
+        if (isReferrersClass) {
+            // Fast path, static storage base is this method's class
+            rMethod  = loadCurrMethod(cUnit);
+            rBase = oatAllocTemp(cUnit);
+            loadWordDisp(cUnit, rMethod,
+                         Method::DeclaringClassOffset().Int32Value(), rBase);
+        } else {
+            // Medium path, static storage base in a different class which
+            // requires checks that the other class is initialized
+            DCHECK_GE(ssbIndex, 0);
+            // May do runtime call so everything to home locations.
+            oatFlushAllRegs(cUnit);
+            // Using fixed register to sync with possible call to runtime
+            // support
+            rMethod = r1;
+            oatLockTemp(cUnit, rMethod);
+            loadCurrMethodDirect(cUnit, rMethod);
+            rBase = r0;
+            oatLockTemp(cUnit, rBase);
+            loadWordDisp(cUnit, rMethod,
+                Method::DexCacheInitializedStaticStorageOffset().Int32Value(),
+                rBase);
+            loadWordDisp(cUnit, rBase,
+                         Array::DataOffset().Int32Value() + sizeof(int32_t*) * ssbIndex,
+                         rBase);
+            // rBase now points at appropriate static storage base (Class*)
+            // or NULL if not initialized. Check for NULL and call helper if NULL.
+            // TUNING: fast path should fall through
+            ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondNe, rBase, 0);
+            loadWordDisp(cUnit, rSELF,
+                         OFFSETOF_MEMBER(Thread, pInitializeStaticStorage), rLR);
+            loadConstant(cUnit, r0, ssbIndex);
+            callRuntimeHelper(cUnit, rLR);
+            ArmLIR* skipTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
+            skipTarget->defMask = ENCODE_ALL;
+            branchOver->generic.target = (LIR*)skipTarget;
+        }
+        // rBase now holds static storage base
+        oatFreeTemp(cUnit, rMethod);
+        rlDest = isLongOrDouble ? oatGetDestWide(cUnit, mir, 0, 1)
+                                : oatGetDest(cUnit, mir, 0);
+        RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
+        if (isVolatile) {
+            oatGenMemBarrier(cUnit, kSY);
+        }
+        if (isLongOrDouble) {
+            loadBaseDispWide(cUnit, NULL, rBase, fieldOffset, rlResult.lowReg,
+                             rlResult.highReg, INVALID_SREG);
+        } else {
+            loadWordDisp(cUnit, rBase, fieldOffset, rlResult.lowReg);
+        }
+        oatFreeTemp(cUnit, rBase);
+        if (isLongOrDouble) {
+            storeValueWide(cUnit, rlDest, rlResult);
+        } else {
+            storeValue(cUnit, rlDest, rlResult);
+        }
+    } else {
+        oatFlushAllRegs(cUnit);  // Everything to home locations
+        int getterOffset = isLongOrDouble ? OFFSETOF_MEMBER(Thread, pGet64Static) :
+                           (isObject ? OFFSETOF_MEMBER(Thread, pGetObjStatic)
+                                     : OFFSETOF_MEMBER(Thread, pGet32Static));
+        loadWordDisp(cUnit, rSELF, getterOffset, rLR);
+        loadConstant(cUnit, r0, fieldIdx);
+        callRuntimeHelper(cUnit, rLR);
+        if (isLongOrDouble) {
+            RegLocation rlResult = oatGetReturnWide(cUnit);
+            storeValueWide(cUnit, rlDest, rlResult);
+        } else {
+            RegLocation rlResult = oatGetReturn(cUnit);
+            storeValue(cUnit, rlDest, rlResult);
+        }
+    }
+}
+
+typedef int (*NextCallInsn)(CompilationUnit*, MIR*, int, uint32_t dexIdx,
+                            uint32_t methodIdx);
+
+/*
+ * Bit of a hack here - in leiu of a real scheduling pass,
+ * emit the next instruction in static & direct invoke sequences.
+ */
+STATIC int nextSDCallInsn(CompilationUnit* cUnit, MIR* mir,
+                          int state, uint32_t dexIdx, uint32_t unused)
+{
+    switch(state) {
+        case 0:  // Get the current Method* [sets r0]
+            loadCurrMethodDirect(cUnit, r0);
+            break;
+        case 1:  // Get method->code_and_direct_methods_
+            loadWordDisp(cUnit, r0,
+                Method::GetDexCacheCodeAndDirectMethodsOffset().Int32Value(),
+                r0);
+            break;
+        case 2:  // Grab target method* and target code_
+            loadWordDisp(cUnit, r0,
+                CodeAndDirectMethods::CodeOffsetInBytes(dexIdx), rLR);
+            loadWordDisp(cUnit, r0,
+                CodeAndDirectMethods::MethodOffsetInBytes(dexIdx), r0);
+            break;
+        default:
+            return -1;
+    }
+    return state + 1;
+}
+
+/*
+ * Bit of a hack here - in leiu of a real scheduling pass,
+ * emit the next instruction in a virtual invoke sequence.
+ * We can use rLR as a temp prior to target address loading
+ * Note also that we'll load the first argument ("this") into
+ * r1 here rather than the standard loadArgRegs.
+ */
+STATIC int nextVCallInsn(CompilationUnit* cUnit, MIR* mir,
+                         int state, uint32_t dexIdx, uint32_t methodIdx)
+{
+    RegLocation rlArg;
+    /*
+     * This is the fast path in which the target virtual method is
+     * fully resolved at compile time.
+     */
+    switch(state) {
+        case 0:  // Get "this" [set r1]
+            rlArg = oatGetSrc(cUnit, mir, 0);
+            loadValueDirectFixed(cUnit, rlArg, r1);
+            break;
+        case 1: // Is "this" null? [use r1]
+            genNullCheck(cUnit, oatSSASrc(mir,0), r1, mir);
+            // get this->klass_ [use r1, set rLR]
+            loadWordDisp(cUnit, r1, Object::ClassOffset().Int32Value(), rLR);
+            break;
+        case 2: // Get this->klass_->vtable [usr rLR, set rLR]
+            loadWordDisp(cUnit, rLR, Class::VTableOffset().Int32Value(), rLR);
+            break;
+        case 3: // Get target method [use rLR, set r0]
+            loadWordDisp(cUnit, rLR, (methodIdx * 4) +
+                         Array::DataOffset().Int32Value(), r0);
+            break;
+        case 4: // Get the target compiled code address [uses r0, sets rLR]
+            loadWordDisp(cUnit, r0, Method::GetCodeOffset().Int32Value(), rLR);
+            break;
+        default:
+            return -1;
+    }
+    return state + 1;
+}
+
+/*
+ * Interleave launch code for INVOKE_SUPER.  See comments
+ * for nextVCallIns.
+ */
+STATIC int nextSuperCallInsn(CompilationUnit* cUnit, MIR* mir,
+                             int state, uint32_t dexIdx, uint32_t methodIdx)
+{
+    /*
+     * This is the fast path in which the target virtual method is
+     * fully resolved at compile time.  Note also that this path assumes
+     * that the check to verify that the target method index falls
+     * within the size of the super's vtable has been done at compile-time.
+     */
+    RegLocation rlArg;
+    switch(state) {
+        case 0: // Get current Method* [set r0]
+            loadCurrMethodDirect(cUnit, r0);
+            // Load "this" [set r1]
+            rlArg = oatGetSrc(cUnit, mir, 0);
+            loadValueDirectFixed(cUnit, rlArg, r1);
+            // Get method->declaring_class_ [use r0, set rLR]
+            loadWordDisp(cUnit, r0, Method::DeclaringClassOffset().Int32Value(),
+                         rLR);
+            // Is "this" null? [use r1]
+            genNullCheck(cUnit, oatSSASrc(mir,0), r1, mir);
+            break;
+        case 1: // Get method->declaring_class_->super_class [usr rLR, set rLR]
+            loadWordDisp(cUnit, rLR, Class::SuperClassOffset().Int32Value(),
+                         rLR);
+            break;
+        case 2: // Get ...->super_class_->vtable [u/s rLR]
+            loadWordDisp(cUnit, rLR, Class::VTableOffset().Int32Value(), rLR);
+            break;
+        case 3: // Get target method [use rLR, set r0]
+            loadWordDisp(cUnit, rLR, (methodIdx * 4) +
+                         Array::DataOffset().Int32Value(), r0);
+            break;
+        case 4: // Get the target compiled code address [uses r0, sets rLR]
+            loadWordDisp(cUnit, r0, Method::GetCodeOffset().Int32Value(), rLR);
+            break;
+        default:
+            return -1;
+    }
+    return state + 1;
+}
+
+STATIC int nextInvokeInsnSP(CompilationUnit* cUnit, MIR* mir, int trampoline,
+                            int state, uint32_t dexIdx, uint32_t methodIdx)
+{
+    /*
+     * This handles the case in which the base method is not fully
+     * resolved at compile time, we bail to a runtime helper.
+     */
+    if (state == 0) {
+        // Load trampoline target
+        loadWordDisp(cUnit, rSELF, trampoline, rLR);
+        // Load r0 with method index
+        loadConstant(cUnit, r0, dexIdx);
+        return 1;
+    }
+    return -1;
+}
+
+STATIC int nextStaticCallInsnSP(CompilationUnit* cUnit, MIR* mir,
+                                int state, uint32_t dexIdx, uint32_t methodIdx)
+{
+  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeStaticTrampolineWithAccessCheck);
+  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
+}
+
+STATIC int nextDirectCallInsnSP(CompilationUnit* cUnit, MIR* mir,
+                                int state, uint32_t dexIdx, uint32_t methodIdx)
+{
+  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeDirectTrampolineWithAccessCheck);
+  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
+}
+
+STATIC int nextSuperCallInsnSP(CompilationUnit* cUnit, MIR* mir,
+                               int state, uint32_t dexIdx, uint32_t methodIdx)
+{
+  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeSuperTrampolineWithAccessCheck);
+  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
+}
+
+STATIC int nextVCallInsnSP(CompilationUnit* cUnit, MIR* mir,
+                           int state, uint32_t dexIdx, uint32_t methodIdx)
+{
+  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeVirtualTrampolineWithAccessCheck);
+  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
+}
+
+/*
+ * All invoke-interface calls bounce off of art_invoke_interface_trampoline,
+ * which will locate the target and continue on via a tail call.
+ */
+STATIC int nextInterfaceCallInsn(CompilationUnit* cUnit, MIR* mir,
+                                 int state, uint32_t dexIdx, uint32_t unused)
+{
+  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampoline);
+  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
+}
+
+STATIC int nextInterfaceCallInsnWithAccessCheck(CompilationUnit* cUnit,
+                                                MIR* mir, int state,
+                                                uint32_t dexIdx,
+                                                uint32_t unused)
+{
+  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampolineWithAccessCheck);
+  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
+}
+
+STATIC int loadArgRegs(CompilationUnit* cUnit, MIR* mir,
+                          DecodedInstruction* dInsn, int callState,
+                          NextCallInsn nextCallInsn, uint32_t dexIdx,
+                          uint32_t methodIdx, bool skipThis)
+{
+    int nextReg = r1;
+    int nextArg = 0;
+    if (skipThis) {
+        nextReg++;
+        nextArg++;
+    }
+    for (; (nextReg <= r3) && (nextArg < mir->ssaRep->numUses); nextReg++) {
+        RegLocation rlArg = oatGetRawSrc(cUnit, mir, nextArg++);
+        rlArg = oatUpdateRawLoc(cUnit, rlArg);
+        if (rlArg.wide && (nextReg <= r2)) {
+            loadValueDirectWideFixed(cUnit, rlArg, nextReg, nextReg + 1);
+            nextReg++;
+            nextArg++;
+        } else {
+            rlArg.wide = false;
+            loadValueDirectFixed(cUnit, rlArg, nextReg);
+        }
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+    }
+    return callState;
+}
+
+/*
+ * Load up to 5 arguments, the first three of which will be in
+ * r1 .. r3.  On entry r0 contains the current method pointer,
+ * and as part of the load sequence, it must be replaced with
+ * the target method pointer.  Note, this may also be called
+ * for "range" variants if the number of arguments is 5 or fewer.
+ */
+STATIC int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir,
+                                DecodedInstruction* dInsn, int callState,
+                                ArmLIR** pcrLabel, NextCallInsn nextCallInsn,
+                                uint32_t dexIdx, uint32_t methodIdx,
+                                bool skipThis)
+{
+    RegLocation rlArg;
+
+    /* If no arguments, just return */
+    if (dInsn->vA == 0)
+        return callState;
+
+    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+
+    DCHECK_LE(dInsn->vA, 5U);
+    if (dInsn->vA > 3) {
+        uint32_t nextUse = 3;
+        //Detect special case of wide arg spanning arg3/arg4
+        RegLocation rlUse0 = oatGetRawSrc(cUnit, mir, 0);
+        RegLocation rlUse1 = oatGetRawSrc(cUnit, mir, 1);
+        RegLocation rlUse2 = oatGetRawSrc(cUnit, mir, 2);
+        if (((!rlUse0.wide && !rlUse1.wide) || rlUse0.wide) &&
+            rlUse2.wide) {
+            int reg;
+            // Wide spans, we need the 2nd half of uses[2].
+            rlArg = oatUpdateLocWide(cUnit, rlUse2);
+            if (rlArg.location == kLocPhysReg) {
+                reg = rlArg.highReg;
+            } else {
+                // r2 & r3 can safely be used here
+                reg = r3;
+                loadWordDisp(cUnit, rSP,
+                             oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
+                callState = nextCallInsn(cUnit, mir, callState, dexIdx,
+                                         methodIdx);
+            }
+            storeBaseDisp(cUnit, rSP, (nextUse + 1) * 4, reg, kWord);
+            storeBaseDisp(cUnit, rSP, 16 /* (3+1)*4 */, reg, kWord);
+            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+            nextUse++;
+        }
+        // Loop through the rest
+        while (nextUse < dInsn->vA) {
+            int lowReg;
+            int highReg;
+            rlArg = oatGetRawSrc(cUnit, mir, nextUse);
+            rlArg = oatUpdateRawLoc(cUnit, rlArg);
+            if (rlArg.location == kLocPhysReg) {
+                lowReg = rlArg.lowReg;
+                highReg = rlArg.highReg;
+            } else {
+                lowReg = r2;
+                highReg = r3;
+                if (rlArg.wide) {
+                    loadValueDirectWideFixed(cUnit, rlArg, lowReg, highReg);
+                } else {
+                    loadValueDirectFixed(cUnit, rlArg, lowReg);
+                }
+                callState = nextCallInsn(cUnit, mir, callState, dexIdx,
+                                         methodIdx);
+            }
+            int outsOffset = (nextUse + 1) * 4;
+            if (rlArg.wide) {
+                storeBaseDispWide(cUnit, rSP, outsOffset, lowReg, highReg);
+                nextUse += 2;
+            } else {
+                storeWordDisp(cUnit, rSP, outsOffset, lowReg);
+                nextUse++;
+            }
+            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        }
+    }
+
+    callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
+                            dexIdx, methodIdx, skipThis);
+
+    if (pcrLabel) {
+        *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), r1, mir);
+    }
+    return callState;
+}
+
+/*
+ * May have 0+ arguments (also used for jumbo).  Note that
+ * source virtual registers may be in physical registers, so may
+ * need to be flushed to home location before copying.  This
+ * applies to arg3 and above (see below).
+ *
+ * Two general strategies:
+ *    If < 20 arguments
+ *       Pass args 3-18 using vldm/vstm block copy
+ *       Pass arg0, arg1 & arg2 in r1-r3
+ *    If 20+ arguments
+ *       Pass args arg19+ using memcpy block copy
+ *       Pass arg0, arg1 & arg2 in r1-r3
+ *
+ */
+STATIC int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir,
+                              DecodedInstruction* dInsn, int callState,
+                              ArmLIR** pcrLabel, NextCallInsn nextCallInsn,
+                              uint32_t dexIdx, uint32_t methodIdx,
+                              bool skipThis)
+{
+    int firstArg = dInsn->vC;
+    int numArgs = dInsn->vA;
+
+    // If we can treat it as non-range (Jumbo ops will use range form)
+    if (numArgs <= 5)
+        return genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pcrLabel,
+                                    nextCallInsn, dexIdx, methodIdx,
+                                    skipThis);
+    /*
+     * Make sure range list doesn't span the break between in normal
+     * Dalvik vRegs and the ins.
+     */
+    int highestArg = oatGetSrc(cUnit, mir, numArgs-1).sRegLow;
+    int boundaryReg = cUnit->numDalvikRegisters - cUnit->numIns;
+    if ((firstArg < boundaryReg) && (highestArg >= boundaryReg)) {
+        LOG(FATAL) << "Argument list spanned locals & args";
+    }
+
+    /*
+     * First load the non-register arguments.  Both forms expect all
+     * of the source arguments to be in their home frame location, so
+     * scan the sReg names and flush any that have been promoted to
+     * frame backing storage.
+     */
+    // Scan the rest of the args - if in physReg flush to memory
+    for (int nextArg = 0; nextArg < numArgs;) {
+        RegLocation loc = oatGetRawSrc(cUnit, mir, nextArg);
+        if (loc.wide) {
+            loc = oatUpdateLocWide(cUnit, loc);
+            if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
+                storeBaseDispWide(cUnit, rSP,
+                                  oatSRegOffset(cUnit, loc.sRegLow),
+                                  loc.lowReg, loc.highReg);
+            }
+            nextArg += 2;
+        } else {
+            loc = oatUpdateLoc(cUnit, loc);
+            if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
+                storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
+                              loc.lowReg, kWord);
+            }
+            nextArg++;
+        }
+    }
+
+    int startOffset = oatSRegOffset(cUnit,
+        cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow);
+    int outsOffset = 4 /* Method* */ + (3 * 4);
+    if (numArgs >= 20) {
+        // Generate memcpy
+        opRegRegImm(cUnit, kOpAdd, r0, rSP, outsOffset);
+        opRegRegImm(cUnit, kOpAdd, r1, rSP, startOffset);
+        loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pMemcpy), rLR);
+        loadConstant(cUnit, r2, (numArgs - 3) * 4);
+        callRuntimeHelper(cUnit, rLR);
+        // Restore Method*
+        loadCurrMethodDirect(cUnit, r0);
+    } else {
+        // Use vldm/vstm pair using r3 as a temp
+        int regsLeft = std::min(numArgs - 3, 16);
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        opRegRegImm(cUnit, kOpAdd, r3, rSP, startOffset);
+        ArmLIR* ld = newLIR3(cUnit, kThumb2Vldms, r3, fr0, regsLeft);
+        //TUNING: loosen barrier
+        ld->defMask = ENCODE_ALL;
+        setMemRefType(ld, true /* isLoad */, kDalvikReg);
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        opRegRegImm(cUnit, kOpAdd, r3, rSP, 4 /* Method* */ + (3 * 4));
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+        ArmLIR* st = newLIR3(cUnit, kThumb2Vstms, r3, fr0, regsLeft);
+        setMemRefType(st, false /* isLoad */, kDalvikReg);
+        st->defMask = ENCODE_ALL;
+        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+    }
+
+    callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
+                            dexIdx, methodIdx, skipThis);
+
+    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
+    if (pcrLabel) {
+        *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), r1, mir);
+    }
+    return callState;
+}
+
+// Debugging routine - if null target, branch to DebugMe
+STATIC void genShowTarget(CompilationUnit* cUnit)
+{
+    ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondNe, rLR, 0);
+    loadWordDisp(cUnit, rSELF,
+                 OFFSETOF_MEMBER(Thread, pDebugMe), rLR);
+    ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
+    target->defMask = -1;
+    branchOver->generic.target = (LIR*)target;
+}
+
+STATIC void genThrowVerificationError(CompilationUnit* cUnit, MIR* mir)
+{
+    loadWordDisp(cUnit, rSELF,
+                 OFFSETOF_MEMBER(Thread, pThrowVerificationErrorFromCode), rLR);
+    loadConstant(cUnit, r0, mir->dalvikInsn.vA);
+    loadConstant(cUnit, r1, mir->dalvikInsn.vB);
+    callRuntimeHelper(cUnit, rLR);
+}
+
+STATIC void genCompareAndBranch(CompilationUnit* cUnit, BasicBlock* bb,
+                                MIR* mir, RegLocation rlSrc1,
+                                RegLocation rlSrc2, ArmLIR* labelList)
+{
+    ArmConditionCode cond;
+    rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
+    rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg);
+    opRegReg(cUnit, kOpCmp, rlSrc1.lowReg, rlSrc2.lowReg);
+    Opcode opcode = mir->dalvikInsn.opcode;
+    switch(opcode) {
+        case OP_IF_EQ:
+            cond = kArmCondEq;
+            break;
+        case OP_IF_NE:
+            cond = kArmCondNe;
+            break;
+        case OP_IF_LT:
+            cond = kArmCondLt;
+            break;
+        case OP_IF_GE:
+            cond = kArmCondGe;
+            break;
+        case OP_IF_GT:
+            cond = kArmCondGt;
+            break;
+        case OP_IF_LE:
+            cond = kArmCondLe;
+            break;
+        default:
+            cond = (ArmConditionCode)0;
+            LOG(FATAL) << "Unexpected opcode " << (int)opcode;
+    }
+    genConditionalBranch(cUnit, cond, &labelList[bb->taken->id]);
+    genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]);
+}
+
+STATIC void genCompareZeroAndBranch(CompilationUnit* cUnit, BasicBlock* bb,
+                                   MIR* mir, RegLocation rlSrc,
+                                   ArmLIR* labelList)
+{
+    ArmConditionCode cond;
+    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+    opRegImm(cUnit, kOpCmp, rlSrc.lowReg, 0);
+    Opcode opcode = mir->dalvikInsn.opcode;
+    switch(opcode) {
+        case OP_IF_EQZ:
+            cond = kArmCondEq;
+            break;
+        case OP_IF_NEZ:
+            cond = kArmCondNe;
+            break;
+        case OP_IF_LTZ:
+            cond = kArmCondLt;
+            break;
+        case OP_IF_GEZ:
+            cond = kArmCondGe;
+            break;
+        case OP_IF_GTZ:
+            cond = kArmCondGt;
+            break;
+        case OP_IF_LEZ:
+            cond = kArmCondLe;
+            break;
+        default:
+            cond = (ArmConditionCode)0;
+            LOG(FATAL) << "Unexpected opcode " << (int)opcode;
+    }
+    genConditionalBranch(cUnit, cond, &labelList[bb->taken->id]);
+    genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]);
+}
+
+STATIC void genIntToLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                         RegLocation rlSrc)
+{
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    if (rlSrc.location == kLocPhysReg) {
+        genRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg);
+    } else {
+        loadValueDirect(cUnit, rlSrc, rlResult.lowReg);
+    }
+    opRegRegImm(cUnit, kOpAsr, rlResult.highReg,
+                rlResult.lowReg, 31);
+    storeValueWide(cUnit, rlDest, rlResult);
+}
+
+STATIC void genIntNarrowing(CompilationUnit* cUnit, MIR* mir,
+                            RegLocation rlDest, RegLocation rlSrc)
+{
+     rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+     RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+     OpKind op = kOpInvalid;
+     switch(mir->dalvikInsn.opcode) {
+         case OP_INT_TO_BYTE:
+             op = kOp2Byte;
+             break;
+         case OP_INT_TO_SHORT:
+              op = kOp2Short;
+              break;
+         case OP_INT_TO_CHAR:
+              op = kOp2Char;
+              break;
+         default:
+             LOG(ERROR) << "Bad int conversion type";
+     }
+     opRegReg(cUnit, op, rlResult.lowReg, rlSrc.lowReg);
+     storeValue(cUnit, rlDest, rlResult);
+}
+
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame.  Perform intial
+ * assignment of promoted arguments.
+ */
+STATIC void flushIns(CompilationUnit* cUnit)
+{
+    if (cUnit->numIns == 0)
+        return;
+    int firstArgReg = r1;
+    int lastArgReg = r3;
+    int startVReg = cUnit->numDalvikRegisters - cUnit->numIns;
+    /*
+     * Arguments passed in registers should be flushed
+     * to their backing locations in the frame for now.
+     * Also, we need to do initial assignment for promoted
+     * arguments.  NOTE: an older version of dx had an issue
+     * in which it would reuse static method argument registers.
+     * This could result in the same Dalvik virtual register
+     * being promoted to both core and fp regs.  In those
+     * cases, copy argument to both.  This will be uncommon
+     * enough that it isn't worth attempting to optimize.
+     */
+    for (int i = 0; i < cUnit->numIns; i++) {
+        PromotionMap vMap = cUnit->promotionMap[startVReg + i];
+        if (i <= (lastArgReg - firstArgReg)) {
+            // If arriving in register
+            if (vMap.coreLocation == kLocPhysReg) {
+                genRegCopy(cUnit, vMap.coreReg, firstArgReg + i);
+            }
+            if (vMap.fpLocation == kLocPhysReg) {
+                genRegCopy(cUnit, vMap.fpReg, firstArgReg + i);
+            }
+            // Also put a copy in memory in case we're partially promoted
+            storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
+                          firstArgReg + i, kWord);
+        } else {
+            // If arriving in frame & promoted
+            if (vMap.coreLocation == kLocPhysReg) {
+                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
+                             vMap.coreReg);
+            }
+            if (vMap.fpLocation == kLocPhysReg) {
+                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
+                             vMap.fpReg);
+            }
+        }
+    }
+}
+
+STATIC void genEntrySequence(CompilationUnit* cUnit, BasicBlock* bb)
+{
+    int spillCount = cUnit->numCoreSpills + cUnit->numFPSpills;
+    /*
+     * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
+     * mechanism know so it doesn't try to use any of them when
+     * expanding the frame or flushing.  This leaves the utility
+     * code with a single temp: r12.  This should be enough.
+     */
+    oatLockTemp(cUnit, r0);
+    oatLockTemp(cUnit, r1);
+    oatLockTemp(cUnit, r2);
+    oatLockTemp(cUnit, r3);
+
+    /*
+     * We can safely skip the stack overflow check if we're
+     * a leaf *and* our frame size < fudge factor.
+     */
+    bool skipOverflowCheck = ((cUnit->attrs & METHOD_IS_LEAF) &&
+                              ((size_t)cUnit->frameSize <
+                              Thread::kStackOverflowReservedBytes));
+    newLIR0(cUnit, kArmPseudoMethodEntry);
+    if (!skipOverflowCheck) {
+        /* Load stack limit */
+        loadWordDisp(cUnit, rSELF,
+                     Thread::StackEndOffset().Int32Value(), r12);
+    }
+    /* Spill core callee saves */
+    newLIR1(cUnit, kThumb2Push, cUnit->coreSpillMask);
+    /* Need to spill any FP regs? */
+    if (cUnit->numFPSpills) {
+        /*
+         * NOTE: fp spills are a little different from core spills in that
+         * they are pushed as a contiguous block.  When promoting from
+         * the fp set, we must allocate all singles from s16..highest-promoted
+         */
+        newLIR1(cUnit, kThumb2VPushCS, cUnit->numFPSpills);
+    }
+    if (!skipOverflowCheck) {
+        opRegRegImm(cUnit, kOpSub, rLR, rSP,
+                    cUnit->frameSize - (spillCount * 4));
+        genRegRegCheck(cUnit, kArmCondCc, rLR, r12, NULL,
+                       kArmThrowStackOverflow);
+        genRegCopy(cUnit, rSP, rLR);         // Establish stack
+    } else {
+        opRegImm(cUnit, kOpSub, rSP,
+                 cUnit->frameSize - (spillCount * 4));
+    }
+    storeBaseDisp(cUnit, rSP, 0, r0, kWord);
+    flushIns(cUnit);
+
+    if (cUnit->genDebugger) {
+        // Refresh update debugger callout
+        loadWordDisp(cUnit, rSELF,
+                     OFFSETOF_MEMBER(Thread, pUpdateDebuggerFromCode), rSUSPEND);
+        genDebuggerUpdate(cUnit, DEBUGGER_METHOD_ENTRY);
+    }
+
+    oatFreeTemp(cUnit, r0);
+    oatFreeTemp(cUnit, r1);
+    oatFreeTemp(cUnit, r2);
+    oatFreeTemp(cUnit, r3);
+}
+
+STATIC void genExitSequence(CompilationUnit* cUnit, BasicBlock* bb)
+{
+    int spillCount = cUnit->numCoreSpills + cUnit->numFPSpills;
+    /*
+     * In the exit path, r0/r1 are live - make sure they aren't
+     * allocated by the register utilities as temps.
+     */
+    oatLockTemp(cUnit, r0);
+    oatLockTemp(cUnit, r1);
+
+    newLIR0(cUnit, kArmPseudoMethodExit);
+    /* If we're compiling for the debugger, generate an update callout */
+    if (cUnit->genDebugger) {
+        genDebuggerUpdate(cUnit, DEBUGGER_METHOD_EXIT);
+    }
+    opRegImm(cUnit, kOpAdd, rSP, cUnit->frameSize - (spillCount * 4));
+    /* Need to restore any FP callee saves? */
+    if (cUnit->numFPSpills) {
+        newLIR1(cUnit, kThumb2VPopCS, cUnit->numFPSpills);
+    }
+    if (cUnit->coreSpillMask & (1 << rLR)) {
+        /* Unspill rLR to rPC */
+        cUnit->coreSpillMask &= ~(1 << rLR);
+        cUnit->coreSpillMask |= (1 << rPC);
+    }
+    newLIR1(cUnit, kThumb2Pop, cUnit->coreSpillMask);
+    if (!(cUnit->coreSpillMask & (1 << rPC))) {
+        /* We didn't pop to rPC, so must do a bv rLR */
+        newLIR1(cUnit, kThumbBx, rLR);
+    }
+}
+
+/*
+ * Nop any unconditional branches that go to the next instruction.
+ * Note: new redundant branches may be inserted later, and we'll
+ * use a check in final instruction assembly to nop those out.
+ */
+void removeRedundantBranches(CompilationUnit* cUnit)
+{
+    ArmLIR* thisLIR;
+
+    for (thisLIR = (ArmLIR*) cUnit->firstLIRInsn;
+         thisLIR != (ArmLIR*) cUnit->lastLIRInsn;
+         thisLIR = NEXT_LIR(thisLIR)) {
+
+        /* Branch to the next instruction */
+        if ((thisLIR->opcode == kThumbBUncond) ||
+            (thisLIR->opcode == kThumb2BUncond)) {
+            ArmLIR* nextLIR = thisLIR;
+
+            while (true) {
+                nextLIR = NEXT_LIR(nextLIR);
+
+                /*
+                 * Is the branch target the next instruction?
+                 */
+                if (nextLIR == (ArmLIR*) thisLIR->generic.target) {
+                    thisLIR->flags.isNop = true;
+                    break;
+                }
+
+                /*
+                 * Found real useful stuff between the branch and the target.
+                 * Need to explicitly check the lastLIRInsn here because it
+                 * might be the last real instruction.
+                 */
+                if (!isPseudoOpcode(nextLIR->opcode) ||
+                    (nextLIR = (ArmLIR*) cUnit->lastLIRInsn))
+                    break;
+            }
+        }
+    }
+}
+
+STATIC void handleSuspendLaunchpads(CompilationUnit *cUnit)
+{
+    ArmLIR** suspendLabel =
+        (ArmLIR **) cUnit->suspendLaunchpads.elemList;
+    int numElems = cUnit->suspendLaunchpads.numUsed;
+
+    for (int i = 0; i < numElems; i++) {
+        /* TUNING: move suspend count load into helper */
+        ArmLIR* lab = suspendLabel[i];
+        ArmLIR* resumeLab = (ArmLIR*)lab->operands[0];
+        cUnit->currentDalvikOffset = lab->operands[1];
+        oatAppendLIR(cUnit, (LIR *)lab);
+        loadWordDisp(cUnit, rSELF,
+                     OFFSETOF_MEMBER(Thread, pTestSuspendFromCode), rLR);
+        if (!cUnit->genDebugger) {
+            // use rSUSPEND for suspend count
+            loadWordDisp(cUnit, rSELF,
+                         Thread::SuspendCountOffset().Int32Value(), rSUSPEND);
+        }
+        opReg(cUnit, kOpBlx, rLR);
+        if ( cUnit->genDebugger) {
+            // use rSUSPEND for update debugger
+            loadWordDisp(cUnit, rSELF,
+                         OFFSETOF_MEMBER(Thread, pUpdateDebuggerFromCode), rSUSPEND);
+        }
+        genUnconditionalBranch(cUnit, resumeLab);
+    }
+}
+
+STATIC void handleThrowLaunchpads(CompilationUnit *cUnit)
+{
+    ArmLIR** throwLabel =
+        (ArmLIR **) cUnit->throwLaunchpads.elemList;
+    int numElems = cUnit->throwLaunchpads.numUsed;
+    int i;
+
+    for (i = 0; i < numElems; i++) {
+        ArmLIR* lab = throwLabel[i];
+        cUnit->currentDalvikOffset = lab->operands[1];
+        oatAppendLIR(cUnit, (LIR *)lab);
+        int funcOffset = 0;
+        int v1 = lab->operands[2];
+        int v2 = lab->operands[3];
+        switch(lab->operands[0]) {
+            case kArmThrowNullPointer:
+                funcOffset = OFFSETOF_MEMBER(Thread, pThrowNullPointerFromCode);
+                break;
+            case kArmThrowArrayBounds:
+                if (v2 != r0) {
+                    genRegCopy(cUnit, r0, v1);
+                    genRegCopy(cUnit, r1, v2);
+                } else {
+                    if (v1 == r1) {
+                        genRegCopy(cUnit, r12, v1);
+                        genRegCopy(cUnit, r1, v2);
+                        genRegCopy(cUnit, r0, r12);
+                    } else {
+                        genRegCopy(cUnit, r1, v2);
+                        genRegCopy(cUnit, r0, v1);
+                    }
+                }
+                funcOffset = OFFSETOF_MEMBER(Thread, pThrowArrayBoundsFromCode);
+                break;
+            case kArmThrowDivZero:
+                funcOffset = OFFSETOF_MEMBER(Thread, pThrowDivZeroFromCode);
+                break;
+            case kArmThrowVerificationError:
+                loadConstant(cUnit, r0, v1);
+                loadConstant(cUnit, r1, v2);
+                funcOffset =
+                    OFFSETOF_MEMBER(Thread, pThrowVerificationErrorFromCode);
+                break;
+            case kArmThrowNegArraySize:
+                genRegCopy(cUnit, r0, v1);
+                funcOffset =
+                    OFFSETOF_MEMBER(Thread, pThrowNegArraySizeFromCode);
+                break;
+            case kArmThrowNoSuchMethod:
+                genRegCopy(cUnit, r0, v1);
+                funcOffset =
+                    OFFSETOF_MEMBER(Thread, pThrowNoSuchMethodFromCode);
+                break;
+            case kArmThrowStackOverflow:
+                funcOffset =
+                    OFFSETOF_MEMBER(Thread, pThrowStackOverflowFromCode);
+                // Restore stack alignment
+                opRegImm(cUnit, kOpAdd, rSP,
+                         (cUnit->numCoreSpills + cUnit->numFPSpills) * 4);
+                break;
+            default:
+                LOG(FATAL) << "Unexpected throw kind: " << lab->operands[0];
+        }
+        loadWordDisp(cUnit, rSELF, funcOffset, rLR);
+        callRuntimeHelper(cUnit, rLR);
+    }
+}
+
+/* Common initialization routine for an architecture family */
+bool oatArchInit()
+{
+    int i;
+
+    for (i = 0; i < kArmLast; i++) {
+        if (EncodingMap[i].opcode != i) {
+            LOG(FATAL) << "Encoding order for " << EncodingMap[i].name <<
+               " is wrong: expecting " << i << ", seeing " <<
+               (int)EncodingMap[i].opcode;
+        }
+    }
+
+    return oatArchVariantInit();
+}
+
+/* Needed by the Assembler */
+void oatSetupResourceMasks(ArmLIR* lir)
+{
+    setupResourceMasks(lir);
+}
+
 }  // namespace art
diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc
index c9a30fd3ae..75ae456c03 100644
--- a/src/compiler/codegen/arm/ArchUtility.cc
+++ b/src/compiler/codegen/arm/ArchUtility.cc
@@ -363,9 +363,8 @@ void oatDumpLIRInsn(CompilationUnit* cUnit, LIR* arg, unsigned char* baseAddr)
             } else {
                 std::string op_name(buildInsnString(EncodingMap[lir->opcode].name, lir, baseAddr));
                 std::string op_operands(buildInsnString(EncodingMap[lir->opcode].fmt, lir, baseAddr));
-                LOG(INFO) << StringPrintf("%p (%04x): %-9s%s%s%s", baseAddr + offset, offset,
-                    op_name.c_str(), op_operands.c_str(), lir->flags.isNop ? "(nop)" : "",
-                    lir->flags.squashed ? "(squashed)" : "");
+                LOG(INFO) << StringPrintf("%p (%04x): %-9s%s%s", baseAddr + offset, offset,
+                    op_name.c_str(), op_operands.c_str(), lir->flags.isNop ? "(nop)" : "");
             }
             break;
     }
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index 510a5eac26..a9c8286b23 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -125,41 +125,9 @@ namespace art {
 #define rNone   (-1)
 
 /* RegisterLocation templates return values (r0, or r0/r1) */
-#define LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, INVALID_SREG}
-#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, INVALID_SREG}
-/* RegisterLocation templates for interpState->retVal; */
-#define LOC_DALVIK_RETURN_VAL {kLocPhysReg, 0, 0, 0, 0, 0, 1, r0, INVALID_REG, \
-                      INVALID_SREG}
-#define LOC_DALVIK_RETURN_VAL_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, \
+#define LOC_C_RETURN {kLocPhysReg, 0, 0, 0, 0, 0, 1, r0, INVALID_REG,\
                       INVALID_SREG}
-
- /*
- * Data structure tracking the mapping between a Dalvik register (pair) and a
- * native register (pair). The idea is to reuse the previously loaded value
- * if possible, otherwise to keep the value in a native register as long as
- * possible.
- */
-typedef struct RegisterInfo {
-    int reg;                    // Reg number
-    bool inUse;                 // Has it been allocated?
-    bool isTemp;                // Can allocate as temp?
-    bool pair;                  // Part of a register pair?
-    int partner;                // If pair, other reg of pair
-    bool live;                  // Is there an associated SSA name?
-    bool dirty;                 // If live, is it dirty?
-    int sReg;                   // Name of live value
-    struct LIR *defStart;       // Starting inst in last def sequence
-    struct LIR *defEnd;         // Ending inst in last def sequence
-} RegisterInfo;
-
-typedef struct RegisterPool {
-    int numCoreRegs;
-    RegisterInfo *coreRegs;
-    int nextCoreReg;
-    int numFPRegs;
-    RegisterInfo *FPRegs;
-    int nextFPReg;
-} RegisterPool;
+#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, 0, 0, 0, 1, r0, r1, INVALID_SREG}
 
 typedef enum ResourceEncodingPos {
     kGPReg0     = 0,
@@ -199,17 +167,6 @@ typedef enum ResourceEncodingPos {
 #define DECODE_ALIAS_INFO_REG(X)        (X & 0xffff)
 #define DECODE_ALIAS_INFO_WIDE(X)       ((X & 0x80000000) ? 1 : 0)
 
-typedef enum OpSize {
-    kWord,
-    kLong,
-    kSingle,
-    kDouble,
-    kUnsignedHalf,
-    kSignedHalf,
-    kUnsignedByte,
-    kSignedByte,
-} OpSize;
-
 typedef enum OpKind {
     kOpMov,
     kOpMvn,
@@ -243,6 +200,7 @@ typedef enum OpKind {
     kOp2Byte,
     kOpCondBr,
     kOpUncondBr,
+    kOpInvalid,
 } OpKind;
 
 /*
@@ -844,12 +802,10 @@ typedef struct ArmLIR {
     int operands[4];            // [0..3] = [dest, src1, src2, extra]
     struct {
         bool isNop:1;           // LIR is optimized away
-        bool insertWrapper:1;   // insert branch to emulate memory accesses
-        bool squashed:1;        // Eliminated def
         bool pcRelFixup:1;      // May need pc-relative fixup
         unsigned int age:4;     // default is 0, set lazily by the optimizer
         unsigned int size:3;    // bytes (2 for thumb, 2/4 for thumb2)
-        unsigned int unused:21;
+        unsigned int unused:23;
     } flags;
     int aliasInfo;              // For Dalvik register & litpool disambiguation
     u8 useMask;                 // Resource mask for use
@@ -871,10 +827,6 @@ typedef struct FillArrayData {
     int vaddr;                 // Dalvik offset of OP_FILL_ARRAY_DATA opcode
 } FillArrayData;
 
-/* Init values when a predicted chain is initially assembled */
-/* E7FE is branch to self */
-#define PREDICTED_CHAIN_BX_PAIR_INIT     0xe7fe
-
 /* Utility macros to traverse the LIR/ArmLIR list */
 #define NEXT_LIR(lir) ((ArmLIR *) lir->generic.next)
 #define PREV_LIR(lir) ((ArmLIR *) lir->generic.prev)
@@ -882,11 +834,6 @@ typedef struct FillArrayData {
 #define NEXT_LIR_LVALUE(lir) (lir)->generic.next
 #define PREV_LIR_LVALUE(lir) (lir)->generic.prev
 
-#define CHAIN_CELL_OFFSET_TAG   0xcdab
-
-#define CHAIN_CELL_NORMAL_SIZE 12
-#define CHAIN_CELL_PREDICTED_SIZE 16
-
 }  // namespace art
 
 #endif  // ART_SRC_COMPILER_CODEGEN_ARM_ARMLIR_H_
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc
index 1986b0f5ce..38f1b8868c 100644
--- a/src/compiler/codegen/arm/ArmRallocUtil.cc
+++ b/src/compiler/codegen/arm/ArmRallocUtil.cc
@@ -28,262 +28,87 @@
 namespace art {
 
 /*
- * Placeholder routine until we do proper register allocation.
+ * TUNING: is leaf?  Can't just use "hasInvoke" to determine as some
+ * instructions might call out to C/assembly helper functions.  Until
+ * machinery is in place, always spill lr.
  */
 
-typedef struct RefCounts {
-    int count;
-    int sReg;
-    bool doubleStart;   // Starting vReg for a double
-} RefCounts;
-
-/* USE SSA names to count references of base Dalvik vRegs. */
-STATIC void countRefs(CompilationUnit *cUnit, BasicBlock* bb,
-                      RefCounts* coreCounts, RefCounts* fpCounts)
+void oatAdjustSpillMask(CompilationUnit* cUnit)
 {
-    MIR* mir;
-    if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock &&
-        bb->blockType != kExitBlock)
-        return;
-
-    for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
-        SSARepresentation *ssaRep = mir->ssaRep;
-        if (ssaRep) {
-            for (int i = 0; i < ssaRep->numDefs;) {
-                RegLocation loc = cUnit->regLocation[ssaRep->defs[i]];
-                RefCounts* counts = loc.fp ? fpCounts : coreCounts;
-                int vReg = oatS2VReg(cUnit, ssaRep->defs[i]);
-                if (loc.defined) {
-                    counts[vReg].count++;
-                }
-                if (loc.wide) {
-                    if (loc.defined) {
-                        if (loc.fp) {
-                            counts[vReg].doubleStart = true;
-                        }
-                        counts[vReg+1].count++;
-                    }
-                    i += 2;
-                } else {
-                    i++;
-                }
-            }
-            for (int i = 0; i < ssaRep->numUses;) {
-                RegLocation loc = cUnit->regLocation[ssaRep->uses[i]];
-                RefCounts* counts = loc.fp ? fpCounts : coreCounts;
-                int vReg = oatS2VReg(cUnit, ssaRep->uses[i]);
-                if (loc.defined) {
-                    counts[vReg].count++;
-                }
-                if (loc.wide) {
-                    if (loc.defined) {
-                        if (loc.fp) {
-                            counts[vReg].doubleStart = true;
-                        }
-                        counts[vReg+1].count++;
-                    }
-                    i += 2;
-                } else {
-                    i++;
-                }
-            }
-        }
-    }
-}
-
-/* qsort callback function, sort descending */
-STATIC int sortCounts(const void *val1, const void *val2)
-{
-    const RefCounts* op1 = (const RefCounts*)val1;
-    const RefCounts* op2 = (const RefCounts*)val2;
-    return (op1->count == op2->count) ? 0 : (op1->count < op2->count ? 1 : -1);
-}
-
-STATIC void dumpCounts(const RefCounts* arr, int size, const char* msg)
-{
-    LOG(INFO) << msg;
-    for (int i = 0; i < size; i++) {
-        LOG(INFO) << "sReg[" << arr[i].sReg << "]: " << arr[i].count;
-    }
+    cUnit->coreSpillMask |= (1 << rLR);
+    cUnit->numCoreSpills++;
 }
 
 /*
- * Note: some portions of this code required even if the kPromoteRegs
- * optimization is disabled.
+ * Mark a callee-save fp register as promoted.  Note that
+ * vpush/vpop uses contiguous register lists so we must
+ * include any holes in the mask.  Associate holes with
+ * Dalvik register INVALID_VREG (0xFFFFU).
  */
-extern void oatDoPromotion(CompilationUnit* cUnit)
+void oatMarkPreservedSingle(CompilationUnit* cUnit, int sReg, int reg)
 {
-    int numRegs = cUnit->numDalvikRegisters;
-
-    /*
-     * TUNING: is leaf?  Can't just use "hasInvoke" to determine as some
-     * instructions might call out to C/assembly helper functions.  Until
-     * machinery is in place, always spill lr.
-     */
-    cUnit->coreSpillMask |= (1 << rLR);
-    cUnit->numCoreSpills++;
-    /*
-     * Simple hack for testing register allocation.  Just do a static
-     * count of the uses of Dalvik registers.  Note that we examine
-     * the SSA names, but count based on original Dalvik register name.
-     * Count refs separately based on type in order to give allocation
-     * preference to fp doubles - which must be allocated sequential
-     * physical single fp registers started with an even-numbered
-     * reg.
-     */
-    RefCounts *coreRegs = (RefCounts *)
-          oatNew(cUnit, sizeof(RefCounts) * numRegs, true, kAllocRegAlloc);
-    RefCounts *fpRegs = (RefCounts *)
-          oatNew(cUnit, sizeof(RefCounts) * numRegs, true, kAllocRegAlloc);
-    for (int i = 0; i < numRegs; i++) {
-        coreRegs[i].sReg = fpRegs[i].sReg = i;
-    }
-    GrowableListIterator iterator;
-    oatGrowableListIteratorInit(&cUnit->blockList, &iterator);
-    while (true) {
-        BasicBlock* bb;
-        bb = (BasicBlock*)oatGrowableListIteratorNext(&iterator);
-        if (bb == NULL) break;
-        countRefs(cUnit, bb, coreRegs, fpRegs);
-    }
-
-    /*
-     * Ideally, we'd allocate doubles starting with an even-numbered
-     * register.  Bias the counts to try to allocate any vreg that's
-     * used as the start of a pair first.
-     */
-    for (int i = 0; i < numRegs; i++) {
-        if (fpRegs[i].doubleStart) {
-            fpRegs[i].count *= 2;
-        }
-    }
-
-    // Sort the count arrays
-    qsort(coreRegs, numRegs, sizeof(RefCounts), sortCounts);
-    qsort(fpRegs, numRegs, sizeof(RefCounts), sortCounts);
-
-    if (cUnit->printMe) {
-        dumpCounts(coreRegs, numRegs, "Core regs after sort");
-        dumpCounts(fpRegs, numRegs, "Fp regs after sort");
+    DCHECK_GE(reg, FP_REG_MASK + FP_CALLEE_SAVE_BASE);
+    reg = (reg & FP_REG_MASK) - FP_CALLEE_SAVE_BASE;
+    // Ensure fpVmapTable is large enough
+    int tableSize = cUnit->fpVmapTable.size();
+    for (int i = tableSize; i < (reg + 1); i++) {
+        cUnit->fpVmapTable.push_back(INVALID_VREG);
     }
+    // Add the current mapping
+    cUnit->fpVmapTable[reg] = sReg;
+    // Size of fpVmapTable is high-water mark, use to set mask
+    cUnit->numFPSpills = cUnit->fpVmapTable.size();
+    cUnit->fpSpillMask = ((1 << cUnit->numFPSpills) - 1) << FP_CALLEE_SAVE_BASE;
+}
 
-    if (!(cUnit->disableOpt & (1 << kPromoteRegs))) {
-        // Promote fpRegs
-        for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) {
-            if (cUnit->promotionMap[fpRegs[i].sReg].fpLocation != kLocPhysReg) {
-                if (fpRegs[i].sReg >= cUnit->numRegs) {
-                    // don't promote arg regs
-                    continue;
-                }
-                int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg,
-                    fpRegs[i].doubleStart);
-                if (reg < 0) {
-                    break;  // No more left
-                }
-            }
+void oatFlushRegWide(CompilationUnit* cUnit, int reg1, int reg2)
+{
+    RegisterInfo* info1 = oatGetRegInfo(cUnit, reg1);
+    RegisterInfo* info2 = oatGetRegInfo(cUnit, reg2);
+    DCHECK(info1 && info2 && info1->pair && info2->pair &&
+           (info1->partner == info2->reg) &&
+           (info2->partner == info1->reg));
+    if ((info1->live && info1->dirty) || (info2->live && info2->dirty)) {
+        if (!(info1->isTemp && info2->isTemp)) {
+            /* Should not happen.  If it does, there's a problem in evalLoc */
+            LOG(FATAL) << "Long half-temp, half-promoted";
         }
 
-        // Promote core regs
-        for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) {
-            if (cUnit->promotionMap[coreRegs[i].sReg].coreLocation !=
-                    kLocPhysReg) {
-                if (coreRegs[i].sReg >= cUnit->numRegs) {
-                    // don't promote arg regs
-                    continue;
-                }
-                int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg);
-                if (reg < 0) {
-                   break;  // No more left
-                }
-            }
-        }
-    }
-
-    // Now, update SSA names to new home locations
-    for (int i = 0; i < cUnit->numSSARegs; i++) {
-        RegLocation *curr = &cUnit->regLocation[i];
-        int baseVReg = oatS2VReg(cUnit, curr->sRegLow);
-        if (!curr->wide) {
-            if (curr->fp) {
-                if (cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) {
-                    curr->location = kLocPhysReg;
-                    curr->lowReg = cUnit->promotionMap[baseVReg].fpReg;
-                    curr->home = true;
-                }
-            } else {
-                if (cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg) {
-                    curr->location = kLocPhysReg;
-                    curr->lowReg = cUnit->promotionMap[baseVReg].coreReg;
-                    curr->home = true;
-                }
-            }
-            curr->highReg = INVALID_REG;
-        } else {
-            if (curr->highWord) {
-                continue;
-            }
-            if (curr->fp) {
-                if ((cUnit->promotionMap[baseVReg].fpLocation == kLocPhysReg) &&
-                    (cUnit->promotionMap[baseVReg+1].fpLocation ==
-                    kLocPhysReg)) {
-                    int lowReg = cUnit->promotionMap[baseVReg].fpReg;
-                    int highReg = cUnit->promotionMap[baseVReg+1].fpReg;
-                    // Doubles require pair of singles starting at even reg
-                    if (((lowReg & 0x1) == 0) && ((lowReg + 1) == highReg)) {
-                        curr->location = kLocPhysReg;
-                        curr->lowReg = lowReg;
-                        curr->highReg = highReg;
-                        curr->home = true;
-                    }
-                }
-            } else {
-                if ((cUnit->promotionMap[baseVReg].coreLocation == kLocPhysReg)
-                     && (cUnit->promotionMap[baseVReg+1].coreLocation ==
-                     kLocPhysReg)) {
-                    curr->location = kLocPhysReg;
-                    curr->lowReg = cUnit->promotionMap[baseVReg].coreReg;
-                    curr->highReg = cUnit->promotionMap[baseVReg+1].coreReg;
-                    curr->home = true;
-                }
-            }
-        }
+        info1->dirty = false;
+        info2->dirty = false;
+        if (oatS2VReg(cUnit, info2->sReg) <
+            oatS2VReg(cUnit, info1->sReg))
+            info1 = info2;
+        int vReg = oatS2VReg(cUnit, info1->sReg);
+        oatFlushRegWideImpl(cUnit, rSP,
+                                    oatVRegOffset(cUnit, vReg),
+                                    info1->reg, info1->partner);
     }
 }
 
-/* Returns sp-relative offset in bytes for a VReg */
-extern int oatVRegOffset(CompilationUnit* cUnit, int vReg)
+void oatFlushReg(CompilationUnit* cUnit, int reg)
 {
-    return (vReg < cUnit->numRegs) ? cUnit->regsOffset + (vReg << 2) :
-            cUnit->insOffset + ((vReg - cUnit->numRegs) << 2);
+    RegisterInfo* info = oatGetRegInfo(cUnit, reg);
+    if (info->live && info->dirty) {
+        info->dirty = false;
+        int vReg = oatS2VReg(cUnit, info->sReg);
+        oatFlushRegImpl(cUnit, rSP,
+                                oatVRegOffset(cUnit, vReg),
+                                reg, kWord);
+    }
 }
 
-/* Returns sp-relative offset in bytes for a SReg */
-extern int oatSRegOffset(CompilationUnit* cUnit, int sReg)
-{
-    return oatVRegOffset(cUnit, oatS2VReg(cUnit, sReg));
+/* Give access to the target-dependent FP register encoding to common code */
+bool oatIsFpReg(int reg) {
+    return FPREG(reg);
 }
 
-
-/* Return sp-relative offset in bytes using Method* */
-extern int oatVRegOffset(const DexFile::CodeItem* code_item,
-                         uint32_t core_spills, uint32_t fp_spills,
-                         size_t frame_size, int reg)
-{
-    int numIns = code_item->ins_size_;
-    int numRegs = code_item->registers_size_ - numIns;
-    int numOuts = code_item->outs_size_;
-    int numSpills = __builtin_popcount(core_spills) +
-                    __builtin_popcount(fp_spills);
-    int numPadding = (STACK_ALIGN_WORDS -
-        (numSpills + numRegs + numOuts + 2)) & (STACK_ALIGN_WORDS-1);
-    int regsOffset = (numOuts + numPadding + 1) * 4;
-    int insOffset = frame_size + 4;
-    return (reg < numRegs) ? regsOffset + (reg << 2) :
-           insOffset + ((reg - numRegs) << 2);
+uint32_t oatFpRegMask() {
+    return FP_REG_MASK;
 }
 
 /* Clobber all regs that might be used by an external C call */
-extern void oatClobberCalleeSave(CompilationUnit *cUnit)
+void oatClobberCalleeSave(CompilationUnit *cUnit)
 {
     oatClobber(cUnit, r0);
     oatClobber(cUnit, r1);
@@ -340,4 +165,28 @@ extern RegisterInfo* oatGetRegInfo(CompilationUnit* cUnit, int reg)
                       : &cUnit->regPool->coreRegs[reg];
 }
 
+/* To be used when explicitly managing register use */
+extern void oatLockCallTemps(CompilationUnit* cUnit)
+{
+    oatLockTemp(cUnit, r0);
+    oatLockTemp(cUnit, r1);
+    oatLockTemp(cUnit, r2);
+    oatLockTemp(cUnit, r3);
+}
+
+/* To be used when explicitly managing register use */
+extern void oatFreeCallTemps(CompilationUnit* cUnit)
+{
+    oatFreeTemp(cUnit, r0);
+    oatFreeTemp(cUnit, r1);
+    oatFreeTemp(cUnit, r2);
+    oatFreeTemp(cUnit, r3);
+}
+
+/* Convert an instruction to a NOP */
+STATIC void oatNopLIR( LIR* lir)
+{
+    ((ArmLIR*)lir)->flags.isNop = true;
+}
+
 }  // namespace art
diff --git a/src/compiler/codegen/arm/Assemble.cc b/src/compiler/codegen/arm/Assemble.cc
index 29906310dd..0cd76051d2 100644
--- a/src/compiler/codegen/arm/Assemble.cc
+++ b/src/compiler/codegen/arm/Assemble.cc
@@ -22,8 +22,6 @@
 
 namespace art {
 
-#define MAX_ASSEMBLER_RETRIES 50
-
 /*
  * opcode: ArmOpcode enum
  * skeleton: pre-designated bit-pattern for this opcode
@@ -977,92 +975,13 @@ const ArmEncodingMap EncodingMap[kArmLast] = {
  */
 #define PADDING_MOV_R5_R5               0x1C2D
 
-STATIC void pushWord(std::vector<uint16_t>&buf, int data) {
-    buf.push_back( data & 0xffff);
-    buf.push_back( (data >> 16) & 0xffff);
-}
-
-void alignBuffer(std::vector<uint16_t>&buf, size_t offset) {
-    while (buf.size() < (offset/2))
-        buf.push_back(0);
-}
-
-/* Write the numbers in the constant to the output stream */
-STATIC void installLiteralPools(CompilationUnit* cUnit)
-{
-    alignBuffer(cUnit->codeBuffer, cUnit->dataOffset);
-    ArmLIR* dataLIR = (ArmLIR*) cUnit->literalList;
-    while (dataLIR) {
-        pushWord(cUnit->codeBuffer, dataLIR->operands[0]);
-        dataLIR = NEXT_LIR(dataLIR);
-    }
-}
-
-/* Write the switch tables to the output stream */
-STATIC void installSwitchTables(CompilationUnit* cUnit)
-{
-    GrowableListIterator iterator;
-    oatGrowableListIteratorInit(&cUnit->switchTables, &iterator);
-    while (true) {
-        SwitchTable* tabRec = (SwitchTable *) oatGrowableListIteratorNext(
-             &iterator);
-        if (tabRec == NULL) break;
-        alignBuffer(cUnit->codeBuffer, tabRec->offset);
-        int bxOffset = tabRec->bxInst->generic.offset + 4;
-        if (cUnit->printMe) {
-            LOG(INFO) << "Switch table for offset 0x" << std::hex << bxOffset;
-        }
-        if (tabRec->table[0] == kSparseSwitchSignature) {
-            int* keys = (int*)&(tabRec->table[2]);
-            for (int elems = 0; elems < tabRec->table[1]; elems++) {
-                int disp = tabRec->targets[elems]->generic.offset - bxOffset;
-                if (cUnit->printMe) {
-                    LOG(INFO) << "    Case[" << elems << "] key: 0x" <<
-                        std::hex << keys[elems] << ", disp: 0x" <<
-                        std::hex << disp;
-                }
-                pushWord(cUnit->codeBuffer, keys[elems]);
-                pushWord(cUnit->codeBuffer,
-                    tabRec->targets[elems]->generic.offset - bxOffset);
-            }
-        } else {
-            DCHECK_EQ(tabRec->table[0], kPackedSwitchSignature);
-            for (int elems = 0; elems < tabRec->table[1]; elems++) {
-                int disp = tabRec->targets[elems]->generic.offset - bxOffset;
-                if (cUnit->printMe) {
-                    LOG(INFO) << "    Case[" << elems << "] disp: 0x" <<
-                        std::hex << disp;
-                }
-                pushWord(cUnit->codeBuffer,
-                         tabRec->targets[elems]->generic.offset - bxOffset);
-            }
-        }
-    }
-}
-
-/* Write the fill array dta to the output stream */
-STATIC void installFillArrayData(CompilationUnit* cUnit)
-{
-    GrowableListIterator iterator;
-    oatGrowableListIteratorInit(&cUnit->fillArrayData, &iterator);
-    while (true) {
-        FillArrayData *tabRec = (FillArrayData *) oatGrowableListIteratorNext(
-             &iterator);
-        if (tabRec == NULL) break;
-        alignBuffer(cUnit->codeBuffer, tabRec->offset);
-        for (int i = 0; i < ((tabRec->size + 1) / 2) ; i++) {
-            cUnit->codeBuffer.push_back( tabRec->table[i]);
-        }
-    }
-}
-
 /*
  * Assemble the LIR into binary instruction format.  Note that we may
  * discover that pc-relative displacements may not fit the selected
  * instruction.
  */
-STATIC AssemblerStatus assembleInstructions(CompilationUnit* cUnit,
-                                            intptr_t startAddr)
+AssemblerStatus oatAssembleInstructions(CompilationUnit* cUnit,
+                                        intptr_t startAddr)
 {
     ArmLIR* lir;
     AssemblerStatus res = kSuccess;  // Assume success
@@ -1461,80 +1380,12 @@ STATIC AssemblerStatus assembleInstructions(CompilationUnit* cUnit,
     return res;
 }
 
-STATIC int assignLiteralOffsetCommon(LIR* lir, int offset)
-{
-    for (;lir != NULL; lir = lir->next) {
-        lir->offset = offset;
-        offset += 4;
-    }
-    return offset;
-}
-
-STATIC void createMappingTable(CompilationUnit* cUnit)
-{
-    ArmLIR* armLIR;
-    int currentDalvikOffset = -1;
-
-    for (armLIR = (ArmLIR *) cUnit->firstLIRInsn;
-         armLIR;
-         armLIR = NEXT_LIR(armLIR)) {
-        if ((armLIR->opcode >= 0) && !armLIR->flags.isNop &&
-            (currentDalvikOffset != armLIR->generic.dalvikOffset)) {
-            // Changed - need to emit a record
-            cUnit->mappingTable.push_back(armLIR->generic.offset);
-            cUnit->mappingTable.push_back(armLIR->generic.dalvikOffset);
-            currentDalvikOffset = armLIR->generic.dalvikOffset;
-        }
-    }
-}
-
-/* Determine the offset of each literal field */
-STATIC int assignLiteralOffset(CompilationUnit* cUnit, int offset)
-{
-    offset = assignLiteralOffsetCommon(cUnit->literalList, offset);
-    return offset;
-}
-
-STATIC int assignSwitchTablesOffset(CompilationUnit* cUnit, int offset)
-{
-    GrowableListIterator iterator;
-    oatGrowableListIteratorInit(&cUnit->switchTables, &iterator);
-    while (true) {
-        SwitchTable *tabRec = (SwitchTable *) oatGrowableListIteratorNext(
-             &iterator);
-        if (tabRec == NULL) break;
-        tabRec->offset = offset;
-        if (tabRec->table[0] == kSparseSwitchSignature) {
-            offset += tabRec->table[1] * (sizeof(int) * 2);
-        } else {
-            DCHECK_EQ(tabRec->table[0], kPackedSwitchSignature);
-            offset += tabRec->table[1] * sizeof(int);
-        }
-    }
-    return offset;
-}
-
-STATIC int assignFillArrayDataOffset(CompilationUnit* cUnit, int offset)
-{
-    GrowableListIterator iterator;
-    oatGrowableListIteratorInit(&cUnit->fillArrayData, &iterator);
-    while (true) {
-        FillArrayData *tabRec = (FillArrayData *) oatGrowableListIteratorNext(
-             &iterator);
-        if (tabRec == NULL) break;
-        tabRec->offset = offset;
-        offset += tabRec->size;
-        // word align
-        offset = (offset + 3) & ~3;
-        }
-    return offset;
-}
-
 /*
- * Walk the compilation unit and assign offsets to instructions
- * and literals and compute the total size of the compiled unit.
+ * Target-dependent offset assignment.
+ * TODO: normalize usage of flags.size and make this target
+ * independent.
  */
-void assignOffsets(CompilationUnit* cUnit)
+int oatAssignInsnOffsets(CompilationUnit* cUnit)
 {
     ArmLIR* armLIR;
     int offset = 0;
@@ -1559,61 +1410,7 @@ void assignOffsets(CompilationUnit* cUnit)
         /* Pseudo opcodes don't consume space */
     }
 
-    /* Const values have to be word aligned */
-    offset = (offset + 3) & ~3;
-
-    /* Set up offsets for literals */
-    cUnit->dataOffset = offset;
-
-    offset = assignLiteralOffset(cUnit, offset);
-
-    offset = assignSwitchTablesOffset(cUnit, offset);
-
-    offset = assignFillArrayDataOffset(cUnit, offset);
-
-    cUnit->totalSize = offset;
-}
-/*
- * Go over each instruction in the list and calculate the offset from the top
- * before sending them off to the assembler. If out-of-range branch distance is
- * seen rearrange the instructions a bit to correct it.
- */
-void oatAssembleLIR(CompilationUnit* cUnit)
-{
-    assignOffsets(cUnit);
-    /*
-     * Assemble here.  Note that we generate code with optimistic assumptions
-     * and if found now to work, we'll have to redo the sequence and retry.
-     */
-
-    while (true) {
-        AssemblerStatus res = assembleInstructions(cUnit, 0);
-        if (res == kSuccess) {
-            break;
-        } else {
-            cUnit->assemblerRetries++;
-            if (cUnit->assemblerRetries > MAX_ASSEMBLER_RETRIES) {
-                LOG(FATAL) << "Assembler error - too many retries";
-            }
-            // Redo offsets and try again
-            assignOffsets(cUnit);
-            cUnit->codeBuffer.clear();
-        }
-    }
-
-    // Install literals
-    installLiteralPools(cUnit);
-
-    // Install switch tables
-    installSwitchTables(cUnit);
-
-    // Install fill array data
-    installFillArrayData(cUnit);
-
-    /*
-     * Create the mapping table
-     */
-    createMappingTable(cUnit);
+    return offset;
 }
 
 }  // namespace art
diff --git a/src/compiler/codegen/arm/LocalOptimizations.cc b/src/compiler/codegen/arm/LocalOptimizations.cc
deleted file mode 100644
index 9098627672..0000000000
--- a/src/compiler/codegen/arm/LocalOptimizations.cc
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "../../Dalvik.h"
-#include "../../CompilerInternals.h"
-#include "ArmLIR.h"
-#include "Codegen.h"
-
-namespace art {
-
-#define DEBUG_OPT(X)
-
-/* Check RAW, WAR, and WAR dependency on the register operands */
-#define CHECK_REG_DEP(use, def, check) ((def & check->useMask) || \
-                                        ((use | def) & check->defMask))
-
-/* Scheduler heuristics */
-#define MAX_HOIST_DISTANCE 20
-#define LDLD_DISTANCE 4
-#define LD_LATENCY 2
-
-STATIC inline bool isDalvikRegisterClobbered(ArmLIR* lir1, ArmLIR* lir2)
-{
-    int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->aliasInfo);
-    int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->aliasInfo);
-    int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->aliasInfo);
-    int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->aliasInfo);
-
-    return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
-}
-
-/* Convert a more expensive instruction (ie load) into a move */
-STATIC void convertMemOpIntoMove(CompilationUnit* cUnit, ArmLIR* origLIR,
-                                 int dest, int src)
-{
-    /* Insert a move to replace the load */
-    ArmLIR* moveLIR;
-    moveLIR = oatRegCopyNoInsert( cUnit, dest, src);
-    /*
-     * Insert the converted instruction after the original since the
-     * optimization is scannng in the top-down order and the new instruction
-     * will need to be re-checked (eg the new dest clobbers the src used in
-     * thisLIR).
-     */
-    oatInsertLIRAfter((LIR*) origLIR, (LIR*) moveLIR);
-}
-
-/*
- * Perform a pass of top-down walk, from the second-last instruction in the
- * superblock, to eliminate redundant loads and stores.
- *
- * An earlier load can eliminate a later load iff
- *   1) They are must-aliases
- *   2) The native register is not clobbered in between
- *   3) The memory location is not written to in between
- *
- * An earlier store can eliminate a later load iff
- *   1) They are must-aliases
- *   2) The native register is not clobbered in between
- *   3) The memory location is not written to in between
- *
- * A later store can be eliminated by an earlier store iff
- *   1) They are must-aliases
- *   2) The memory location is not written to in between
- */
-STATIC void applyLoadStoreElimination(CompilationUnit* cUnit,
-                                      ArmLIR* headLIR,
-                                      ArmLIR* tailLIR)
-{
-    ArmLIR* thisLIR;
-
-    if (headLIR == tailLIR) return;
-
-    for (thisLIR = PREV_LIR(tailLIR);
-         thisLIR != headLIR;
-         thisLIR = PREV_LIR(thisLIR)) {
-        int sinkDistance = 0;
-
-        /* Skip non-interesting instructions */
-        if ((thisLIR->flags.isNop == true) ||
-            isPseudoOpcode(thisLIR->opcode) ||
-            !(EncodingMap[thisLIR->opcode].flags & (IS_LOAD | IS_STORE))) {
-            continue;
-        }
-
-        int nativeRegId = thisLIR->operands[0];
-        bool isThisLIRLoad = EncodingMap[thisLIR->opcode].flags & IS_LOAD;
-        ArmLIR* checkLIR;
-        /* Use the mem mask to determine the rough memory location */
-        u8 thisMemMask = (thisLIR->useMask | thisLIR->defMask) & ENCODE_MEM;
-
-        /*
-         * Currently only eliminate redundant ld/st for constant and Dalvik
-         * register accesses.
-         */
-        if (!(thisMemMask & (ENCODE_LITERAL | ENCODE_DALVIK_REG))) continue;
-
-        /*
-         * Add r15 (pc) to the resource mask to prevent this instruction
-         * from sinking past branch instructions. Also take out the memory
-         * region bits since stopMask is used to check data/control
-         * dependencies.
-         */
-        u8 stopUseRegMask = (ENCODE_REG_PC | thisLIR->useMask) &
-                            ~ENCODE_MEM;
-        u8 stopDefRegMask = thisLIR->defMask & ~ENCODE_MEM;
-
-        for (checkLIR = NEXT_LIR(thisLIR);
-             checkLIR != tailLIR;
-             checkLIR = NEXT_LIR(checkLIR)) {
-
-            /*
-             * Skip already dead instructions (whose dataflow information is
-             * outdated and misleading).
-             */
-            if (checkLIR->flags.isNop) continue;
-
-            u8 checkMemMask = (checkLIR->useMask | checkLIR->defMask) &
-                              ENCODE_MEM;
-            u8 aliasCondition = thisMemMask & checkMemMask;
-            bool stopHere = false;
-
-            /*
-             * Potential aliases seen - check the alias relations
-             */
-            if (checkMemMask != ENCODE_MEM && aliasCondition != 0) {
-                bool isCheckLIRLoad = EncodingMap[checkLIR->opcode].flags &
-                                      IS_LOAD;
-                if  (aliasCondition == ENCODE_LITERAL) {
-                    /*
-                     * Should only see literal loads in the instruction
-                     * stream.
-                     */
-                    DCHECK(!(EncodingMap[checkLIR->opcode].flags &
-                             IS_STORE));
-                    /* Same value && same register type */
-                    if (checkLIR->aliasInfo == thisLIR->aliasInfo &&
-                        REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId)){
-                        /*
-                         * Different destination register - insert
-                         * a move
-                         */
-                        if (checkLIR->operands[0] != nativeRegId) {
-                            convertMemOpIntoMove(cUnit, checkLIR,
-                                                 checkLIR->operands[0],
-                                                 nativeRegId);
-                        }
-                        checkLIR->flags.isNop = true;
-                    }
-                } else if (aliasCondition == ENCODE_DALVIK_REG) {
-                    /* Must alias */
-                    if (checkLIR->aliasInfo == thisLIR->aliasInfo) {
-                        /* Only optimize compatible registers */
-                        bool regCompatible =
-                            REGTYPE(checkLIR->operands[0]) ==
-                            REGTYPE(nativeRegId);
-                        if ((isThisLIRLoad && isCheckLIRLoad) ||
-                            (!isThisLIRLoad && isCheckLIRLoad)) {
-                            /* RAR or RAW */
-                            if (regCompatible) {
-                                /*
-                                 * Different destination register -
-                                 * insert a move
-                                 */
-                                if (checkLIR->operands[0] !=
-                                    nativeRegId) {
-                                    convertMemOpIntoMove(cUnit,
-                                                 checkLIR,
-                                                 checkLIR->operands[0],
-                                                 nativeRegId);
-                                }
-                                checkLIR->flags.isNop = true;
-                            } else {
-                                /*
-                                 * Destinaions are of different types -
-                                 * something complicated going on so
-                                 * stop looking now.
-                                 */
-                                stopHere = true;
-                            }
-                        } else if (isThisLIRLoad && !isCheckLIRLoad) {
-                            /* WAR - register value is killed */
-                            stopHere = true;
-                        } else if (!isThisLIRLoad && !isCheckLIRLoad) {
-                            /* WAW - nuke the earlier store */
-                            thisLIR->flags.isNop = true;
-                            stopHere = true;
-                        }
-                    /* Partial overlap */
-                    } else if (isDalvikRegisterClobbered(thisLIR, checkLIR)) {
-                        /*
-                         * It is actually ok to continue if checkLIR
-                         * is a read. But it is hard to make a test
-                         * case for this so we just stop here to be
-                         * conservative.
-                         */
-                        stopHere = true;
-                    }
-                }
-                /* Memory content may be updated. Stop looking now. */
-                if (stopHere) {
-                    break;
-                /* The checkLIR has been transformed - check the next one */
-                } else if (checkLIR->flags.isNop) {
-                    continue;
-                }
-            }
-
-
-            /*
-             * this and check LIRs have no memory dependency. Now check if
-             * their register operands have any RAW, WAR, and WAW
-             * dependencies. If so, stop looking.
-             */
-            if (stopHere == false) {
-                stopHere = CHECK_REG_DEP(stopUseRegMask, stopDefRegMask,
-                                         checkLIR);
-            }
-
-            if (stopHere == true) {
-                DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
-                                                "REG CLOBBERED"));
-                /* Only sink store instructions */
-                if (sinkDistance && !isThisLIRLoad) {
-                    ArmLIR* newStoreLIR =
-                        (ArmLIR* ) oatNew(cUnit, sizeof(ArmLIR), true,
-                                          kAllocLIR);
-                    *newStoreLIR = *thisLIR;
-                    /*
-                     * Stop point found - insert *before* the checkLIR
-                     * since the instruction list is scanned in the
-                     * top-down order.
-                     */
-                    oatInsertLIRBefore((LIR*) checkLIR,
-                                               (LIR*) newStoreLIR);
-                    thisLIR->flags.isNop = true;
-                }
-                break;
-            } else if (!checkLIR->flags.isNop) {
-                sinkDistance++;
-            }
-        }
-    }
-}
-
-/*
- * Perform a pass of bottom-up walk, from the second instruction in the
- * superblock, to try to hoist loads to earlier slots.
- */
-STATIC void applyLoadHoisting(CompilationUnit* cUnit,
-                              ArmLIR* headLIR,
-                              ArmLIR* tailLIR)
-{
-    ArmLIR* thisLIR, *checkLIR;
-    /*
-     * Store the list of independent instructions that can be hoisted past.
-     * Will decide the best place to insert later.
-     */
-    ArmLIR* prevInstList[MAX_HOIST_DISTANCE];
-
-    /* Empty block */
-    if (headLIR == tailLIR) return;
-
-    /* Start from the second instruction */
-    for (thisLIR = NEXT_LIR(headLIR);
-         thisLIR != tailLIR;
-         thisLIR = NEXT_LIR(thisLIR)) {
-
-        /* Skip non-interesting instructions */
-        if ((thisLIR->flags.isNop == true) ||
-            isPseudoOpcode(thisLIR->opcode) ||
-            !(EncodingMap[thisLIR->opcode].flags & IS_LOAD)) {
-            continue;
-        }
-
-        u8 stopUseAllMask = thisLIR->useMask;
-
-        /*
-         * Branches for null/range checks are marked with the true resource
-         * bits, and loads to Dalvik registers, constant pools, and non-alias
-         * locations are safe to be hoisted. So only mark the heap references
-         * conservatively here.
-         */
-        if (stopUseAllMask & ENCODE_HEAP_REF) {
-            stopUseAllMask |= ENCODE_REG_PC;
-        }
-
-        /* Similar as above, but just check for pure register dependency */
-        u8 stopUseRegMask = stopUseAllMask & ~ENCODE_MEM;
-        u8 stopDefRegMask = thisLIR->defMask & ~ENCODE_MEM;
-
-        int nextSlot = 0;
-        bool stopHere = false;
-
-        /* Try to hoist the load to a good spot */
-        for (checkLIR = PREV_LIR(thisLIR);
-             checkLIR != headLIR;
-             checkLIR = PREV_LIR(checkLIR)) {
-
-            /*
-             * Skip already dead instructions (whose dataflow information is
-             * outdated and misleading).
-             */
-            if (checkLIR->flags.isNop) continue;
-
-            u8 checkMemMask = checkLIR->defMask & ENCODE_MEM;
-            u8 aliasCondition = stopUseAllMask & checkMemMask;
-            stopHere = false;
-
-            /* Potential WAR alias seen - check the exact relation */
-            if (checkMemMask != ENCODE_MEM && aliasCondition != 0) {
-                /* We can fully disambiguate Dalvik references */
-                if (aliasCondition == ENCODE_DALVIK_REG) {
-                    /* Must alias or partually overlap */
-                    if ((checkLIR->aliasInfo == thisLIR->aliasInfo) ||
-                        isDalvikRegisterClobbered(thisLIR, checkLIR)) {
-                        stopHere = true;
-                    }
-                /* Conservatively treat all heap refs as may-alias */
-                } else {
-                    DCHECK_EQ(aliasCondition, ENCODE_HEAP_REF);
-                    stopHere = true;
-                }
-                /* Memory content may be updated. Stop looking now. */
-                if (stopHere) {
-                    prevInstList[nextSlot++] = checkLIR;
-                    break;
-                }
-            }
-
-            if (stopHere == false) {
-                stopHere = CHECK_REG_DEP(stopUseRegMask, stopDefRegMask,
-                                         checkLIR);
-            }
-
-            /*
-             * Store the dependent or non-pseudo/indepedent instruction to the
-             * list.
-             */
-            if (stopHere || !isPseudoOpcode(checkLIR->opcode)) {
-                prevInstList[nextSlot++] = checkLIR;
-                if (nextSlot == MAX_HOIST_DISTANCE) break;
-            }
-
-            /* Found a new place to put the load - move it here */
-            if (stopHere == true) {
-                DEBUG_OPT(dumpDependentInsnPair(checkLIR, thisLIR
-                                                "HOIST STOP"));
-                break;
-            }
-        }
-
-        /*
-         * Reached the top - use headLIR as the dependent marker as all labels
-         * are barriers.
-         */
-        if (stopHere == false && nextSlot < MAX_HOIST_DISTANCE) {
-            prevInstList[nextSlot++] = headLIR;
-        }
-
-        /*
-         * At least one independent instruction is found. Scan in the reversed
-         * direction to find a beneficial slot.
-         */
-        if (nextSlot >= 2) {
-            int firstSlot = nextSlot - 2;
-            int slot;
-            ArmLIR* depLIR = prevInstList[nextSlot-1];
-            /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */
-            if (!isPseudoOpcode(depLIR->opcode) &&
-                (EncodingMap[depLIR->opcode].flags & IS_LOAD)) {
-                firstSlot -= LDLD_DISTANCE;
-            }
-            /*
-             * Make sure we check slot >= 0 since firstSlot may be negative
-             * when the loop is first entered.
-             */
-            for (slot = firstSlot; slot >= 0; slot--) {
-                ArmLIR* curLIR = prevInstList[slot];
-                ArmLIR* prevLIR = prevInstList[slot+1];
-
-                /* Check the highest instruction */
-                if (prevLIR->defMask == ENCODE_ALL) {
-                    /*
-                     * If the first instruction is a load, don't hoist anything
-                     * above it since it is unlikely to be beneficial.
-                     */
-                    if (EncodingMap[curLIR->opcode].flags & IS_LOAD) continue;
-                    /*
-                     * If the remaining number of slots is less than LD_LATENCY,
-                     * insert the hoisted load here.
-                     */
-                    if (slot < LD_LATENCY) break;
-                }
-
-                /*
-                 * NOTE: now prevLIR is guaranteed to be a non-pseudo
-                 * instruction (ie accessing EncodingMap[prevLIR->opcode] is
-                 * safe).
-                 *
-                 * Try to find two instructions with load/use dependency until
-                 * the remaining instructions are less than LD_LATENCY.
-                 */
-                if (((curLIR->useMask & prevLIR->defMask) &&
-                     (EncodingMap[prevLIR->opcode].flags & IS_LOAD)) ||
-                    (slot < LD_LATENCY)) {
-                    break;
-                }
-            }
-
-            /* Found a slot to hoist to */
-            if (slot >= 0) {
-                ArmLIR* curLIR = prevInstList[slot];
-                ArmLIR* newLoadLIR = (ArmLIR* ) oatNew(cUnit, sizeof(ArmLIR),
-                                                       true, kAllocLIR);
-                *newLoadLIR = *thisLIR;
-                /*
-                 * Insertion is guaranteed to succeed since checkLIR
-                 * is never the first LIR on the list
-                 */
-                oatInsertLIRBefore((LIR*) curLIR, (LIR*) newLoadLIR);
-                thisLIR->flags.isNop = true;
-            }
-        }
-    }
-}
-
-void oatApplyLocalOptimizations(CompilationUnit* cUnit, LIR* headLIR,
-                                        LIR* tailLIR)
-{
-    if (!(cUnit->disableOpt & (1 << kLoadStoreElimination))) {
-        applyLoadStoreElimination(cUnit, (ArmLIR* ) headLIR,
-                                  (ArmLIR* ) tailLIR);
-    }
-    if (!(cUnit->disableOpt & (1 << kLoadHoisting))) {
-        applyLoadHoisting(cUnit, (ArmLIR* ) headLIR, (ArmLIR* ) tailLIR);
-    }
-}
-
-}  // namespace art
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
deleted file mode 100644
index 4efa27ab3b..0000000000
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ /dev/null
@@ -1,1978 +0,0 @@
-/*
- * Copyright (C) 2011 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "object_utils.h"
-
-namespace art {
-
-#define DISPLAY_MISSING_TARGETS (cUnit->enableDebug & \
-    (1 << kDebugDisplayMissingTargets))
-
-STATIC const RegLocation badLoc = {kLocDalvikFrame, 0, 0, 0, 0, 0, 0, INVALID_REG,
-                                   INVALID_REG, INVALID_SREG};
-
-/* Mark register usage state and return long retloc */
-STATIC RegLocation getRetLocWide(CompilationUnit* cUnit)
-{
-    RegLocation res = LOC_DALVIK_RETURN_VAL_WIDE;
-    oatLockTemp(cUnit, res.lowReg);
-    oatLockTemp(cUnit, res.highReg);
-    oatMarkPair(cUnit, res.lowReg, res.highReg);
-    return res;
-}
-
-STATIC RegLocation getRetLoc(CompilationUnit* cUnit)
-{
-    RegLocation res = LOC_DALVIK_RETURN_VAL;
-    oatLockTemp(cUnit, res.lowReg);
-    return res;
-}
-
-/*
- * Let helper function take care of everything.  Will call
- * Array::AllocFromCode(type_idx, method, count);
- * Note: AllocFromCode will handle checks for errNegativeArraySize.
- */
-STATIC void genNewArray(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
-                        RegLocation rlSrc)
-{
-    oatFlushAllRegs(cUnit);    /* Everything to home location */
-    uint32_t type_idx = mir->dalvikInsn.vC;
-    if (cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx,
-                                                    cUnit->dex_cache,
-                                                    *cUnit->dex_file,
-                                                    type_idx)) {
-        loadWordDisp(cUnit, rSELF,
-                     OFFSETOF_MEMBER(Thread, pAllocArrayFromCode), rLR);
-    } else {
-        loadWordDisp(cUnit, rSELF,
-                     OFFSETOF_MEMBER(Thread, pAllocArrayFromCodeWithAccessCheck), rLR);
-    }
-    loadCurrMethodDirect(cUnit, r1);              // arg1 <- Method*
-    loadConstant(cUnit, r0, type_idx);            // arg0 <- type_id
-    loadValueDirectFixed(cUnit, rlSrc, r2);       // arg2 <- count
-    callRuntimeHelper(cUnit, rLR);
-    RegLocation rlResult = oatGetReturn(cUnit);
-    storeValue(cUnit, rlDest, rlResult);
-}
-
-/*
- * Similar to genNewArray, but with post-allocation initialization.
- * Verifier guarantees we're dealing with an array class.  Current
- * code throws runtime exception "bad Filled array req" for 'D' and 'J'.
- * Current code also throws internal unimp if not 'L', '[' or 'I'.
- */
-STATIC void genFilledNewArray(CompilationUnit* cUnit, MIR* mir, bool isRange)
-{
-    DecodedInstruction* dInsn = &mir->dalvikInsn;
-    int elems = dInsn->vA;
-    int typeId = dInsn->vB;
-    oatFlushAllRegs(cUnit);    /* Everything to home location */
-    if (cUnit->compiler->CanAccessTypeWithoutChecks(cUnit->method_idx,
-                                                    cUnit->dex_cache,
-                                                    *cUnit->dex_file,
-                                                    typeId)) {
-        loadWordDisp(cUnit, rSELF,
-                     OFFSETOF_MEMBER(Thread, pCheckAndAllocArrayFromCode), rLR);
-    } else {
-        loadWordDisp(cUnit, rSELF,
-                     OFFSETOF_MEMBER(Thread, pCheckAndAllocArrayFromCodeWithAccessCheck), rLR);
-    }
-    loadCurrMethodDirect(cUnit, r1);              // arg1 <- Method*
-    loadConstant(cUnit, r0, typeId);              // arg0 <- type_id
-    loadConstant(cUnit, r2, elems);               // arg2 <- count
-    callRuntimeHelper(cUnit, rLR);
-    /*
-     * NOTE: the implicit target for OP_FILLED_NEW_ARRAY is the
-     * return region.  Because AllocFromCode placed the new array
-     * in r0, we'll just lock it into place.  When debugger support is
-     * added, it may be necessary to additionally copy all return
-     * values to a home location in thread-local storage
-     */
-    oatLockTemp(cUnit, r0);
-
-    // Having a range of 0 is legal
-    if (isRange && (dInsn->vA > 0)) {
-        /*
-         * Bit of ugliness here.  We're going generate a mem copy loop
-         * on the register range, but it is possible that some regs
-         * in the range have been promoted.  This is unlikely, but
-         * before generating the copy, we'll just force a flush
-         * of any regs in the source range that have been promoted to
-         * home location.
-         */
-        for (unsigned int i = 0; i < dInsn->vA; i++) {
-            RegLocation loc = oatUpdateLoc(cUnit,
-                oatGetSrc(cUnit, mir, i));
-            if (loc.location == kLocPhysReg) {
-                storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
-                              loc.lowReg, kWord);
-            }
-        }
-        /*
-         * TUNING note: generated code here could be much improved, but
-         * this is an uncommon operation and isn't especially performance
-         * critical.
-         */
-        int rSrc = oatAllocTemp(cUnit);
-        int rDst = oatAllocTemp(cUnit);
-        int rIdx = oatAllocTemp(cUnit);
-        int rVal = rLR;  // Using a lot of temps, rLR is known free here
-        // Set up source pointer
-        RegLocation rlFirst = oatGetSrc(cUnit, mir, 0);
-        opRegRegImm(cUnit, kOpAdd, rSrc, rSP,
-                    oatSRegOffset(cUnit, rlFirst.sRegLow));
-        // Set up the target pointer
-        opRegRegImm(cUnit, kOpAdd, rDst, r0,
-                    Array::DataOffset().Int32Value());
-        // Set up the loop counter (known to be > 0)
-        loadConstant(cUnit, rIdx, dInsn->vA - 1);
-        // Generate the copy loop.  Going backwards for convenience
-        ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
-        target->defMask = ENCODE_ALL;
-        // Copy next element
-        loadBaseIndexed(cUnit, rSrc, rIdx, rVal, 2, kWord);
-        storeBaseIndexed(cUnit, rDst, rIdx, rVal, 2, kWord);
-        // Use setflags encoding here
-        newLIR3(cUnit, kThumb2SubsRRI12, rIdx, rIdx, 1);
-        ArmLIR* branch = opCondBranch(cUnit, kArmCondGe);
-        branch->generic.target = (LIR*)target;
-    } else if (!isRange) {
-        // TUNING: interleave
-        for (unsigned int i = 0; i < dInsn->vA; i++) {
-            RegLocation rlArg = loadValue(cUnit,
-                oatGetSrc(cUnit, mir, i), kCoreReg);
-            storeBaseDisp(cUnit, r0,
-                          Array::DataOffset().Int32Value() +
-                          i * 4, rlArg.lowReg, kWord);
-            // If the loadValue caused a temp to be allocated, free it
-            if (oatIsTemp(cUnit, rlArg.lowReg)) {
-                oatFreeTemp(cUnit, rlArg.lowReg);
-            }
-        }
-    }
-}
-
-STATIC void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
-                    bool isLongOrDouble, bool isObject)
-{
-    int fieldOffset;
-    int ssbIndex;
-    bool isVolatile;
-    bool isReferrersClass;
-    uint32_t fieldIdx = mir->dalvikInsn.vB;
-    bool fastPath =
-        cUnit->compiler->ComputeStaticFieldInfo(fieldIdx, cUnit,
-                                                fieldOffset, ssbIndex,
-                                                isReferrersClass, isVolatile, true);
-    if (fastPath && !SLOW_FIELD_PATH) {
-        DCHECK_GE(fieldOffset, 0);
-        int rBase;
-        int rMethod;
-        if (isReferrersClass) {
-            // Fast path, static storage base is this method's class
-            rMethod  = loadCurrMethod(cUnit);
-            rBase = oatAllocTemp(cUnit);
-            loadWordDisp(cUnit, rMethod,
-                         Method::DeclaringClassOffset().Int32Value(), rBase);
-        } else {
-            // Medium path, static storage base in a different class which
-            // requires checks that the other class is initialized.
-            DCHECK_GE(ssbIndex, 0);
-            // May do runtime call so everything to home locations.
-            oatFlushAllRegs(cUnit);
-            // Using fixed register to sync with possible call to runtime
-            // support.
-            rMethod = r1;
-            oatLockTemp(cUnit, rMethod);
-            loadCurrMethodDirect(cUnit, rMethod);
-            rBase = r0;
-            oatLockTemp(cUnit, rBase);
-            loadWordDisp(cUnit, rMethod,
-                Method::DexCacheInitializedStaticStorageOffset().Int32Value(),
-                rBase);
-            loadWordDisp(cUnit, rBase,
-                         Array::DataOffset().Int32Value() + sizeof(int32_t*) * ssbIndex,
-                         rBase);
-            // rBase now points at appropriate static storage base (Class*)
-            // or NULL if not initialized. Check for NULL and call helper if NULL.
-            // TUNING: fast path should fall through
-            ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondNe, rBase, 0);
-            loadWordDisp(cUnit, rSELF,
-                         OFFSETOF_MEMBER(Thread, pInitializeStaticStorage), rLR);
-            loadConstant(cUnit, r0, ssbIndex);
-            callRuntimeHelper(cUnit, rLR);
-            ArmLIR* skipTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
-            skipTarget->defMask = ENCODE_ALL;
-            branchOver->generic.target = (LIR*)skipTarget;
-        }
-        // rBase now holds static storage base
-        oatFreeTemp(cUnit, rMethod);
-        if (isLongOrDouble) {
-            rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
-            rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
-        } else {
-            rlSrc = oatGetSrc(cUnit, mir, 0);
-            rlSrc = loadValue(cUnit, rlSrc, kAnyReg);
-        }
-        if (isVolatile) {
-            oatGenMemBarrier(cUnit, kST);
-        }
-        if (isLongOrDouble) {
-            storeBaseDispWide(cUnit, rBase, fieldOffset, rlSrc.lowReg,
-                              rlSrc.highReg);
-        } else {
-            storeWordDisp(cUnit, rBase, fieldOffset, rlSrc.lowReg);
-        }
-        if (isVolatile) {
-            oatGenMemBarrier(cUnit, kSY);
-        }
-        if (isObject) {
-            markGCCard(cUnit, rlSrc.lowReg, rBase);
-        }
-        oatFreeTemp(cUnit, rBase);
-    } else {
-        oatFlushAllRegs(cUnit);  // Everything to home locations
-        int setterOffset = isLongOrDouble ? OFFSETOF_MEMBER(Thread, pSet64Static) :
-                           (isObject ? OFFSETOF_MEMBER(Thread, pSetObjStatic)
-                                     : OFFSETOF_MEMBER(Thread, pSet32Static));
-        loadWordDisp(cUnit, rSELF, setterOffset, rLR);
-        loadConstant(cUnit, r0, fieldIdx);
-        if (isLongOrDouble) {
-            loadValueDirectWideFixed(cUnit, rlSrc, r2, r3);
-        } else {
-            loadValueDirect(cUnit, rlSrc, r1);
-        }
-        callRuntimeHelper(cUnit, rLR);
-    }
-}
-
-STATIC void genSget(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
-                    bool isLongOrDouble, bool isObject)
-{
-    int fieldOffset;
-    int ssbIndex;
-    bool isVolatile;
-    bool isReferrersClass;
-    uint32_t fieldIdx = mir->dalvikInsn.vB;
-    bool fastPath =
-        cUnit->compiler->ComputeStaticFieldInfo(fieldIdx, cUnit,
-                                                fieldOffset, ssbIndex,
-                                                isReferrersClass, isVolatile, false);
-    if (fastPath && !SLOW_FIELD_PATH) {
-        DCHECK_GE(fieldOffset, 0);
-        int rBase;
-        int rMethod;
-        if (isReferrersClass) {
-            // Fast path, static storage base is this method's class
-            rMethod  = loadCurrMethod(cUnit);
-            rBase = oatAllocTemp(cUnit);
-            loadWordDisp(cUnit, rMethod,
-                         Method::DeclaringClassOffset().Int32Value(), rBase);
-        } else {
-            // Medium path, static storage base in a different class which
-            // requires checks that the other class is initialized
-            DCHECK_GE(ssbIndex, 0);
-            // May do runtime call so everything to home locations.
-            oatFlushAllRegs(cUnit);
-            // Using fixed register to sync with possible call to runtime
-            // support
-            rMethod = r1;
-            oatLockTemp(cUnit, rMethod);
-            loadCurrMethodDirect(cUnit, rMethod);
-            rBase = r0;
-            oatLockTemp(cUnit, rBase);
-            loadWordDisp(cUnit, rMethod,
-                Method::DexCacheInitializedStaticStorageOffset().Int32Value(),
-                rBase);
-            loadWordDisp(cUnit, rBase,
-                         Array::DataOffset().Int32Value() + sizeof(int32_t*) * ssbIndex,
-                         rBase);
-            // rBase now points at appropriate static storage base (Class*)
-            // or NULL if not initialized. Check for NULL and call helper if NULL.
-            // TUNING: fast path should fall through
-            ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondNe, rBase, 0);
-            loadWordDisp(cUnit, rSELF,
-                         OFFSETOF_MEMBER(Thread, pInitializeStaticStorage), rLR);
-            loadConstant(cUnit, r0, ssbIndex);
-            callRuntimeHelper(cUnit, rLR);
-            ArmLIR* skipTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
-            skipTarget->defMask = ENCODE_ALL;
-            branchOver->generic.target = (LIR*)skipTarget;
-        }
-        // rBase now holds static storage base
-        oatFreeTemp(cUnit, rMethod);
-        rlDest = isLongOrDouble ? oatGetDestWide(cUnit, mir, 0, 1)
-                                : oatGetDest(cUnit, mir, 0);
-        RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
-        if (isVolatile) {
-            oatGenMemBarrier(cUnit, kSY);
-        }
-        if (isLongOrDouble) {
-            loadBaseDispWide(cUnit, NULL, rBase, fieldOffset, rlResult.lowReg,
-                             rlResult.highReg, INVALID_SREG);
-        } else {
-            loadWordDisp(cUnit, rBase, fieldOffset, rlResult.lowReg);
-        }
-        oatFreeTemp(cUnit, rBase);
-        if (isLongOrDouble) {
-            storeValueWide(cUnit, rlDest, rlResult);
-        } else {
-            storeValue(cUnit, rlDest, rlResult);
-        }
-    } else {
-        oatFlushAllRegs(cUnit);  // Everything to home locations
-        int getterOffset = isLongOrDouble ? OFFSETOF_MEMBER(Thread, pGet64Static) :
-                           (isObject ? OFFSETOF_MEMBER(Thread, pGetObjStatic)
-                                     : OFFSETOF_MEMBER(Thread, pGet32Static));
-        loadWordDisp(cUnit, rSELF, getterOffset, rLR);
-        loadConstant(cUnit, r0, fieldIdx);
-        callRuntimeHelper(cUnit, rLR);
-        if (isLongOrDouble) {
-            RegLocation rlResult = oatGetReturnWide(cUnit);
-            storeValueWide(cUnit, rlDest, rlResult);
-        } else {
-            RegLocation rlResult = oatGetReturn(cUnit);
-            storeValue(cUnit, rlDest, rlResult);
-        }
-    }
-}
-
-typedef int (*NextCallInsn)(CompilationUnit*, MIR*, int, uint32_t dexIdx,
-                            uint32_t methodIdx);
-
-/*
- * Bit of a hack here - in leiu of a real scheduling pass,
- * emit the next instruction in static & direct invoke sequences.
- */
-STATIC int nextSDCallInsn(CompilationUnit* cUnit, MIR* mir,
-                          int state, uint32_t dexIdx, uint32_t unused)
-{
-    switch(state) {
-        case 0:  // Get the current Method* [sets r0]
-            loadCurrMethodDirect(cUnit, r0);
-            break;
-        case 1:  // Get method->code_and_direct_methods_
-            loadWordDisp(cUnit, r0,
-                Method::GetDexCacheCodeAndDirectMethodsOffset().Int32Value(),
-                r0);
-            break;
-        case 2:  // Grab target method* and target code_
-            loadWordDisp(cUnit, r0,
-                CodeAndDirectMethods::CodeOffsetInBytes(dexIdx), rLR);
-            loadWordDisp(cUnit, r0,
-                CodeAndDirectMethods::MethodOffsetInBytes(dexIdx), r0);
-            break;
-        default:
-            return -1;
-    }
-    return state + 1;
-}
-
-/*
- * Bit of a hack here - in leiu of a real scheduling pass,
- * emit the next instruction in a virtual invoke sequence.
- * We can use rLR as a temp prior to target address loading
- * Note also that we'll load the first argument ("this") into
- * r1 here rather than the standard loadArgRegs.
- */
-STATIC int nextVCallInsn(CompilationUnit* cUnit, MIR* mir,
-                         int state, uint32_t dexIdx, uint32_t methodIdx)
-{
-    RegLocation rlArg;
-    /*
-     * This is the fast path in which the target virtual method is
-     * fully resolved at compile time.
-     */
-    switch(state) {
-        case 0:  // Get "this" [set r1]
-            rlArg = oatGetSrc(cUnit, mir, 0);
-            loadValueDirectFixed(cUnit, rlArg, r1);
-            break;
-        case 1: // Is "this" null? [use r1]
-            genNullCheck(cUnit, oatSSASrc(mir,0), r1, mir);
-            // get this->klass_ [use r1, set rLR]
-            loadWordDisp(cUnit, r1, Object::ClassOffset().Int32Value(), rLR);
-            break;
-        case 2: // Get this->klass_->vtable [usr rLR, set rLR]
-            loadWordDisp(cUnit, rLR, Class::VTableOffset().Int32Value(), rLR);
-            break;
-        case 3: // Get target method [use rLR, set r0]
-            loadWordDisp(cUnit, rLR, (methodIdx * 4) +
-                         Array::DataOffset().Int32Value(), r0);
-            break;
-        case 4: // Get the target compiled code address [uses r0, sets rLR]
-            loadWordDisp(cUnit, r0, Method::GetCodeOffset().Int32Value(), rLR);
-            break;
-        default:
-            return -1;
-    }
-    return state + 1;
-}
-
-/*
- * Interleave launch code for INVOKE_SUPER.  See comments
- * for nextVCallIns.
- */
-STATIC int nextSuperCallInsn(CompilationUnit* cUnit, MIR* mir,
-                             int state, uint32_t dexIdx, uint32_t methodIdx)
-{
-    /*
-     * This is the fast path in which the target virtual method is
-     * fully resolved at compile time.  Note also that this path assumes
-     * that the check to verify that the target method index falls
-     * within the size of the super's vtable has been done at compile-time.
-     */
-    RegLocation rlArg;
-    switch(state) {
-        case 0: // Get current Method* [set r0]
-            loadCurrMethodDirect(cUnit, r0);
-            // Load "this" [set r1]
-            rlArg = oatGetSrc(cUnit, mir, 0);
-            loadValueDirectFixed(cUnit, rlArg, r1);
-            // Get method->declaring_class_ [use r0, set rLR]
-            loadWordDisp(cUnit, r0, Method::DeclaringClassOffset().Int32Value(),
-                         rLR);
-            // Is "this" null? [use r1]
-            genNullCheck(cUnit, oatSSASrc(mir,0), r1, mir);
-            break;
-        case 1: // Get method->declaring_class_->super_class [usr rLR, set rLR]
-            loadWordDisp(cUnit, rLR, Class::SuperClassOffset().Int32Value(),
-                         rLR);
-            break;
-        case 2: // Get ...->super_class_->vtable [u/s rLR]
-            loadWordDisp(cUnit, rLR, Class::VTableOffset().Int32Value(), rLR);
-            break;
-        case 3: // Get target method [use rLR, set r0]
-            loadWordDisp(cUnit, rLR, (methodIdx * 4) +
-                         Array::DataOffset().Int32Value(), r0);
-            break;
-        case 4: // Get the target compiled code address [uses r0, sets rLR]
-            loadWordDisp(cUnit, r0, Method::GetCodeOffset().Int32Value(), rLR);
-            break;
-        default:
-            return -1;
-    }
-    return state + 1;
-}
-
-STATIC int nextInvokeInsnSP(CompilationUnit* cUnit, MIR* mir, int trampoline,
-                            int state, uint32_t dexIdx, uint32_t methodIdx)
-{
-    /*
-     * This handles the case in which the base method is not fully
-     * resolved at compile time, we bail to a runtime helper.
-     */
-    if (state == 0) {
-        // Load trampoline target
-        loadWordDisp(cUnit, rSELF, trampoline, rLR);
-        // Load r0 with method index
-        loadConstant(cUnit, r0, dexIdx);
-        return 1;
-    }
-    return -1;
-}
-
-STATIC int nextStaticCallInsnSP(CompilationUnit* cUnit, MIR* mir,
-                                int state, uint32_t dexIdx, uint32_t methodIdx)
-{
-  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeStaticTrampolineWithAccessCheck);
-  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
-}
-
-STATIC int nextDirectCallInsnSP(CompilationUnit* cUnit, MIR* mir,
-                                int state, uint32_t dexIdx, uint32_t methodIdx)
-{
-  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeDirectTrampolineWithAccessCheck);
-  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
-}
-
-STATIC int nextSuperCallInsnSP(CompilationUnit* cUnit, MIR* mir,
-                               int state, uint32_t dexIdx, uint32_t methodIdx)
-{
-  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeSuperTrampolineWithAccessCheck);
-  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
-}
-
-STATIC int nextVCallInsnSP(CompilationUnit* cUnit, MIR* mir,
-                           int state, uint32_t dexIdx, uint32_t methodIdx)
-{
-  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeVirtualTrampolineWithAccessCheck);
-  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
-}
-
-/*
- * All invoke-interface calls bounce off of art_invoke_interface_trampoline,
- * which will locate the target and continue on via a tail call.
- */
-STATIC int nextInterfaceCallInsn(CompilationUnit* cUnit, MIR* mir,
-                                 int state, uint32_t dexIdx, uint32_t unused)
-{
-  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampoline);
-  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
-}
-
-STATIC int nextInterfaceCallInsnWithAccessCheck(CompilationUnit* cUnit,
-                                                MIR* mir, int state,
-                                                uint32_t dexIdx,
-                                                uint32_t unused)
-{
-  int trampoline = OFFSETOF_MEMBER(Thread, pInvokeInterfaceTrampolineWithAccessCheck);
-  return nextInvokeInsnSP(cUnit, mir, trampoline, state, dexIdx, 0);
-}
-
-STATIC int loadArgRegs(CompilationUnit* cUnit, MIR* mir,
-                          DecodedInstruction* dInsn, int callState,
-                          NextCallInsn nextCallInsn, uint32_t dexIdx,
-                          uint32_t methodIdx, bool skipThis)
-{
-    int nextReg = r1;
-    int nextArg = 0;
-    if (skipThis) {
-        nextReg++;
-        nextArg++;
-    }
-    for (; (nextReg <= r3) && (nextArg < mir->ssaRep->numUses); nextReg++) {
-        RegLocation rlArg = oatGetRawSrc(cUnit, mir, nextArg++);
-        rlArg = oatUpdateRawLoc(cUnit, rlArg);
-        if (rlArg.wide && (nextReg <= r2)) {
-            loadValueDirectWideFixed(cUnit, rlArg, nextReg, nextReg + 1);
-            nextReg++;
-            nextArg++;
-        } else {
-            rlArg.wide = false;
-            loadValueDirectFixed(cUnit, rlArg, nextReg);
-        }
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-    }
-    return callState;
-}
-
-/*
- * Load up to 5 arguments, the first three of which will be in
- * r1 .. r3.  On entry r0 contains the current method pointer,
- * and as part of the load sequence, it must be replaced with
- * the target method pointer.  Note, this may also be called
- * for "range" variants if the number of arguments is 5 or fewer.
- */
-STATIC int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir,
-                                DecodedInstruction* dInsn, int callState,
-                                ArmLIR** pcrLabel, NextCallInsn nextCallInsn,
-                                uint32_t dexIdx, uint32_t methodIdx,
-                                bool skipThis)
-{
-    RegLocation rlArg;
-
-    /* If no arguments, just return */
-    if (dInsn->vA == 0)
-        return callState;
-
-    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-
-    DCHECK_LE(dInsn->vA, 5U);
-    if (dInsn->vA > 3) {
-        uint32_t nextUse = 3;
-        //Detect special case of wide arg spanning arg3/arg4
-        RegLocation rlUse0 = oatGetRawSrc(cUnit, mir, 0);
-        RegLocation rlUse1 = oatGetRawSrc(cUnit, mir, 1);
-        RegLocation rlUse2 = oatGetRawSrc(cUnit, mir, 2);
-        if (((!rlUse0.wide && !rlUse1.wide) || rlUse0.wide) &&
-            rlUse2.wide) {
-            int reg;
-            // Wide spans, we need the 2nd half of uses[2].
-            rlArg = oatUpdateLocWide(cUnit, rlUse2);
-            if (rlArg.location == kLocPhysReg) {
-                reg = rlArg.highReg;
-            } else {
-                // r2 & r3 can safely be used here
-                reg = r3;
-                loadWordDisp(cUnit, rSP,
-                             oatSRegOffset(cUnit, rlArg.sRegLow) + 4, reg);
-                callState = nextCallInsn(cUnit, mir, callState, dexIdx,
-                                         methodIdx);
-            }
-            storeBaseDisp(cUnit, rSP, (nextUse + 1) * 4, reg, kWord);
-            storeBaseDisp(cUnit, rSP, 16 /* (3+1)*4 */, reg, kWord);
-            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-            nextUse++;
-        }
-        // Loop through the rest
-        while (nextUse < dInsn->vA) {
-            int lowReg;
-            int highReg;
-            rlArg = oatGetRawSrc(cUnit, mir, nextUse);
-            rlArg = oatUpdateRawLoc(cUnit, rlArg);
-            if (rlArg.location == kLocPhysReg) {
-                lowReg = rlArg.lowReg;
-                highReg = rlArg.highReg;
-            } else {
-                lowReg = r2;
-                highReg = r3;
-                if (rlArg.wide) {
-                    loadValueDirectWideFixed(cUnit, rlArg, lowReg, highReg);
-                } else {
-                    loadValueDirectFixed(cUnit, rlArg, lowReg);
-                }
-                callState = nextCallInsn(cUnit, mir, callState, dexIdx,
-                                         methodIdx);
-            }
-            int outsOffset = (nextUse + 1) * 4;
-            if (rlArg.wide) {
-                storeBaseDispWide(cUnit, rSP, outsOffset, lowReg, highReg);
-                nextUse += 2;
-            } else {
-                storeWordDisp(cUnit, rSP, outsOffset, lowReg);
-                nextUse++;
-            }
-            callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-        }
-    }
-
-    callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
-                            dexIdx, methodIdx, skipThis);
-
-    if (pcrLabel) {
-        *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), r1, mir);
-    }
-    return callState;
-}
-
-/*
- * May have 0+ arguments (also used for jumbo).  Note that
- * source virtual registers may be in physical registers, so may
- * need to be flushed to home location before copying.  This
- * applies to arg3 and above (see below).
- *
- * Two general strategies:
- *    If < 20 arguments
- *       Pass args 3-18 using vldm/vstm block copy
- *       Pass arg0, arg1 & arg2 in r1-r3
- *    If 20+ arguments
- *       Pass args arg19+ using memcpy block copy
- *       Pass arg0, arg1 & arg2 in r1-r3
- *
- */
-STATIC int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir,
-                              DecodedInstruction* dInsn, int callState,
-                              ArmLIR** pcrLabel, NextCallInsn nextCallInsn,
-                              uint32_t dexIdx, uint32_t methodIdx,
-                              bool skipThis)
-{
-    int firstArg = dInsn->vC;
-    int numArgs = dInsn->vA;
-
-    // If we can treat it as non-range (Jumbo ops will use range form)
-    if (numArgs <= 5)
-        return genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pcrLabel,
-                                    nextCallInsn, dexIdx, methodIdx,
-                                    skipThis);
-    /*
-     * Make sure range list doesn't span the break between in normal
-     * Dalvik vRegs and the ins.
-     */
-    int highestArg = oatGetSrc(cUnit, mir, numArgs-1).sRegLow;
-    int boundaryReg = cUnit->numDalvikRegisters - cUnit->numIns;
-    if ((firstArg < boundaryReg) && (highestArg >= boundaryReg)) {
-        LOG(FATAL) << "Argument list spanned locals & args";
-    }
-
-    /*
-     * First load the non-register arguments.  Both forms expect all
-     * of the source arguments to be in their home frame location, so
-     * scan the sReg names and flush any that have been promoted to
-     * frame backing storage.
-     */
-    // Scan the rest of the args - if in physReg flush to memory
-    for (int nextArg = 0; nextArg < numArgs;) {
-        RegLocation loc = oatGetRawSrc(cUnit, mir, nextArg);
-        if (loc.wide) {
-            loc = oatUpdateLocWide(cUnit, loc);
-            if ((nextArg >= 2) && (loc.location == kLocPhysReg)) {
-                storeBaseDispWide(cUnit, rSP,
-                                  oatSRegOffset(cUnit, loc.sRegLow),
-                                  loc.lowReg, loc.highReg);
-            }
-            nextArg += 2;
-        } else {
-            loc = oatUpdateLoc(cUnit, loc);
-            if ((nextArg >= 3) && (loc.location == kLocPhysReg)) {
-                storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, loc.sRegLow),
-                              loc.lowReg, kWord);
-            }
-            nextArg++;
-        }
-    }
-
-    int startOffset = oatSRegOffset(cUnit,
-        cUnit->regLocation[mir->ssaRep->uses[3]].sRegLow);
-    int outsOffset = 4 /* Method* */ + (3 * 4);
-    if (numArgs >= 20) {
-        // Generate memcpy
-        opRegRegImm(cUnit, kOpAdd, r0, rSP, outsOffset);
-        opRegRegImm(cUnit, kOpAdd, r1, rSP, startOffset);
-        loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pMemcpy), rLR);
-        loadConstant(cUnit, r2, (numArgs - 3) * 4);
-        callRuntimeHelper(cUnit, rLR);
-        // Restore Method*
-        loadCurrMethodDirect(cUnit, r0);
-    } else {
-        // Use vldm/vstm pair using r3 as a temp
-        int regsLeft = std::min(numArgs - 3, 16);
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-        opRegRegImm(cUnit, kOpAdd, r3, rSP, startOffset);
-        ArmLIR* ld = newLIR3(cUnit, kThumb2Vldms, r3, fr0, regsLeft);
-        //TUNING: loosen barrier
-        ld->defMask = ENCODE_ALL;
-        setMemRefType(ld, true /* isLoad */, kDalvikReg);
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-        opRegRegImm(cUnit, kOpAdd, r3, rSP, 4 /* Method* */ + (3 * 4));
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-        ArmLIR* st = newLIR3(cUnit, kThumb2Vstms, r3, fr0, regsLeft);
-        setMemRefType(st, false /* isLoad */, kDalvikReg);
-        st->defMask = ENCODE_ALL;
-        callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-    }
-
-    callState = loadArgRegs(cUnit, mir, dInsn, callState, nextCallInsn,
-                            dexIdx, methodIdx, skipThis);
-
-    callState = nextCallInsn(cUnit, mir, callState, dexIdx, methodIdx);
-    if (pcrLabel) {
-        *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), r1, mir);
-    }
-    return callState;
-}
-
-// Debugging routine - if null target, branch to DebugMe
-STATIC void genShowTarget(CompilationUnit* cUnit)
-{
-    ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondNe, rLR, 0);
-    loadWordDisp(cUnit, rSELF,
-                 OFFSETOF_MEMBER(Thread, pDebugMe), rLR);
-    ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
-    target->defMask = -1;
-    branchOver->generic.target = (LIR*)target;
-}
-
-STATIC void genInvoke(CompilationUnit* cUnit, MIR* mir,
-                      InvokeType type, bool isRange)
-{
-    DecodedInstruction* dInsn = &mir->dalvikInsn;
-    int callState = 0;
-    ArmLIR* nullCk;
-    ArmLIR** pNullCk = NULL;
-    NextCallInsn nextCallInsn;
-    oatFlushAllRegs(cUnit);    /* Everything to home location */
-    // Explicit register usage
-    oatLockCallTemps(cUnit);
-
-    uint32_t dexMethodIdx = dInsn->vB;
-    int vtableIdx;
-    bool skipThis;
-    bool fastPath =
-        cUnit->compiler->ComputeInvokeInfo(dexMethodIdx, cUnit, type,
-                                           vtableIdx)
-        && !SLOW_INVOKE_PATH;
-    if (type == kInterface) {
-      nextCallInsn = fastPath ? nextInterfaceCallInsn
-                              : nextInterfaceCallInsnWithAccessCheck;
-      skipThis = false;
-    } else if (type == kDirect) {
-      if (fastPath) {
-        pNullCk = &nullCk;
-      }
-      nextCallInsn = fastPath ? nextSDCallInsn : nextDirectCallInsnSP;
-      skipThis = false;
-    } else if (type == kStatic) {
-      nextCallInsn = fastPath ? nextSDCallInsn : nextStaticCallInsnSP;
-      skipThis = false;
-    } else if (type == kSuper) {
-      nextCallInsn = fastPath ? nextSuperCallInsn : nextSuperCallInsnSP;
-      skipThis = fastPath;
-    } else {
-      DCHECK_EQ(type, kVirtual);
-      nextCallInsn = fastPath ? nextVCallInsn : nextVCallInsnSP;
-      skipThis = fastPath;
-    }
-    if (!isRange) {
-        callState = genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pNullCk,
-                                         nextCallInsn, dexMethodIdx,
-                                         vtableIdx, skipThis);
-    } else {
-        callState = genDalvikArgsRange(cUnit, mir, dInsn, callState, pNullCk,
-                                       nextCallInsn, dexMethodIdx, vtableIdx,
-                                       skipThis);
-    }
-    // Finish up any of the call sequence not interleaved in arg loading
-    while (callState >= 0) {
-        callState = nextCallInsn(cUnit, mir, callState, dexMethodIdx,
-                                 vtableIdx);
-    }
-    if (DISPLAY_MISSING_TARGETS) {
-        genShowTarget(cUnit);
-    }
-    opReg(cUnit, kOpBlx, rLR);
-    oatClobberCalleeSave(cUnit);
-}
-
-STATIC bool compileDalvikInstruction(CompilationUnit* cUnit, MIR* mir,
-                                     BasicBlock* bb, ArmLIR* labelList)
-{
-    bool res = false;   // Assume success
-    RegLocation rlSrc[3];
-    RegLocation rlDest = badLoc;
-    RegLocation rlResult = badLoc;
-    Opcode opcode = mir->dalvikInsn.opcode;
-
-    /* Prep Src and Dest locations */
-    int nextSreg = 0;
-    int nextLoc = 0;
-    int attrs = oatDataFlowAttributes[opcode];
-    rlSrc[0] = rlSrc[1] = rlSrc[2] = badLoc;
-    if (attrs & DF_UA) {
-        rlSrc[nextLoc++] = oatGetSrc(cUnit, mir, nextSreg);
-        nextSreg++;
-    } else if (attrs & DF_UA_WIDE) {
-        rlSrc[nextLoc++] = oatGetSrcWide(cUnit, mir, nextSreg,
-                                                 nextSreg + 1);
-        nextSreg+= 2;
-    }
-    if (attrs & DF_UB) {
-        rlSrc[nextLoc++] = oatGetSrc(cUnit, mir, nextSreg);
-        nextSreg++;
-    } else if (attrs & DF_UB_WIDE) {
-        rlSrc[nextLoc++] = oatGetSrcWide(cUnit, mir, nextSreg,
-                                                 nextSreg + 1);
-        nextSreg+= 2;
-    }
-    if (attrs & DF_UC) {
-        rlSrc[nextLoc++] = oatGetSrc(cUnit, mir, nextSreg);
-    } else if (attrs & DF_UC_WIDE) {
-        rlSrc[nextLoc++] = oatGetSrcWide(cUnit, mir, nextSreg,
-                                                 nextSreg + 1);
-    }
-    if (attrs & DF_DA) {
-        rlDest = oatGetDest(cUnit, mir, 0);
-    } else if (attrs & DF_DA_WIDE) {
-        rlDest = oatGetDestWide(cUnit, mir, 0, 1);
-    }
-
-    switch(opcode) {
-        case OP_NOP:
-            break;
-
-        case OP_MOVE_EXCEPTION:
-            int exOffset;
-            int resetReg;
-            exOffset = Thread::ExceptionOffset().Int32Value();
-            resetReg = oatAllocTemp(cUnit);
-            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-            loadWordDisp(cUnit, rSELF, exOffset, rlResult.lowReg);
-            loadConstant(cUnit, resetReg, 0);
-            storeWordDisp(cUnit, rSELF, exOffset, resetReg);
-            storeValue(cUnit, rlDest, rlResult);
-            break;
-
-        case OP_RETURN_VOID:
-            genSuspendTest(cUnit, mir);
-            break;
-
-        case OP_RETURN:
-        case OP_RETURN_OBJECT:
-            genSuspendTest(cUnit, mir);
-            storeValue(cUnit, getRetLoc(cUnit), rlSrc[0]);
-            break;
-
-        case OP_RETURN_WIDE:
-            genSuspendTest(cUnit, mir);
-            storeValueWide(cUnit, getRetLocWide(cUnit), rlSrc[0]);
-            break;
-
-        case OP_MOVE_RESULT_WIDE:
-            if (mir->optimizationFlags & MIR_INLINED)
-                break;  // Nop - combined w/ previous invoke
-            storeValueWide(cUnit, rlDest, getRetLocWide(cUnit));
-            break;
-
-        case OP_MOVE_RESULT:
-        case OP_MOVE_RESULT_OBJECT:
-            if (mir->optimizationFlags & MIR_INLINED)
-                break;  // Nop - combined w/ previous invoke
-            storeValue(cUnit, rlDest, getRetLoc(cUnit));
-            break;
-
-        case OP_MOVE:
-        case OP_MOVE_OBJECT:
-        case OP_MOVE_16:
-        case OP_MOVE_OBJECT_16:
-        case OP_MOVE_FROM16:
-        case OP_MOVE_OBJECT_FROM16:
-            storeValue(cUnit, rlDest, rlSrc[0]);
-            break;
-
-        case OP_MOVE_WIDE:
-        case OP_MOVE_WIDE_16:
-        case OP_MOVE_WIDE_FROM16:
-            storeValueWide(cUnit, rlDest, rlSrc[0]);
-            break;
-
-        case OP_CONST:
-        case OP_CONST_4:
-        case OP_CONST_16:
-            rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
-            loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB);
-            storeValue(cUnit, rlDest, rlResult);
-            break;
-
-        case OP_CONST_HIGH16:
-            rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
-            loadConstantNoClobber(cUnit, rlResult.lowReg,
-                                  mir->dalvikInsn.vB << 16);
-            storeValue(cUnit, rlDest, rlResult);
-            break;
-
-        case OP_CONST_WIDE_16:
-        case OP_CONST_WIDE_32:
-            rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
-            loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg,
-                                  mir->dalvikInsn.vB,
-                                  (mir->dalvikInsn.vB & 0x80000000) ? -1 : 0);
-            storeValueWide(cUnit, rlDest, rlResult);
-            break;
-
-        case OP_CONST_WIDE:
-            rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
-            loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg,
-                          mir->dalvikInsn.vB_wide & 0xffffffff,
-                          (mir->dalvikInsn.vB_wide >> 32) & 0xffffffff);
-            storeValueWide(cUnit, rlDest, rlResult);
-            break;
-
-        case OP_CONST_WIDE_HIGH16:
-            rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
-            loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg,
-                                  0, mir->dalvikInsn.vB << 16);
-            storeValueWide(cUnit, rlDest, rlResult);
-            break;
-
-        case OP_MONITOR_ENTER:
-            genMonitorEnter(cUnit, mir, rlSrc[0]);
-            break;
-
-        case OP_MONITOR_EXIT:
-            genMonitorExit(cUnit, mir, rlSrc[0]);
-            break;
-
-        case OP_CHECK_CAST:
-            genCheckCast(cUnit, mir, rlSrc[0]);
-            break;
-
-        case OP_INSTANCE_OF:
-            genInstanceof(cUnit, mir, rlDest, rlSrc[0]);
-            break;
-
-        case OP_NEW_INSTANCE:
-            genNewInstance(cUnit, mir, rlDest);
-            break;
-
-        case OP_THROW:
-            genThrow(cUnit, mir, rlSrc[0]);
-            break;
-
-        case OP_THROW_VERIFICATION_ERROR:
-            loadWordDisp(cUnit, rSELF,
-                OFFSETOF_MEMBER(Thread, pThrowVerificationErrorFromCode), rLR);
-            loadConstant(cUnit, r0, mir->dalvikInsn.vA);
-            loadConstant(cUnit, r1, mir->dalvikInsn.vB);
-            callRuntimeHelper(cUnit, rLR);
-            break;
-
-        case OP_ARRAY_LENGTH:
-            int lenOffset;
-            lenOffset = Array::LengthOffset().Int32Value();
-            rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
-            genNullCheck(cUnit, rlSrc[0].sRegLow, rlSrc[0].lowReg, mir);
-            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-            loadWordDisp(cUnit, rlSrc[0].lowReg, lenOffset,
-                         rlResult.lowReg);
-            storeValue(cUnit, rlDest, rlResult);
-            break;
-
-        case OP_CONST_STRING:
-        case OP_CONST_STRING_JUMBO:
-            genConstString(cUnit, mir, rlDest, rlSrc[0]);
-            break;
-
-        case OP_CONST_CLASS:
-            genConstClass(cUnit, mir, rlDest, rlSrc[0]);
-            break;
-
-        case OP_FILL_ARRAY_DATA:
-            genFillArrayData(cUnit, mir, rlSrc[0]);
-            break;
-
-        case OP_FILLED_NEW_ARRAY:
-            genFilledNewArray(cUnit, mir, false /* not range */);
-            break;
-
-        case OP_FILLED_NEW_ARRAY_RANGE:
-            genFilledNewArray(cUnit, mir, true /* range */);
-            break;
-
-        case OP_NEW_ARRAY:
-            genNewArray(cUnit, mir, rlDest, rlSrc[0]);
-            break;
-
-        case OP_GOTO:
-        case OP_GOTO_16:
-        case OP_GOTO_32:
-            if (bb->taken->startOffset <= mir->offset) {
-                genSuspendTest(cUnit, mir);
-            }
-            genUnconditionalBranch(cUnit, &labelList[bb->taken->id]);
-            break;
-
-        case OP_PACKED_SWITCH:
-            genPackedSwitch(cUnit, mir, rlSrc[0]);
-            break;
-
-        case OP_SPARSE_SWITCH:
-            genSparseSwitch(cUnit, mir, rlSrc[0]);
-            break;
-
-        case OP_CMPL_FLOAT:
-        case OP_CMPG_FLOAT:
-        case OP_CMPL_DOUBLE:
-        case OP_CMPG_DOUBLE:
-            res = genCmpFP(cUnit, mir, rlDest, rlSrc[0], rlSrc[1]);
-            break;
-
-        case OP_CMP_LONG:
-            genCmpLong(cUnit, mir, rlDest, rlSrc[0], rlSrc[1]);
-            break;
-
-        case OP_IF_EQ:
-        case OP_IF_NE:
-        case OP_IF_LT:
-        case OP_IF_GE:
-        case OP_IF_GT:
-        case OP_IF_LE: {
-            bool backwardBranch;
-            ArmConditionCode cond;
-            backwardBranch = (bb->taken->startOffset <= mir->offset);
-            if (backwardBranch) {
-                genSuspendTest(cUnit, mir);
-            }
-            rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
-            rlSrc[1] = loadValue(cUnit, rlSrc[1], kCoreReg);
-            opRegReg(cUnit, kOpCmp, rlSrc[0].lowReg, rlSrc[1].lowReg);
-            switch(opcode) {
-                case OP_IF_EQ:
-                    cond = kArmCondEq;
-                    break;
-                case OP_IF_NE:
-                    cond = kArmCondNe;
-                    break;
-                case OP_IF_LT:
-                    cond = kArmCondLt;
-                    break;
-                case OP_IF_GE:
-                    cond = kArmCondGe;
-                    break;
-                case OP_IF_GT:
-                    cond = kArmCondGt;
-                    break;
-                case OP_IF_LE:
-                    cond = kArmCondLe;
-                    break;
-                default:
-                    cond = (ArmConditionCode)0;
-                    LOG(FATAL) << "Unexpected opcode " << (int)opcode;
-            }
-            genConditionalBranch(cUnit, cond, &labelList[bb->taken->id]);
-            genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]);
-            break;
-            }
-
-        case OP_IF_EQZ:
-        case OP_IF_NEZ:
-        case OP_IF_LTZ:
-        case OP_IF_GEZ:
-        case OP_IF_GTZ:
-        case OP_IF_LEZ: {
-            bool backwardBranch;
-            ArmConditionCode cond;
-            backwardBranch = (bb->taken->startOffset <= mir->offset);
-            if (backwardBranch) {
-                genSuspendTest(cUnit, mir);
-            }
-            rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
-            opRegImm(cUnit, kOpCmp, rlSrc[0].lowReg, 0);
-            switch(opcode) {
-                case OP_IF_EQZ:
-                    cond = kArmCondEq;
-                    break;
-                case OP_IF_NEZ:
-                    cond = kArmCondNe;
-                    break;
-                case OP_IF_LTZ:
-                    cond = kArmCondLt;
-                    break;
-                case OP_IF_GEZ:
-                    cond = kArmCondGe;
-                    break;
-                case OP_IF_GTZ:
-                    cond = kArmCondGt;
-                    break;
-                case OP_IF_LEZ:
-                    cond = kArmCondLe;
-                    break;
-                default:
-                    cond = (ArmConditionCode)0;
-                    LOG(FATAL) << "Unexpected opcode " << (int)opcode;
-            }
-            genConditionalBranch(cUnit, cond, &labelList[bb->taken->id]);
-            genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]);
-            break;
-            }
-
-      case OP_AGET_WIDE:
-            genArrayGet(cUnit, mir, kLong, rlSrc[0], rlSrc[1], rlDest, 3);
-            break;
-        case OP_AGET:
-        case OP_AGET_OBJECT:
-            genArrayGet(cUnit, mir, kWord, rlSrc[0], rlSrc[1], rlDest, 2);
-            break;
-        case OP_AGET_BOOLEAN:
-            genArrayGet(cUnit, mir, kUnsignedByte, rlSrc[0], rlSrc[1],
-                        rlDest, 0);
-            break;
-        case OP_AGET_BYTE:
-            genArrayGet(cUnit, mir, kSignedByte, rlSrc[0], rlSrc[1], rlDest, 0);
-            break;
-        case OP_AGET_CHAR:
-            genArrayGet(cUnit, mir, kUnsignedHalf, rlSrc[0], rlSrc[1],
-                        rlDest, 1);
-            break;
-        case OP_AGET_SHORT:
-            genArrayGet(cUnit, mir, kSignedHalf, rlSrc[0], rlSrc[1], rlDest, 1);
-            break;
-        case OP_APUT_WIDE:
-            genArrayPut(cUnit, mir, kLong, rlSrc[1], rlSrc[2], rlSrc[0], 3);
-            break;
-        case OP_APUT:
-            genArrayPut(cUnit, mir, kWord, rlSrc[1], rlSrc[2], rlSrc[0], 2);
-            break;
-        case OP_APUT_OBJECT:
-            genArrayObjPut(cUnit, mir, rlSrc[1], rlSrc[2], rlSrc[0], 2);
-            break;
-        case OP_APUT_SHORT:
-        case OP_APUT_CHAR:
-            genArrayPut(cUnit, mir, kUnsignedHalf, rlSrc[1], rlSrc[2],
-                        rlSrc[0], 1);
-            break;
-        case OP_APUT_BYTE:
-        case OP_APUT_BOOLEAN:
-            genArrayPut(cUnit, mir, kUnsignedByte, rlSrc[1], rlSrc[2],
-                        rlSrc[0], 0);
-            break;
-
-        case OP_IGET_OBJECT:
-        case OP_IGET_OBJECT_VOLATILE:
-            genIGet(cUnit, mir, kWord, rlDest, rlSrc[0], false, true);
-            break;
-
-        case OP_IGET_WIDE:
-        case OP_IGET_WIDE_VOLATILE:
-            genIGet(cUnit, mir, kLong, rlDest, rlSrc[0], true, false);
-            break;
-
-        case OP_IGET:
-        case OP_IGET_VOLATILE:
-            genIGet(cUnit, mir, kWord, rlDest, rlSrc[0], false, false);
-            break;
-
-        case OP_IGET_CHAR:
-            genIGet(cUnit, mir, kUnsignedHalf, rlDest, rlSrc[0], false, false);
-            break;
-
-        case OP_IGET_SHORT:
-            genIGet(cUnit, mir, kSignedHalf, rlDest, rlSrc[0], false, false);
-            break;
-
-        case OP_IGET_BOOLEAN:
-        case OP_IGET_BYTE:
-            genIGet(cUnit, mir, kUnsignedByte, rlDest, rlSrc[0], false, false);
-            break;
-
-        case OP_IPUT_WIDE:
-        case OP_IPUT_WIDE_VOLATILE:
-            genIPut(cUnit, mir, kLong, rlSrc[0], rlSrc[1], true, false);
-            break;
-
-        case OP_IPUT_OBJECT:
-        case OP_IPUT_OBJECT_VOLATILE:
-            genIPut(cUnit, mir, kWord, rlSrc[0], rlSrc[1], false, true);
-            break;
-
-        case OP_IPUT:
-        case OP_IPUT_VOLATILE:
-            genIPut(cUnit, mir, kWord, rlSrc[0], rlSrc[1], false, false);
-            break;
-
-        case OP_IPUT_BOOLEAN:
-        case OP_IPUT_BYTE:
-            genIPut(cUnit, mir, kUnsignedByte, rlSrc[0], rlSrc[1], false, false);
-            break;
-
-        case OP_IPUT_CHAR:
-            genIPut(cUnit, mir, kUnsignedHalf, rlSrc[0], rlSrc[1], false, false);
-            break;
-
-        case OP_IPUT_SHORT:
-            genIPut(cUnit, mir, kSignedHalf, rlSrc[0], rlSrc[1], false, false);
-            break;
-
-        case OP_SGET_OBJECT:
-          genSget(cUnit, mir, rlDest, false, true);
-          break;
-        case OP_SGET:
-        case OP_SGET_BOOLEAN:
-        case OP_SGET_BYTE:
-        case OP_SGET_CHAR:
-        case OP_SGET_SHORT:
-            genSget(cUnit, mir, rlDest, false, false);
-            break;
-
-        case OP_SGET_WIDE:
-            genSget(cUnit, mir, rlDest, true, false);
-            break;
-
-        case OP_SPUT_OBJECT:
-          genSput(cUnit, mir, rlSrc[0], false, true);
-          break;
-
-        case OP_SPUT:
-        case OP_SPUT_BOOLEAN:
-        case OP_SPUT_BYTE:
-        case OP_SPUT_CHAR:
-        case OP_SPUT_SHORT:
-            genSput(cUnit, mir, rlSrc[0], false, false);
-            break;
-
-        case OP_SPUT_WIDE:
-            genSput(cUnit, mir, rlSrc[0], true, false);
-            break;
-
-        case OP_INVOKE_STATIC_RANGE:
-            genInvoke(cUnit, mir, kStatic, true /*range*/);
-            break;
-        case OP_INVOKE_STATIC:
-            genInvoke(cUnit, mir, kStatic, false /*range*/);
-            break;
-
-        case OP_INVOKE_DIRECT:
-            genInvoke(cUnit, mir, kDirect, false /*range*/);
-            break;
-        case OP_INVOKE_DIRECT_RANGE:
-            genInvoke(cUnit, mir, kDirect, true /*range*/);
-            break;
-
-        case OP_INVOKE_VIRTUAL:
-            genInvoke(cUnit, mir, kVirtual, false /*range*/);
-            break;
-        case OP_INVOKE_VIRTUAL_RANGE:
-            genInvoke(cUnit, mir, kVirtual, true /*range*/);
-            break;
-
-        case OP_INVOKE_SUPER:
-            genInvoke(cUnit, mir, kSuper, false /*range*/);
-            break;
-        case OP_INVOKE_SUPER_RANGE:
-            genInvoke(cUnit, mir, kSuper, true /*range*/);
-            break;
-
-        case OP_INVOKE_INTERFACE:
-            genInvoke(cUnit, mir, kInterface, false /*range*/);
-            break;
-        case OP_INVOKE_INTERFACE_RANGE:
-            genInvoke(cUnit, mir, kInterface, true /*range*/);
-            break;
-
-        case OP_NEG_INT:
-        case OP_NOT_INT:
-            res = genArithOpInt(cUnit, mir, rlDest, rlSrc[0], rlSrc[0]);
-            break;
-
-        case OP_NEG_LONG:
-        case OP_NOT_LONG:
-            res = genArithOpLong(cUnit, mir, rlDest, rlSrc[0], rlSrc[0]);
-            break;
-
-        case OP_NEG_FLOAT:
-            res = genArithOpFloat(cUnit, mir, rlDest, rlSrc[0], rlSrc[0]);
-            break;
-
-        case OP_NEG_DOUBLE:
-            res = genArithOpDouble(cUnit, mir, rlDest, rlSrc[0], rlSrc[0]);
-            break;
-
-        case OP_INT_TO_LONG:
-            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-            if (rlSrc[0].location == kLocPhysReg) {
-                genRegCopy(cUnit, rlResult.lowReg, rlSrc[0].lowReg);
-            } else {
-                loadValueDirect(cUnit, rlSrc[0], rlResult.lowReg);
-            }
-            opRegRegImm(cUnit, kOpAsr, rlResult.highReg,
-                        rlResult.lowReg, 31);
-            storeValueWide(cUnit, rlDest, rlResult);
-            break;
-
-        case OP_LONG_TO_INT:
-            rlSrc[0] = oatUpdateLocWide(cUnit, rlSrc[0]);
-            rlSrc[0] = oatWideToNarrow(cUnit, rlSrc[0]);
-            storeValue(cUnit, rlDest, rlSrc[0]);
-            break;
-
-        case OP_INT_TO_BYTE:
-            rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
-            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-            opRegReg(cUnit, kOp2Byte, rlResult.lowReg, rlSrc[0].lowReg);
-            storeValue(cUnit, rlDest, rlResult);
-            break;
-
-        case OP_INT_TO_SHORT:
-            rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
-            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-            opRegReg(cUnit, kOp2Short, rlResult.lowReg, rlSrc[0].lowReg);
-            storeValue(cUnit, rlDest, rlResult);
-            break;
-
-        case OP_INT_TO_CHAR:
-            rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
-            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-            opRegReg(cUnit, kOp2Char, rlResult.lowReg, rlSrc[0].lowReg);
-            storeValue(cUnit, rlDest, rlResult);
-            break;
-
-        case OP_INT_TO_FLOAT:
-        case OP_INT_TO_DOUBLE:
-        case OP_LONG_TO_FLOAT:
-        case OP_LONG_TO_DOUBLE:
-        case OP_FLOAT_TO_INT:
-        case OP_FLOAT_TO_LONG:
-        case OP_FLOAT_TO_DOUBLE:
-        case OP_DOUBLE_TO_INT:
-        case OP_DOUBLE_TO_LONG:
-        case OP_DOUBLE_TO_FLOAT:
-            genConversion(cUnit, mir);
-            break;
-
-        case OP_ADD_INT:
-        case OP_SUB_INT:
-        case OP_MUL_INT:
-        case OP_DIV_INT:
-        case OP_REM_INT:
-        case OP_AND_INT:
-        case OP_OR_INT:
-        case OP_XOR_INT:
-        case OP_SHL_INT:
-        case OP_SHR_INT:
-        case OP_USHR_INT:
-        case OP_ADD_INT_2ADDR:
-        case OP_SUB_INT_2ADDR:
-        case OP_MUL_INT_2ADDR:
-        case OP_DIV_INT_2ADDR:
-        case OP_REM_INT_2ADDR:
-        case OP_AND_INT_2ADDR:
-        case OP_OR_INT_2ADDR:
-        case OP_XOR_INT_2ADDR:
-        case OP_SHL_INT_2ADDR:
-        case OP_SHR_INT_2ADDR:
-        case OP_USHR_INT_2ADDR:
-            genArithOpInt(cUnit, mir, rlDest, rlSrc[0], rlSrc[1]);
-            break;
-
-        case OP_ADD_LONG:
-        case OP_SUB_LONG:
-        case OP_MUL_LONG:
-        case OP_DIV_LONG:
-        case OP_REM_LONG:
-        case OP_AND_LONG:
-        case OP_OR_LONG:
-        case OP_XOR_LONG:
-        case OP_ADD_LONG_2ADDR:
-        case OP_SUB_LONG_2ADDR:
-        case OP_MUL_LONG_2ADDR:
-        case OP_DIV_LONG_2ADDR:
-        case OP_REM_LONG_2ADDR:
-        case OP_AND_LONG_2ADDR:
-        case OP_OR_LONG_2ADDR:
-        case OP_XOR_LONG_2ADDR:
-            genArithOpLong(cUnit, mir, rlDest, rlSrc[0], rlSrc[1]);
-            break;
-
-        case OP_SHL_LONG:
-        case OP_SHR_LONG:
-        case OP_USHR_LONG:
-        case OP_SHL_LONG_2ADDR:
-        case OP_SHR_LONG_2ADDR:
-        case OP_USHR_LONG_2ADDR:
-            genShiftOpLong(cUnit,mir, rlDest, rlSrc[0], rlSrc[1]);
-            break;
-
-        case OP_ADD_FLOAT:
-        case OP_SUB_FLOAT:
-        case OP_MUL_FLOAT:
-        case OP_DIV_FLOAT:
-        case OP_REM_FLOAT:
-        case OP_ADD_FLOAT_2ADDR:
-        case OP_SUB_FLOAT_2ADDR:
-        case OP_MUL_FLOAT_2ADDR:
-        case OP_DIV_FLOAT_2ADDR:
-        case OP_REM_FLOAT_2ADDR:
-            genArithOpFloat(cUnit, mir, rlDest, rlSrc[0], rlSrc[1]);
-            break;
-
-        case OP_ADD_DOUBLE:
-        case OP_SUB_DOUBLE:
-        case OP_MUL_DOUBLE:
-        case OP_DIV_DOUBLE:
-        case OP_REM_DOUBLE:
-        case OP_ADD_DOUBLE_2ADDR:
-        case OP_SUB_DOUBLE_2ADDR:
-        case OP_MUL_DOUBLE_2ADDR:
-        case OP_DIV_DOUBLE_2ADDR:
-        case OP_REM_DOUBLE_2ADDR:
-            genArithOpDouble(cUnit, mir, rlDest, rlSrc[0], rlSrc[1]);
-            break;
-
-        case OP_RSUB_INT:
-        case OP_ADD_INT_LIT16:
-        case OP_MUL_INT_LIT16:
-        case OP_DIV_INT_LIT16:
-        case OP_REM_INT_LIT16:
-        case OP_AND_INT_LIT16:
-        case OP_OR_INT_LIT16:
-        case OP_XOR_INT_LIT16:
-        case OP_ADD_INT_LIT8:
-        case OP_RSUB_INT_LIT8:
-        case OP_MUL_INT_LIT8:
-        case OP_DIV_INT_LIT8:
-        case OP_REM_INT_LIT8:
-        case OP_AND_INT_LIT8:
-        case OP_OR_INT_LIT8:
-        case OP_XOR_INT_LIT8:
-        case OP_SHL_INT_LIT8:
-        case OP_SHR_INT_LIT8:
-        case OP_USHR_INT_LIT8:
-            genArithOpIntLit(cUnit, mir, rlDest, rlSrc[0], mir->dalvikInsn.vC);
-            break;
-
-        default:
-            res = true;
-    }
-    return res;
-}
-
-STATIC const char* extendedMIROpNames[kMirOpLast - kMirOpFirst] = {
-    "kMirOpPhi",
-    "kMirOpNullNRangeUpCheck",
-    "kMirOpNullNRangeDownCheck",
-    "kMirOpLowerBound",
-    "kMirOpPunt",
-    "kMirOpCheckInlinePrediction",
-};
-
-/* Extended MIR instructions like PHI */
-STATIC void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir)
-{
-    int opOffset = mir->dalvikInsn.opcode - kMirOpFirst;
-    char* msg = NULL;
-    if (cUnit->printMe) {
-        msg = (char*)oatNew(cUnit, strlen(extendedMIROpNames[opOffset]) + 1,
-                            false, kAllocDebugInfo);
-        strcpy(msg, extendedMIROpNames[opOffset]);
-    }
-    ArmLIR* op = newLIR1(cUnit, kArmPseudoExtended, (int) msg);
-
-    switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
-        case kMirOpPhi: {
-            char* ssaString = NULL;
-            if (cUnit->printMe) {
-                ssaString = oatGetSSAString(cUnit, mir->ssaRep);
-            }
-            op->flags.isNop = true;
-            newLIR1(cUnit, kArmPseudoSSARep, (int) ssaString);
-            break;
-        }
-        default:
-            break;
-    }
-}
-
-/*
- * If there are any ins passed in registers that have not been promoted
- * to a callee-save register, flush them to the frame.  Perform intial
- * assignment of promoted arguments.
- */
-STATIC void flushIns(CompilationUnit* cUnit)
-{
-    if (cUnit->numIns == 0)
-        return;
-    int firstArgReg = r1;
-    int lastArgReg = r3;
-    int startVReg = cUnit->numDalvikRegisters - cUnit->numIns;
-    /*
-     * Arguments passed in registers should be flushed
-     * to their backing locations in the frame for now.
-     * Also, we need to do initial assignment for promoted
-     * arguments.  NOTE: an older version of dx had an issue
-     * in which it would reuse static method argument registers.
-     * This could result in the same Dalvik virtual register
-     * being promoted to both core and fp regs.  In those
-     * cases, copy argument to both.  This will be uncommon
-     * enough that it isn't worth attempting to optimize.
-     */
-    for (int i = 0; i < cUnit->numIns; i++) {
-        PromotionMap vMap = cUnit->promotionMap[startVReg + i];
-        if (i <= (lastArgReg - firstArgReg)) {
-            // If arriving in register
-            if (vMap.coreLocation == kLocPhysReg) {
-                genRegCopy(cUnit, vMap.coreReg, firstArgReg + i);
-            }
-            if (vMap.fpLocation == kLocPhysReg) {
-                genRegCopy(cUnit, vMap.fpReg, firstArgReg + i);
-            }
-            // Also put a copy in memory in case we're partially promoted
-            storeBaseDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
-                          firstArgReg + i, kWord);
-        } else {
-            // If arriving in frame & promoted
-            if (vMap.coreLocation == kLocPhysReg) {
-                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
-                             vMap.coreReg);
-            }
-            if (vMap.fpLocation == kLocPhysReg) {
-                loadWordDisp(cUnit, rSP, oatSRegOffset(cUnit, startVReg + i),
-                             vMap.fpReg);
-            }
-        }
-    }
-}
-
-/* Handle the content in each basic block */
-STATIC bool methodBlockCodeGen(CompilationUnit* cUnit, BasicBlock* bb)
-{
-    MIR* mir;
-    ArmLIR* labelList = (ArmLIR*) cUnit->blockLabelList;
-    int blockId = bb->id;
-
-    cUnit->curBlock = bb;
-    labelList[blockId].operands[0] = bb->startOffset;
-
-    /* Insert the block label */
-    labelList[blockId].opcode = kArmPseudoNormalBlockLabel;
-    oatAppendLIR(cUnit, (LIR*) &labelList[blockId]);
-
-    /* Reset local optimization data on block boundaries */
-    oatResetRegPool(cUnit);
-    oatClobberAllRegs(cUnit);
-    oatResetDefTracking(cUnit);
-
-    ArmLIR* headLIR = NULL;
-
-    int spillCount = cUnit->numCoreSpills + cUnit->numFPSpills;
-    if (bb->blockType == kEntryBlock) {
-        /*
-         * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
-         * mechanism know so it doesn't try to use any of them when
-         * expanding the frame or flushing.  This leaves the utility
-         * code with a single temp: r12.  This should be enough.
-         */
-        oatLockTemp(cUnit, r0);
-        oatLockTemp(cUnit, r1);
-        oatLockTemp(cUnit, r2);
-        oatLockTemp(cUnit, r3);
-
-        /*
-         * We can safely skip the stack overflow check if we're
-         * a leaf *and* our frame size < fudge factor.
-         */
-        bool skipOverflowCheck = ((cUnit->attrs & METHOD_IS_LEAF) &&
-                                  ((size_t)cUnit->frameSize <
-                                  Thread::kStackOverflowReservedBytes));
-        newLIR0(cUnit, kArmPseudoMethodEntry);
-        if (!skipOverflowCheck) {
-            /* Load stack limit */
-            loadWordDisp(cUnit, rSELF,
-                         Thread::StackEndOffset().Int32Value(), r12);
-        }
-        /* Spill core callee saves */
-        newLIR1(cUnit, kThumb2Push, cUnit->coreSpillMask);
-        /* Need to spill any FP regs? */
-        if (cUnit->numFPSpills) {
-            /*
-             * NOTE: fp spills are a little different from core spills in that
-             * they are pushed as a contiguous block.  When promoting from
-             * the fp set, we must allocate all singles from s16..highest-promoted
-             */
-            newLIR1(cUnit, kThumb2VPushCS, cUnit->numFPSpills);
-        }
-        if (!skipOverflowCheck) {
-            opRegRegImm(cUnit, kOpSub, rLR, rSP,
-                        cUnit->frameSize - (spillCount * 4));
-            genRegRegCheck(cUnit, kArmCondCc, rLR, r12, NULL,
-                           kArmThrowStackOverflow);
-            genRegCopy(cUnit, rSP, rLR);         // Establish stack
-        } else {
-            opRegImm(cUnit, kOpSub, rSP,
-                     cUnit->frameSize - (spillCount * 4));
-        }
-        storeBaseDisp(cUnit, rSP, 0, r0, kWord);
-        flushIns(cUnit);
-
-        if (cUnit->genDebugger) {
-            // Refresh update debugger callout
-            loadWordDisp(cUnit, rSELF,
-                         OFFSETOF_MEMBER(Thread, pUpdateDebuggerFromCode), rSUSPEND);
-            genDebuggerUpdate(cUnit, DEBUGGER_METHOD_ENTRY);
-        }
-
-        oatFreeTemp(cUnit, r0);
-        oatFreeTemp(cUnit, r1);
-        oatFreeTemp(cUnit, r2);
-        oatFreeTemp(cUnit, r3);
-    } else if (bb->blockType == kExitBlock) {
-        /*
-         * In the exit path, r0/r1 are live - make sure they aren't
-         * allocated by the register utilities as temps.
-         */
-        oatLockTemp(cUnit, r0);
-        oatLockTemp(cUnit, r1);
-
-        newLIR0(cUnit, kArmPseudoMethodExit);
-        /* If we're compiling for the debugger, generate an update callout */
-        if (cUnit->genDebugger) {
-            genDebuggerUpdate(cUnit, DEBUGGER_METHOD_EXIT);
-        }
-        opRegImm(cUnit, kOpAdd, rSP, cUnit->frameSize - (spillCount * 4));
-        /* Need to restore any FP callee saves? */
-        if (cUnit->numFPSpills) {
-            newLIR1(cUnit, kThumb2VPopCS, cUnit->numFPSpills);
-        }
-        if (cUnit->coreSpillMask & (1 << rLR)) {
-            /* Unspill rLR to rPC */
-            cUnit->coreSpillMask &= ~(1 << rLR);
-            cUnit->coreSpillMask |= (1 << rPC);
-        }
-        newLIR1(cUnit, kThumb2Pop, cUnit->coreSpillMask);
-        if (!(cUnit->coreSpillMask & (1 << rPC))) {
-            /* We didn't pop to rPC, so must do a bv rLR */
-            newLIR1(cUnit, kThumbBx, rLR);
-        }
-    }
-
-    for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
-
-        oatResetRegPool(cUnit);
-        if (cUnit->disableOpt & (1 << kTrackLiveTemps)) {
-            oatClobberAllRegs(cUnit);
-        }
-
-        if (cUnit->disableOpt & (1 << kSuppressLoads)) {
-            oatResetDefTracking(cUnit);
-        }
-
-        if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) {
-            handleExtendedMethodMIR(cUnit, mir);
-            continue;
-        }
-
-        cUnit->currentDalvikOffset = mir->offset;
-
-        Opcode dalvikOpcode = mir->dalvikInsn.opcode;
-        InstructionFormat dalvikFormat =
-            dexGetFormatFromOpcode(dalvikOpcode);
-
-        ArmLIR* boundaryLIR;
-
-        /* Mark the beginning of a Dalvik instruction for line tracking */
-        char* instStr = cUnit->printMe ?
-           oatGetDalvikDisassembly(cUnit, &mir->dalvikInsn, "") : NULL;
-        boundaryLIR = newLIR1(cUnit, kArmPseudoDalvikByteCodeBoundary,
-                              (intptr_t) instStr);
-        cUnit->boundaryMap.insert(std::make_pair(mir->offset,
-                                 (LIR*)boundaryLIR));
-        /* Remember the first LIR for this block */
-        if (headLIR == NULL) {
-            headLIR = boundaryLIR;
-            /* Set the first boundaryLIR as a scheduling barrier */
-            headLIR->defMask = ENCODE_ALL;
-        }
-
-        /* If we're compiling for the debugger, generate an update callout */
-        if (cUnit->genDebugger) {
-            genDebuggerUpdate(cUnit, mir->offset);
-        }
-
-        /* Don't generate the SSA annotation unless verbose mode is on */
-        if (cUnit->printMe && mir->ssaRep) {
-            char* ssaString = oatGetSSAString(cUnit, mir->ssaRep);
-            newLIR1(cUnit, kArmPseudoSSARep, (int) ssaString);
-        }
-
-        bool notHandled = compileDalvikInstruction(cUnit, mir, bb, labelList);
-
-        if (notHandled) {
-            char buf[100];
-            snprintf(buf, 100, "%#06x: Opcode %#x (%s) / Fmt %d not handled",
-                 mir->offset,
-                 dalvikOpcode, dexGetOpcodeName(dalvikOpcode),
-                 dalvikFormat);
-            LOG(FATAL) << buf;
-        }
-    }
-
-    if (headLIR) {
-        /*
-         * Eliminate redundant loads/stores and delay stores into later
-         * slots
-         */
-        oatApplyLocalOptimizations(cUnit, (LIR*) headLIR,
-                                           cUnit->lastLIRInsn);
-
-        /*
-         * Generate an unconditional branch to the fallthrough block.
-         */
-        if (bb->fallThrough) {
-            genUnconditionalBranch(cUnit,
-                                   &labelList[bb->fallThrough->id]);
-        }
-    }
-    return false;
-}
-
-/*
- * Nop any unconditional branches that go to the next instruction.
- * Note: new redundant branches may be inserted later, and we'll
- * use a check in final instruction assembly to nop those out.
- */
-void removeRedundantBranches(CompilationUnit* cUnit)
-{
-    ArmLIR* thisLIR;
-
-    for (thisLIR = (ArmLIR*) cUnit->firstLIRInsn;
-         thisLIR != (ArmLIR*) cUnit->lastLIRInsn;
-         thisLIR = NEXT_LIR(thisLIR)) {
-
-        /* Branch to the next instruction */
-        if ((thisLIR->opcode == kThumbBUncond) ||
-            (thisLIR->opcode == kThumb2BUncond)) {
-            ArmLIR* nextLIR = thisLIR;
-
-            while (true) {
-                nextLIR = NEXT_LIR(nextLIR);
-
-                /*
-                 * Is the branch target the next instruction?
-                 */
-                if (nextLIR == (ArmLIR*) thisLIR->generic.target) {
-                    thisLIR->flags.isNop = true;
-                    break;
-                }
-
-                /*
-                 * Found real useful stuff between the branch and the target.
-                 * Need to explicitly check the lastLIRInsn here because it
-                 * might be the last real instruction.
-                 */
-                if (!isPseudoOpcode(nextLIR->opcode) ||
-                    (nextLIR = (ArmLIR*) cUnit->lastLIRInsn))
-                    break;
-            }
-        }
-    }
-}
-
-STATIC void handleSuspendLaunchpads(CompilationUnit *cUnit)
-{
-    ArmLIR** suspendLabel =
-        (ArmLIR **) cUnit->suspendLaunchpads.elemList;
-    int numElems = cUnit->suspendLaunchpads.numUsed;
-
-    for (int i = 0; i < numElems; i++) {
-        /* TUNING: move suspend count load into helper */
-        ArmLIR* lab = suspendLabel[i];
-        ArmLIR* resumeLab = (ArmLIR*)lab->operands[0];
-        cUnit->currentDalvikOffset = lab->operands[1];
-        oatAppendLIR(cUnit, (LIR *)lab);
-        loadWordDisp(cUnit, rSELF,
-                     OFFSETOF_MEMBER(Thread, pTestSuspendFromCode), rLR);
-        if (!cUnit->genDebugger) {
-            // use rSUSPEND for suspend count
-            loadWordDisp(cUnit, rSELF,
-                         Thread::SuspendCountOffset().Int32Value(), rSUSPEND);
-        }
-        opReg(cUnit, kOpBlx, rLR);
-        if ( cUnit->genDebugger) {
-            // use rSUSPEND for update debugger
-            loadWordDisp(cUnit, rSELF,
-                         OFFSETOF_MEMBER(Thread, pUpdateDebuggerFromCode), rSUSPEND);
-        }
-        genUnconditionalBranch(cUnit, resumeLab);
-    }
-}
-
-STATIC void handleThrowLaunchpads(CompilationUnit *cUnit)
-{
-    ArmLIR** throwLabel =
-        (ArmLIR **) cUnit->throwLaunchpads.elemList;
-    int numElems = cUnit->throwLaunchpads.numUsed;
-    int i;
-
-    for (i = 0; i < numElems; i++) {
-        ArmLIR* lab = throwLabel[i];
-        cUnit->currentDalvikOffset = lab->operands[1];
-        oatAppendLIR(cUnit, (LIR *)lab);
-        int funcOffset = 0;
-        int v1 = lab->operands[2];
-        int v2 = lab->operands[3];
-        switch(lab->operands[0]) {
-            case kArmThrowNullPointer:
-                funcOffset = OFFSETOF_MEMBER(Thread, pThrowNullPointerFromCode);
-                break;
-            case kArmThrowArrayBounds:
-                if (v2 != r0) {
-                    genRegCopy(cUnit, r0, v1);
-                    genRegCopy(cUnit, r1, v2);
-                } else {
-                    if (v1 == r1) {
-                        genRegCopy(cUnit, r12, v1);
-                        genRegCopy(cUnit, r1, v2);
-                        genRegCopy(cUnit, r0, r12);
-                    } else {
-                        genRegCopy(cUnit, r1, v2);
-                        genRegCopy(cUnit, r0, v1);
-                    }
-                }
-                funcOffset = OFFSETOF_MEMBER(Thread, pThrowArrayBoundsFromCode);
-                break;
-            case kArmThrowDivZero:
-                funcOffset = OFFSETOF_MEMBER(Thread, pThrowDivZeroFromCode);
-                break;
-            case kArmThrowVerificationError:
-                loadConstant(cUnit, r0, v1);
-                loadConstant(cUnit, r1, v2);
-                funcOffset =
-                    OFFSETOF_MEMBER(Thread, pThrowVerificationErrorFromCode);
-                break;
-            case kArmThrowNegArraySize:
-                genRegCopy(cUnit, r0, v1);
-                funcOffset =
-                    OFFSETOF_MEMBER(Thread, pThrowNegArraySizeFromCode);
-                break;
-            case kArmThrowNoSuchMethod:
-                genRegCopy(cUnit, r0, v1);
-                funcOffset =
-                    OFFSETOF_MEMBER(Thread, pThrowNoSuchMethodFromCode);
-                break;
-            case kArmThrowStackOverflow:
-                funcOffset =
-                    OFFSETOF_MEMBER(Thread, pThrowStackOverflowFromCode);
-                // Restore stack alignment
-                opRegImm(cUnit, kOpAdd, rSP,
-                         (cUnit->numCoreSpills + cUnit->numFPSpills) * 4);
-                break;
-            default:
-                LOG(FATAL) << "Unexpected throw kind: " << lab->operands[0];
-        }
-        loadWordDisp(cUnit, rSELF, funcOffset, rLR);
-        callRuntimeHelper(cUnit, rLR);
-    }
-}
-
-void oatMethodMIR2LIR(CompilationUnit* cUnit)
-{
-    /* Used to hold the labels of each block */
-    cUnit->blockLabelList =
-        (void *) oatNew(cUnit, sizeof(ArmLIR) * cUnit->numBlocks, true,
-                        kAllocLIR);
-
-    oatDataFlowAnalysisDispatcher(cUnit, methodBlockCodeGen,
-                                  kPreOrderDFSTraversal, false /* Iterative */);
-    handleSuspendLaunchpads(cUnit);
-
-    handleThrowLaunchpads(cUnit);
-
-    removeRedundantBranches(cUnit);
-}
-
-/* Common initialization routine for an architecture family */
-bool oatArchInit()
-{
-    int i;
-
-    for (i = 0; i < kArmLast; i++) {
-        if (EncodingMap[i].opcode != i) {
-            LOG(FATAL) << "Encoding order for " << EncodingMap[i].name <<
-               " is wrong: expecting " << i << ", seeing " <<
-               (int)EncodingMap[i].opcode;
-        }
-    }
-
-    return oatArchVariantInit();
-}
-
-/* Needed by the Assembler */
-void oatSetupResourceMasks(ArmLIR* lir)
-{
-    setupResourceMasks(lir);
-}
-
-/* Needed by the ld/st optmizatons */
-ArmLIR* oatRegCopyNoInsert(CompilationUnit* cUnit, int rDest, int rSrc)
-{
-    return genRegCopyNoInsert(cUnit, rDest, rSrc);
-}
-
-/* Needed by the register allocator */
-ArmLIR* oatRegCopy(CompilationUnit* cUnit, int rDest, int rSrc)
-{
-    return genRegCopy(cUnit, rDest, rSrc);
-}
-
-/* Needed by the register allocator */
-void oatRegCopyWide(CompilationUnit* cUnit, int destLo, int destHi,
-                            int srcLo, int srcHi)
-{
-    genRegCopyWide(cUnit, destLo, destHi, srcLo, srcHi);
-}
-
-void oatFlushRegImpl(CompilationUnit* cUnit, int rBase,
-                             int displacement, int rSrc, OpSize size)
-{
-    storeBaseDisp(cUnit, rBase, displacement, rSrc, size);
-}
-
-void oatFlushRegWideImpl(CompilationUnit* cUnit, int rBase,
-                                 int displacement, int rSrcLo, int rSrcHi)
-{
-    storeBaseDispWide(cUnit, rBase, displacement, rSrcLo, rSrcHi);
-}
-
-}  // namespace art
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index dff05b77c3..c385f358fc 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -22,19 +22,22 @@
  *
  */
 
-#define SLOW_FIELD_PATH (cUnit->enableDebug & (1 << kDebugSlowFieldPath))
-#define SLOW_INVOKE_PATH (cUnit->enableDebug & (1 << kDebugSlowInvokePath))
-#define SLOW_STRING_PATH (cUnit->enableDebug & (1 << kDebugSlowStringPath))
-#define SLOW_TYPE_PATH (cUnit->enableDebug & (1 << kDebugSlowTypePath))
-#define EXERCISE_SLOWEST_FIELD_PATH (cUnit->enableDebug & \
-    (1 << kDebugSlowestFieldPath))
-#define EXERCISE_SLOWEST_STRING_PATH (cUnit->enableDebug & \
-    (1 << kDebugSlowestStringPath))
-#define EXERCISE_RESOLVE_METHOD (cUnit->enableDebug & \
-    (1 << kDebugExerciseResolveMethod))
-
 namespace art {
 
+/*
+ * Return most flexible allowed register class based on size.
+ * Bug: 2813841
+ * Must use a core register for data types narrower than word (due
+ * to possible unaligned load/store.
+ */
+STATIC inline RegisterClass oatRegClassBySize(OpSize size)
+{
+    return (size == kUnsignedHalf ||
+            size == kSignedHalf ||
+            size == kUnsignedByte ||
+            size == kSignedByte ) ? kCoreReg : kAnyReg;
+}
+
 STATIC RegLocation getRetLoc(CompilationUnit* cUnit);
 
 void warnIfUnresolved(CompilationUnit* cUnit, int fieldIdx, Field* field) {
@@ -67,20 +70,6 @@ STATIC inline s4 s4FromSwitchData(const void* switchData) {
 }
 #endif
 
-STATIC ArmLIR* callRuntimeHelper(CompilationUnit* cUnit, int reg)
-{
-    oatClobberCalleeSave(cUnit);
-    return opReg(cUnit, kOpBlx, reg);
-}
-
-/* Generate unconditional branch instructions */
-STATIC ArmLIR* genUnconditionalBranch(CompilationUnit* cUnit, ArmLIR* target)
-{
-    ArmLIR* branch = opNone(cUnit, kOpUncondBr);
-    branch->generic.target = (LIR*) target;
-    return branch;
-}
-
 /*
  * Generate a Thumb2 IT instruction, which can nullify up to
  * four subsequent instructions based on a condition and its
@@ -386,26 +375,6 @@ STATIC void genFillArrayData(CompilationUnit* cUnit, MIR* mir,
     callRuntimeHelper(cUnit, rLR);
 }
 
-/*
- * Mark garbage collection card. Skip if the value we're storing is null.
- */
-STATIC void markGCCard(CompilationUnit* cUnit, int valReg, int tgtAddrReg)
-{
-    int regCardBase = oatAllocTemp(cUnit);
-    int regCardNo = oatAllocTemp(cUnit);
-    ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondEq, valReg, 0);
-    loadWordDisp(cUnit, rSELF, Thread::CardTableOffset().Int32Value(),
-                 regCardBase);
-    opRegRegImm(cUnit, kOpLsr, regCardNo, tgtAddrReg, GC_CARD_SHIFT);
-    storeBaseIndexed(cUnit, regCardBase, regCardNo, regCardBase, 0,
-                     kUnsignedByte);
-    ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
-    branchOver->generic.target = (LIR*)target;
-    oatFreeTemp(cUnit, regCardBase);
-    oatFreeTemp(cUnit, regCardNo);
-}
-
 STATIC void genIGet(CompilationUnit* cUnit, MIR* mir, OpSize size,
                     RegLocation rlDest, RegLocation rlObj,
                     bool isLongOrDouble, bool isObject)
@@ -1274,16 +1243,6 @@ STATIC bool genConversionPortable(CompilationUnit* cUnit, MIR* mir)
     return false;
 }
 
-/* Generate conditional branch instructions */
-STATIC ArmLIR* genConditionalBranch(CompilationUnit* cUnit,
-                                    ArmConditionCode cond,
-                                    ArmLIR* target)
-{
-    ArmLIR* branch = opCondBranch(cUnit, cond);
-    branch->generic.target = (LIR*) target;
-    return branch;
-}
-
 /*
  * Generate array store
  *
diff --git a/src/compiler/codegen/arm/armv7-a-neon/Codegen.cc b/src/compiler/codegen/arm/armv7-a-neon/Codegen.cc
index 00339ef61a..f042d5ddde 100644
--- a/src/compiler/codegen/arm/armv7-a-neon/Codegen.cc
+++ b/src/compiler/codegen/arm/armv7-a-neon/Codegen.cc
@@ -18,11 +18,8 @@
 #define TGT_LIR ArmLIR
 
 #include "../../../Dalvik.h"
-//#include "interp/InterpDefs.h"
-//#include "libdex/DexOpcodes.h"
 #include "../../../CompilerInternals.h"
 #include "../arm/ArmLIR.h"
-//#include "mterp/common/FindInterface.h"
 #include "../../Ralloc.h"
 #include "../Codegen.h"
 
@@ -45,7 +42,13 @@
 #include "../Thumb2/Ralloc.cc"
 
 /* MIR2LIR dispatcher and architectural independent codegen routines */
-#include "../MethodCodegenDriver.cc"
+#include "../../MethodCodegenDriver.cc"
+
+/* Target-independent local optimizations */
+#include "../../LocalOptimizations.cc"
+
+/* Common codegen utility code */
+#include "../../CodegenUtil.cc"
 
 /* Architecture manifest */
 #include "ArchVariant.cc"
diff --git a/src/compiler/codegen/arm/armv7-a/Codegen.cc b/src/compiler/codegen/arm/armv7-a/Codegen.cc
index 384ce15637..633ab1e411 100644
--- a/src/compiler/codegen/arm/armv7-a/Codegen.cc
+++ b/src/compiler/codegen/arm/armv7-a/Codegen.cc
@@ -18,11 +18,8 @@
 #define TGT_LIR ArmLIR
 
 #include "../../../Dalvik.h"
-//#include "interp/InterpDefs.h"
-//#include "libdex/DexOpcodes.h"
 #include "../../../CompilerInternals.h"
 #include "../ArmLIR.h"
-//#include "mterp/common/FindInterface.h"
 #include "../../Ralloc.h"
 #include "../Codegen.h"
 
@@ -45,7 +42,13 @@
 #include "../Thumb2/Ralloc.cc"
 
 /* MIR2LIR dispatcher and architectural independent codegen routines */
-#include "../MethodCodegenDriver.cc"
+#include "../../MethodCodegenDriver.cc"
+
+/* Target-independent local optimizations */
+#include "../../LocalOptimizations.cc"
+
+/* Common codegen utility code */
+#include "../../CodegenUtil.cc"
 
 /* Architecture manifest */
 #include "ArchVariant.cc"