Oat compiler integration snapshot.

Cleanly compiles, but not integrated.  Old-world dependencies captured
in hacked-up temporary files "Dalvik.h" and "HackStubs.cc".

Dalvik.h is a placeholder that captures all of the constants, struct
definitions and inline functions the compiler needs.  It largely consists
of declaration fragments of libdex, Object.h, DvmDex.h and Thread.h.

HackStubs.cc contains empty shells for some required libdex routines.

Change-Id: Ia479dda41da4e3162ff6df383252fdc7dbf38d71
diff --git a/src/compiler/codegen/arm/ArchFactory.cc b/src/compiler/codegen/arm/ArchFactory.cc
new file mode 100644
index 0000000..bf87c1e
--- /dev/null
+++ b/src/compiler/codegen/arm/ArchFactory.cc
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains arm-specific codegen factory support.
+ * It is included by
+ *
+ *        Codegen-$(TARGET_ARCH_VARIANT).c
+ *
+ */
+
+/*
+ * Perform a "reg cmp imm" operation and jump to the PCR region if condition
+ * satisfies.
+ */
+static TGT_LIR* genRegImmCheck(CompilationUnit* cUnit,
+                               ArmConditionCode cond, int reg,
+                               int checkValue, int dOffset,
+                               TGT_LIR* pcrLabel)
+{
+    TGT_LIR* branch = genCmpImmBranch(cUnit, cond, reg, checkValue);
+    BasicBlock* bb = cUnit->curBlock;
+    if (bb->taken) {
+        ArmLIR  *exceptionLabel = (ArmLIR* ) cUnit->blockLabelList;
+        exceptionLabel += bb->taken->id;
+        branch->generic.target = (LIR* ) exceptionLabel;
+        return exceptionLabel;
+    } else {
+        LOG(FATAL) << "Catch blocks not handled yet";
+        return NULL; // quiet gcc
+    }
+}
+
+/*
+ * Perform null-check on a register. sReg is the ssa register being checked,
+ * and mReg is the machine register holding the actual value. If internal state
+ * indicates that sReg has been checked before the check request is ignored.
+ */
+static TGT_LIR* genNullCheck(CompilationUnit* cUnit, int sReg, int mReg,
+                             int dOffset, TGT_LIR* pcrLabel)
+{
+    /* This particular Dalvik register has been null-checked */
+    if (oatIsBitSet(cUnit->regPool->nullCheckedRegs, sReg)) {
+        return pcrLabel;
+    }
+    oatSetBit(cUnit->regPool->nullCheckedRegs, sReg);
+    return genRegImmCheck(cUnit, kArmCondEq, mReg, 0, dOffset, pcrLabel);
+}
+
+/*
+ * Perform a "reg cmp reg" operation and jump to the PCR region if condition
+ * satisfies.
+ */
+static TGT_LIR* genRegRegCheck(CompilationUnit* cUnit,
+                               ArmConditionCode cond,
+                               int reg1, int reg2, int dOffset,
+                               TGT_LIR* pcrLabel)
+{
+    TGT_LIR* res;
+    res = opRegReg(cUnit, kOpCmp, reg1, reg2);
+    TGT_LIR* branch = opCondBranch(cUnit, cond);
+    genCheckCommon(cUnit, dOffset, branch, pcrLabel);
+    return res;
+}
+
+/* Perform bound check on two registers */
+static TGT_LIR* genBoundsCheck(CompilationUnit* cUnit, int rIndex,
+                               int rBound, int dOffset, TGT_LIR* pcrLabel)
+{
+    return genRegRegCheck(cUnit, kArmCondCs, rIndex, rBound, dOffset,
+                          pcrLabel);
+}
diff --git a/src/compiler/codegen/arm/ArchUtility.cc b/src/compiler/codegen/arm/ArchUtility.cc
new file mode 100644
index 0000000..16a0200
--- /dev/null
+++ b/src/compiler/codegen/arm/ArchUtility.cc
@@ -0,0 +1,495 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../../CompilerInternals.h"
+#include "ArmLIR.h"
+
+static const char* coreRegNames[16] = {
+    "r0",
+    "r1",
+    "r2",
+    "r3",
+    "r4",
+    "r5",
+    "r6",
+    "r7",
+    "r8",
+    "rSELF",
+    "r10",
+    "r11",
+    "r12",
+    "sp",
+    "lr",
+    "pc",
+};
+
+
+static const char* shiftNames[4] = {
+    "lsl",
+    "lsr",
+    "asr",
+    "ror"};
+
+/* Decode and print a ARM register name */
+static char* decodeRegList(ArmOpcode opcode, int vector, char* buf)
+{
+    int i;
+    bool printed = false;
+    buf[0] = 0;
+    for (i = 0; i < 16; i++, vector >>= 1) {
+        if (vector & 0x1) {
+            int regId = i;
+            if (opcode == kThumbPush && i == 8) {
+                regId = r14lr;
+            } else if (opcode == kThumbPop && i == 8) {
+                regId = r15pc;
+            }
+            if (printed) {
+                sprintf(buf + strlen(buf), ", r%d", regId);
+            } else {
+                printed = true;
+                sprintf(buf, "r%d", regId);
+            }
+        }
+    }
+    return buf;
+}
+
+static char*  decodeFPCSRegList(int count, int base, char* buf)
+{
+    sprintf(buf, "s%d", base);
+    for (int i = 1; i < count; i++) {
+        sprintf(buf + strlen(buf), ", s%d",base + i);
+    }
+    return buf;
+}
+
+static int expandImmediate(int value)
+{
+    int mode = (value & 0xf00) >> 8;
+    u4 bits = value & 0xff;
+    switch(mode) {
+        case 0:
+            return bits;
+       case 1:
+            return (bits << 16) | bits;
+       case 2:
+            return (bits << 24) | (bits << 8);
+       case 3:
+            return (bits << 24) | (bits << 16) | (bits << 8) | bits;
+      default:
+            break;
+    }
+    bits = (bits | 0x80) << 24;
+    return bits >> (((value & 0xf80) >> 7) - 8);
+}
+
+const char* ccNames[] = {"eq","ne","cs","cc","mi","pl","vs","vc",
+                         "hi","ls","ge","lt","gt","le","al","nv"};
+/*
+ * Interpret a format string and build a string no longer than size
+ * See format key in Assemble.c.
+ */
+static void buildInsnString(const char* fmt, ArmLIR* lir, char* buf,
+                            unsigned char* baseAddr, int size)
+{
+    int i;
+    char* bufEnd = &buf[size-1];
+    const char* fmtEnd = &fmt[strlen(fmt)];
+    char tbuf[256];
+    const char* name;
+    char nc;
+    while (fmt < fmtEnd) {
+        int operand;
+        if (*fmt == '!') {
+            fmt++;
+            assert(fmt < fmtEnd);
+            nc = *fmt++;
+            if (nc=='!') {
+                strcpy(tbuf, "!");
+            } else {
+               assert(fmt < fmtEnd);
+               assert((unsigned)(nc-'0') < 4);
+               operand = lir->operands[nc-'0'];
+               switch(*fmt++) {
+                   case 'H':
+                       if (operand != 0) {
+                           sprintf(tbuf, ", %s %d",shiftNames[operand & 0x3],
+                                   operand >> 2);
+                       } else {
+                           strcpy(tbuf,"");
+                       }
+                       break;
+                   case 'B':
+                       switch (operand) {
+                           case kSY:
+                               name = "sy";
+                               break;
+                           case kST:
+                               name = "st";
+                               break;
+                           case kISH:
+                               name = "ish";
+                               break;
+                           case kISHST:
+                               name = "ishst";
+                               break;
+                           case kNSH:
+                               name = "nsh";
+                               break;
+                           case kNSHST:
+                               name = "shst";
+                               break;
+                           default:
+                               name = "DecodeError2";
+                               break;
+                       }
+                       strcpy(tbuf, name);
+                       break;
+                   case 'b':
+                       strcpy(tbuf,"0000");
+                       for (i=3; i>= 0; i--) {
+                           tbuf[i] += operand & 1;
+                           operand >>= 1;
+                       }
+                       break;
+                   case 'n':
+                       operand = ~expandImmediate(operand);
+                       sprintf(tbuf,"%d [%#x]", operand, operand);
+                       break;
+                   case 'm':
+                       operand = expandImmediate(operand);
+                       sprintf(tbuf,"%d [%#x]", operand, operand);
+                       break;
+                   case 's':
+                       sprintf(tbuf,"s%d",operand & FP_REG_MASK);
+                       break;
+                   case 'S':
+                       sprintf(tbuf,"d%d",(operand & FP_REG_MASK) >> 1);
+                       break;
+                   case 'h':
+                       sprintf(tbuf,"%04x", operand);
+                       break;
+                   case 'M':
+                   case 'd':
+                       sprintf(tbuf,"%d", operand);
+                       break;
+                   case 'C':
+                       sprintf(tbuf,"%s",coreRegNames[operand]);
+                       break;
+                   case 'E':
+                       sprintf(tbuf,"%d", operand*4);
+                       break;
+                   case 'F':
+                       sprintf(tbuf,"%d", operand*2);
+                       break;
+                   case 'c':
+                       strcpy(tbuf, ccNames[operand]);
+                       break;
+                   case 't':
+                       sprintf(tbuf,"0x%08x (L%p)",
+                               (int) baseAddr + lir->generic.offset + 4 +
+                               (operand << 1),
+                               lir->generic.target);
+                       break;
+                   case 'u': {
+                       int offset_1 = lir->operands[0];
+                       int offset_2 = NEXT_LIR(lir)->operands[0];
+                       intptr_t target =
+                           ((((intptr_t) baseAddr + lir->generic.offset + 4) &
+                            ~3) + (offset_1 << 21 >> 9) + (offset_2 << 1)) &
+                           0xfffffffc;
+                       sprintf(tbuf, "%p", (void *) target);
+                       break;
+                    }
+
+                   /* Nothing to print for BLX_2 */
+                   case 'v':
+                       strcpy(tbuf, "see above");
+                       break;
+                   case 'R':
+                       decodeRegList(lir->opcode, operand, tbuf);
+                       break;
+                   case 'P':
+                       decodeFPCSRegList(operand, 16, tbuf);
+                       break;
+                   case 'Q':
+                       decodeFPCSRegList(operand, 0, tbuf);
+                       break;
+                   default:
+                       strcpy(tbuf,"DecodeError1");
+                       break;
+               }
+               if (buf+strlen(tbuf) <= bufEnd) {
+                   strcpy(buf, tbuf);
+                   buf += strlen(tbuf);
+               } else {
+                   break;
+               }
+            }
+        } else {
+           *buf++ = *fmt++;
+        }
+        if (buf == bufEnd)
+            break;
+    }
+    *buf = 0;
+}
+
+void oatDumpResourceMask(LIR* lir, u8 mask, const char* prefix)
+{
+    char buf[256];
+    buf[0] = 0;
+    ArmLIR* armLIR = (ArmLIR*) lir;
+
+    if (mask == ENCODE_ALL) {
+        strcpy(buf, "all");
+    } else {
+        char num[8];
+        int i;
+
+        for (i = 0; i < kRegEnd; i++) {
+            if (mask & (1ULL << i)) {
+                sprintf(num, "%d ", i);
+                strcat(buf, num);
+            }
+        }
+
+        if (mask & ENCODE_CCODE) {
+            strcat(buf, "cc ");
+        }
+        if (mask & ENCODE_FP_STATUS) {
+            strcat(buf, "fpcc ");
+        }
+
+        /* Memory bits */
+        if (armLIR && (mask & ENCODE_DALVIK_REG)) {
+            sprintf(buf + strlen(buf), "dr%d%s", armLIR->aliasInfo & 0xffff,
+                    (armLIR->aliasInfo & 0x80000000) ? "(+1)" : "");
+        }
+        if (mask & ENCODE_LITERAL) {
+            strcat(buf, "lit ");
+        }
+
+        if (mask & ENCODE_HEAP_REF) {
+            strcat(buf, "heap ");
+        }
+        if (mask & ENCODE_MUST_NOT_ALIAS) {
+            strcat(buf, "noalias ");
+        }
+    }
+    if (buf[0]) {
+        LOG(INFO) << prefix << ": " << buf;
+    }
+}
+
+/*
+ * Debugging macros
+ */
+#define DUMP_RESOURCE_MASK(X)
+#define DUMP_SSA_REP(X)
+
+/* Pretty-print a LIR instruction */
+void oatDumpLIRInsn(CompilationUnit* cUnit, LIR* arg, unsigned char* baseAddr)
+{
+    ArmLIR* lir = (ArmLIR*) arg;
+    if (lir->flags.isNop)
+        return;
+    char buf[256];
+    char opName[256];
+    int offset = lir->generic.offset;
+    int dest = lir->operands[0];
+    const bool dumpNop = false;
+
+    /* Handle pseudo-ops individually, and all regular insns as a group */
+    switch(lir->opcode) {
+        case kArmPseudoMethodEntry:
+            LOG(INFO) << "-------- method entry " <<
+                 cUnit->method->clazz->descriptor << ":" <<
+                 cUnit->method->name;
+            break;
+        case kArmPseudoMethodExit:
+            LOG(INFO) << "-------- Method_Exit";
+            break;
+        case kArmPseudoBarrier:
+            LOG(INFO) << "-------- BARRIER";
+            break;
+        case kArmPseudoExtended:
+            LOG(INFO) << "-------- " << (char* ) dest;
+            break;
+        case kArmPseudoSSARep:
+            DUMP_SSA_REP(LOG(INFO) << "-------- kMirOpPhi: " <<  (char* ) dest);
+            break;
+        case kArmPseudoEntryBlock:
+            LOG(INFO) << "-------- entry offset: 0x" << std::hex << dest;
+            break;
+        case kArmPseudoDalvikByteCodeBoundary:
+            LOG(INFO) << "-------- dalvik offset: 0x" << std::hex <<
+                 lir->generic.dalvikOffset << " @ " << (char* )lir->operands[0];
+            break;
+        case kArmPseudoExitBlock:
+            LOG(INFO) << "-------- exit offset: 0x" << std::hex << dest;
+            break;
+        case kArmPseudoPseudoAlign4:
+            LOG(INFO) << (intptr_t)baseAddr + offset << " (0x" << std::hex <<
+                offset << "): .align4";
+            break;
+        case kArmPseudoEHBlockLabel:
+            LOG(INFO) << "Exception_Handling:";
+            break;
+        case kArmPseudoTargetLabel:
+        case kArmPseudoNormalBlockLabel:
+            LOG(INFO) << "L" << (intptr_t)lir << ":";
+            break;
+        case kArmPseudoCaseLabel:
+            LOG(INFO) << "LC" << (intptr_t)lir << ": Case target 0x" <<
+                std::hex << lir->operands[0] << "|" << std::dec <<
+                lir->operands[0];
+            break;
+        default:
+            if (lir->flags.isNop && !dumpNop) {
+                break;
+            }
+            buildInsnString(EncodingMap[lir->opcode].name, lir, opName,
+                            baseAddr, 256);
+            buildInsnString(EncodingMap[lir->opcode].fmt, lir, buf, baseAddr,
+                            256);
+            char buf[100];
+            snprintf(buf, 100, "%p (%04x): %-8s%s%s",
+                 baseAddr + offset, offset, opName, buf,
+                 lir->flags.isNop ? "(nop)" : "");
+            LOG(INFO) << buf;
+            break;
+    }
+
+    if (lir->useMask && (!lir->flags.isNop || dumpNop)) {
+        DUMP_RESOURCE_MASK(oatDumpResourceMask((LIR* ) lir,
+                                               lir->useMask, "use"));
+    }
+    if (lir->defMask && (!lir->flags.isNop || dumpNop)) {
+        DUMP_RESOURCE_MASK(oatDumpResourceMask((LIR* ) lir,
+                                               lir->defMask, "def"));
+    }
+}
+
+/* Dump instructions and constant pool contents */
+void oatCodegenDump(CompilationUnit* cUnit)
+{
+    const Method *method = cUnit->method;
+    LOG(INFO) << "/*";
+    LOG(INFO) << "Dumping LIR insns for " << method->clazz->descriptor <<
+        ":" << method->name;
+    LIR* lirInsn;
+    ArmLIR* armLIR;
+    int insnsSize = cUnit->insnsSize;
+
+    LOG(INFO) << "Regs (excluding ins) : " << cUnit->numRegs;
+    LOG(INFO) << "Ins                  : " << cUnit->numIns;
+    LOG(INFO) << "Outs                 : " << cUnit->numOuts;
+    LOG(INFO) << "Spills               : " << cUnit->numSpills;
+    LOG(INFO) << "Padding              : " << cUnit->numPadding;
+    LOG(INFO) << "Frame size           : " << cUnit->frameSize;
+    LOG(INFO) << "Start of ins         : " << cUnit->insOffset;
+    LOG(INFO) << "Start of regs        : " << cUnit->regsOffset;
+    LOG(INFO) << "code size is " << cUnit->totalSize <<
+        " bytes, Dalvik size is " << insnsSize * 2;
+    LOG(INFO) << "expansion factor: " <<
+         (float)cUnit->totalSize / (float)(insnsSize * 2);
+    for (int i = 0; i < method->registersSize; i++) {
+        RegLocation loc = cUnit->regLocation[i];
+        char buf[100];
+        if (loc.fpLocation == kLocPhysReg) {
+            snprintf(buf, 100, " : s%d", loc.fpLowReg & FP_REG_MASK);
+        } else {
+            buf[0] = 0;
+        }
+        char buf2[100];
+        snprintf(buf2, 100, "V[%02d] -> %s%d%s", i,
+                 loc.location == kLocPhysReg ?
+                 "r" : "SP+", loc.location == kLocPhysReg ?
+                 loc.lowReg : loc.spOffset, buf);
+        LOG(INFO) << buf2;
+
+    }
+    for (lirInsn = cUnit->firstLIRInsn; lirInsn; lirInsn = lirInsn->next) {
+        oatDumpLIRInsn(cUnit, lirInsn, (unsigned char*) cUnit->baseAddr);
+    }
+    for (lirInsn = cUnit->classPointerList; lirInsn; lirInsn = lirInsn->next) {
+        armLIR = (ArmLIR*) lirInsn;
+        char buf[100];
+        snprintf(buf, 100, "%p (%04x): .class (%s)",
+             (char*)cUnit->baseAddr + armLIR->generic.offset,
+             armLIR->generic.offset,
+             ((CallsiteInfo *) armLIR->operands[0])->classDescriptor);
+        LOG(INFO) << buf;
+    }
+    for (lirInsn = cUnit->literalList; lirInsn; lirInsn = lirInsn->next) {
+        armLIR = (ArmLIR*) lirInsn;
+        char buf[100];
+        snprintf(buf, 100, "%p (%04x): .word (%#x)",
+             (char*)cUnit->baseAddr + armLIR->generic.offset,
+             armLIR->generic.offset,
+             armLIR->operands[0]);
+        LOG(INFO) << buf;
+
+    }
+
+    int linebreak = 0;
+    char buf[100];
+    LOG(INFO) << "*/";
+    sprintf(buf,"\n    u1 %s%s_%s_code[] = {",
+            cUnit->method->clazz->descriptor, cUnit->method->name,
+            cUnit->method->shorty);
+    for (unsigned int i = 0; i < strlen(buf); i++)
+        if (buf[i] == ';') buf[i] = '_';
+    LOG(INFO) << buf;
+    strcpy(buf,"        ");
+    for (int i = 0; i < cUnit->totalSize; i++) {
+        sprintf(buf+strlen(buf),"0x%02x,",
+               ((u1*)cUnit->baseAddr)[i]);
+        if (++linebreak == 8) {
+            linebreak = 0;
+            LOG(INFO) << buf;
+            strcpy(buf,"        ");
+        }
+    }
+    if (strlen(buf) > 8) {
+        LOG(INFO) << buf;
+    }
+    LOG(INFO) << "    };\n\n";
+
+    // Dump mapping table
+    if (cUnit->mappingTableSize > 0) {
+        MappingTable *table = cUnit->mappingTable;
+        if (!table) {
+            LOG(FATAL) << "Null table";
+        }
+        sprintf(buf,"\n    MappingTable %s%s_%s_mappingTable[%d] = {",
+                cUnit->method->clazz->descriptor, cUnit->method->name,
+                cUnit->method->shorty, cUnit->mappingTableSize);
+        for (unsigned int i = 0; i < strlen(buf); i++)
+            if (buf[i] == ';') buf[i] = '_';
+        LOG(INFO) << buf;
+        strcpy(buf,"       ");
+        for (int i = 0; i < cUnit->mappingTableSize; i++) {
+            sprintf(buf+strlen(buf)," {0x%08x, 0x%04x},",
+                    table[i].targetOffset, table[i].dalvikOffset);
+            LOG(INFO) << buf;
+            strcpy(buf,"       ");
+        }
+        LOG(INFO) <<"    };\n\n";
+    }
+}
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
new file mode 100644
index 0000000..9d22c4f
--- /dev/null
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -0,0 +1,867 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_SRC_COMPILER_CODEGEN_ARM_ARMLIR_H_
+#define ART_SRC_COMPILER_CODEGEN_ARM_ARMLIR_H_
+
+#include "../../Dalvik.h"
+#include "../../CompilerInternals.h"
+
+/*
+ * Runtime register usage conventions.
+ *
+ * r0-r3: Argument registers in both Dalvik and C/C++ conventions.
+ *        However, for Dalvik->Dalvik calls we'll pass the target's Method*
+ *        pointer in r0 as a hidden arg0. Otherwise used as codegen scratch
+ *        registers.
+ * r0-r1: As in C/C++ r0 is 32-bit return register and r0/r1 is 64-bit
+ * r4   : Callee save (promotion target)
+ * r5   : Callee save (promotion target)
+ * r6   : Callee save (promotion target)
+ * r7   : Callee save (promotion target)
+ * r8   : Callee save (promotion target)
+ * r9   : (rSELF) is reserved (pointer to thread-local storage)
+ * r10  : Callee save (promotion target)
+ * r11  : Callee save (promotion target)
+ * r12  : Scratch, may be trashed by linkage stubs
+ * r13  : (sp) is reserved
+ * r14  : (lr) is reserved
+ * r15  : (pc) is reserved
+ *
+ * 5 core temps that codegen can use (r0, r1, r2, r3, r12)
+ * 7 core registers that can be used for promotion
+ *
+ * Floating pointer registers
+ * s0-s31
+ * d0-d15, where d0={s0,s1}, d1={s2,s3}, ... , d15={s30,s31}
+ *
+ * s16-s31 (d8-d15) preserved across C calls
+ * s0-s15 (d0-d7) trashed across C calls
+ *
+ * s0-s15/d0-d7 used as codegen temp/scratch
+ * s16-s31/d8-d31 can be used for promotion.
+ *
+ * Calling convention
+ *     o On a call to a Dalvik method, pass target's Method* in r0
+ *     o r1-r3 will be used for up to the first 3 words of arguments
+ *     o Arguments past the first 3 words will be placed in appropriate
+ *       out slots by the caller.
+ *     o If a 64-bit argument would span the register/memory argument
+ *       boundary, it will instead be fully passed in the frame.
+ *     o Maintain a 16-byte stack alignment
+ *
+ *  Stack frame diagram (stack grows down, higher addresses at top):
+ *
+ * +------------------------+
+ * | IN[ins-1]              |  {Note: resides in caller's frame}
+ * |       .                |
+ * | IN[0]                  |
+ * | caller's Method*       |
+ * +========================+  {Note: start of callee's frame}
+ * | spill region           |  {variable sized - will include lr if non-leaf.}
+ * +------------------------+
+ * | ...filler word...      |  {Note: used as 2nd word of V[locals-1] if long]
+ * +------------------------+
+ * | V[locals-1]            |
+ * | V[locals-2]            |
+ * |      .                 |
+ * |      .                 |
+ * | V[1]                   |
+ * | V[0]                   |
+ * +------------------------+
+ * |  0 to 3 words padding  |
+ * +------------------------+
+ * | OUT[outs-1]            |
+ * | OUT[outs-2]            |
+ * |       .                |
+ * | OUT[0]                 |
+ * | curMethod*             | <<== sp w/ 16-byte alignment
+ * +========================+
+ */
+
+/* Offset to distingish FP regs */
+#define FP_REG_OFFSET 32
+/* Offset to distinguish DP FP regs */
+#define FP_DOUBLE 64
+/* Reg types */
+#define REGTYPE(x) (x & (FP_REG_OFFSET | FP_DOUBLE))
+#define FPREG(x) ((x & FP_REG_OFFSET) == FP_REG_OFFSET)
+#define LOWREG(x) ((x & 0x7) == x)
+#define DOUBLEREG(x) ((x & FP_DOUBLE) == FP_DOUBLE)
+#define SINGLEREG(x) (FPREG(x) && !DOUBLEREG(x))
+/*
+ * Note: the low register of a floating point pair is sufficient to
+ * create the name of a double, but require both names to be passed to
+ * allow for asserts to verify that the pair is consecutive if significant
+ * rework is done in this area.  Also, it is a good reminder in the calling
+ * code that reg locations always describe doubles as a pair of singles.
+ */
+#define S2D(x,y) ((x) | FP_DOUBLE)
+/* Mask to strip off fp flags */
+#define FP_REG_MASK (FP_REG_OFFSET-1)
+/* non-existent Dalvik register */
+#define vNone   (-1)
+/* non-existant physical register */
+#define rNone   (-1)
+
+/* RegisterLocation templates return values (r0, or r0/r1) */
+#define LOC_C_RETURN {kLocPhysReg, 0, 0, r0, INVALID_REG, INVALID_SREG, \
+                      1, kLocPhysReg, r0, INVALID_REG, INVALID_OFFSET}
+#define LOC_C_RETURN_WIDE {kLocPhysReg, 1, 0, r0, r1, INVALID_SREG, \
+                      1, kLocPhysReg, r0, r1, INVALID_OFFSET}
+/* RegisterLocation templates for interpState->retVal; */
+#define LOC_DALVIK_RETURN_VAL {kLocPhysReg, 0, 0, r0, INVALID_REG, \
+                      INVALID_SREG, 1, kLocPhysReg, r0, INVALID_REG, \
+                      INVALID_OFFSET}
+#define LOC_DALVIK_RETURN_VAL_WIDE {kLocPhysReg, 1, 0, r0, r1, \
+                      INVALID_SREG, 1, kLocPhysReg, r0, r1, INVALID_OFFSET}
+
+ /*
+ * Data structure tracking the mapping between a Dalvik register (pair) and a
+ * native register (pair). The idea is to reuse the previously loaded value
+ * if possible, otherwise to keep the value in a native register as long as
+ * possible.
+ */
+typedef struct RegisterInfo {
+    int reg;                    // Reg number
+    bool inUse;                 // Has it been allocated?
+    bool isTemp;                // Can allocate as temp?
+    bool pair;                  // Part of a register pair?
+    int partner;                // If pair, other reg of pair
+    bool live;                  // Is there an associated SSA name?
+    bool dirty;                 // If live, is it dirty?
+    int sReg;                   // Name of live value
+    struct LIR *defStart;       // Starting inst in last def sequence
+    struct LIR *defEnd;         // Ending inst in last def sequence
+} RegisterInfo;
+
+typedef struct RegisterPool {
+    ArenaBitVector *nullCheckedRegs; // Which registers have been null-checked?
+    int numCoreRegs;
+    RegisterInfo *coreRegs;
+    int nextCoreReg;
+    int numFPRegs;
+    RegisterInfo *FPRegs;
+    int nextFPReg;
+} RegisterPool;
+
+typedef enum ResourceEncodingPos {
+    kGPReg0     = 0,
+    kRegSP      = 13,
+    kRegLR      = 14,
+    kRegPC      = 15,
+    kFPReg0     = 16,
+    kFPReg16    = 32,
+    kRegEnd     = 48,
+    kCCode      = kRegEnd,
+    kFPStatus,          // FP status word
+    // The following four bits are for memory disambiguation
+    kDalvikReg,         // 1 Dalvik Frame (can be fully disambiguated)
+    kLiteral,           // 2 Literal pool (can be fully disambiguated)
+    kHeapRef,           // 3 Somewhere on the heap (alias with any other heap)
+    kMustNotAlias,      // 4 Guaranteed to be non-alias (eg *(r6+x))
+} ResourceEncodingPos;
+
+#define ENCODE_REG_LIST(N)      ((u8) N)
+#define ENCODE_REG_SP           (1ULL << kRegSP)
+#define ENCODE_REG_LR           (1ULL << kRegLR)
+#define ENCODE_REG_PC           (1ULL << kRegPC)
+#define ENCODE_CCODE            (1ULL << kCCode)
+#define ENCODE_FP_STATUS        (1ULL << kFPStatus)
+#define ENCODE_REG_FPCS_LIST(N) ((u8)N << kFPReg16)
+
+/* Abstract memory locations */
+#define ENCODE_DALVIK_REG       (1ULL << kDalvikReg)
+#define ENCODE_LITERAL          (1ULL << kLiteral)
+#define ENCODE_HEAP_REF         (1ULL << kHeapRef)
+#define ENCODE_MUST_NOT_ALIAS   (1ULL << kMustNotAlias)
+
+#define ENCODE_ALL              (~0ULL)
+#define ENCODE_MEM              (ENCODE_DALVIK_REG | ENCODE_LITERAL | \
+                                 ENCODE_HEAP_REF | ENCODE_MUST_NOT_ALIAS)
+
+#define DECODE_ALIAS_INFO_REG(X)        (X & 0xffff)
+#define DECODE_ALIAS_INFO_WIDE(X)       ((X & 0x80000000) ? 1 : 0)
+
+typedef enum OpSize {
+    kWord,
+    kLong,
+    kSingle,
+    kDouble,
+    kUnsignedHalf,
+    kSignedHalf,
+    kUnsignedByte,
+    kSignedByte,
+} OpSize;
+
+typedef enum OpKind {
+    kOpMov,
+    kOpMvn,
+    kOpCmp,
+    kOpLsl,
+    kOpLsr,
+    kOpAsr,
+    kOpRor,
+    kOpNot,
+    kOpAnd,
+    kOpOr,
+    kOpXor,
+    kOpNeg,
+    kOpAdd,
+    kOpAdc,
+    kOpSub,
+    kOpSbc,
+    kOpRsub,
+    kOpMul,
+    kOpDiv,
+    kOpRem,
+    kOpBic,
+    kOpCmn,
+    kOpTst,
+    kOpBkpt,
+    kOpBlx,
+    kOpPush,
+    kOpPop,
+    kOp2Char,
+    kOp2Short,
+    kOp2Byte,
+    kOpCondBr,
+    kOpUncondBr,
+} OpKind;
+
+/*
+ * Annotate special-purpose core registers:
+ *   - VM: r4PC, r5FP, and r6SELF
+ *   - ARM architecture: r13sp, r14lr, and r15pc
+ *
+ * rPC, rFP, and rSELF are for architecture-independent code to use.
+ */
+typedef enum NativeRegisterPool {
+    r0     = 0,
+    r1     = 1,
+    r2     = 2,
+    r3     = 3,
+    r4     = 4,
+    r5     = 5,
+    r6     = 6,
+    r7     = 7,
+    r8     = 8,
+    rSELF  = 9,
+    r10    = 10,
+    r11    = 11,
+    r12    = 12,
+    r13sp  = 13,
+    rSP    = 13,
+    r14lr  = 14,
+    rLR    = 14,
+    r15pc  = 15,
+    rPC    = 15,
+    fr0  =  0 + FP_REG_OFFSET,
+    fr1  =  1 + FP_REG_OFFSET,
+    fr2  =  2 + FP_REG_OFFSET,
+    fr3  =  3 + FP_REG_OFFSET,
+    fr4  =  4 + FP_REG_OFFSET,
+    fr5  =  5 + FP_REG_OFFSET,
+    fr6  =  6 + FP_REG_OFFSET,
+    fr7  =  7 + FP_REG_OFFSET,
+    fr8  =  8 + FP_REG_OFFSET,
+    fr9  =  9 + FP_REG_OFFSET,
+    fr10 = 10 + FP_REG_OFFSET,
+    fr11 = 11 + FP_REG_OFFSET,
+    fr12 = 12 + FP_REG_OFFSET,
+    fr13 = 13 + FP_REG_OFFSET,
+    fr14 = 14 + FP_REG_OFFSET,
+    fr15 = 15 + FP_REG_OFFSET,
+    fr16 = 16 + FP_REG_OFFSET,
+    fr17 = 17 + FP_REG_OFFSET,
+    fr18 = 18 + FP_REG_OFFSET,
+    fr19 = 19 + FP_REG_OFFSET,
+    fr20 = 20 + FP_REG_OFFSET,
+    fr21 = 21 + FP_REG_OFFSET,
+    fr22 = 22 + FP_REG_OFFSET,
+    fr23 = 23 + FP_REG_OFFSET,
+    fr24 = 24 + FP_REG_OFFSET,
+    fr25 = 25 + FP_REG_OFFSET,
+    fr26 = 26 + FP_REG_OFFSET,
+    fr27 = 27 + FP_REG_OFFSET,
+    fr28 = 28 + FP_REG_OFFSET,
+    fr29 = 29 + FP_REG_OFFSET,
+    fr30 = 30 + FP_REG_OFFSET,
+    fr31 = 31 + FP_REG_OFFSET,
+    dr0 = fr0 + FP_DOUBLE,
+    dr1 = fr2 + FP_DOUBLE,
+    dr2 = fr4 + FP_DOUBLE,
+    dr3 = fr6 + FP_DOUBLE,
+    dr4 = fr8 + FP_DOUBLE,
+    dr5 = fr10 + FP_DOUBLE,
+    dr6 = fr12 + FP_DOUBLE,
+    dr7 = fr14 + FP_DOUBLE,
+    dr8 = fr16 + FP_DOUBLE,
+    dr9 = fr18 + FP_DOUBLE,
+    dr10 = fr20 + FP_DOUBLE,
+    dr11 = fr22 + FP_DOUBLE,
+    dr12 = fr24 + FP_DOUBLE,
+    dr13 = fr26 + FP_DOUBLE,
+    dr14 = fr28 + FP_DOUBLE,
+    dr15 = fr30 + FP_DOUBLE,
+} NativeRegisterPool;
+
+/* Shift encodings */
+typedef enum ArmShiftEncodings {
+    kArmLsl = 0x0,
+    kArmLsr = 0x1,
+    kArmAsr = 0x2,
+    kArmRor = 0x3
+} ArmShiftEncodings;
+
+/* Thumb condition encodings */
+typedef enum ArmConditionCode {
+    kArmCondEq = 0x0,    /* 0000 */
+    kArmCondNe = 0x1,    /* 0001 */
+    kArmCondCs = 0x2,    /* 0010 */
+    kArmCondCc = 0x3,    /* 0011 */
+    kArmCondMi = 0x4,    /* 0100 */
+    kArmCondPl = 0x5,    /* 0101 */
+    kArmCondVs = 0x6,    /* 0110 */
+    kArmCondVc = 0x7,    /* 0111 */
+    kArmCondHi = 0x8,    /* 1000 */
+    kArmCondLs = 0x9,    /* 1001 */
+    kArmCondGe = 0xa,    /* 1010 */
+    kArmCondLt = 0xb,    /* 1011 */
+    kArmCondGt = 0xc,    /* 1100 */
+    kArmCondLe = 0xd,    /* 1101 */
+    kArmCondAl = 0xe,    /* 1110 */
+    kArmCondNv = 0xf,    /* 1111 */
+} ArmConditionCode;
+
+#define isPseudoOpcode(opcode) ((int)(opcode) < 0)
+
+/*
+ * The following enum defines the list of supported Thumb instructions by the
+ * assembler. Their corresponding snippet positions will be defined in
+ * Assemble.c.
+ */
+typedef enum ArmOpcode {
+    kArmPseudoCaseLabel = -13,
+    kArmPseudoMethodEntry = -12,
+    kArmPseudoMethodExit = -11,
+    kArmPseudoBarrier = -10,
+    kArmPseudoExtended = -9,
+    kArmPseudoSSARep = -8,
+    kArmPseudoEntryBlock = -7,
+    kArmPseudoExitBlock = -6,
+    kArmPseudoTargetLabel = -5,
+    kArmPseudoDalvikByteCodeBoundary = -4,
+    kArmPseudoPseudoAlign4 = -3,
+    kArmPseudoEHBlockLabel = -2,
+    kArmPseudoNormalBlockLabel = -1,
+    /************************************************************************/
+    kArm16BitData,       /* DATA   [0] rd[15..0] */
+    kThumbAdcRR,         /* adc     [0100000101] rm[5..3] rd[2..0] */
+    kThumbAddRRI3,       /* add(1)  [0001110] imm_3[8..6] rn[5..3] rd[2..0]*/
+    kThumbAddRI8,        /* add(2)  [00110] rd[10..8] imm_8[7..0] */
+    kThumbAddRRR,        /* add(3)  [0001100] rm[8..6] rn[5..3] rd[2..0] */
+    kThumbAddRRLH,       /* add(4)  [01000100] H12[01] rm[5..3] rd[2..0] */
+    kThumbAddRRHL,       /* add(4)  [01001000] H12[10] rm[5..3] rd[2..0] */
+    kThumbAddRRHH,       /* add(4)  [01001100] H12[11] rm[5..3] rd[2..0] */
+    kThumbAddPcRel,      /* add(5)  [10100] rd[10..8] imm_8[7..0] */
+    kThumbAddSpRel,      /* add(6)  [10101] rd[10..8] imm_8[7..0] */
+    kThumbAddSpI7,       /* add(7)  [101100000] imm_7[6..0] */
+    kThumbAndRR,         /* and     [0100000000] rm[5..3] rd[2..0] */
+    kThumbAsrRRI5,       /* asr(1)  [00010] imm_5[10..6] rm[5..3] rd[2..0] */
+    kThumbAsrRR,         /* asr(2)  [0100000100] rs[5..3] rd[2..0] */
+    kThumbBCond,         /* b(1)    [1101] cond[11..8] offset_8[7..0] */
+    kThumbBUncond,       /* b(2)    [11100] offset_11[10..0] */
+    kThumbBicRR,         /* bic     [0100001110] rm[5..3] rd[2..0] */
+    kThumbBkpt,          /* bkpt    [10111110] imm_8[7..0] */
+    kThumbBlx1,          /* blx(1)  [111] H[10] offset_11[10..0] */
+    kThumbBlx2,          /* blx(1)  [111] H[01] offset_11[10..0] */
+    kThumbBl1,           /* blx(1)  [111] H[10] offset_11[10..0] */
+    kThumbBl2,           /* blx(1)  [111] H[11] offset_11[10..0] */
+    kThumbBlxR,          /* blx(2)  [010001111] rm[6..3] [000] */
+    kThumbBx,            /* bx      [010001110] H2[6..6] rm[5..3] SBZ[000] */
+    kThumbCmnRR,         /* cmn     [0100001011] rm[5..3] rd[2..0] */
+    kThumbCmpRI8,        /* cmp(1)  [00101] rn[10..8] imm_8[7..0] */
+    kThumbCmpRR,         /* cmp(2)  [0100001010] rm[5..3] rd[2..0] */
+    kThumbCmpLH,         /* cmp(3)  [01000101] H12[01] rm[5..3] rd[2..0] */
+    kThumbCmpHL,         /* cmp(3)  [01000110] H12[10] rm[5..3] rd[2..0] */
+    kThumbCmpHH,         /* cmp(3)  [01000111] H12[11] rm[5..3] rd[2..0] */
+    kThumbEorRR,         /* eor     [0100000001] rm[5..3] rd[2..0] */
+    kThumbLdmia,         /* ldmia   [11001] rn[10..8] reglist [7..0] */
+    kThumbLdrRRI5,       /* ldr(1)  [01101] imm_5[10..6] rn[5..3] rd[2..0] */
+    kThumbLdrRRR,        /* ldr(2)  [0101100] rm[8..6] rn[5..3] rd[2..0] */
+    kThumbLdrPcRel,      /* ldr(3)  [01001] rd[10..8] imm_8[7..0] */
+    kThumbLdrSpRel,      /* ldr(4)  [10011] rd[10..8] imm_8[7..0] */
+    kThumbLdrbRRI5,      /* ldrb(1) [01111] imm_5[10..6] rn[5..3] rd[2..0] */
+    kThumbLdrbRRR,       /* ldrb(2) [0101110] rm[8..6] rn[5..3] rd[2..0] */
+    kThumbLdrhRRI5,      /* ldrh(1) [10001] imm_5[10..6] rn[5..3] rd[2..0] */
+    kThumbLdrhRRR,       /* ldrh(2) [0101101] rm[8..6] rn[5..3] rd[2..0] */
+    kThumbLdrsbRRR,      /* ldrsb   [0101011] rm[8..6] rn[5..3] rd[2..0] */
+    kThumbLdrshRRR,      /* ldrsh   [0101111] rm[8..6] rn[5..3] rd[2..0] */
+    kThumbLslRRI5,       /* lsl(1)  [00000] imm_5[10..6] rm[5..3] rd[2..0] */
+    kThumbLslRR,         /* lsl(2)  [0100000010] rs[5..3] rd[2..0] */
+    kThumbLsrRRI5,       /* lsr(1)  [00001] imm_5[10..6] rm[5..3] rd[2..0] */
+    kThumbLsrRR,         /* lsr(2)  [0100000011] rs[5..3] rd[2..0] */
+    kThumbMovImm,        /* mov(1)  [00100] rd[10..8] imm_8[7..0] */
+    kThumbMovRR,         /* mov(2)  [0001110000] rn[5..3] rd[2..0] */
+    kThumbMovRR_H2H,     /* mov(3)  [01000111] H12[11] rm[5..3] rd[2..0] */
+    kThumbMovRR_H2L,     /* mov(3)  [01000110] H12[01] rm[5..3] rd[2..0] */
+    kThumbMovRR_L2H,     /* mov(3)  [01000101] H12[10] rm[5..3] rd[2..0] */
+    kThumbMul,           /* mul     [0100001101] rm[5..3] rd[2..0] */
+    kThumbMvn,           /* mvn     [0100001111] rm[5..3] rd[2..0] */
+    kThumbNeg,           /* neg     [0100001001] rm[5..3] rd[2..0] */
+    kThumbOrr,           /* orr     [0100001100] rm[5..3] rd[2..0] */
+    kThumbPop,           /* pop     [1011110] r[8..8] rl[7..0] */
+    kThumbPush,          /* push    [1011010] r[8..8] rl[7..0] */
+    kThumbRorRR,         /* ror     [0100000111] rs[5..3] rd[2..0] */
+    kThumbSbc,           /* sbc     [0100000110] rm[5..3] rd[2..0] */
+    kThumbStmia,         /* stmia   [11000] rn[10..8] reglist [7.. 0] */
+    kThumbStrRRI5,       /* str(1)  [01100] imm_5[10..6] rn[5..3] rd[2..0] */
+    kThumbStrRRR,        /* str(2)  [0101000] rm[8..6] rn[5..3] rd[2..0] */
+    kThumbStrSpRel,      /* str(3)  [10010] rd[10..8] imm_8[7..0] */
+    kThumbStrbRRI5,      /* strb(1) [01110] imm_5[10..6] rn[5..3] rd[2..0] */
+    kThumbStrbRRR,       /* strb(2) [0101010] rm[8..6] rn[5..3] rd[2..0] */
+    kThumbStrhRRI5,      /* strh(1) [10000] imm_5[10..6] rn[5..3] rd[2..0] */
+    kThumbStrhRRR,       /* strh(2) [0101001] rm[8..6] rn[5..3] rd[2..0] */
+    kThumbSubRRI3,       /* sub(1)  [0001111] imm_3[8..6] rn[5..3] rd[2..0]*/
+    kThumbSubRI8,        /* sub(2)  [00111] rd[10..8] imm_8[7..0] */
+    kThumbSubRRR,        /* sub(3)  [0001101] rm[8..6] rn[5..3] rd[2..0] */
+    kThumbSubSpI7,       /* sub(4)  [101100001] imm_7[6..0] */
+    kThumbSwi,           /* swi     [11011111] imm_8[7..0] */
+    kThumbTst,           /* tst     [0100001000] rm[5..3] rn[2..0] */
+    kThumb2Vldrs,        /* vldr low  sx [111011011001] rn[19..16] rd[15-12]
+                                    [1010] imm_8[7..0] */
+    kThumb2Vldrd,        /* vldr low  dx [111011011001] rn[19..16] rd[15-12]
+                                    [1011] imm_8[7..0] */
+    kThumb2Vmuls,        /* vmul vd, vn, vm [111011100010] rn[19..16]
+                                    rd[15-12] [10100000] rm[3..0] */
+    kThumb2Vmuld,        /* vmul vd, vn, vm [111011100010] rn[19..16]
+                                    rd[15-12] [10110000] rm[3..0] */
+    kThumb2Vstrs,        /* vstr low  sx [111011011000] rn[19..16] rd[15-12]
+                                    [1010] imm_8[7..0] */
+    kThumb2Vstrd,        /* vstr low  dx [111011011000] rn[19..16] rd[15-12]
+                                    [1011] imm_8[7..0] */
+    kThumb2Vsubs,        /* vsub vd, vn, vm [111011100011] rn[19..16]
+                                    rd[15-12] [10100040] rm[3..0] */
+    kThumb2Vsubd,        /* vsub vd, vn, vm [111011100011] rn[19..16]
+                                    rd[15-12] [10110040] rm[3..0] */
+    kThumb2Vadds,        /* vadd vd, vn, vm [111011100011] rn[19..16]
+                                    rd[15-12] [10100000] rm[3..0] */
+    kThumb2Vaddd,        /* vadd vd, vn, vm [111011100011] rn[19..16]
+                                    rd[15-12] [10110000] rm[3..0] */
+    kThumb2Vdivs,        /* vdiv vd, vn, vm [111011101000] rn[19..16]
+                                    rd[15-12] [10100000] rm[3..0] */
+    kThumb2Vdivd,        /* vdiv vd, vn, vm [111011101000] rn[19..16]
+                                    rd[15-12] [10110000] rm[3..0] */
+    kThumb2VcvtIF,       /* vcvt.F32 vd, vm [1110111010111000] vd[15..12]
+                                    [10101100] vm[3..0] */
+    kThumb2VcvtID,       /* vcvt.F64 vd, vm [1110111010111000] vd[15..12]
+                                       [10111100] vm[3..0] */
+    kThumb2VcvtFI,       /* vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12]
+                                       [10101100] vm[3..0] */
+    kThumb2VcvtDI,       /* vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12]
+                                       [10111100] vm[3..0] */
+    kThumb2VcvtFd,       /* vcvt.F64.F32 vd, vm [1110111010110111] vd[15..12]
+                                       [10101100] vm[3..0] */
+    kThumb2VcvtDF,       /* vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12]
+                                       [10111100] vm[3..0] */
+    kThumb2Vsqrts,       /* vsqrt.f32 vd, vm [1110111010110001] vd[15..12]
+                                       [10101100] vm[3..0] */
+    kThumb2Vsqrtd,       /* vsqrt.f64 vd, vm [1110111010110001] vd[15..12]
+                                       [10111100] vm[3..0] */
+    kThumb2MovImmShift,  /* mov(T2) rd, #<const> [11110] i [00001001111]
+                                       imm3 rd[11..8] imm8 */
+    kThumb2MovImm16,     /* mov(T3) rd, #<const> [11110] i [0010100] imm4 [0]
+                                       imm3 rd[11..8] imm8 */
+    kThumb2StrRRI12,     /* str(Imm,T3) rd,[rn,#imm12] [111110001100]
+                                       rn[19..16] rt[15..12] imm12[11..0] */
+    kThumb2LdrRRI12,     /* str(Imm,T3) rd,[rn,#imm12] [111110001100]
+                                       rn[19..16] rt[15..12] imm12[11..0] */
+    kThumb2StrRRI8Predec, /* str(Imm,T4) rd,[rn,#-imm8] [111110000100]
+                                       rn[19..16] rt[15..12] [1100] imm[7..0]*/
+    kThumb2LdrRRI8Predec, /* ldr(Imm,T4) rd,[rn,#-imm8] [111110000101]
+                                       rn[19..16] rt[15..12] [1100] imm[7..0]*/
+    kThumb2Cbnz,         /* cbnz rd,<label> [101110] i [1] imm5[7..3]
+                                       rn[2..0] */
+    kThumb2Cbz,          /* cbn rd,<label> [101100] i [1] imm5[7..3]
+                                       rn[2..0] */
+    kThumb2AddRRI12,     /* add rd, rn, #imm12 [11110] i [100000] rn[19..16]
+                                       [0] imm3[14..12] rd[11..8] imm8[7..0] */
+    kThumb2MovRR,        /* mov rd, rm [11101010010011110000] rd[11..8]
+                                       [0000] rm[3..0] */
+    kThumb2Vmovs,        /* vmov.f32 vd, vm [111011101] D [110000]
+                                       vd[15..12] 101001] M [0] vm[3..0] */
+    kThumb2Vmovd,        /* vmov.f64 vd, vm [111011101] D [110000]
+                                       vd[15..12] 101101] M [0] vm[3..0] */
+    kThumb2Ldmia,        /* ldmia  [111010001001[ rn[19..16] mask[15..0] */
+    kThumb2Stmia,        /* stmia  [111010001000[ rn[19..16] mask[15..0] */
+    kThumb2AddRRR,       /* add [111010110000] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2SubRRR,       /* sub [111010111010] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2SbcRRR,       /* sbc [111010110110] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2CmpRR,        /* cmp [111010111011] rn[19..16] [0000] [1111]
+                                   [0000] rm[3..0] */
+    kThumb2SubRRI12,     /* sub rd, rn, #imm12 [11110] i [01010] rn[19..16]
+                                       [0] imm3[14..12] rd[11..8] imm8[7..0] */
+    kThumb2MvnImmShift,  /* mov(T2) rd, #<const> [11110] i [00011011110]
+                                       imm3 rd[11..8] imm8 */
+    kThumb2Sel,          /* sel rd, rn, rm [111110101010] rn[19-16] rd[11-8]
+                                       rm[3-0] */
+    kThumb2Ubfx,         /* ubfx rd,rn,#lsb,#width [111100111100] rn[19..16]
+                                       [0] imm3[14-12] rd[11-8] w[4-0] */
+    kThumb2Sbfx,         /* ubfx rd,rn,#lsb,#width [111100110100] rn[19..16]
+                                       [0] imm3[14-12] rd[11-8] w[4-0] */
+    kThumb2LdrRRR,       /* ldr rt,[rn,rm,LSL #imm] [111110000101] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    kThumb2LdrhRRR,      /* ldrh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    kThumb2LdrshRRR,     /* ldrsh rt,[rn,rm,LSL #imm] [111110000101] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    kThumb2LdrbRRR,      /* ldrb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    kThumb2LdrsbRRR,     /* ldrsb rt,[rn,rm,LSL #imm] [111110000101] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    kThumb2StrRRR,       /* str rt,[rn,rm,LSL #imm] [111110000100] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    kThumb2StrhRRR,      /* str rt,[rn,rm,LSL #imm] [111110000010] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    kThumb2StrbRRR,      /* str rt,[rn,rm,LSL #imm] [111110000000] rn[19-16]
+                                       rt[15-12] [000000] imm[5-4] rm[3-0] */
+    kThumb2LdrhRRI12,    /* ldrh rt,[rn,#imm12] [111110001011]
+                                       rt[15..12] rn[19..16] imm12[11..0] */
+    kThumb2LdrshRRI12,   /* ldrsh rt,[rn,#imm12] [111110011011]
+                                       rt[15..12] rn[19..16] imm12[11..0] */
+    kThumb2LdrbRRI12,    /* ldrb rt,[rn,#imm12] [111110001001]
+                                       rt[15..12] rn[19..16] imm12[11..0] */
+    kThumb2LdrsbRRI12,   /* ldrsb rt,[rn,#imm12] [111110011001]
+                                       rt[15..12] rn[19..16] imm12[11..0] */
+    kThumb2StrhRRI12,    /* strh rt,[rn,#imm12] [111110001010]
+                                       rt[15..12] rn[19..16] imm12[11..0] */
+    kThumb2StrbRRI12,    /* strb rt,[rn,#imm12] [111110001000]
+                                       rt[15..12] rn[19..16] imm12[11..0] */
+    kThumb2Pop,          /* pop     [1110100010111101] list[15-0]*/
+    kThumb2Push,         /* push    [1110100100101101] list[15-0]*/
+    kThumb2CmpRI8,       /* cmp rn, #<const> [11110] i [011011] rn[19-16] [0]
+                                       imm3 [1111] imm8[7..0] */
+    kThumb2AdcRRR,       /* adc [111010110101] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2AndRRR,       /* and [111010100000] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2BicRRR,       /* bic [111010100010] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2CmnRR,        /* cmn [111010110001] rn[19..16] [0000] [1111]
+                                   [0000] rm[3..0] */
+    kThumb2EorRRR,       /* eor [111010101000] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2MulRRR,       /* mul [111110110000] rn[19..16] [1111] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2MnvRR,        /* mvn [11101010011011110] rd[11-8] [0000]
+                                   rm[3..0] */
+    kThumb2RsubRRI8,     /* rsub [111100011100] rn[19..16] [0000] rd[11..8]
+                                   imm8[7..0] */
+    kThumb2NegRR,        /* actually rsub rd, rn, #0 */
+    kThumb2OrrRRR,       /* orr [111010100100] rn[19..16] [0000] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2TstRR,        /* tst [111010100001] rn[19..16] [0000] [1111]
+                                   [0000] rm[3..0] */
+    kThumb2LslRRR,       /* lsl [111110100000] rn[19..16] [1111] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2LsrRRR,       /* lsr [111110100010] rn[19..16] [1111] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2AsrRRR,       /* asr [111110100100] rn[19..16] [1111] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2RorRRR,       /* ror [111110100110] rn[19..16] [1111] rd[11..8]
+                                   [0000] rm[3..0] */
+    kThumb2LslRRI5,      /* lsl [11101010010011110] imm[14.12] rd[11..8]
+                                   [00] rm[3..0] */
+    kThumb2LsrRRI5,      /* lsr [11101010010011110] imm[14.12] rd[11..8]
+                                   [01] rm[3..0] */
+    kThumb2AsrRRI5,      /* asr [11101010010011110] imm[14.12] rd[11..8]
+                                   [10] rm[3..0] */
+    kThumb2RorRRI5,      /* ror [11101010010011110] imm[14.12] rd[11..8]
+                                   [11] rm[3..0] */
+    kThumb2BicRRI8,      /* bic [111100000010] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    kThumb2AndRRI8,      /* bic [111100000000] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    kThumb2OrrRRI8,      /* orr [111100000100] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    kThumb2EorRRI8,      /* eor [111100001000] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    kThumb2AddRRI8,      /* add [111100001000] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    kThumb2AdcRRI8,      /* adc [111100010101] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    kThumb2SubRRI8,      /* sub [111100011011] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    kThumb2SbcRRI8,      /* sbc [111100010111] rn[19..16] [0] imm3
+                                   rd[11..8] imm8 */
+    kThumb2It,           /* it [10111111] firstcond[7-4] mask[3-0] */
+    kThumb2Fmstat,       /* fmstat [11101110111100011111101000010000] */
+    kThumb2Vcmpd,        /* vcmp [111011101] D [11011] rd[15-12] [1011]
+                                   E [1] M [0] rm[3-0] */
+    kThumb2Vcmps,        /* vcmp [111011101] D [11010] rd[15-12] [1011]
+                                   E [1] M [0] rm[3-0] */
+    kThumb2LdrPcRel12,   /* ldr rd,[pc,#imm12] [1111100011011111] rt[15-12]
+                                  imm12[11-0] */
+    kThumb2BCond,        /* b<c> [1110] S cond[25-22] imm6[21-16] [10]
+                                  J1 [0] J2 imm11[10..0] */
+    kThumb2Vmovd_RR,     /* vmov [111011101] D [110000] vd[15-12 [101101]
+                                  M [0] vm[3-0] */
+    kThumb2Vmovs_RR,     /* vmov [111011101] D [110000] vd[15-12 [101001]
+                                  M [0] vm[3-0] */
+    kThumb2Fmrs,         /* vmov [111011100000] vn[19-16] rt[15-12] [1010]
+                                  N [0010000] */
+    kThumb2Fmsr,         /* vmov [111011100001] vn[19-16] rt[15-12] [1010]
+                                  N [0010000] */
+    kThumb2Fmrrd,        /* vmov [111011000100] rt2[19-16] rt[15-12]
+                                  [101100] M [1] vm[3-0] */
+    kThumb2Fmdrr,        /* vmov [111011000101] rt2[19-16] rt[15-12]
+                                  [101100] M [1] vm[3-0] */
+    kThumb2Vabsd,        /* vabs.f64 [111011101] D [110000] rd[15-12]
+                                  [1011110] M [0] vm[3-0] */
+    kThumb2Vabss,        /* vabs.f32 [111011101] D [110000] rd[15-12]
+                                  [1010110] M [0] vm[3-0] */
+    kThumb2Vnegd,        /* vneg.f64 [111011101] D [110000] rd[15-12]
+                                  [1011110] M [0] vm[3-0] */
+    kThumb2Vnegs,        /* vneg.f32 [111011101] D [110000] rd[15-12]
+                                 [1010110] M [0] vm[3-0] */
+    kThumb2Vmovs_IMM8,   /* vmov.f32 [111011101] D [11] imm4h[19-16] vd[15-12]
+                                  [10100000] imm4l[3-0] */
+    kThumb2Vmovd_IMM8,   /* vmov.f64 [111011101] D [11] imm4h[19-16] vd[15-12]
+                                  [10110000] imm4l[3-0] */
+    kThumb2Mla,          /* mla [111110110000] rn[19-16] ra[15-12] rd[7-4]
+                                  [0000] rm[3-0] */
+    kThumb2Umull,        /* umull [111110111010] rn[19-16], rdlo[15-12]
+                                  rdhi[11-8] [0000] rm[3-0] */
+    kThumb2Ldrex,        /* ldrex [111010000101] rn[19-16] rt[11-8] [1111]
+                                  imm8[7-0] */
+    kThumb2Strex,        /* strex [111010000100] rn[19-16] rt[11-8] rd[11-8]
+                                  imm8[7-0] */
+    kThumb2Clrex,        /* clrex [111100111011111110000111100101111] */
+    kThumb2Bfi,          /* bfi [111100110110] rn[19-16] [0] imm3[14-12]
+                                  rd[11-8] imm2[7-6] [0] msb[4-0] */
+    kThumb2Bfc,          /* bfc [11110011011011110] [0] imm3[14-12]
+                                  rd[11-8] imm2[7-6] [0] msb[4-0] */
+    kThumb2Dmb,          /* dmb [1111001110111111100011110101] option[3-0] */
+    kThumb2LdrPcReln12,  /* ldr rd,[pc,-#imm12] [1111100011011111] rt[15-12]
+                                  imm12[11-0] */
+    kThumb2Stm,          /* stm <list> [111010010000] rn[19-16] 000 rl[12-0] */
+    kThumbUndefined,     /* undefined [11011110xxxxxxxx] */
+    kThumb2VPopCS,       /* vpop <list of callee save fp singles (s16+) */
+    kThumb2VPushCS,      /* vpush <list callee save fp singles (s16+) */
+    kThumb2Vldms,        /* vldms rd, <list> */
+    kThumb2Vstms,        /* vstms rd, <list> */
+    kThumb2BUncond,      /* b <label> */
+    kThumb2MovImm16H,    /* similar to kThumb2MovImm16, but target high hw */
+    kThumb2AddPCR,       /* Thumb2 2-operand add with hard-coded PC target */
+    kThumb2AdrST,        /* Special purpose encoding of ADR for switch tables */
+    kThumb2MovImm16LST,  /* Special purpose version for switch table use */
+    kThumb2MovImm16HST,  /* Special purpose version for switch table use */
+    kThumb2LdmiaWB,      /* ldmia  [111010011001[ rn[19..16] mask[15..0] */
+    kThumb2SubsRRI12,    /* setflags encoding */
+    kArmLast,
+} ArmOpcode;
+
+/* DMB option encodings */
+typedef enum ArmOpDmbOptions {
+    kSY = 0xf,
+    kST = 0xe,
+    kISH = 0xb,
+    kISHST = 0xa,
+    kNSH = 0x7,
+    kNSHST = 0x6
+} ArmOpDmbOptions;
+
+/* Bit flags describing the behavior of each native opcode */
+typedef enum ArmOpFeatureFlags {
+    kIsBranch = 0,
+    kRegDef0,
+    kRegDef1,
+    kRegDefSP,
+    kRegDefLR,
+    kRegDefList0,
+    kRegDefList1,
+    kRegDefFPCSList0,
+    kRegDefFPCSList2,
+    kRegDefList2,
+    kRegUse0,
+    kRegUse1,
+    kRegUse2,
+    kRegUse3,
+    kRegUseSP,
+    kRegUsePC,
+    kRegUseList0,
+    kRegUseList1,
+    kRegUseFPCSList0,
+    kRegUseFPCSList2,
+    kNoOperand,
+    kIsUnaryOp,
+    kIsBinaryOp,
+    kIsTertiaryOp,
+    kIsQuadOp,
+    kIsIT,
+    kSetsCCodes,
+    kUsesCCodes,
+    kMemLoad,
+    kMemStore,
+} ArmOpFeatureFlags;
+
+#define IS_LOAD         (1 << kMemLoad)
+#define IS_STORE        (1 << kMemStore)
+#define IS_BRANCH       (1 << kIsBranch)
+#define REG_DEF0        (1 << kRegDef0)
+#define REG_DEF1        (1 << kRegDef1)
+#define REG_DEF_SP      (1 << kRegDefSP)
+#define REG_DEF_LR      (1 << kRegDefLR)
+#define REG_DEF_LIST0   (1 << kRegDefList0)
+#define REG_DEF_LIST1   (1 << kRegDefList1)
+#define REG_DEF_FPCS_LIST0   (1 << kRegDefFPCSList0)
+#define REG_DEF_FPCS_LIST2   (1 << kRegDefFPCSList2)
+#define REG_USE0        (1 << kRegUse0)
+#define REG_USE1        (1 << kRegUse1)
+#define REG_USE2        (1 << kRegUse2)
+#define REG_USE3        (1 << kRegUse3)
+#define REG_USE_SP      (1 << kRegUseSP)
+#define REG_USE_PC      (1 << kRegUsePC)
+#define REG_USE_LIST0   (1 << kRegUseList0)
+#define REG_USE_LIST1   (1 << kRegUseList1)
+#define REG_USE_FPCS_LIST0   (1 << kRegUseFPCSList0)
+#define REG_USE_FPCS_LIST2   (1 << kRegUseFPCSList2)
+#define NO_OPERAND      (1 << kNoOperand)
+#define IS_UNARY_OP     (1 << kIsUnaryOp)
+#define IS_BINARY_OP    (1 << kIsBinaryOp)
+#define IS_TERTIARY_OP  (1 << kIsTertiaryOp)
+#define IS_QUAD_OP      (1 << kIsQuadOp)
+#define IS_IT           (1 << kIsIT)
+#define SETS_CCODES     (1 << kSetsCCodes)
+#define USES_CCODES     (1 << kUsesCCodes)
+
+/* Common combo register usage patterns */
+#define REG_USE01       (REG_USE0 | REG_USE1)
+#define REG_USE012      (REG_USE01 | REG_USE2)
+#define REG_USE12       (REG_USE1 | REG_USE2)
+#define REG_DEF0_USE0   (REG_DEF0 | REG_USE0)
+#define REG_DEF0_USE1   (REG_DEF0 | REG_USE1)
+#define REG_DEF0_USE01  (REG_DEF0 | REG_USE01)
+#define REG_DEF0_USE12  (REG_DEF0 | REG_USE12)
+#define REG_DEF01_USE2  (REG_DEF0 | REG_DEF1 | REG_USE2)
+
+/* Instruction assembly fieldLoc kind */
+typedef enum ArmEncodingKind {
+    kFmtUnused,
+    kFmtBitBlt,        /* Bit string using end/start */
+    kFmtDfp,           /* Double FP reg */
+    kFmtSfp,           /* Single FP reg */
+    kFmtModImm,        /* Shifted 8-bit immed using [26,14..12,7..0] */
+    kFmtImm16,         /* Zero-extended immed using [26,19..16,14..12,7..0] */
+    kFmtImm6,          /* Encoded branch target using [9,7..3]0 */
+    kFmtImm12,         /* Zero-extended immediate using [26,14..12,7..0] */
+    kFmtShift,         /* Shift descriptor, [14..12,7..4] */
+    kFmtLsb,           /* least significant bit using [14..12][7..6] */
+    kFmtBWidth,        /* bit-field width, encoded as width-1 */
+    kFmtShift5,        /* Shift count, [14..12,7..6] */
+    kFmtBrOffset,      /* Signed extended [26,11,13,21-16,10-0]:0 */
+    kFmtFPImm,         /* Encoded floating point immediate */
+    kFmtOff24,         /* 24-bit Thumb2 unconditional branch encoding */
+} ArmEncodingKind;
+
+/* Struct used to define the snippet positions for each Thumb opcode */
+typedef struct ArmEncodingMap {
+    u4 skeleton;
+    struct {
+        ArmEncodingKind kind;
+        int end;   /* end for kFmtBitBlt, 1-bit slice end for FP regs */
+        int start; /* start for kFmtBitBlt, 4-bit slice end for FP regs */
+    } fieldLoc[4];
+    ArmOpcode opcode;
+    int flags;
+    const char* name;
+    const char* fmt;
+    int size;
+} ArmEncodingMap;
+
+/* Keys for target-specific scheduling and other optimization hints */
+typedef enum ArmTargetOptHints {
+    kMaxHoistDistance,
+} ArmTargetOptHints;
+
+extern ArmEncodingMap EncodingMap[kArmLast];
+
+/*
+ * Each instance of this struct holds a pseudo or real LIR instruction:
+ * - pseudo ones (eg labels and marks) and will be discarded by the assembler.
+ * - real ones will be assembled into Thumb instructions.
+ *
+ * Machine resources are encoded into a 64-bit vector, where the encodings are
+ * as following:
+ * - [ 0..15]: general purpose registers including PC, SP, and LR
+ * - [16..47]: floating-point registers where d0 is expanded to s[01] and s0
+ *   starts at bit 16
+ * - [48]: IT block
+ * - [49]: integer condition code
+ * - [50]: floatint-point status word
+ */
+typedef struct ArmLIR {
+    LIR generic;
+    ArmOpcode opcode;
+    int operands[4];            // [0..3] = [dest, src1, src2, extra]
+    struct {
+        bool isNop:1;           // LIR is optimized away
+        bool insertWrapper:1;   // insert branch to emulate memory accesses
+        unsigned int age:4;     // default is 0, set lazily by the optimizer
+        unsigned int size:3;    // bytes (2 for thumb, 2/4 for thumb2)
+        unsigned int unused:23;
+    } flags;
+    int aliasInfo;              // For Dalvik register & litpool disambiguation
+    u8 useMask;                 // Resource mask for use
+    u8 defMask;                 // Resource mask for def
+} ArmLIR;
+
+typedef struct SwitchTable {
+    int offset;
+    const u2* table;            // Original dex table
+    int vaddr;                  // Dalvik offset of switch opcode
+    ArmLIR* bxInst;             // Switch indirect branch instruction
+    ArmLIR** targets;           // Array of case targets
+} SwitchTable;
+
+typedef struct FillArrayData {
+    int offset;
+    const u2* table;           // Original dex table
+    int size;
+    int vaddr;                 // Dalvik offset of OP_FILL_ARRAY_DATA opcode
+} FillArrayData;
+
+/* Init values when a predicted chain is initially assembled */
+/* E7FE is branch to self */
+#define PREDICTED_CHAIN_BX_PAIR_INIT     0xe7fe
+
+/* Utility macros to traverse the LIR/ArmLIR list */
+#define NEXT_LIR(lir) ((ArmLIR *) lir->generic.next)
+#define PREV_LIR(lir) ((ArmLIR *) lir->generic.prev)
+
+#define NEXT_LIR_LVALUE(lir) (lir)->generic.next
+#define PREV_LIR_LVALUE(lir) (lir)->generic.prev
+
+#define CHAIN_CELL_OFFSET_TAG   0xcdab
+
+#define CHAIN_CELL_NORMAL_SIZE 12
+#define CHAIN_CELL_PREDICTED_SIZE 16
+
+#endif  // ART_SRC_COMPILER_CODEGEN_ARM_ARMLIR_H_
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc
new file mode 100644
index 0000000..2fc1603
--- /dev/null
+++ b/src/compiler/codegen/arm/ArmRallocUtil.cc
@@ -0,0 +1,344 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains Arm-specific register alloction support.
+ */
+
+#include "../../CompilerUtility.h"
+#include "../../CompilerIR.h"
+#include "../..//Dataflow.h"
+#include "ArmLIR.h"
+#include "Codegen.h"
+#include "../Ralloc.h"
+
+/*
+ * Placeholder routine until we do proper register allocation.
+ */
+
+typedef struct RefCounts {
+    int count;
+    int sReg;
+    bool doubleStart;   // Starting vReg for a double
+} RefCounts;
+
+/*
+ * USE SSA names to count references of base Dalvik vRegs.  Also,
+ * mark "wide" in the first of wide SSA locationRec pairs.
+ */
+static void countRefs(CompilationUnit *cUnit, BasicBlock* bb,
+                      RefCounts* counts, bool fp)
+{
+    MIR* mir;
+    if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock &&
+        bb->blockType != kExitBlock)
+        return;
+
+    for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
+        SSARepresentation *ssaRep = mir->ssaRep;
+        if (ssaRep) {
+            int i;
+            int attrs = oatDataFlowAttributes[mir->dalvikInsn.opcode];
+            if (fp) {
+                // Mark 1st reg of double pairs
+                int first = 0;
+                int sReg;
+                if ((attrs & (DF_DA_WIDE|DF_FP_A)) == (DF_DA_WIDE|DF_FP_A)) {
+                    sReg = DECODE_REG(
+                        oatConvertSSARegToDalvik(cUnit, ssaRep->defs[0]));
+                    counts[sReg].doubleStart = true;
+                    cUnit->regLocation[ssaRep->defs[0]].wide = true;
+                }
+                if ((attrs & (DF_UA_WIDE|DF_FP_A)) == (DF_UA_WIDE|DF_FP_A)) {
+                    sReg = DECODE_REG(
+                        oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
+                    counts[sReg].doubleStart = true;
+                    cUnit->regLocation[ssaRep->uses[first]].wide = true;
+                    first += 2;
+                }
+                if ((attrs & (DF_UB_WIDE|DF_FP_B)) == (DF_UB_WIDE|DF_FP_B)) {
+                    sReg = DECODE_REG(
+                        oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
+                    counts[sReg].doubleStart = true;
+                    cUnit->regLocation[ssaRep->uses[first]].wide = true;
+                    first += 2;
+                }
+                if ((attrs & (DF_UC_WIDE|DF_FP_C)) == (DF_UC_WIDE|DF_FP_C)) {
+                    sReg = DECODE_REG(
+                        oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
+                    counts[sReg].doubleStart = true;
+                    cUnit->regLocation[ssaRep->uses[first]].wide = true;
+                }
+            }
+            for (i=0; i< ssaRep->numUses; i++) {
+                int origSreg = DECODE_REG(
+                    oatConvertSSARegToDalvik(cUnit, ssaRep->uses[i]));
+                assert(origSreg < cUnit->method->registersSize);
+                bool fpUse = ssaRep->fpUse ? ssaRep->fpUse[i] : false;
+                if (fp == fpUse) {
+                    counts[origSreg].count++;
+                }
+            }
+            for (i=0; i< ssaRep->numDefs; i++) {
+                if (attrs & DF_SETS_CONST) {
+                    // CONST opcodes are untyped - don't pollute the counts
+                    continue;
+                }
+                int origSreg = DECODE_REG(
+                    oatConvertSSARegToDalvik(cUnit, ssaRep->defs[i]));
+                assert(origSreg < cUnit->method->registersSize);
+                bool fpDef = ssaRep->fpDef ? ssaRep->fpDef[i] : false;
+                if (fp == fpDef) {
+                    counts[origSreg].count++;
+                }
+            }
+        }
+    }
+}
+
+/* qsort callback function, sort descending */
+static int sortCounts(const void *val1, const void *val2)
+{
+    const RefCounts* op1 = (const RefCounts*)val1;
+    const RefCounts* op2 = (const RefCounts*)val2;
+    return (op1->count == op2->count) ? 0 : (op1->count < op2->count ? 1 : -1);
+}
+
+static void dumpCounts(const RefCounts* arr, int size, const char* msg)
+{
+    LOG(INFO) << msg;
+    for (int i = 0; i < size; i++) {
+        LOG(INFO) << "sReg[" << arr[i].sReg << "]: " << arr[i].count;
+    }
+}
+
+/*
+ * Note: some portions of this code required even if the kPromoteRegs
+ * optimization is disabled.
+ */
+extern void oatDoPromotion(CompilationUnit* cUnit)
+{
+    int numRegs = cUnit->method->registersSize;
+    int numIns = cUnit->method->insSize;
+
+    /*
+     * Because ins don't have explicit definitions, we need to type
+     * them based on the signature.
+     */
+    if (numIns > 0) {
+        int sReg = numRegs - numIns;
+        const char *shorty = cUnit->method->shorty;
+        shorty++;  // Move past return type;
+        while (*shorty) {
+            char arg = *shorty++;
+            // Is it wide?
+            if ((arg == 'D') || (arg == 'J')) {
+                cUnit->regLocation[sReg].wide = true;
+                cUnit->regLocation[sReg+1].fp = cUnit->regLocation[sReg].fp;
+                sReg++;  // Skip to next
+            }
+            sReg++;
+        }
+    }
+    /*
+     * TUNING: is leaf?  Can't just use "hasInvoke" to determine as some
+     * instructions might call out to C/assembly helper functions.  Until
+     * machinery is in place, always spill lr.
+     */
+    cUnit->coreSpillMask |= (1 << rLR);
+    cUnit->numSpills++;
+    /*
+     * Simple hack for testing register allocation.  Just do a static
+     * count of the uses of Dalvik registers.  Note that we examine
+     * the SSA names, but count based on original Dalvik register name.
+     * Count refs separately based on type in order to give allocation
+     * preference to fp doubles - which must be allocated sequential
+     * physical single fp registers started with an even-numbered
+     * reg.
+     */
+    RefCounts *coreRegs = (RefCounts *)
+          oatNew(sizeof(RefCounts) * numRegs, true);
+    RefCounts *fpRegs = (RefCounts *)
+          oatNew(sizeof(RefCounts) * numRegs, true);
+    for (int i = 0; i < numRegs; i++) {
+        coreRegs[i].sReg = fpRegs[i].sReg = i;
+    }
+    GrowableListIterator iterator;
+    oatGrowableListIteratorInit(&cUnit->blockList, &iterator);
+    while (true) {
+        BasicBlock* bb;
+        bb = (BasicBlock*)oatGrowableListIteratorNext(&iterator);
+        if (bb == NULL) break;
+        countRefs(cUnit, bb, coreRegs, false);
+        countRefs(cUnit, bb, fpRegs, true);
+    }
+
+    /*
+     * Ideally, we'd allocate doubles starting with an even-numbered
+     * register.  Bias the counts to try to allocate any vreg that's
+     * used as the start of a pair first.
+     */
+    for (int i = 0; i < numRegs; i++) {
+        if (fpRegs[i].doubleStart) {
+            fpRegs[i].count *= 2;
+        }
+    }
+
+    // Sort the count arrays
+    qsort(coreRegs, numRegs, sizeof(RefCounts), sortCounts);
+    qsort(fpRegs, numRegs, sizeof(RefCounts), sortCounts);
+
+    // TODO: temp for debugging, too verbose.  Remove when unneeded
+    if (cUnit->printMeVerbose) {
+        dumpCounts(coreRegs, numRegs, "coreRegs");
+        dumpCounts(fpRegs, numRegs, "fpRegs");
+    }
+
+    if (!(cUnit->disableOpt & (1 << kPromoteRegs))) {
+        // Promote fpRegs
+        for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) {
+            if (cUnit->regLocation[fpRegs[i].sReg].fpLocation != kLocPhysReg) {
+                int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg,
+                    fpRegs[i].doubleStart);
+                if (reg < 0) {
+                   break;  // No more left
+                }
+            }
+        }
+
+        // Promote core regs
+        for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) {
+            if (cUnit->regLocation[i].location != kLocPhysReg) {
+                int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg);
+                if (reg < 0) {
+                   break;  // No more left
+                }
+            }
+        }
+    }
+
+    // Now, update SSA names to new home locations
+    for (int i = 0; i < cUnit->numSSARegs; i++) {
+        int baseSreg = cUnit->regLocation[i].sRegLow;
+        RegLocation *base = &cUnit->regLocation[baseSreg];
+        RegLocation *baseNext = &cUnit->regLocation[baseSreg+1];
+        RegLocation *curr = &cUnit->regLocation[i];
+        if (curr->fp) {
+            /* Single or double, check fpLocation of base */
+            if (base->fpLocation == kLocPhysReg) {
+                if (curr->wide) {
+                    /* TUNING: consider alignment during allocation */
+                    if (base->fpLowReg & 1) {
+                        // Pair must start on even reg - don't promote
+                        continue;
+                    }
+                    /* Make sure upper half is also in reg or skip */
+                    if (baseNext->fpLocation != kLocPhysReg) {
+                        continue;
+                    }
+                    curr->highReg = baseNext->fpLowReg;
+                }
+                curr->location = kLocPhysReg;
+                curr->lowReg = base->fpLowReg;
+                curr->home = true;
+            }
+        } else {
+            /* Core or wide */
+            if (base->location == kLocPhysReg) {
+                if (curr->wide) {
+                    /* Make sure upper half is also in reg or skip */
+                    if (baseNext->location != kLocPhysReg) {
+                        continue;
+                    }
+                    curr->highReg = baseNext->lowReg;
+                }
+                curr->location = kLocPhysReg;
+                curr->lowReg = base->lowReg;
+                curr->home = true;
+            }
+        }
+    }
+}
+
+/* Returns sp-relative offset in bytes */
+extern int oatVRegOffset(CompilationUnit* cUnit, int reg)
+{
+    return (reg < cUnit->numRegs) ? cUnit->regsOffset + (reg << 2) :
+            cUnit->insOffset + ((reg - cUnit->numRegs) << 2);
+}
+
+
+/* Clobber all regs that might be used by an external C call */
+extern void oatClobberCallRegs(CompilationUnit *cUnit)
+{
+    oatClobber(cUnit, r0);
+    oatClobber(cUnit, r1);
+    oatClobber(cUnit, r2);
+    oatClobber(cUnit, r3);
+    oatClobber(cUnit, r12);
+    oatClobber(cUnit, r14lr);
+}
+
+/* Clobber all of the temps that might be used by a handler. */
+extern void oatClobberHandlerRegs(CompilationUnit* cUnit)
+{
+    //TUNING: reduce the set of regs used by handlers.  Only a few need lots.
+    oatClobberCallRegs(cUnit);
+    oatClobber(cUnit, r7);
+    oatClobber(cUnit, r8);
+    oatClobber(cUnit, r10);
+}
+
+extern RegLocation oatGetReturnWide(CompilationUnit* cUnit)
+{
+    RegLocation res = LOC_C_RETURN_WIDE;
+    oatClobber(cUnit, r0);
+    oatClobber(cUnit, r1);
+    oatMarkInUse(cUnit, r0);
+    oatMarkInUse(cUnit, r1);
+    oatMarkPair(cUnit, res.lowReg, res.highReg);
+    return res;
+}
+
+extern RegLocation oatGetReturnWideAlt(CompilationUnit* cUnit)
+{
+    RegLocation res = LOC_C_RETURN_WIDE;
+    res.lowReg = r2;
+    res.highReg = r3;
+    oatClobber(cUnit, r2);
+    oatClobber(cUnit, r3);
+    oatMarkInUse(cUnit, r2);
+    oatMarkInUse(cUnit, r3);
+    oatMarkPair(cUnit, res.lowReg, res.highReg);
+    return res;
+}
+
+extern RegLocation oatGetReturn(CompilationUnit* cUnit)
+{
+    RegLocation res = LOC_C_RETURN;
+    oatClobber(cUnit, r0);
+    oatMarkInUse(cUnit, r0);
+    return res;
+}
+
+extern RegLocation oatGetReturnAlt(CompilationUnit* cUnit)
+{
+    RegLocation res = LOC_C_RETURN;
+    res.lowReg = r1;
+    oatClobber(cUnit, r1);
+    oatMarkInUse(cUnit, r1);
+    return res;
+}
diff --git a/src/compiler/codegen/arm/Assemble.cc b/src/compiler/codegen/arm/Assemble.cc
new file mode 100644
index 0000000..a601286
--- /dev/null
+++ b/src/compiler/codegen/arm/Assemble.cc
@@ -0,0 +1,1581 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../../Dalvik.h"
+#include "../../CompilerInternals.h"
+#include "ArmLIR.h"
+#include "Codegen.h"
+#include <sys/mman.h>           /* for protection change */
+
+//#define TESTMODE
+#ifdef TESTMODE
+#include <cutils/ashmem.h>      /* for oat testing */
+#include <unistd.h>
+#endif
+
+#define MAX_ASSEMBLER_RETRIES 50
+
+/*
+ * opcode: ArmOpcode enum
+ * skeleton: pre-designated bit-pattern for this opcode
+ * k0: key to applying ds/de
+ * ds: dest start bit position
+ * de: dest end bit position
+ * k1: key to applying s1s/s1e
+ * s1s: src1 start bit position
+ * s1e: src1 end bit position
+ * k2: key to applying s2s/s2e
+ * s2s: src2 start bit position
+ * s2e: src2 end bit position
+ * operands: number of operands (for sanity check purposes)
+ * name: mnemonic name
+ * fmt: for pretty-printing
+ */
+#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \
+                     k3, k3s, k3e, flags, name, fmt, size) \
+        {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}, \
+                    {k3, k3s, k3e}}, opcode, flags, name, fmt, size}
+
+/* Instruction dump string format keys: !pf, where "!" is the start
+ * of the key, "p" is which numeric operand to use and "f" is the
+ * print format.
+ *
+ * [p]ositions:
+ *     0 -> operands[0] (dest)
+ *     1 -> operands[1] (src1)
+ *     2 -> operands[2] (src2)
+ *     3 -> operands[3] (extra)
+ *
+ * [f]ormats:
+ *     h -> 4-digit hex
+ *     d -> decimal
+ *     E -> decimal*4
+ *     F -> decimal*2
+ *     c -> branch condition (beq, bne, etc.)
+ *     t -> pc-relative target
+ *     u -> 1st half of bl[x] target
+ *     v -> 2nd half ob bl[x] target
+ *     R -> register list
+ *     s -> single precision floating point register
+ *     S -> double precision floating point register
+ *     m -> Thumb2 modified immediate
+ *     n -> complimented Thumb2 modified immediate
+ *     M -> Thumb2 16-bit zero-extended immediate
+ *     b -> 4-digit binary
+ *     B -> dmb option string (sy, st, ish, ishst, nsh, hshst)
+ *     H -> operand shift
+ *     C -> core register name
+ *     P -> fp cs register list (base of s16)
+ *     Q -> fp cs register list (base of s0)
+ *
+ *  [!] escape.  To insert "!", use "!!"
+ */
+/* NOTE: must be kept in sync with enum ArmOpcode from ArmLIR.h */
+ArmEncodingMap EncodingMap[kArmLast] = {
+    ENCODING_MAP(kArm16BitData,    0x0000,
+                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 1),
+    ENCODING_MAP(kThumbAdcRR,        0x4140,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES | USES_CCODES,
+                 "adcs", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbAddRRI3,      0x1c00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "adds", "!0C, !1C, #!2d", 1),
+    ENCODING_MAP(kThumbAddRI8,       0x3000,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
+                 "adds", "!0C, !0C, #!1d", 1),
+    ENCODING_MAP(kThumbAddRRR,       0x1800,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "adds", "!0C, !1C, !2C", 1),
+    ENCODING_MAP(kThumbAddRRLH,     0x4440,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
+                 "add", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbAddRRHL,     0x4480,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
+                 "add", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbAddRRHH,     0x44c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE01,
+                 "add", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbAddPcRel,    0xa000,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH,
+                 "add", "!0C, pc, #!1E", 1),
+    ENCODING_MAP(kThumbAddSpRel,    0xa800,
+                 kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF_SP | REG_USE_SP,
+                 "add", "!0C, sp, #!2E", 1),
+    ENCODING_MAP(kThumbAddSpI7,      0xb000,
+                 kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
+                 "add", "sp, #!0d*4", 1),
+    ENCODING_MAP(kThumbAndRR,        0x4000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "ands", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbAsrRRI5,      0x1000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "asrs", "!0C, !1C, #!2d", 1),
+    ENCODING_MAP(kThumbAsrRR,        0x4100,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "asrs", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbBCond,        0xd000,
+                 kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES,
+                 "b!1c", "!0t", 1),
+    ENCODING_MAP(kThumbBUncond,      0xe000,
+                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
+                 "b", "!0t", 1),
+    ENCODING_MAP(kThumbBicRR,        0x4380,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "bics", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbBkpt,          0xbe00,
+                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
+                 "bkpt", "!0d", 1),
+    ENCODING_MAP(kThumbBlx1,         0xf000,
+                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR,
+                 "blx_1", "!0u", 1),
+    ENCODING_MAP(kThumbBlx2,         0xe800,
+                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR,
+                 "blx_2", "!0v", 1),
+    ENCODING_MAP(kThumbBl1,          0xf000,
+                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR,
+                 "bl_1", "!0u", 1),
+    ENCODING_MAP(kThumbBl2,          0xf800,
+                 kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR,
+                 "bl_2", "!0v", 1),
+    ENCODING_MAP(kThumbBlxR,         0x4780,
+                 kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_USE0 | IS_BRANCH | REG_DEF_LR,
+                 "blx", "!0C", 1),
+    ENCODING_MAP(kThumbBx,            0x4700,
+                 kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
+                 "bx", "!0C", 1),
+    ENCODING_MAP(kThumbCmnRR,        0x42c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmn", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbCmpRI8,       0x2800,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmp", "!0C, #!1d", 1),
+    ENCODING_MAP(kThumbCmpRR,        0x4280,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbCmpLH,        0x4540,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbCmpHL,        0x4580,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbCmpHH,        0x45c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbEorRR,        0x4040,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "eors", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbLdmia,         0xc800,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
+                 "ldmia", "!0C!!, <!1R>", 1),
+    ENCODING_MAP(kThumbLdrRRI5,      0x6800,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldr", "!0C, [!1C, #!2E]", 1),
+    ENCODING_MAP(kThumbLdrRRR,       0x5800,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldr", "!0C, [!1C, !2C]", 1),
+    ENCODING_MAP(kThumbLdrPcRel,    0x4800,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC
+                 | IS_LOAD, "ldr", "!0C, [pc, #!1E]", 1),
+    ENCODING_MAP(kThumbLdrSpRel,    0x9800,
+                 kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP
+                 | IS_LOAD, "ldr", "!0C, [sp, #!2E]", 1),
+    ENCODING_MAP(kThumbLdrbRRI5,     0x7800,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrb", "!0C, [!1C, #2d]", 1),
+    ENCODING_MAP(kThumbLdrbRRR,      0x5c00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrb", "!0C, [!1C, !2C]", 1),
+    ENCODING_MAP(kThumbLdrhRRI5,     0x8800,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrh", "!0C, [!1C, #!2F]", 1),
+    ENCODING_MAP(kThumbLdrhRRR,      0x5a00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrh", "!0C, [!1C, !2C]", 1),
+    ENCODING_MAP(kThumbLdrsbRRR,     0x5600,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsb", "!0C, [!1C, !2C]", 1),
+    ENCODING_MAP(kThumbLdrshRRR,     0x5e00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsh", "!0C, [!1C, !2C]", 1),
+    ENCODING_MAP(kThumbLslRRI5,      0x0000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "lsls", "!0C, !1C, #!2d", 1),
+    ENCODING_MAP(kThumbLslRR,        0x4080,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "lsls", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbLsrRRI5,      0x0800,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "lsrs", "!0C, !1C, #!2d", 1),
+    ENCODING_MAP(kThumbLsrRR,        0x40c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "lsrs", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbMovImm,       0x2000,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0 | SETS_CCODES,
+                 "movs", "!0C, #!1d", 1),
+    ENCODING_MAP(kThumbMovRR,        0x1c00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "movs", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbMovRR_H2H,    0x46c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mov", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbMovRR_H2L,    0x4640,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mov", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbMovRR_L2H,    0x4680,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mov", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbMul,           0x4340,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "muls", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbMvn,           0x43c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "mvns", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbNeg,           0x4240,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "negs", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbOrr,           0x4300,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "orrs", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbPop,           0xbc00,
+                 kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
+                 | IS_LOAD, "pop", "<!0R>", 1),
+    ENCODING_MAP(kThumbPush,          0xb400,
+                 kFmtBitBlt, 8, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
+                 | IS_STORE, "push", "<!0R>", 1),
+    ENCODING_MAP(kThumbRorRR,        0x41c0,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | SETS_CCODES,
+                 "rors", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbSbc,           0x4180,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE01 | USES_CCODES | SETS_CCODES,
+                 "sbcs", "!0C, !1C", 1),
+    ENCODING_MAP(kThumbStmia,         0xc000,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE0 | REG_USE_LIST1 | IS_STORE,
+                 "stmia", "!0C!!, <!1R>", 1),
+    ENCODING_MAP(kThumbStrRRI5,      0x6000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "str", "!0C, [!1C, #!2E]", 1),
+    ENCODING_MAP(kThumbStrRRR,       0x5000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
+                 "str", "!0C, [!1C, !2C]", 1),
+    ENCODING_MAP(kThumbStrSpRel,    0x9000,
+                 kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE0 | REG_USE_SP
+                 | IS_STORE, "str", "!0C, [sp, #!2E]", 1),
+    ENCODING_MAP(kThumbStrbRRI5,     0x7000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strb", "!0C, [!1C, #!2d]", 1),
+    ENCODING_MAP(kThumbStrbRRR,      0x5400,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
+                 "strb", "!0C, [!1C, !2C]", 1),
+    ENCODING_MAP(kThumbStrhRRI5,     0x8000,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 10, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strh", "!0C, [!1C, #!2F]", 1),
+    ENCODING_MAP(kThumbStrhRRR,      0x5200,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE012 | IS_STORE,
+                 "strh", "!0C, [!1C, !2C]", 1),
+    ENCODING_MAP(kThumbSubRRI3,      0x1e00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "subs", "!0C, !1C, #!2d", 1),
+    ENCODING_MAP(kThumbSubRI8,       0x3800,
+                 kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE0 | SETS_CCODES,
+                 "subs", "!0C, #!1d", 1),
+    ENCODING_MAP(kThumbSubRRR,       0x1a00,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtBitBlt, 8, 6,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "subs", "!0C, !1C, !2C", 1),
+    ENCODING_MAP(kThumbSubSpI7,      0xb080,
+                 kFmtBitBlt, 6, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP,
+                 "sub", "sp, #!0d*4", 1),
+    ENCODING_MAP(kThumbSwi,           0xdf00,
+                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,                       kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH,
+                 "swi", "!0d", 1),
+    ENCODING_MAP(kThumbTst,           0x4200,
+                 kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP | REG_USE01 | SETS_CCODES,
+                 "tst", "!0C, !1C", 1),
+    ENCODING_MAP(kThumb2Vldrs,       0xed900a00,
+                 kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "vldr", "!0s, [!1C, #!2E]", 2),
+    ENCODING_MAP(kThumb2Vldrd,       0xed900b00,
+                 kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "vldr", "!0S, [!1C, #!2E]", 2),
+    ENCODING_MAP(kThumb2Vmuls,        0xee200a00,
+                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vmuls", "!0s, !1s, !2s", 2),
+    ENCODING_MAP(kThumb2Vmuld,        0xee200b00,
+                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vmuld", "!0S, !1S, !2S", 2),
+    ENCODING_MAP(kThumb2Vstrs,       0xed800a00,
+                 kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "vstr", "!0s, [!1C, #!2E]", 2),
+    ENCODING_MAP(kThumb2Vstrd,       0xed800b00,
+                 kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "vstr", "!0S, [!1C, #!2E]", 2),
+    ENCODING_MAP(kThumb2Vsubs,        0xee300a40,
+                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vsub", "!0s, !1s, !2s", 2),
+    ENCODING_MAP(kThumb2Vsubd,        0xee300b40,
+                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vsub", "!0S, !1S, !2S", 2),
+    ENCODING_MAP(kThumb2Vadds,        0xee300a00,
+                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vadd", "!0s, !1s, !2s", 2),
+    ENCODING_MAP(kThumb2Vaddd,        0xee300b00,
+                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vadd", "!0S, !1S, !2S", 2),
+    ENCODING_MAP(kThumb2Vdivs,        0xee800a00,
+                 kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vdivs", "!0s, !1s, !2s", 2),
+    ENCODING_MAP(kThumb2Vdivd,        0xee800b00,
+                 kFmtDfp, 22, 12, kFmtDfp, 7, 16, kFmtDfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "vdivd", "!0S, !1S, !2S", 2),
+    ENCODING_MAP(kThumb2VcvtIF,       0xeeb80ac0,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.f32", "!0s, !1s", 2),
+    ENCODING_MAP(kThumb2VcvtID,       0xeeb80bc0,
+                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.f64", "!0S, !1s", 2),
+    ENCODING_MAP(kThumb2VcvtFI,       0xeebd0ac0,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.s32.f32 ", "!0s, !1s", 2),
+    ENCODING_MAP(kThumb2VcvtDI,       0xeebd0bc0,
+                 kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.s32.f64 ", "!0s, !1S", 2),
+    ENCODING_MAP(kThumb2VcvtFd,       0xeeb70ac0,
+                 kFmtDfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.f64.f32 ", "!0S, !1s", 2),
+    ENCODING_MAP(kThumb2VcvtDF,       0xeeb70bc0,
+                 kFmtSfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vcvt.f32.f64 ", "!0s, !1S", 2),
+    ENCODING_MAP(kThumb2Vsqrts,       0xeeb10ac0,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vsqrt.f32 ", "!0s, !1s", 2),
+    ENCODING_MAP(kThumb2Vsqrtd,       0xeeb10bc0,
+                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vsqrt.f64 ", "!0S, !1S", 2),
+    ENCODING_MAP(kThumb2MovImmShift, 0xf04f0000, /* no setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "mov", "!0C, #!1m", 2),
+    ENCODING_MAP(kThumb2MovImm16,       0xf2400000,
+                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "mov", "!0C, #!1M", 2),
+    ENCODING_MAP(kThumb2StrRRI12,       0xf8c00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "str", "!0C, [!1C, #!2d]", 2),
+    ENCODING_MAP(kThumb2LdrRRI12,       0xf8d00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldr", "!0C, [!1C, #!2d]", 2),
+    ENCODING_MAP(kThumb2StrRRI8Predec,       0xf8400c00,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "str", "!0C, [!1C, #-!2d]", 2),
+    ENCODING_MAP(kThumb2LdrRRI8Predec,       0xf8500c00,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 8, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldr", "!0C, [!1C, #-!2d]", 2),
+    ENCODING_MAP(kThumb2Cbnz,       0xb900, /* Note: does not affect flags */
+                 kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH,
+                 "cbnz", "!0C,!1t", 1),
+    ENCODING_MAP(kThumb2Cbz,       0xb100, /* Note: does not affect flags */
+                 kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH,
+                 "cbz", "!0C,!1t", 1),
+    ENCODING_MAP(kThumb2AddRRI12,       0xf2000000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
+                 "add", "!0C,!1C,#!2d", 2),
+    ENCODING_MAP(kThumb2MovRR,       0xea4f0000, /* no setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "mov", "!0C, !1C", 2),
+    ENCODING_MAP(kThumb2Vmovs,       0xeeb00a40,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vmov.f32 ", " !0s, !1s", 2),
+    ENCODING_MAP(kThumb2Vmovd,       0xeeb00b40,
+                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vmov.f64 ", " !0S, !1S", 2),
+    ENCODING_MAP(kThumb2Ldmia,         0xe8900000,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
+                 "ldmia", "!0C!!, <!1R>", 2),
+    ENCODING_MAP(kThumb2Stmia,         0xe8800000,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE0 | REG_USE_LIST1 | IS_STORE,
+                 "stmia", "!0C!!, <!1R>", 2),
+    ENCODING_MAP(kThumb2AddRRR,  0xeb100000, /* setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1,
+                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "adds", "!0C, !1C, !2C!3H", 2),
+    ENCODING_MAP(kThumb2SubRRR,       0xebb00000, /* setflags enconding */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1,
+                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "subs", "!0C, !1C, !2C!3H", 2),
+    ENCODING_MAP(kThumb2SbcRRR,       0xeb700000, /* setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1,
+                 IS_QUAD_OP | REG_DEF0_USE12 | USES_CCODES | SETS_CCODES,
+                 "sbcs", "!0C, !1C, !2C!3H", 2),
+    ENCODING_MAP(kThumb2CmpRR,       0xebb00f00,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "cmp", "!0C, !1C", 2),
+    ENCODING_MAP(kThumb2SubRRI12,       0xf2a00000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1,/* Note: doesn't affect flags */
+                 "sub", "!0C,!1C,#!2d", 2),
+    ENCODING_MAP(kThumb2MvnImmShift,  0xf06f0000, /* no setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "mvn", "!0C, #!1n", 2),
+    ENCODING_MAP(kThumb2Sel,       0xfaa0f080,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE12 | USES_CCODES,
+                 "sel", "!0C, !1C, !2C", 2),
+    ENCODING_MAP(kThumb2Ubfx,       0xf3c00000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
+                 kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
+                 "ubfx", "!0C, !1C, #!2d, #!3d", 2),
+    ENCODING_MAP(kThumb2Sbfx,       0xf3400000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtLsb, -1, -1,
+                 kFmtBWidth, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
+                 "sbfx", "!0C, !1C, #!2d, #!3d", 2),
+    ENCODING_MAP(kThumb2LdrRRR,    0xf8500000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldr", "!0C, [!1C, !2C, LSL #!3d]", 2),
+    ENCODING_MAP(kThumb2LdrhRRR,    0xf8300000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrh", "!0C, [!1C, !2C, LSL #!3d]", 2),
+    ENCODING_MAP(kThumb2LdrshRRR,    0xf9300000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsh", "!0C, [!1C, !2C, LSL #!3d]", 2),
+    ENCODING_MAP(kThumb2LdrbRRR,    0xf8100000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrb", "!0C, [!1C, !2C, LSL #!3d]", 2),
+    ENCODING_MAP(kThumb2LdrsbRRR,    0xf9100000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_DEF0_USE12 | IS_LOAD,
+                 "ldrsb", "!0C, [!1C, !2C, LSL #!3d]", 2),
+    ENCODING_MAP(kThumb2StrRRR,    0xf8400000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "str", "!0C, [!1C, !2C, LSL #!3d]", 2),
+    ENCODING_MAP(kThumb2StrhRRR,    0xf8200000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "strh", "!0C, [!1C, !2C, LSL #!3d]", 2),
+    ENCODING_MAP(kThumb2StrbRRR,    0xf8000000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 5, 4, IS_QUAD_OP | REG_USE012 | IS_STORE,
+                 "strb", "!0C, [!1C, !2C, LSL #!3d]", 2),
+    ENCODING_MAP(kThumb2LdrhRRI12,       0xf8b00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrh", "!0C, [!1C, #!2d]", 2),
+    ENCODING_MAP(kThumb2LdrshRRI12,       0xf9b00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrsh", "!0C, [!1C, #!2d]", 2),
+    ENCODING_MAP(kThumb2LdrbRRI12,       0xf8900000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrb", "!0C, [!1C, #!2d]", 2),
+    ENCODING_MAP(kThumb2LdrsbRRI12,       0xf9900000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrsb", "!0C, [!1C, #!2d]", 2),
+    ENCODING_MAP(kThumb2StrhRRI12,       0xf8a00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strh", "!0C, [!1C, #!2d]", 2),
+    ENCODING_MAP(kThumb2StrbRRI12,       0xf8800000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 11, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_USE01 | IS_STORE,
+                 "strb", "!0C, [!1C, #!2d]", 2),
+    ENCODING_MAP(kThumb2Pop,           0xe8bd0000,
+                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
+                 | IS_LOAD, "pop", "<!0R>", 2),
+    ENCODING_MAP(kThumb2Push,          0xe92d0000,
+                 kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
+                 | IS_STORE, "push", "<!0R>", 2),
+    ENCODING_MAP(kThumb2CmpRI8, 0xf1b00f00,
+                 kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | SETS_CCODES,
+                 "cmp", "!0C, #!1m", 2),
+    ENCODING_MAP(kThumb2AdcRRR,  0xeb500000, /* setflags encoding */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1,
+                 IS_QUAD_OP | REG_DEF0_USE12 | SETS_CCODES,
+                 "adcs", "!0C, !1C, !2C!3H", 2),
+    ENCODING_MAP(kThumb2AndRRR,  0xea000000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "and", "!0C, !1C, !2C!3H", 2),
+    ENCODING_MAP(kThumb2BicRRR,  0xea200000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "bic", "!0C, !1C, !2C!3H", 2),
+    ENCODING_MAP(kThumb2CmnRR,  0xeb000000,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "cmn", "!0C, !1C, shift !2d", 2),
+    ENCODING_MAP(kThumb2EorRRR,  0xea800000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "eor", "!0C, !1C, !2C!3H", 2),
+    ENCODING_MAP(kThumb2MulRRR,  0xfb00f000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "mul", "!0C, !1C, !2C", 2),
+    ENCODING_MAP(kThumb2MnvRR,  0xea6f0000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "mvn", "!0C, !1C, shift !2d", 2),
+    ENCODING_MAP(kThumb2RsubRRI8,       0xf1d00000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "rsb", "!0C,!1C,#!2m", 2),
+    ENCODING_MAP(kThumb2NegRR,       0xf1d00000, /* instance of rsub */
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "neg", "!0C,!1C", 2),
+    ENCODING_MAP(kThumb2OrrRRR,  0xea400000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtShift, -1, -1, IS_QUAD_OP | REG_DEF0_USE12,
+                 "orr", "!0C, !1C, !2C!3H", 2),
+    ENCODING_MAP(kThumb2TstRR,       0xea100f00,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_USE01 | SETS_CCODES,
+                 "tst", "!0C, !1C, shift !2d", 2),
+    ENCODING_MAP(kThumb2LslRRR,  0xfa00f000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "lsl", "!0C, !1C, !2C", 2),
+    ENCODING_MAP(kThumb2LsrRRR,  0xfa20f000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "lsr", "!0C, !1C, !2C", 2),
+    ENCODING_MAP(kThumb2AsrRRR,  0xfa40f000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "asr", "!0C, !1C, !2C", 2),
+    ENCODING_MAP(kThumb2RorRRR,  0xfa60f000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "ror", "!0C, !1C, !2C", 2),
+    ENCODING_MAP(kThumb2LslRRI5,  0xea4f0000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "lsl", "!0C, !1C, #!2d", 2),
+    ENCODING_MAP(kThumb2LsrRRI5,  0xea4f0010,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "lsr", "!0C, !1C, #!2d", 2),
+    ENCODING_MAP(kThumb2AsrRRI5,  0xea4f0020,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "asr", "!0C, !1C, #!2d", 2),
+    ENCODING_MAP(kThumb2RorRRI5,  0xea4f0030,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift5, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "ror", "!0C, !1C, #!2d", 2),
+    ENCODING_MAP(kThumb2BicRRI8,  0xf0200000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "bic", "!0C, !1C, #!2m", 2),
+    ENCODING_MAP(kThumb2AndRRI8,  0xf0000000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "and", "!0C, !1C, #!2m", 2),
+    ENCODING_MAP(kThumb2OrrRRI8,  0xf0400000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "orr", "!0C, !1C, #!2m", 2),
+    ENCODING_MAP(kThumb2EorRRI8,  0xf0800000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
+                 "eor", "!0C, !1C, #!2m", 2),
+    ENCODING_MAP(kThumb2AddRRI8,  0xf1100000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "adds", "!0C, !1C, #!2m", 2),
+    ENCODING_MAP(kThumb2AdcRRI8,  0xf1500000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
+                 "adcs", "!0C, !1C, #!2m", 2),
+    ENCODING_MAP(kThumb2SubRRI8,  0xf1b00000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "subs", "!0C, !1C, #!2m", 2),
+    ENCODING_MAP(kThumb2SbcRRI8,  0xf1700000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES | USES_CCODES,
+                 "sbcs", "!0C, !1C, #!2m", 2),
+    ENCODING_MAP(kThumb2It,  0xbf00,
+                 kFmtBitBlt, 7, 4, kFmtBitBlt, 3, 0, kFmtModImm, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_IT | USES_CCODES,
+                 "it:!1b", "!0c", 1),
+    ENCODING_MAP(kThumb2Fmstat,  0xeef1fa10,
+                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND | SETS_CCODES,
+                 "fmstat", "", 2),
+    ENCODING_MAP(kThumb2Vcmpd,        0xeeb40b40,
+                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
+                 "vcmp.f64", "!0S, !1S", 2),
+    ENCODING_MAP(kThumb2Vcmps,        0xeeb40a40,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE01,
+                 "vcmp.f32", "!0s, !1s", 2),
+    ENCODING_MAP(kThumb2LdrPcRel12,       0xf8df0000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD,
+                 "ldr", "!0C, [r15pc, #!1d]", 2),
+    ENCODING_MAP(kThumb2BCond,        0xf0008000,
+                 kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | IS_BRANCH | USES_CCODES,
+                 "b!1c", "!0t", 2),
+    ENCODING_MAP(kThumb2Vmovd_RR,       0xeeb00b40,
+                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vmov.f64", "!0S, !1S", 2),
+    ENCODING_MAP(kThumb2Vmovs_RR,       0xeeb00a40,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vmov.f32", "!0s, !1s", 2),
+    ENCODING_MAP(kThumb2Fmrs,       0xee100a10,
+                 kFmtBitBlt, 15, 12, kFmtSfp, 7, 16, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmrs", "!0C, !1s", 2),
+    ENCODING_MAP(kThumb2Fmsr,       0xee000a10,
+                 kFmtSfp, 7, 16, kFmtBitBlt, 15, 12, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "fmsr", "!0s, !1C", 2),
+    ENCODING_MAP(kThumb2Fmrrd,       0xec500b10,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtDfp, 5, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF01_USE2,
+                 "fmrrd", "!0C, !1C, !2S", 2),
+    ENCODING_MAP(kThumb2Fmdrr,       0xec400b10,
+                 kFmtDfp, 5, 0, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "fmdrr", "!0S, !1C, !2C", 2),
+    ENCODING_MAP(kThumb2Vabsd,       0xeeb00bc0,
+                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vabs.f64", "!0S, !1S", 2),
+    ENCODING_MAP(kThumb2Vabss,       0xeeb00ac0,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vabs.f32", "!0s, !1s", 2),
+    ENCODING_MAP(kThumb2Vnegd,       0xeeb10b40,
+                 kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vneg.f64", "!0S, !1S", 2),
+    ENCODING_MAP(kThumb2Vnegs,       0xeeb10a40,
+                 kFmtSfp, 22, 12, kFmtSfp, 5, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0_USE1,
+                 "vneg.f32", "!0s, !1s", 2),
+    ENCODING_MAP(kThumb2Vmovs_IMM8,       0xeeb00a00,
+                 kFmtSfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "vmov.f32", "!0s, #0x!1h", 2),
+    ENCODING_MAP(kThumb2Vmovd_IMM8,       0xeeb00b00,
+                 kFmtDfp, 22, 12, kFmtFPImm, 16, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "vmov.f64", "!0S, #0x!1h", 2),
+    ENCODING_MAP(kThumb2Mla,  0xfb000000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtBitBlt, 15, 12,
+                 IS_QUAD_OP | REG_DEF0 | REG_USE1 | REG_USE2 | REG_USE3,
+                 "mla", "!0C, !1C, !2C, !3C", 2),
+    ENCODING_MAP(kThumb2Umull,  0xfba00000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16,
+                 kFmtBitBlt, 3, 0,
+                 IS_QUAD_OP | REG_DEF0 | REG_DEF1 | REG_USE2 | REG_USE3,
+                 "umull", "!0C, !1C, !2C, !3C", 2),
+    ENCODING_MAP(kThumb2Ldrex,       0xe8500f00,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD,
+                 "ldrex", "!0C, [!1C, #!2E]", 2),
+    ENCODING_MAP(kThumb2Strex,       0xe8400000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 15, 12, kFmtBitBlt, 19, 16,
+                 kFmtBitBlt, 7, 0, IS_QUAD_OP | REG_DEF0_USE12 | IS_STORE,
+                 "strex", "!0C,!1C, [!2C, #!2E]", 2),
+    ENCODING_MAP(kThumb2Clrex,       0xf3bf8f2f,
+                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND,
+                 "clrex", "", 2),
+    ENCODING_MAP(kThumb2Bfi,         0xf3600000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtShift5, -1, -1,
+                 kFmtBitBlt, 4, 0, IS_QUAD_OP | REG_DEF0_USE1,
+                 "bfi", "!0C,!1C,#!2d,#!3d", 2),
+    ENCODING_MAP(kThumb2Bfc,         0xf36f0000,
+                 kFmtBitBlt, 11, 8, kFmtShift5, -1, -1, kFmtBitBlt, 4, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0,
+                 "bfc", "!0C,#!1d,#!2d", 2),
+    ENCODING_MAP(kThumb2Dmb,         0xf3bf8f50,
+                 kFmtBitBlt, 3, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_UNARY_OP,
+                 "dmb","#!0B",2),
+    ENCODING_MAP(kThumb2LdrPcReln12,       0xf85f0000,
+                 kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD,
+                 "ldr", "!0C, [r15pc, -#!1d]", 2),
+    ENCODING_MAP(kThumb2Stm,          0xe9000000,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 12, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_USE0 | REG_USE_LIST1 | IS_STORE,
+                 "stm", "!0C, <!1R>", 2),
+    ENCODING_MAP(kThumbUndefined,       0xde00,
+                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND,
+                 "undefined", "", 1),
+    // NOTE: vpop, vpush hard-encoded for s16+ reg list
+    ENCODING_MAP(kThumb2VPopCS,       0xecbd8a00,
+                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_FPCS_LIST0
+                 | IS_LOAD, "vpop", "<!0P>", 2),
+    ENCODING_MAP(kThumb2VPushCS,      0xed2d8a00,
+                 kFmtBitBlt, 7, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_FPCS_LIST0
+                 | IS_STORE, "vpush", "<!0P>", 2),
+    ENCODING_MAP(kThumb2Vldms,        0xec900a00,
+                 kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_USE0 | REG_DEF_FPCS_LIST2
+                 | IS_LOAD, "vldms", "!0C, <!2Q>", 2),
+    ENCODING_MAP(kThumb2Vstms,        0xec800a00,
+                 kFmtBitBlt, 19, 16, kFmtSfp, 22, 12, kFmtBitBlt, 7, 0,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_USE0 | REG_USE_FPCS_LIST2
+                 | IS_STORE, "vstms", "!0C, <!2Q>", 2),
+    ENCODING_MAP(kThumb2BUncond,      0xf0009000,
+                 kFmtOff24, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
+                 "b", "!0t", 2),
+    ENCODING_MAP(kThumb2MovImm16H,       0xf2c00000,
+                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "movh", "!0C, #!1M", 2),
+    ENCODING_MAP(kThumb2AddPCR,      0x4487,
+                 kFmtBitBlt, 6, 3, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_UNARY_OP | REG_USE0 | IS_BRANCH,
+                 "add", "rPC, !0C", 1),
+    ENCODING_MAP(kThumb2AdrST,       0xf20f0000,
+                 kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0,/* Note: doesn't affect flags */
+                 "adr", "!0C,#!1d", 2),
+    ENCODING_MAP(kThumb2MovImm16LST,     0xf2400000,
+                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "mov", "!0C, #!1M", 2),
+    ENCODING_MAP(kThumb2MovImm16HST,     0xf2c00000,
+                 kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 "movh", "!0C, #!1M", 2),
+    ENCODING_MAP(kThumb2LdmiaWB,         0xe8b00000,
+                 kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_BINARY_OP | REG_DEF0_USE0 | REG_DEF_LIST1 | IS_LOAD,
+                 "ldmia", "!0C!!, <!1R>", 2),
+    ENCODING_MAP(kThumb2SubsRRI12,       0xf1b00000,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
+                 kFmtUnused, -1, -1,
+                 IS_TERTIARY_OP | REG_DEF0_USE1 | SETS_CCODES,
+                 "subs", "!0C,!1C,#!2d", 2),
+
+};
+
+/*
+ * The fake NOP of moving r0 to r0 actually will incur data stalls if r0 is
+ * not ready. Since r5FP is not updated often, it is less likely to
+ * generate unnecessary stall cycles.
+ * TUNING: No longer true - find new NOP pattern.
+ */
+#define PADDING_MOV_R5_R5               0x1C2D
+
+/* Write the numbers in the constant to the output stream */
+static void installLiteralPools(CompilationUnit* cUnit)
+{
+    int* dataPtr = (int*) ((char*) cUnit->baseAddr + cUnit->dataOffset);
+    ArmLIR* dataLIR = (ArmLIR*) cUnit->literalList;
+    while (dataLIR) {
+        *dataPtr++ = dataLIR->operands[0];
+        dataLIR = NEXT_LIR(dataLIR);
+    }
+}
+
+/* Write the switch tables to the output stream */
+static void installSwitchTables(CompilationUnit* cUnit)
+{
+    GrowableListIterator iterator;
+    oatGrowableListIteratorInit(&cUnit->switchTables, &iterator);
+    while (true) {
+        SwitchTable* tabRec = (SwitchTable *) oatGrowableListIteratorNext(
+             &iterator);
+        if (tabRec == NULL) break;
+        int* dataPtr = (int*)((char*)cUnit->baseAddr + tabRec->offset);
+        int bxOffset = tabRec->bxInst->generic.offset + 4;
+        if (cUnit->printMe) {
+            LOG(INFO) << "Switch table for offset 0x" /*<< hex*/ << bxOffset;
+        }
+        if (tabRec->table[0] == kSparseSwitchSignature) {
+            int* keys = (int*)&(tabRec->table[2]);
+            for (int elems = 0; elems < tabRec->table[1]; elems++) {
+                int disp = tabRec->targets[elems]->generic.offset - bxOffset;
+                if (cUnit->printMe) {
+                    LOG(INFO) << "    Case[" << elems << "] key: 0x" <<
+                        std::hex << keys[elems] << ", disp: 0x" <<
+                        std::hex << disp;
+                }
+                *dataPtr++ = keys[elems];
+                *dataPtr++ = tabRec->targets[elems]->generic.offset - bxOffset;
+            }
+        } else {
+            assert(tabRec->table[0] == kPackedSwitchSignature);
+            for (int elems = 0; elems < tabRec->table[1]; elems++) {
+                int disp = tabRec->targets[elems]->generic.offset - bxOffset;
+                if (cUnit->printMe) {
+                    LOG(INFO) << "    Case[" << elems << "] disp: 0x" <<
+                        std::hex << disp;
+                }
+                *dataPtr++ = tabRec->targets[elems]->generic.offset - bxOffset;
+            }
+        }
+    }
+}
+
+/* Write the fill array dta to the output stream */
+static void installFillArrayData(CompilationUnit* cUnit)
+{
+    GrowableListIterator iterator;
+    oatGrowableListIteratorInit(&cUnit->fillArrayData, &iterator);
+    while (true) {
+        FillArrayData *tabRec = (FillArrayData *) oatGrowableListIteratorNext(
+             &iterator);
+        if (tabRec == NULL) break;
+        char* dataPtr = (char*)cUnit->baseAddr + tabRec->offset;
+        memcpy(dataPtr, (char*)tabRec->table, tabRec->size);
+    }
+}
+
+/*
+ * Assemble the LIR into binary instruction format.  Note that we may
+ * discover that pc-relative displacements may not fit the selected
+ * instruction.
+ */
+static AssemblerStatus assembleInstructions(CompilationUnit* cUnit,
+                                            intptr_t startAddr)
+{
+    short* bufferAddr = (short*) cUnit->codeBuffer;
+    ArmLIR* lir;
+    AssemblerStatus res = kSuccess;  // Assume success
+
+    for (lir = (ArmLIR *) cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
+        if (lir->opcode < 0) {
+            if ((lir->opcode == kArmPseudoPseudoAlign4) &&
+                /* 1 means padding is needed */
+                (lir->operands[0] == 1)) {
+                *bufferAddr++ = PADDING_MOV_R5_R5;
+            }
+            continue;
+        }
+
+        if (lir->flags.isNop) {
+            continue;
+        }
+
+        /*
+         * For PC-relative displacements we won't know if the
+         * selected instruction will work until late (i.e. - now).
+         * If something doesn't fit, we must replace the short-form
+         * operation with a longer-form one.  Note, though, that this
+         * can change code we've already processed, so we'll need to
+         * re-calculate offsets and restart.  To limit the number of
+         * restarts, the entire list will be scanned and patched.
+         * Of course, the patching itself may cause new overflows so this
+         * is an iterative process.
+         */
+
+        if (lir->opcode == kThumbLdrPcRel ||
+            lir->opcode == kThumb2LdrPcRel12 ||
+            lir->opcode == kThumbAddPcRel ||
+            ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) {
+            /*
+             * PC-relative loads are mostly used to load immediates
+             * that are too large to materialize directly in one shot.
+             * However, if the load displacement exceeds the limit,
+             * we revert to a 2-instruction materialization sequence.
+             */
+            ArmLIR *lirTarget = (ArmLIR *) lir->generic.target;
+            intptr_t pc = (lir->generic.offset + 4) & ~3;
+            intptr_t target = lirTarget->generic.offset;
+            int delta = target - pc;
+            if (delta & 0x3) {
+                LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta;
+            }
+            // First, a sanity check for cases we shouldn't see now
+            if (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) ||
+                ((lir->opcode == kThumbLdrPcRel) && (delta > 1020))) {
+                // Shouldn't happen in current codegen.
+                LOG(FATAL) << "Unexpected pc-rel offset " << delta;
+            }
+            // Now, check for the two difficult cases
+            if (1 || ((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
+                ((lir->opcode == kThumb2Vldrs) && (delta > 1020))) {
+            /*
+             * OK - the load doesn't work.  We'll just materialize
+             * the immediate directly using mov16l and mov16h.
+             * It's a little ugly for float immediates as we don't have
+             * float ops like the core mov imm16H/L.  In this case
+             * we'll materialize in a core register (rLR) and then copy.
+             * NOTE/WARNING: This is a *very* fragile workaround that will
+             * be addressed in a later release when we have a late spill
+             * capability.  We can get away with it for now because rLR
+             * is currently only used during call setups, and our convention
+             * requires all arguments to be passed in core register & the
+             * frame (and thus, we won't see any vlrds in the sequence).
+             * The normal resource mask mechanism will prevent any damaging
+             * code motion.
+             */
+                int tgtReg = (lir->opcode == kThumb2Vldrs) ? rLR :
+                              lir->operands[0];
+                int immVal = lirTarget->operands[0];
+                // The standard utilities won't work here - build manually
+                ArmLIR *newMov16L =
+                    (ArmLIR *)oatNew(sizeof(ArmLIR), true);
+                newMov16L->generic.dalvikOffset = lir->generic.dalvikOffset;
+                newMov16L->opcode = kThumb2MovImm16;
+                newMov16L->operands[0] = tgtReg;
+                newMov16L->operands[1] = immVal & 0xffff;
+                oatSetupResourceMasks(newMov16L);
+                oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16L);
+                ArmLIR *newMov16H =
+                    (ArmLIR *)oatNew(sizeof(ArmLIR), true);
+                newMov16H->generic.dalvikOffset = lir->generic.dalvikOffset;
+                newMov16H->opcode = kThumb2MovImm16H;
+                newMov16H->operands[0] = tgtReg;
+                newMov16H->operands[1] = (immVal >> 16) & 0xffff;
+                oatSetupResourceMasks(newMov16H);
+                oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16H);
+                if (lir->opcode == kThumb2Vldrs) {
+                    // Convert the vldrs to a kThumb2Fmsr
+                    lir->opcode = kThumb2Fmsr;
+                    lir->operands[1] = rLR;
+                    lir->generic.target = NULL;
+                    lir->operands[2] = 0;
+                    oatSetupResourceMasks(lir);
+                } else {
+                    // Nullify the original load
+                    lir->flags.isNop = true;
+                }
+                res = kRetryAll;
+            } else {
+                if (lir->opcode == kThumb2Vldrs) {
+                    lir->operands[2] = delta >> 2;
+                } else {
+                    lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?
+                                        delta : delta >> 2;
+                }
+            }
+        } else if (lir->opcode == kThumb2Cbnz || lir->opcode == kThumb2Cbz) {
+            ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
+            intptr_t pc = lir->generic.offset + 4;
+            intptr_t target = targetLIR->generic.offset;
+            int delta = target - pc;
+            if (delta > 126 || delta < 0) {
+                /* Convert to cmp rx,#0 / b[eq/ne] tgt pair */
+                ArmLIR *newInst =
+                    (ArmLIR *)oatNew(sizeof(ArmLIR), true);
+                /* Make new branch instruction and insert after */
+                newInst->generic.dalvikOffset = lir->generic.dalvikOffset;
+                newInst->opcode = kThumbBCond;
+                newInst->operands[0] = 0;
+                newInst->operands[1] = (lir->opcode == kThumb2Cbz) ?
+                                        kArmCondEq : kArmCondNe;
+                newInst->generic.target = lir->generic.target;
+                oatSetupResourceMasks(newInst);
+                oatInsertLIRAfter((LIR *)lir, (LIR *)newInst);
+                /* Convert the cb[n]z to a cmp rx, #0 ] */
+                lir->opcode = kThumbCmpRI8;
+                /* operand[0] is src1 in both cb[n]z & CmpRI8 */
+                lir->operands[1] = 0;
+                lir->generic.target = 0;
+                oatSetupResourceMasks(lir);
+                res = kRetryAll;
+            } else {
+                lir->operands[1] = delta >> 1;
+            }
+        } else if (lir->opcode == kThumbBCond ||
+                   lir->opcode == kThumb2BCond) {
+            ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
+            intptr_t pc = lir->generic.offset + 4;
+            intptr_t target = targetLIR->generic.offset;
+            int delta = target - pc;
+            if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) {
+                lir->opcode = kThumb2BCond;
+                oatSetupResourceMasks(lir);
+                res = kRetryAll;
+            }
+            lir->operands[0] = delta >> 1;
+        } else if (lir->opcode == kThumb2BUncond) {
+            ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
+            intptr_t pc = lir->generic.offset + 4;
+            intptr_t target = targetLIR->generic.offset;
+            int delta = target - pc;
+            lir->operands[0] = delta >> 1;
+            if (lir->operands[0] == 0) {  // Useless branch?
+                lir->flags.isNop = true;
+                res = kRetryAll;
+            }
+        } else if (lir->opcode == kThumbBUncond) {
+            ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
+            intptr_t pc = lir->generic.offset + 4;
+            intptr_t target = targetLIR->generic.offset;
+            int delta = target - pc;
+            if (delta > 2046 || delta < -2048) {
+                // Convert to Thumb2BCond w/ kArmCondAl
+                lir->opcode = kThumb2BUncond;
+                lir->operands[0] = 0;
+                oatSetupResourceMasks(lir);
+                res = kRetryAll;
+            }
+            lir->operands[0] = delta >> 1;
+            if ((lir->operands[0] == 0) ||
+                (lir->operands[0] == -1)) {  // Useless branch?
+                lir->flags.isNop = true;
+                res = kRetryAll;
+            }
+        } else if (lir->opcode == kThumbBlx1) {
+            assert(NEXT_LIR(lir)->opcode == kThumbBlx2);
+            /* curPC is Thumb */
+            intptr_t curPC = (startAddr + lir->generic.offset + 4) & ~3;
+            intptr_t target = lir->operands[1];
+
+            /* Match bit[1] in target with base */
+            if (curPC & 0x2) {
+                target |= 0x2;
+            }
+            int delta = target - curPC;
+            assert((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+            lir->operands[0] = (delta >> 12) & 0x7ff;
+            NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+        } else if (lir->opcode == kThumbBl1) {
+            assert(NEXT_LIR(lir)->opcode == kThumbBl2);
+            /* Both curPC and target are Thumb */
+            intptr_t curPC = startAddr + lir->generic.offset + 4;
+            intptr_t target = lir->operands[1];
+
+            int delta = target - curPC;
+            assert((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+            lir->operands[0] = (delta >> 12) & 0x7ff;
+            NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+        } else if (lir->opcode == kThumb2AdrST) {
+            SwitchTable *tabRec = (SwitchTable*)lir->operands[2];
+            int disp = tabRec->offset - ((lir->generic.offset + 4) & ~3);
+            if (disp < 4096) {
+                lir->operands[1] = disp;
+            } else {
+                // convert to ldimm16l, ldimm16h, add tgt, pc, r12
+                ArmLIR *newMov16L =
+                    (ArmLIR *)oatNew(sizeof(ArmLIR), true);
+                newMov16L->generic.dalvikOffset = lir->generic.dalvikOffset;
+                newMov16L->opcode = kThumb2MovImm16LST;
+                newMov16L->operands[0] = lir->operands[0];
+                newMov16L->operands[2] = (intptr_t)lir;
+                newMov16L->operands[3] = (intptr_t)tabRec;
+                oatSetupResourceMasks(newMov16L);
+                oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16L);
+                ArmLIR *newMov16H =
+                    (ArmLIR *)oatNew(sizeof(ArmLIR), true);
+                newMov16H->generic.dalvikOffset = lir->generic.dalvikOffset;
+                newMov16H->opcode = kThumb2MovImm16HST;
+                newMov16H->operands[0] = lir->operands[0];
+                newMov16H->operands[2] = (intptr_t)lir;
+                newMov16H->operands[3] = (intptr_t)tabRec;
+                oatSetupResourceMasks(newMov16H);
+                oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16H);
+                lir->opcode = kThumb2AddRRR;
+                lir->operands[1] = rPC;
+                lir->operands[2] = lir->operands[0];
+                oatSetupResourceMasks(lir);
+                res = kRetryAll;
+            }
+        } else if (lir->opcode == kThumb2MovImm16LST) {
+            // operands[1] should hold disp, [2] has add, [3] has tabRec
+            ArmLIR *addPCInst = (ArmLIR*)lir->operands[2];
+            SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
+            lir->operands[1] = (tabRec->offset -
+                ((addPCInst->generic.offset + 4) & ~3)) & 0xffff;
+        } else if (lir->opcode == kThumb2MovImm16HST) {
+            // operands[1] should hold disp, [2] has add, [3] has tabRec
+            ArmLIR *addPCInst = (ArmLIR*)lir->operands[2];
+            SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
+            lir->operands[1] = ((tabRec->offset -
+                ((addPCInst->generic.offset + 4) & ~3)) >> 16) & 0xffff;
+        }
+        ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
+        u4 bits = encoder->skeleton;
+        int i;
+        for (i = 0; i < 4; i++) {
+            u4 operand;
+            u4 value;
+            operand = lir->operands[i];
+            switch(encoder->fieldLoc[i].kind) {
+                case kFmtUnused:
+                    break;
+                case kFmtFPImm:
+                    value = ((operand & 0xF0) >> 4) << encoder->fieldLoc[i].end;
+                    value |= (operand & 0x0F) << encoder->fieldLoc[i].start;
+                    bits |= value;
+                    break;
+                case kFmtBrOffset:
+                    value = ((operand  & 0x80000) >> 19) << 26;
+                    value |= ((operand & 0x40000) >> 18) << 11;
+                    value |= ((operand & 0x20000) >> 17) << 13;
+                    value |= ((operand & 0x1f800) >> 11) << 16;
+                    value |= (operand  & 0x007ff);
+                    bits |= value;
+                    break;
+                case kFmtShift5:
+                    value = ((operand & 0x1c) >> 2) << 12;
+                    value |= (operand & 0x03) << 6;
+                    bits |= value;
+                    break;
+                case kFmtShift:
+                    value = ((operand & 0x70) >> 4) << 12;
+                    value |= (operand & 0x0f) << 4;
+                    bits |= value;
+                    break;
+                case kFmtBWidth:
+                    value = operand - 1;
+                    bits |= value;
+                    break;
+                case kFmtLsb:
+                    value = ((operand & 0x1c) >> 2) << 12;
+                    value |= (operand & 0x03) << 6;
+                    bits |= value;
+                    break;
+                case kFmtImm6:
+                    value = ((operand & 0x20) >> 5) << 9;
+                    value |= (operand & 0x1f) << 3;
+                    bits |= value;
+                    break;
+                case kFmtBitBlt:
+                    value = (operand << encoder->fieldLoc[i].start) &
+                            ((1 << (encoder->fieldLoc[i].end + 1)) - 1);
+                    bits |= value;
+                    break;
+                case kFmtDfp: {
+                    assert(DOUBLEREG(operand));
+                    assert((operand & 0x1) == 0);
+                    int regName = (operand & FP_REG_MASK) >> 1;
+                    /* Snag the 1-bit slice and position it */
+                    value = ((regName & 0x10) >> 4) <<
+                            encoder->fieldLoc[i].end;
+                    /* Extract and position the 4-bit slice */
+                    value |= (regName & 0x0f) <<
+                            encoder->fieldLoc[i].start;
+                    bits |= value;
+                    break;
+                }
+                case kFmtSfp:
+                    assert(SINGLEREG(operand));
+                    /* Snag the 1-bit slice and position it */
+                    value = (operand & 0x1) <<
+                            encoder->fieldLoc[i].end;
+                    /* Extract and position the 4-bit slice */
+                    value |= ((operand & 0x1e) >> 1) <<
+                            encoder->fieldLoc[i].start;
+                    bits |= value;
+                    break;
+                case kFmtImm12:
+                case kFmtModImm:
+                    value = ((operand & 0x800) >> 11) << 26;
+                    value |= ((operand & 0x700) >> 8) << 12;
+                    value |= operand & 0x0ff;
+                    bits |= value;
+                    break;
+                case kFmtImm16:
+                    value = ((operand & 0x0800) >> 11) << 26;
+                    value |= ((operand & 0xf000) >> 12) << 16;
+                    value |= ((operand & 0x0700) >> 8) << 12;
+                    value |= operand & 0x0ff;
+                    bits |= value;
+                    break;
+                case kFmtOff24: {
+                    u4 signbit = (operand >> 31) & 0x1;
+                    u4 i1 = (operand >> 22) & 0x1;
+                    u4 i2 = (operand >> 21) & 0x1;
+                    u4 imm10 = (operand >> 11) & 0x03ff;
+                    u4 imm11 = operand & 0x07ff;
+                    u4 j1 = (i1 ^ signbit) ? 0 : 1;
+                    u4 j2 = (i2 ^ signbit) ? 0 : 1;
+                    value = (signbit << 26) | (j1 << 13) | (j2 << 11) |
+                            (imm10 << 16) | imm11;
+                    bits |= value;
+                    }
+                    break;
+                default:
+                    assert(0);
+            }
+        }
+        if (encoder->size == 2) {
+            *bufferAddr++ = (bits >> 16) & 0xffff;
+        }
+        *bufferAddr++ = bits & 0xffff;
+    }
+    return res;
+}
+
+static int assignLiteralOffsetCommon(LIR* lir, int offset)
+{
+    for (;lir != NULL; lir = lir->next) {
+        lir->offset = offset;
+        offset += 4;
+    }
+    return offset;
+}
+
+static int createMappingTable(CompilationUnit* cUnit, MappingTable** pTable)
+{
+    ArmLIR* armLIR;
+    int currentDalvikOffset = -1;
+    int count = 0;
+
+    for (armLIR = (ArmLIR *) cUnit->firstLIRInsn;
+         armLIR;
+         armLIR = NEXT_LIR(armLIR)) {
+        if ((armLIR->opcode >= 0) && !armLIR->flags.isNop &&
+            (currentDalvikOffset != armLIR->generic.dalvikOffset)) {
+            // Changed - need to emit a record
+            if (pTable) {
+                MappingTable *table = *pTable;
+                assert(table);
+                table[count].targetOffset = armLIR->generic.offset;
+                table[count].dalvikOffset = armLIR->generic.dalvikOffset;
+            }
+            count++;
+            currentDalvikOffset = armLIR->generic.dalvikOffset;
+        }
+    }
+    return count;
+}
+
+/* Determine the offset of each literal field */
+static int assignLiteralOffset(CompilationUnit* cUnit, int offset)
+{
+    offset = assignLiteralOffsetCommon(cUnit->literalList, offset);
+    return offset;
+}
+
+static int assignSwitchTablesOffset(CompilationUnit* cUnit, int offset)
+{
+    GrowableListIterator iterator;
+    oatGrowableListIteratorInit(&cUnit->switchTables, &iterator);
+    while (true) {
+        SwitchTable *tabRec = (SwitchTable *) oatGrowableListIteratorNext(
+             &iterator);
+        if (tabRec == NULL) break;
+        tabRec->offset = offset;
+        if (tabRec->table[0] == kSparseSwitchSignature) {
+            offset += tabRec->table[1] * (sizeof(int) * 2);
+        } else {
+            assert(tabRec->table[0] == kPackedSwitchSignature);
+            offset += tabRec->table[1] * sizeof(int);
+        }
+    }
+    return offset;
+}
+
+static int assignFillArrayDataOffset(CompilationUnit* cUnit, int offset)
+{
+    GrowableListIterator iterator;
+    oatGrowableListIteratorInit(&cUnit->fillArrayData, &iterator);
+    while (true) {
+        FillArrayData *tabRec = (FillArrayData *) oatGrowableListIteratorNext(
+             &iterator);
+        if (tabRec == NULL) break;
+        tabRec->offset = offset;
+        offset += tabRec->size;
+        // word align
+        offset = (offset + 3) & ~3;
+        }
+    return offset;
+}
+
+/*
+ * Walk the compilation unit and assign offsets to instructions
+ * and literals and compute the total size of the compiled unit.
+ */
+void assignOffsets(CompilationUnit* cUnit)
+{
+    ArmLIR* armLIR;
+    int offset = 0;
+
+    for (armLIR = (ArmLIR *) cUnit->firstLIRInsn;
+         armLIR;
+         armLIR = NEXT_LIR(armLIR)) {
+        armLIR->generic.offset = offset;
+        if (armLIR->opcode >= 0 && !armLIR->flags.isNop) {
+            armLIR->flags.size = EncodingMap[armLIR->opcode].size * 2;
+            offset += armLIR->flags.size;
+        } else if (armLIR->opcode == kArmPseudoPseudoAlign4) {
+            if (offset & 0x2) {
+                offset += 2;
+                armLIR->operands[0] = 1;
+            } else {
+                armLIR->operands[0] = 0;
+            }
+        }
+        /* Pseudo opcodes don't consume space */
+    }
+
+    /* Const values have to be word aligned */
+    offset = (offset + 3) & ~3;
+
+    /* Set up offsets for literals */
+    cUnit->dataOffset = offset;
+
+    offset = assignLiteralOffset(cUnit, offset);
+
+    offset = assignSwitchTablesOffset(cUnit, offset);
+
+    offset = assignFillArrayDataOffset(cUnit, offset);
+
+    cUnit->totalSize = offset;
+}
+/*
+ * Go over each instruction in the list and calculate the offset from the top
+ * before sending them off to the assembler. If out-of-range branch distance is
+ * seen rearrange the instructions a bit to correct it.
+ */
+void oatAssembleLIR(CompilationUnit* cUnit)
+{
+    assignOffsets(cUnit);
+
+#ifdef TESTMODE
+//For testing - caller will allocate buffer
+    int testSize = 1024 * 1024;
+    int fd = ashmem_create_region("dalvik-test-code-cache",testSize);
+    if (fd < 0) {
+        LOG(FATAL) << "Coudln't create ashmem region";
+    }
+    cUnit->baseAddr = cUnit->codeBuffer = (unsigned char *)
+         mmap(NULL, testSize, PROT_READ | PROT_WRITE | PROT_EXEC,
+              MAP_PRIVATE, fd, 0);
+    close(fd);
+    if (cUnit->baseAddr == MAP_FAILED) {
+        LOG(FATAL) << "Failed to mmap the test region: " << strerror(errno);
+    }
+#endif
+
+    /*
+     * Assemble here.  Note that we generate code with optimistic assumptions
+     * and if found now to work, we'll have to redo the sequence and retry.
+     */
+
+    while (true) {
+        AssemblerStatus res = assembleInstructions(cUnit, NULL);
+        if (res == kSuccess) {
+            break;
+        } else {
+            cUnit->assemblerRetries++;
+            if (cUnit->assemblerRetries > MAX_ASSEMBLER_RETRIES) {
+                LOG(FATAL) << "Assembler error - too many retries";
+            }
+            // Redo offsets and try again
+            assignOffsets(cUnit);
+        }
+    }
+
+    // Install literals
+    installLiteralPools(cUnit);
+
+    // Install switch tables
+    installSwitchTables(cUnit);
+
+    // Install fill array data
+    installFillArrayData(cUnit);
+
+    /*
+     * Create the mapping table
+     */
+    cUnit->mappingTableSize = createMappingTable(cUnit, NULL /* just count */);
+    cUnit->mappingTable = (MappingTable*)oatNew(
+        cUnit->mappingTableSize * sizeof(*cUnit->mappingTable), true);
+    createMappingTable(cUnit, &cUnit->mappingTable);
+}
diff --git a/src/compiler/codegen/arm/CalloutHelper.h b/src/compiler/codegen/arm/CalloutHelper.h
new file mode 100644
index 0000000..933f686
--- /dev/null
+++ b/src/compiler/codegen/arm/CalloutHelper.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_SRC_COMPILER_CODEGEN_ARM_CALLOUT_HELPER_H_
+#define ART_SRC_COMPILER_CODEGEN_ARM_CALLOUT_HELPER_H_
+
+#include "../../Dalvik.h"
+
+/* Helper functions used at runtime by compiled code */
+
+/* Conversions */
+extern "C" float __aeabi_i2f(int op1);             // OP_INT_TO_FLOAT
+extern "C" int __aeabi_f2iz(float op1);            // OP_FLOAT_TO_INT
+extern "C" float __aeabi_d2f(double op1);          // OP_DOUBLE_TO_FLOAT
+extern "C" double __aeabi_f2d(float op1);          // OP_FLOAT_TO_DOUBLE
+extern "C" double __aeabi_i2d(int op1);            // OP_INT_TO_DOUBLE
+extern "C" int __aeabi_d2iz(double op1);           // OP_DOUBLE_TO_INT
+extern "C" float __aeabi_l2f(long op1);            // OP_LONG_TO_FLOAT
+extern "C" double __aeabi_l2d(long op1);           // OP_LONG_TO_DOUBLE
+s8 artF2L(float op1);                // OP_FLOAT_TO_LONG
+s8 artD2L(double op1);               // OP_DOUBLE_TO_LONG
+
+/* Single-precision FP arithmetics */
+extern "C" float __aeabi_fadd(float a, float b);   // OP_ADD_FLOAT[_2ADDR]
+extern "C" float __aeabi_fsub(float a, float b);   // OP_SUB_FLOAT[_2ADDR]
+extern "C" float __aeabi_fdiv(float a, float b);   // OP_DIV_FLOAT[_2ADDR]
+extern "C" float __aeabi_fmul(float a, float b);   // OP_MUL_FLOAT[_2ADDR]
+extern "C" float fmodf(float a, float b);          // OP_REM_FLOAT[_2ADDR]
+
+/* Double-precision FP arithmetics */
+extern "C" double __aeabi_dadd(double a, double b); // OP_ADD_DOUBLE[_2ADDR]
+extern "C" double __aeabi_dsub(double a, double b); // OP_SUB_DOUBLE[_2ADDR]
+extern "C" double __aeabi_ddiv(double a, double b); // OP_DIV_DOUBLE[_2ADDR]
+extern "C" double __aeabi_dmul(double a, double b); // OP_MUL_DOUBLE[_2ADDR]
+extern "C" double fmod(double a, double b);         // OP_REM_DOUBLE[_2ADDR]
+
+/* Integer arithmetics */
+extern "C" int __aeabi_idivmod(int op1, int op2);  // OP_REM_INT[_2ADDR|_LIT8|_LIT16]
+extern "C" int __aeabi_idiv(int op1, int op2);     // OP_DIV_INT[_2ADDR|_LIT8|_LIT16]
+
+/* Long long arithmetics - OP_REM_LONG[_2ADDR] & OP_DIV_LONG[_2ADDR] */
+extern "C" long long __aeabi_ldivmod(long long op1, long long op2);
+
+/* Originally declared in Sync.h */
+bool dvmUnlockObject(struct Thread* self, struct Object* obj); //OP_MONITOR_EXIT
+void dvmLockObject(struct Thread* self, struct Object* obj); //OP_MONITOR_ENTER
+
+/* Originally declared in oo/TypeCheck.h */
+bool dvmCanPutArrayElement(const ClassObject* elemClass,   // OP_APUT_OBJECT
+                           const ClassObject* arrayClass);
+int dvmInstanceofNonTrivial(const ClassObject* instance,   // OP_CHECK_CAST &&
+                            const ClassObject* clazz);     // OP_INSTANCE_OF
+
+/* Originally declared in oo/Array.h */
+ArrayObject* dvmAllocArrayByClass(ClassObject* arrayClass, // OP_NEW_ARRAY
+                                  size_t length, int allocFlags);
+/* Originally declared in alloc/Alloc.h */
+Object* dvmAllocObject(ClassObject* clazz, int flags);  // OP_NEW_INSTANCE
+
+/*
+ * The following functions are invoked through the compiler templates (declared
+ * in compiler/template/armv5te/footer.S:
+ *
+ *      __aeabi_cdcmple         // CMPG_DOUBLE
+ *      __aeabi_cfcmple         // CMPG_FLOAT
+ *      dvmLockObject           // MONITOR_ENTER
+ */
+
+/* from mterp/common/FindInterface.h */
+Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
+    u4 methodIdx, const Method* method, DvmDex* methodClassDex);
+
+/* from interp/Interp.cpp */
+bool dvmInterpHandleFillArrayData(ArrayObject* arrayObj, const u2* arrayData);
+
+#endif  // ART_SRC_COMPILER_CODEGEN_ARM_CALLOUT_HELPER_H_
diff --git a/src/compiler/codegen/arm/Codegen.h b/src/compiler/codegen/arm/Codegen.h
new file mode 100644
index 0000000..9a1dc54
--- /dev/null
+++ b/src/compiler/codegen/arm/Codegen.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains register alloction support and is intended to be
+ * included by:
+ *
+ *        Codegen-$(TARGET_ARCH_VARIANT).c
+ *
+ */
+
+#include "../../CompilerIR.h"
+
+#if defined(_CODEGEN_C)
+/*
+ * loadConstant() sometimes needs to add a small imm to a pre-existing constant
+ */
+static ArmLIR *opRegImm(CompilationUnit* cUnit, OpKind op, int rDestSrc1,
+                        int value);
+static ArmLIR *opRegReg(CompilationUnit* cUnit, OpKind op, int rDestSrc1,
+                        int rSrc2);
+
+/* Forward decalraton the portable versions due to circular dependency */
+static bool genArithOpFloatPortable(CompilationUnit* cUnit, MIR* mir,
+                                    RegLocation rlDest, RegLocation rlSrc1,
+                                    RegLocation rlSrc2);
+
+static bool genArithOpDoublePortable(CompilationUnit* cUnit, MIR* mir,
+                                     RegLocation rlDest, RegLocation rlSrc1,
+                                     RegLocation rlSrc2);
+
+static bool genConversionPortable(CompilationUnit* cUnit, MIR* mir);
+
+#endif
+
+extern void oatSetupResourceMasks(ArmLIR* lir);
+
+extern ArmLIR* oatRegCopyNoInsert(CompilationUnit* cUnit, int rDest,
+                                          int rSrc);
diff --git a/src/compiler/codegen/arm/CodegenCommon.cc b/src/compiler/codegen/arm/CodegenCommon.cc
new file mode 100644
index 0000000..1aba82c
--- /dev/null
+++ b/src/compiler/codegen/arm/CodegenCommon.cc
@@ -0,0 +1,418 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains codegen and support common to all supported
+ * ARM variants.  It is included by:
+ *
+ *        Codegen-$(TARGET_ARCH_VARIANT).c
+ *
+ * which combines this common code with specific support found in the
+ * applicable directory below this one.
+ */
+
+/* Track exercised opcodes */
+static int opcodeCoverage[kNumPackedOpcodes];
+
+static void setMemRefType(ArmLIR* lir, bool isLoad, int memType)
+{
+    u8 *maskPtr;
+    u8 mask = ENCODE_MEM;;
+    assert(EncodingMap[lir->opcode].flags & (IS_LOAD | IS_STORE));
+    if (isLoad) {
+        maskPtr = &lir->useMask;
+    } else {
+        maskPtr = &lir->defMask;
+    }
+    /* Clear out the memref flags */
+    *maskPtr &= ~mask;
+    /* ..and then add back the one we need */
+    switch(memType) {
+        case kLiteral:
+            assert(isLoad);
+            *maskPtr |= ENCODE_LITERAL;
+            break;
+        case kDalvikReg:
+            *maskPtr |= ENCODE_DALVIK_REG;
+            break;
+        case kHeapRef:
+            *maskPtr |= ENCODE_HEAP_REF;
+            break;
+        case kMustNotAlias:
+            /* Currently only loads can be marked as kMustNotAlias */
+            assert(!(EncodingMap[lir->opcode].flags & IS_STORE));
+            *maskPtr |= ENCODE_MUST_NOT_ALIAS;
+            break;
+        default:
+            LOG(FATAL) << "Oat: invalid memref kind - " << memType;
+    }
+}
+
+/*
+ * Mark load/store instructions that access Dalvik registers through r5FP +
+ * offset.
+ */
+static void annotateDalvikRegAccess(ArmLIR* lir, int regId, bool isLoad)
+{
+    setMemRefType(lir, isLoad, kDalvikReg);
+
+    /*
+     * Store the Dalvik register id in aliasInfo. Mark he MSB if it is a 64-bit
+     * access.
+     */
+    lir->aliasInfo = regId;
+    if (DOUBLEREG(lir->operands[0])) {
+        lir->aliasInfo |= 0x80000000;
+    }
+}
+
+/*
+ * Decode the register id.
+ */
+static inline u8 getRegMaskCommon(int reg)
+{
+    u8 seed;
+    int shift;
+    int regId = reg & 0x1f;
+
+    /*
+     * Each double register is equal to a pair of single-precision FP registers
+     */
+    seed = DOUBLEREG(reg) ? 3 : 1;
+    /* FP register starts at bit position 16 */
+    shift = FPREG(reg) ? kFPReg0 : 0;
+    /* Expand the double register id into single offset */
+    shift += regId;
+    return (seed << shift);
+}
+
+/*
+ * Mark the corresponding bit(s).
+ */
+static inline void setupRegMask(u8* mask, int reg)
+{
+    *mask |= getRegMaskCommon(reg);
+}
+
+/*
+ * Set up the proper fields in the resource mask
+ */
+static void setupResourceMasks(ArmLIR* lir)
+{
+    int opcode = lir->opcode;
+    int flags;
+
+    if (opcode <= 0) {
+        lir->useMask = lir->defMask = 0;
+        return;
+    }
+
+    flags = EncodingMap[lir->opcode].flags;
+
+    /* Set up the mask for resources that are updated */
+    if (flags & (IS_LOAD | IS_STORE)) {
+        /* Default to heap - will catch specialized classes later */
+        setMemRefType(lir, flags & IS_LOAD, kHeapRef);
+    }
+
+    /*
+     * Conservatively assume the branch here will call out a function that in
+     * turn will trash everything.
+     */
+    if (flags & IS_BRANCH) {
+        lir->defMask = lir->useMask = ENCODE_ALL;
+        return;
+    }
+
+    if (flags & REG_DEF0) {
+        setupRegMask(&lir->defMask, lir->operands[0]);
+    }
+
+    if (flags & REG_DEF1) {
+        setupRegMask(&lir->defMask, lir->operands[1]);
+    }
+
+    if (flags & REG_DEF_SP) {
+        lir->defMask |= ENCODE_REG_SP;
+    }
+
+    if (flags & REG_DEF_LR) {
+        lir->defMask |= ENCODE_REG_LR;
+    }
+
+    if (flags & REG_DEF_LIST0) {
+        lir->defMask |= ENCODE_REG_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_DEF_LIST1) {
+        lir->defMask |= ENCODE_REG_LIST(lir->operands[1]);
+    }
+
+    if (flags & REG_DEF_FPCS_LIST0) {
+        lir->defMask |= ENCODE_REG_FPCS_LIST(lir->operands[0]);
+    }
+
+    if (flags & SETS_CCODES) {
+        lir->defMask |= ENCODE_CCODE;
+    }
+
+    /* Conservatively treat the IT block */
+    if (flags & IS_IT) {
+        lir->defMask = ENCODE_ALL;
+    }
+
+    if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) {
+        int i;
+
+        for (i = 0; i < 4; i++) {
+            if (flags & (1 << (kRegUse0 + i))) {
+                setupRegMask(&lir->useMask, lir->operands[i]);
+            }
+        }
+    }
+
+    if (flags & REG_USE_PC) {
+        lir->useMask |= ENCODE_REG_PC;
+    }
+
+    if (flags & REG_USE_SP) {
+        lir->useMask |= ENCODE_REG_SP;
+    }
+
+    if (flags & REG_USE_LIST0) {
+        lir->useMask |= ENCODE_REG_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_USE_LIST1) {
+        lir->useMask |= ENCODE_REG_LIST(lir->operands[1]);
+    }
+
+    if (flags & REG_USE_FPCS_LIST0) {
+        lir->useMask |= ENCODE_REG_FPCS_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_USE_FPCS_LIST2) {
+        lir->useMask |= ENCODE_REG_FPCS_LIST(lir->operands[2] >> 16);
+    }
+
+    if (flags & USES_CCODES) {
+        lir->useMask |= ENCODE_CCODE;
+    }
+
+    /* Fixup for kThumbPush/lr and kThumbPop/pc */
+    if (opcode == kThumbPush || opcode == kThumbPop) {
+        u8 r8Mask = getRegMaskCommon(r8);
+        if ((opcode == kThumbPush) && (lir->useMask & r8Mask)) {
+            lir->useMask &= ~r8Mask;
+            lir->useMask |= ENCODE_REG_LR;
+        } else if ((opcode == kThumbPop) && (lir->defMask & r8Mask)) {
+            lir->defMask &= ~r8Mask;
+            lir->defMask |= ENCODE_REG_PC;
+        }
+    }
+}
+
+/*
+ * Set up the accurate resource mask for branch instructions
+ */
+static void relaxBranchMasks(ArmLIR* lir)
+{
+    int flags = EncodingMap[lir->opcode].flags;
+
+    /* Make sure only branch instructions are passed here */
+    assert(flags & IS_BRANCH);
+
+    lir->useMask = lir->defMask = ENCODE_REG_PC;
+
+    if (flags & REG_DEF_LR) {
+        lir->defMask |= ENCODE_REG_LR;
+    }
+
+    if (flags & (REG_USE0 | REG_USE1 | REG_USE2 | REG_USE3)) {
+        int i;
+
+        for (i = 0; i < 4; i++) {
+            if (flags & (1 << (kRegUse0 + i))) {
+                setupRegMask(&lir->useMask, lir->operands[i]);
+            }
+        }
+    }
+
+    if (flags & USES_CCODES) {
+        lir->useMask |= ENCODE_CCODE;
+    }
+}
+
+/*
+ * The following are building blocks to construct low-level IRs with 0 - 4
+ * operands.
+ */
+static ArmLIR* newLIR0(CompilationUnit* cUnit, ArmOpcode opcode)
+{
+    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    assert(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & NO_OPERAND));
+    insn->opcode = opcode;
+    setupResourceMasks(insn);
+    insn->generic.dalvikOffset = cUnit->currentDalvikOffset;
+    oatAppendLIR(cUnit, (LIR*) insn);
+    return insn;
+}
+
+static ArmLIR* newLIR1(CompilationUnit* cUnit, ArmOpcode opcode,
+                           int dest)
+{
+    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    assert(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & IS_UNARY_OP));
+    insn->opcode = opcode;
+    insn->operands[0] = dest;
+    setupResourceMasks(insn);
+    insn->generic.dalvikOffset = cUnit->currentDalvikOffset;
+    oatAppendLIR(cUnit, (LIR*) insn);
+    return insn;
+}
+
+static ArmLIR* newLIR2(CompilationUnit* cUnit, ArmOpcode opcode,
+                           int dest, int src1)
+{
+    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    assert(isPseudoOpcode(opcode) ||
+           (EncodingMap[opcode].flags & IS_BINARY_OP));
+    insn->opcode = opcode;
+    insn->operands[0] = dest;
+    insn->operands[1] = src1;
+    setupResourceMasks(insn);
+    insn->generic.dalvikOffset = cUnit->currentDalvikOffset;
+    oatAppendLIR(cUnit, (LIR*) insn);
+    return insn;
+}
+
+static ArmLIR* newLIR3(CompilationUnit* cUnit, ArmOpcode opcode,
+                           int dest, int src1, int src2)
+{
+    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    assert(isPseudoOpcode(opcode) ||
+           (EncodingMap[opcode].flags & IS_TERTIARY_OP));
+    insn->opcode = opcode;
+    insn->operands[0] = dest;
+    insn->operands[1] = src1;
+    insn->operands[2] = src2;
+    setupResourceMasks(insn);
+    insn->generic.dalvikOffset = cUnit->currentDalvikOffset;
+    oatAppendLIR(cUnit, (LIR*) insn);
+    return insn;
+}
+
+#if defined(_ARMV7_A) || defined(_ARMV7_A_NEON)
+static ArmLIR* newLIR4(CompilationUnit* cUnit, ArmOpcode opcode,
+                           int dest, int src1, int src2, int info)
+{
+    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    assert(isPseudoOpcode(opcode) ||
+           (EncodingMap[opcode].flags & IS_QUAD_OP));
+    insn->opcode = opcode;
+    insn->operands[0] = dest;
+    insn->operands[1] = src1;
+    insn->operands[2] = src2;
+    insn->operands[3] = info;
+    setupResourceMasks(insn);
+    insn->generic.dalvikOffset = cUnit->currentDalvikOffset;
+    oatAppendLIR(cUnit, (LIR*) insn);
+    return insn;
+}
+#endif
+
+/*
+ * Search the existing constants in the literal pool for an exact or close match
+ * within specified delta (greater or equal to 0).
+ */
+static ArmLIR* scanLiteralPool(LIR* dataTarget, int value, unsigned int delta)
+{
+    while (dataTarget) {
+        if (((unsigned) (value - ((ArmLIR* ) dataTarget)->operands[0])) <=
+            delta)
+            return (ArmLIR* ) dataTarget;
+        dataTarget = dataTarget->next;
+    }
+    return NULL;
+}
+
+/*
+ * The following are building blocks to insert constants into the pool or
+ * instruction streams.
+ */
+
+/* Add a 32-bit constant either in the constant pool or mixed with code */
+static ArmLIR* addWordData(CompilationUnit* cUnit, LIR* *constantListP,
+                           int value)
+{
+    /* Add the constant to the literal pool */
+    if (constantListP) {
+        ArmLIR* newValue = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+        newValue->operands[0] = value;
+        newValue->generic.next = *constantListP;
+        *constantListP = (LIR*) newValue;
+        return newValue;
+    } else {
+        /* Add the constant in the middle of code stream */
+        newLIR1(cUnit, kArm16BitData, (value & 0xffff));
+        newLIR1(cUnit, kArm16BitData, (value >> 16));
+    }
+    return NULL;
+}
+
+/*
+ * Generate an kArmPseudoBarrier marker to indicate the boundary of special
+ * blocks.
+ */
+static void genBarrier(CompilationUnit* cUnit)
+{
+    ArmLIR* barrier = newLIR0(cUnit, kArmPseudoBarrier);
+    /* Mark all resources as being clobbered */
+    barrier->defMask = -1;
+}
+
+/* Create the PC reconstruction slot if not already done */
+static ArmLIR* genCheckCommon(CompilationUnit* cUnit, int dOffset,
+                              ArmLIR* branch,
+                              ArmLIR* pcrLabel)
+{
+    //FIXME - won't be rolling back, need to throw now.
+    UNIMPLEMENTED(WARNING);
+#if 0
+
+    /* Forget all def info (because we might rollback here.  Bug #2367397 */
+    oatResetDefTracking(cUnit);
+
+    /* Set up the place holder to reconstruct this Dalvik PC */
+    if (pcrLabel == NULL) {
+        int dPC = (int) (cUnit->insns + dOffset);
+        pcrLabel = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+        pcrLabel->opcode = kArmPseudoPCReconstructionCell;
+        pcrLabel->operands[0] = dPC;
+        pcrLabel->operands[1] = dOffset;
+        /* Insert the place holder to the growable list */
+        oatInsertGrowableList(&cUnit->pcReconstructionList,
+                              (intptr_t) pcrLabel);
+    }
+#endif
+    /* Branch to the PC reconstruction code */
+    branch->generic.target = (LIR*) pcrLabel;
+
+    /* Clear the conservative flags for branches that punt to the interpreter */
+    relaxBranchMasks(branch);
+
+    return pcrLabel;
+}
diff --git a/src/compiler/codegen/arm/FP/Thumb2VFP.cc b/src/compiler/codegen/arm/FP/Thumb2VFP.cc
new file mode 100644
index 0000000..3cfb157
--- /dev/null
+++ b/src/compiler/codegen/arm/FP/Thumb2VFP.cc
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+static bool genArithOpFloat(CompilationUnit* cUnit, MIR* mir,
+                            RegLocation rlDest, RegLocation rlSrc1,
+                            RegLocation rlSrc2)
+{
+    int op = kThumbBkpt;
+    RegLocation rlResult;
+
+    /*
+     * Don't attempt to optimize register usage since these opcodes call out to
+     * the handlers.
+     */
+    switch (mir->dalvikInsn.opcode) {
+        case OP_ADD_FLOAT_2ADDR:
+        case OP_ADD_FLOAT:
+            op = kThumb2Vadds;
+            break;
+        case OP_SUB_FLOAT_2ADDR:
+        case OP_SUB_FLOAT:
+            op = kThumb2Vsubs;
+            break;
+        case OP_DIV_FLOAT_2ADDR:
+        case OP_DIV_FLOAT:
+            op = kThumb2Vdivs;
+            break;
+        case OP_MUL_FLOAT_2ADDR:
+        case OP_MUL_FLOAT:
+            op = kThumb2Vmuls;
+            break;
+        case OP_REM_FLOAT_2ADDR:
+        case OP_REM_FLOAT:
+        case OP_NEG_FLOAT: {
+            return genArithOpFloatPortable(cUnit, mir, rlDest, rlSrc1,
+                                              rlSrc2);
+        }
+        default:
+            return true;
+    }
+    rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+    rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg);
+    rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+    newLIR3(cUnit, (ArmOpcode)op, rlResult.lowReg, rlSrc1.lowReg,
+            rlSrc2.lowReg);
+    storeValue(cUnit, rlDest, rlResult);
+    return false;
+}
+
+static bool genArithOpDouble(CompilationUnit* cUnit, MIR* mir,
+                             RegLocation rlDest, RegLocation rlSrc1,
+                             RegLocation rlSrc2)
+{
+    int op = kThumbBkpt;
+    RegLocation rlResult;
+
+    switch (mir->dalvikInsn.opcode) {
+        case OP_ADD_DOUBLE_2ADDR:
+        case OP_ADD_DOUBLE:
+            op = kThumb2Vaddd;
+            break;
+        case OP_SUB_DOUBLE_2ADDR:
+        case OP_SUB_DOUBLE:
+            op = kThumb2Vsubd;
+            break;
+        case OP_DIV_DOUBLE_2ADDR:
+        case OP_DIV_DOUBLE:
+            op = kThumb2Vdivd;
+            break;
+        case OP_MUL_DOUBLE_2ADDR:
+        case OP_MUL_DOUBLE:
+            op = kThumb2Vmuld;
+            break;
+        case OP_REM_DOUBLE_2ADDR:
+        case OP_REM_DOUBLE:
+        case OP_NEG_DOUBLE: {
+            return genArithOpDoublePortable(cUnit, mir, rlDest, rlSrc1,
+                                               rlSrc2);
+        }
+        default:
+            return true;
+    }
+
+    rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+    assert(rlSrc1.wide);
+    rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg);
+    assert(rlSrc2.wide);
+    rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+    assert(rlDest.wide);
+    assert(rlResult.wide);
+    newLIR3(cUnit, (ArmOpcode)op, S2D(rlResult.lowReg, rlResult.highReg),
+            S2D(rlSrc1.lowReg, rlSrc1.highReg),
+            S2D(rlSrc2.lowReg, rlSrc2.highReg));
+    storeValueWide(cUnit, rlDest, rlResult);
+    return false;
+}
+
+static bool genConversion(CompilationUnit* cUnit, MIR* mir)
+{
+    Opcode opcode = mir->dalvikInsn.opcode;
+    int op = kThumbBkpt;
+    bool longSrc = false;
+    bool longDest = false;
+    int srcReg;
+    RegLocation rlSrc;
+    RegLocation rlDest;
+    RegLocation rlResult;
+
+    switch (opcode) {
+        case OP_INT_TO_FLOAT:
+            longSrc = false;
+            longDest = false;
+            op = kThumb2VcvtIF;
+            break;
+        case OP_FLOAT_TO_INT:
+            longSrc = false;
+            longDest = false;
+            op = kThumb2VcvtFI;
+            break;
+        case OP_DOUBLE_TO_FLOAT:
+            longSrc = true;
+            longDest = false;
+            op = kThumb2VcvtDF;
+            break;
+        case OP_FLOAT_TO_DOUBLE:
+            longSrc = false;
+            longDest = true;
+            op = kThumb2VcvtFd;
+            break;
+        case OP_INT_TO_DOUBLE:
+            longSrc = false;
+            longDest = true;
+            op = kThumb2VcvtID;
+            break;
+        case OP_DOUBLE_TO_INT:
+            longSrc = true;
+            longDest = false;
+            op = kThumb2VcvtDI;
+            break;
+        case OP_LONG_TO_DOUBLE:
+        case OP_FLOAT_TO_LONG:
+        case OP_LONG_TO_FLOAT:
+        case OP_DOUBLE_TO_LONG:
+            return genConversionPortable(cUnit, mir);
+        default:
+            return true;
+    }
+    if (longSrc) {
+        rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
+        rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
+        srcReg = S2D(rlSrc.lowReg, rlSrc.highReg);
+    } else {
+        rlSrc = oatGetSrc(cUnit, mir, 0);
+        rlSrc = loadValue(cUnit, rlSrc, kFPReg);
+        srcReg = rlSrc.lowReg;
+    }
+    if (longDest) {
+        rlDest = oatGetDestWide(cUnit, mir, 0, 1);
+        rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+        newLIR2(cUnit, (ArmOpcode)op, S2D(rlResult.lowReg, rlResult.highReg),
+                srcReg);
+        storeValueWide(cUnit, rlDest, rlResult);
+    } else {
+        rlDest = oatGetDest(cUnit, mir, 0);
+        rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+        newLIR2(cUnit, (ArmOpcode)op, rlResult.lowReg, srcReg);
+        storeValue(cUnit, rlDest, rlResult);
+    }
+    return false;
+}
+
+static bool genCmpFP(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                     RegLocation rlSrc1, RegLocation rlSrc2)
+{
+    bool isDouble;
+    int defaultResult;
+    RegLocation rlResult;
+
+    switch(mir->dalvikInsn.opcode) {
+        case OP_CMPL_FLOAT:
+            isDouble = false;
+            defaultResult = -1;
+            break;
+        case OP_CMPG_FLOAT:
+            isDouble = false;
+            defaultResult = 1;
+            break;
+        case OP_CMPL_DOUBLE:
+            isDouble = true;
+            defaultResult = -1;
+            break;
+        case OP_CMPG_DOUBLE:
+            isDouble = true;
+            defaultResult = 1;
+            break;
+        default:
+            return true;
+    }
+    if (isDouble) {
+        rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+        rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg);
+        oatClobberSReg(cUnit, rlDest.sRegLow);
+        rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+        loadConstant(cUnit, rlResult.lowReg, defaultResult);
+        newLIR2(cUnit, kThumb2Vcmpd, S2D(rlSrc1.lowReg, r1Src2.highReg),
+                S2D(rlSrc2.lowReg, rlSrc2.highReg));
+    } else {
+        rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+        rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg);
+        oatClobberSReg(cUnit, rlDest.sRegLow);
+        rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+        loadConstant(cUnit, rlResult.lowReg, defaultResult);
+        newLIR2(cUnit, kThumb2Vcmps, rlSrc1.lowReg, rlSrc2.lowReg);
+    }
+    assert(!FPREG(rlResult.lowReg));
+    newLIR0(cUnit, kThumb2Fmstat);
+
+    genIT(cUnit, (defaultResult == -1) ? kArmCondGt : kArmCondMi, "");
+    newLIR2(cUnit, kThumb2MovImmShift, rlResult.lowReg,
+            modifiedImmediate(-defaultResult)); // Must not alter ccodes
+    genBarrier(cUnit);
+
+    genIT(cUnit, kArmCondEq, "");
+    loadConstant(cUnit, rlResult.lowReg, 0);
+    genBarrier(cUnit);
+
+    storeValue(cUnit, rlDest, rlResult);
+    return false;
+}
diff --git a/src/compiler/codegen/arm/LocalOptimizations.cc b/src/compiler/codegen/arm/LocalOptimizations.cc
new file mode 100644
index 0000000..8d6f3a5
--- /dev/null
+++ b/src/compiler/codegen/arm/LocalOptimizations.cc
@@ -0,0 +1,448 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "../../Dalvik.h"
+#include "../../CompilerInternals.h"
+#include "ArmLIR.h"
+#include "Codegen.h"
+
+#define DEBUG_OPT(X)
+
+/* Check RAW, WAR, and WAR dependency on the register operands */
+#define CHECK_REG_DEP(use, def, check) ((def & check->useMask) || \
+                                        ((use | def) & check->defMask))
+
+/* Scheduler heuristics */
+#define MAX_HOIST_DISTANCE 20
+#define LDLD_DISTANCE 4
+#define LD_LATENCY 2
+
+static inline bool isDalvikRegisterClobbered(ArmLIR* lir1, ArmLIR* lir2)
+{
+    int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->aliasInfo);
+    int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->aliasInfo);
+    int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->aliasInfo);
+    int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->aliasInfo);
+
+    return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
+}
+
+/* Convert a more expensive instruction (ie load) into a move */
+static void convertMemOpIntoMove(CompilationUnit* cUnit, ArmLIR* origLIR,
+                                 int dest, int src)
+{
+    /* Insert a move to replace the load */
+    ArmLIR* moveLIR;
+    moveLIR = oatRegCopyNoInsert( cUnit, dest, src);
+    /*
+     * Insert the converted instruction after the original since the
+     * optimization is scannng in the top-down order and the new instruction
+     * will need to be re-checked (eg the new dest clobbers the src used in
+     * thisLIR).
+     */
+    oatInsertLIRAfter((LIR*) origLIR, (LIR*) moveLIR);
+}
+
+/*
+ * Perform a pass of top-down walk, from the second-last instruction in the
+ * superblock, to eliminate redundant loads and stores.
+ *
+ * An earlier load can eliminate a later load iff
+ *   1) They are must-aliases
+ *   2) The native register is not clobbered in between
+ *   3) The memory location is not written to in between
+ *
+ * An earlier store can eliminate a later load iff
+ *   1) They are must-aliases
+ *   2) The native register is not clobbered in between
+ *   3) The memory location is not written to in between
+ *
+ * A later store can be eliminated by an earlier store iff
+ *   1) They are must-aliases
+ *   2) The memory location is not written to in between
+ */
+static void applyLoadStoreElimination(CompilationUnit* cUnit,
+                                      ArmLIR* headLIR,
+                                      ArmLIR* tailLIR)
+{
+    ArmLIR* thisLIR;
+
+    if (headLIR == tailLIR) return;
+
+    for (thisLIR = PREV_LIR(tailLIR);
+         thisLIR != headLIR;
+         thisLIR = PREV_LIR(thisLIR)) {
+        int sinkDistance = 0;
+
+        /* Skip non-interesting instructions */
+        if ((thisLIR->flags.isNop == true) ||
+            isPseudoOpcode(thisLIR->opcode) ||
+            !(EncodingMap[thisLIR->opcode].flags & (IS_LOAD | IS_STORE))) {
+            continue;
+        }
+
+        int nativeRegId = thisLIR->operands[0];
+        bool isThisLIRLoad = EncodingMap[thisLIR->opcode].flags & IS_LOAD;
+        ArmLIR* checkLIR;
+        /* Use the mem mask to determine the rough memory location */
+        u8 thisMemMask = (thisLIR->useMask | thisLIR->defMask) & ENCODE_MEM;
+
+        /*
+         * Currently only eliminate redundant ld/st for constant and Dalvik
+         * register accesses.
+         */
+        if (!(thisMemMask & (ENCODE_LITERAL | ENCODE_DALVIK_REG))) continue;
+
+        /*
+         * Add r15 (pc) to the resource mask to prevent this instruction
+         * from sinking past branch instructions. Also take out the memory
+         * region bits since stopMask is used to check data/control
+         * dependencies.
+         */
+        u8 stopUseRegMask = (ENCODE_REG_PC | thisLIR->useMask) &
+                            ~ENCODE_MEM;
+        u8 stopDefRegMask = thisLIR->defMask & ~ENCODE_MEM;
+
+        for (checkLIR = NEXT_LIR(thisLIR);
+             checkLIR != tailLIR;
+             checkLIR = NEXT_LIR(checkLIR)) {
+
+            /*
+             * Skip already dead instructions (whose dataflow information is
+             * outdated and misleading).
+             */
+            if (checkLIR->flags.isNop) continue;
+
+            u8 checkMemMask = (checkLIR->useMask | checkLIR->defMask) &
+                              ENCODE_MEM;
+            u8 aliasCondition = thisMemMask & checkMemMask;
+            bool stopHere = false;
+
+            /*
+             * Potential aliases seen - check the alias relations
+             */
+            if (checkMemMask != ENCODE_MEM && aliasCondition != 0) {
+                bool isCheckLIRLoad = EncodingMap[checkLIR->opcode].flags &
+                                      IS_LOAD;
+                if  (aliasCondition == ENCODE_LITERAL) {
+                    /*
+                     * Should only see literal loads in the instruction
+                     * stream.
+                     */
+                    assert(!(EncodingMap[checkLIR->opcode].flags &
+                             IS_STORE));
+                    /* Same value && same register type */
+                    if (checkLIR->aliasInfo == thisLIR->aliasInfo &&
+                        REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId)){
+                        /*
+                         * Different destination register - insert
+                         * a move
+                         */
+                        if (checkLIR->operands[0] != nativeRegId) {
+                            convertMemOpIntoMove(cUnit, checkLIR,
+                                                 checkLIR->operands[0],
+                                                 nativeRegId);
+                        }
+                        checkLIR->flags.isNop = true;
+                    }
+                } else if (aliasCondition == ENCODE_DALVIK_REG) {
+                    /* Must alias */
+                    if (checkLIR->aliasInfo == thisLIR->aliasInfo) {
+                        /* Only optimize compatible registers */
+                        bool regCompatible =
+                            REGTYPE(checkLIR->operands[0]) ==
+                            REGTYPE(nativeRegId);
+                        if ((isThisLIRLoad && isCheckLIRLoad) ||
+                            (!isThisLIRLoad && isCheckLIRLoad)) {
+                            /* RAR or RAW */
+                            if (regCompatible) {
+                                /*
+                                 * Different destination register -
+                                 * insert a move
+                                 */
+                                if (checkLIR->operands[0] !=
+                                    nativeRegId) {
+                                    convertMemOpIntoMove(cUnit,
+                                                 checkLIR,
+                                                 checkLIR->operands[0],
+                                                 nativeRegId);
+                                }
+                                checkLIR->flags.isNop = true;
+                            } else {
+                                /*
+                                 * Destinaions are of different types -
+                                 * something complicated going on so
+                                 * stop looking now.
+                                 */
+                                stopHere = true;
+                            }
+                        } else if (isThisLIRLoad && !isCheckLIRLoad) {
+                            /* WAR - register value is killed */
+                            stopHere = true;
+                        } else if (!isThisLIRLoad && !isCheckLIRLoad) {
+                            /* WAW - nuke the earlier store */
+                            thisLIR->flags.isNop = true;
+                            stopHere = true;
+                        }
+                    /* Partial overlap */
+                    } else if (isDalvikRegisterClobbered(thisLIR, checkLIR)) {
+                        /*
+                         * It is actually ok to continue if checkLIR
+                         * is a read. But it is hard to make a test
+                         * case for this so we just stop here to be
+                         * conservative.
+                         */
+                        stopHere = true;
+                    }
+                }
+                /* Memory content may be updated. Stop looking now. */
+                if (stopHere) {
+                    break;
+                /* The checkLIR has been transformed - check the next one */
+                } else if (checkLIR->flags.isNop) {
+                    continue;
+                }
+            }
+
+
+            /*
+             * this and check LIRs have no memory dependency. Now check if
+             * their register operands have any RAW, WAR, and WAW
+             * dependencies. If so, stop looking.
+             */
+            if (stopHere == false) {
+                stopHere = CHECK_REG_DEP(stopUseRegMask, stopDefRegMask,
+                                         checkLIR);
+            }
+
+            if (stopHere == true) {
+                DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
+                                                "REG CLOBBERED"));
+                /* Only sink store instructions */
+                if (sinkDistance && !isThisLIRLoad) {
+                    ArmLIR* newStoreLIR =
+                        (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+                    *newStoreLIR = *thisLIR;
+                    /*
+                     * Stop point found - insert *before* the checkLIR
+                     * since the instruction list is scanned in the
+                     * top-down order.
+                     */
+                    oatInsertLIRBefore((LIR*) checkLIR,
+                                               (LIR*) newStoreLIR);
+                    thisLIR->flags.isNop = true;
+                }
+                break;
+            } else if (!checkLIR->flags.isNop) {
+                sinkDistance++;
+            }
+        }
+    }
+}
+
+/*
+ * Perform a pass of bottom-up walk, from the second instruction in the
+ * superblock, to try to hoist loads to earlier slots.
+ */
+static void applyLoadHoisting(CompilationUnit* cUnit,
+                              ArmLIR* headLIR,
+                              ArmLIR* tailLIR)
+{
+    ArmLIR* thisLIR, *checkLIR;
+    /*
+     * Store the list of independent instructions that can be hoisted past.
+     * Will decide the best place to insert later.
+     */
+    ArmLIR* prevInstList[MAX_HOIST_DISTANCE];
+
+    /* Empty block */
+    if (headLIR == tailLIR) return;
+
+    /* Start from the second instruction */
+    for (thisLIR = NEXT_LIR(headLIR);
+         thisLIR != tailLIR;
+         thisLIR = NEXT_LIR(thisLIR)) {
+
+        /* Skip non-interesting instructions */
+        if ((thisLIR->flags.isNop == true) ||
+            isPseudoOpcode(thisLIR->opcode) ||
+            !(EncodingMap[thisLIR->opcode].flags & IS_LOAD)) {
+            continue;
+        }
+
+        u8 stopUseAllMask = thisLIR->useMask;
+
+        /*
+         * Branches for null/range checks are marked with the true resource
+         * bits, and loads to Dalvik registers, constant pools, and non-alias
+         * locations are safe to be hoisted. So only mark the heap references
+         * conservatively here.
+         */
+        if (stopUseAllMask & ENCODE_HEAP_REF) {
+            stopUseAllMask |= ENCODE_REG_PC;
+        }
+
+        /* Similar as above, but just check for pure register dependency */
+        u8 stopUseRegMask = stopUseAllMask & ~ENCODE_MEM;
+        u8 stopDefRegMask = thisLIR->defMask & ~ENCODE_MEM;
+
+        int nextSlot = 0;
+        bool stopHere = false;
+
+        /* Try to hoist the load to a good spot */
+        for (checkLIR = PREV_LIR(thisLIR);
+             checkLIR != headLIR;
+             checkLIR = PREV_LIR(checkLIR)) {
+
+            /*
+             * Skip already dead instructions (whose dataflow information is
+             * outdated and misleading).
+             */
+            if (checkLIR->flags.isNop) continue;
+
+            u8 checkMemMask = checkLIR->defMask & ENCODE_MEM;
+            u8 aliasCondition = stopUseAllMask & checkMemMask;
+            stopHere = false;
+
+            /* Potential WAR alias seen - check the exact relation */
+            if (checkMemMask != ENCODE_MEM && aliasCondition != 0) {
+                /* We can fully disambiguate Dalvik references */
+                if (aliasCondition == ENCODE_DALVIK_REG) {
+                    /* Must alias or partually overlap */
+                    if ((checkLIR->aliasInfo == thisLIR->aliasInfo) ||
+                        isDalvikRegisterClobbered(thisLIR, checkLIR)) {
+                        stopHere = true;
+                    }
+                /* Conservatively treat all heap refs as may-alias */
+                } else {
+                    assert(aliasCondition == ENCODE_HEAP_REF);
+                    stopHere = true;
+                }
+                /* Memory content may be updated. Stop looking now. */
+                if (stopHere) {
+                    prevInstList[nextSlot++] = checkLIR;
+                    break;
+                }
+            }
+
+            if (stopHere == false) {
+                stopHere = CHECK_REG_DEP(stopUseRegMask, stopDefRegMask,
+                                         checkLIR);
+            }
+
+            /*
+             * Store the dependent or non-pseudo/indepedent instruction to the
+             * list.
+             */
+            if (stopHere || !isPseudoOpcode(checkLIR->opcode)) {
+                prevInstList[nextSlot++] = checkLIR;
+                if (nextSlot == MAX_HOIST_DISTANCE) break;
+            }
+
+            /* Found a new place to put the load - move it here */
+            if (stopHere == true) {
+                DEBUG_OPT(dumpDependentInsnPair(checkLIR, thisLIR
+                                                "HOIST STOP"));
+                break;
+            }
+        }
+
+        /*
+         * Reached the top - use headLIR as the dependent marker as all labels
+         * are barriers.
+         */
+        if (stopHere == false && nextSlot < MAX_HOIST_DISTANCE) {
+            prevInstList[nextSlot++] = headLIR;
+        }
+
+        /*
+         * At least one independent instruction is found. Scan in the reversed
+         * direction to find a beneficial slot.
+         */
+        if (nextSlot >= 2) {
+            int firstSlot = nextSlot - 2;
+            int slot;
+            ArmLIR* depLIR = prevInstList[nextSlot-1];
+            /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */
+            if (!isPseudoOpcode(depLIR->opcode) &&
+                (EncodingMap[depLIR->opcode].flags & IS_LOAD)) {
+                firstSlot -= LDLD_DISTANCE;
+            }
+            /*
+             * Make sure we check slot >= 0 since firstSlot may be negative
+             * when the loop is first entered.
+             */
+            for (slot = firstSlot; slot >= 0; slot--) {
+                ArmLIR* curLIR = prevInstList[slot];
+                ArmLIR* prevLIR = prevInstList[slot+1];
+
+                /* Check the highest instruction */
+                if (prevLIR->defMask == ENCODE_ALL) {
+                    /*
+                     * If the first instruction is a load, don't hoist anything
+                     * above it since it is unlikely to be beneficial.
+                     */
+                    if (EncodingMap[curLIR->opcode].flags & IS_LOAD) continue;
+                    /*
+                     * If the remaining number of slots is less than LD_LATENCY,
+                     * insert the hoisted load here.
+                     */
+                    if (slot < LD_LATENCY) break;
+                }
+
+                /*
+                 * NOTE: now prevLIR is guaranteed to be a non-pseudo
+                 * instruction (ie accessing EncodingMap[prevLIR->opcode] is
+                 * safe).
+                 *
+                 * Try to find two instructions with load/use dependency until
+                 * the remaining instructions are less than LD_LATENCY.
+                 */
+                if (((curLIR->useMask & prevLIR->defMask) &&
+                     (EncodingMap[prevLIR->opcode].flags & IS_LOAD)) ||
+                    (slot < LD_LATENCY)) {
+                    break;
+                }
+            }
+
+            /* Found a slot to hoist to */
+            if (slot >= 0) {
+                ArmLIR* curLIR = prevInstList[slot];
+                ArmLIR* newLoadLIR = (ArmLIR* ) oatNew(sizeof(ArmLIR),
+                                                               true);
+                *newLoadLIR = *thisLIR;
+                /*
+                 * Insertion is guaranteed to succeed since checkLIR
+                 * is never the first LIR on the list
+                 */
+                oatInsertLIRBefore((LIR*) curLIR, (LIR*) newLoadLIR);
+                thisLIR->flags.isNop = true;
+            }
+        }
+    }
+}
+
+void oatApplyLocalOptimizations(CompilationUnit* cUnit, LIR* headLIR,
+                                        LIR* tailLIR)
+{
+    if (!(cUnit->disableOpt & (1 << kLoadStoreElimination))) {
+        applyLoadStoreElimination(cUnit, (ArmLIR* ) headLIR,
+                                  (ArmLIR* ) tailLIR);
+    }
+    if (!(cUnit->disableOpt & (1 << kLoadHoisting))) {
+        applyLoadHoisting(cUnit, (ArmLIR* ) headLIR, (ArmLIR* ) tailLIR);
+    }
+}
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
new file mode 100644
index 0000000..417fdca
--- /dev/null
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -0,0 +1,1765 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//#define TESTMODE
+
+#ifdef TESTMODE
+#include "CalloutHelper.h"
+#endif
+
+static const RegLocation badLoc = {kLocDalvikFrame, 0, 0, INVALID_REG,
+                                   INVALID_REG, INVALID_SREG, 0,
+                                   kLocDalvikFrame, INVALID_REG, INVALID_REG,
+                                   INVALID_OFFSET};
+static const RegLocation retLoc = LOC_DALVIK_RETURN_VAL;
+static const RegLocation retLocWide = LOC_DALVIK_RETURN_VAL_WIDE;
+
+static void genNewArray(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                        RegLocation rlSrc)
+{
+    oatFlushAllRegs(cUnit);  /* All temps to home location */
+    void* classPtr = (void*)
+        (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vC]);
+    if (classPtr == NULL) {
+         LOG(FATAL) << "Unexpected null passPtr";
+    } else {
+         loadValueDirectFixed(cUnit, rlSrc, r1);    /* get Len */
+         loadConstant(cUnit, r0, (int)classPtr);
+    }
+    // FIXME: need this to throw errNegativeArraySize
+    genRegImmCheck(cUnit, kArmCondMi, r1, 0, mir->offset, NULL);
+#ifdef TESTMODE
+// Hack until we get rSELF setup
+    loadConstant(cUnit, rLR, (int)dvmAllocArrayByClass);
+#else
+    loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pArtAllocArrayByClass),
+                 rLR);
+#endif
+    loadConstant(cUnit, r2, ALLOC_DONT_TRACK);
+    newLIR1(cUnit, kThumbBlxR, rLR); // (arrayClass, length, allocFlags)
+    storeValue(cUnit, rlDest, retLoc);
+}
+
+/*
+ * Similar to genNewArray, but with post-allocation initialization.
+ * Verifier guarantees we're dealing with an array class.  Current
+ * code throws runtime exception "bad Filled array req" for 'D' and 'J'.
+ * Current code also throws internal unimp if not 'L', '[' or 'I'.
+ */
+static void genFilledNewArray(CompilationUnit* cUnit, MIR* mir, bool isRange)
+{
+    DecodedInstruction* dInsn = &mir->dalvikInsn;
+    int elems;
+    int typeIndex;
+    if (isRange) {
+        elems = dInsn->vA;
+        typeIndex = dInsn->vB;
+    } else {
+        elems = dInsn->vB;
+        typeIndex = dInsn->vC;
+    }
+    oatFlushAllRegs(cUnit);  /* All temps to home location */
+    void* classPtr = (void*)
+        (cUnit->method->clazz->pDvmDex->pResClasses[typeIndex]);
+    if (classPtr == NULL) {
+         LOG(FATAL) << "Unexpected null passPtr";
+    } else {
+         loadConstant(cUnit, r0, (int)classPtr);
+         loadConstant(cUnit, r1, elems);
+    }
+    if (elems < 0) {
+        LOG(FATAL) << "Unexpected empty array";
+    }
+    /*
+     * FIXME: Need a new NoThrow allocator that checks for and handles
+     * the above mentioned bad cases of 'D', 'J' or !('L' | '[' | 'I').
+     * That will keep us from wasting space generating an inline check here.
+     */
+#ifdef TESTMODE
+// Hack until we get rSELF setup
+    loadConstant(cUnit, rLR, (int)dvmAllocArrayByClass);
+#else
+    loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pArtAllocArrayByClass),
+                 rLR);
+#endif
+    loadConstant(cUnit, r2, ALLOC_DONT_TRACK);
+    newLIR1(cUnit, kThumbBlxR, rLR); // (arrayClass, length, allocFlags)
+    // Reserve ret0 (r0) - we'll use it in place.
+    oatLockTemp(cUnit, r0);
+    // Having a range of 0 is legal
+    if (isRange && (dInsn->vA > 0)) {
+        /*
+         * Bit of ugliness here.  We're going generate a mem copy loop
+         * on the register range, but it is possible that some regs
+         * in the range have been promoted.  This is unlikely, but
+         * before generating the copy, we'll just force a flush
+         * of any regs in the source range that have been promoted to
+         * home location.
+         */
+        for (unsigned int i = 0; i < dInsn->vA; i++) {
+            RegLocation loc = oatUpdateLoc(cUnit,
+                oatGetSrc(cUnit, mir, i));
+            if (loc.location == kLocPhysReg) {
+                storeBaseDisp(cUnit, rSP, loc.spOffset, loc.lowReg, kWord);
+            }
+        }
+        /*
+         * TUNING note: generated code here could be much improved, but
+         * this is an uncommon operation and isn't especially performance
+         * critical.
+         */
+        int rSrc = oatAllocTemp(cUnit);
+        int rDst = oatAllocTemp(cUnit);
+        int rIdx = oatAllocTemp(cUnit);
+        int rVal = rLR;  // Using a lot of temps, rLR is known free here
+        // Set up source pointer
+        RegLocation rlFirst = oatGetSrc(cUnit, mir, 0);
+        opRegRegImm(cUnit, kOpAdd, rSrc, rSP, rlFirst.spOffset);
+        // Set up the target pointer
+        opRegRegImm(cUnit, kOpAdd, rDst, r0,
+                    OFFSETOF_MEMBER(ArrayObject, contents));
+        // Set up the loop counter (known to be > 0)
+        loadConstant(cUnit, rIdx, dInsn->vA);
+        // Generate the copy loop.  Going backwards for convenience
+        ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
+        target->defMask = ENCODE_ALL;
+        // Copy next element
+        loadBaseIndexed(cUnit, rSrc, rIdx, rVal, 2, kWord);
+        storeBaseIndexed(cUnit, rDst, rIdx, rVal, 2, kWord);
+        // Use setflags encoding here
+        newLIR3(cUnit, kThumb2SubsRRI12, rIdx, rIdx, 1);
+        ArmLIR* branch = opCondBranch(cUnit, kArmCondNe);
+        branch->generic.target = (LIR*)target;
+    } else if (!isRange) {
+        // TUNING: interleave
+        for (unsigned int i = 0; i < dInsn->vA; i++) {
+            RegLocation rlArg = loadValue(cUnit,
+                oatGetSrc(cUnit, mir, i), kCoreReg);
+            storeBaseDisp(cUnit, r0, OFFSETOF_MEMBER(ArrayObject, contents) +
+                          i * 4, rlArg.lowReg, kWord);
+            // If the loadValue caused a temp to be allocated, free it
+            if (oatIsTemp(cUnit, rlArg.lowReg)) {
+                oatFreeTemp(cUnit, rlArg.lowReg);
+            }
+        }
+    }
+}
+
+static void genSput(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
+{
+    int valOffset = OFFSETOF_MEMBER(StaticField, value);
+    int tReg = oatAllocTemp(cUnit);
+    int objHead;
+    bool isVolatile;
+    bool isSputObject;
+    const Method *method = (mir->OptimizationFlags & MIR_CALLEE) ?
+        mir->meta.calleeMethod : cUnit->method;
+    void* fieldPtr = (void*)
+      (method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
+    Opcode opcode = mir->dalvikInsn.opcode;
+
+    if (fieldPtr == NULL) {
+        // FIXME: need to handle this case for oat();
+        UNIMPLEMENTED(FATAL);
+    }
+
+#if ANDROID_SMP != 0
+    isVolatile = (opcode == OP_SPUT_VOLATILE) ||
+                 (opcode == OP_SPUT_VOLATILE_JUMBO) ||
+                 (opcode == OP_SPUT_OBJECT_VOLATILE) ||
+                 (opcode == OP_SPUT_OBJECT_VOLATILE_JUMBO);
+    assert(isVolatile == dvmIsVolatileField((Field *) fieldPtr));
+#else
+    isVolatile = dvmIsVolatileField((Field *) fieldPtr);
+#endif
+
+    isSputObject = (opcode == OP_SPUT_OBJECT) ||
+                   (opcode == OP_SPUT_OBJECT_VOLATILE);
+
+    rlSrc = oatGetSrc(cUnit, mir, 0);
+    rlSrc = loadValue(cUnit, rlSrc, kAnyReg);
+    loadConstant(cUnit, tReg,  (int) fieldPtr);
+    if (isSputObject) {
+        objHead = oatAllocTemp(cUnit);
+        loadWordDisp(cUnit, tReg, OFFSETOF_MEMBER(Field, clazz), objHead);
+    }
+    storeWordDisp(cUnit, tReg, valOffset ,rlSrc.lowReg);
+    oatFreeTemp(cUnit, tReg);
+    if (isVolatile) {
+        oatGenMemBarrier(cUnit, kSY);
+    }
+    if (isSputObject) {
+        /* NOTE: marking card based sfield->clazz */
+        markGCCard(cUnit, rlSrc.lowReg, objHead);
+        oatFreeTemp(cUnit, objHead);
+    }
+}
+
+static void genSputWide(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
+{
+    int tReg = oatAllocTemp(cUnit);
+    int valOffset = OFFSETOF_MEMBER(StaticField, value);
+    const Method *method = (mir->OptimizationFlags & MIR_CALLEE) ?
+        mir->meta.calleeMethod : cUnit->method;
+    void* fieldPtr = (void*)
+      (method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
+
+    if (fieldPtr == NULL) {
+        // FIXME: need to handle this case for oat();
+        UNIMPLEMENTED(FATAL);
+    }
+
+    rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
+    rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
+    loadConstant(cUnit, tReg,  (int) fieldPtr + valOffset);
+
+    storePair(cUnit, tReg, rlSrc.lowReg, rlSrc.highReg);
+}
+
+
+
+static void genSgetWide(CompilationUnit* cUnit, MIR* mir,
+                 RegLocation rlResult, RegLocation rlDest)
+{
+    int valOffset = OFFSETOF_MEMBER(StaticField, value);
+    const Method *method = (mir->OptimizationFlags & MIR_CALLEE) ?
+        mir->meta.calleeMethod : cUnit->method;
+    void* fieldPtr = (void*)
+      (method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
+
+    if (fieldPtr == NULL) {
+        // FIXME: need to handle this case for oat();
+        UNIMPLEMENTED(FATAL);
+    }
+
+    int tReg = oatAllocTemp(cUnit);
+    rlDest = oatGetDestWide(cUnit, mir, 0, 1);
+    rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
+    loadConstant(cUnit, tReg,  (int) fieldPtr + valOffset);
+
+    loadPair(cUnit, tReg, rlResult.lowReg, rlResult.highReg);
+
+    storeValueWide(cUnit, rlDest, rlResult);
+}
+
+static void genSget(CompilationUnit* cUnit, MIR* mir,
+             RegLocation rlResult, RegLocation rlDest)
+{
+    int valOffset = OFFSETOF_MEMBER(StaticField, value);
+    int tReg = oatAllocTemp(cUnit);
+    bool isVolatile;
+    const Method *method = cUnit->method;
+    void* fieldPtr = (void*)
+      (method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vB]);
+
+    if (fieldPtr == NULL) {
+        // FIXME: need to handle this case for oat();
+        UNIMPLEMENTED(FATAL);
+    }
+
+    /*
+     * On SMP systems, Dalvik opcodes found to be referencing
+     * volatile fields are rewritten to their _VOLATILE variant.
+     * However, this does not happen on non-SMP systems. The compiler
+     * still needs to know about volatility to avoid unsafe
+     * optimizations so we determine volatility based on either
+     * the opcode or the field access flags.
+     */
+#if ANDROID_SMP != 0
+    Opcode opcode = mir->dalvikInsn.opcode;
+    isVolatile = (opcode == OP_SGET_VOLATILE) ||
+                 (opcode == OP_SGET_OBJECT_VOLATILE);
+    assert(isVolatile == dvmIsVolatileField((Field *) fieldPtr));
+#else
+    isVolatile = dvmIsVolatileField((Field *) fieldPtr);
+#endif
+
+    rlDest = oatGetDest(cUnit, mir, 0);
+    rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
+    loadConstant(cUnit, tReg,  (int) fieldPtr + valOffset);
+
+    if (isVolatile) {
+        oatGenMemBarrier(cUnit, kSY);
+    }
+    loadWordDisp(cUnit, tReg, 0, rlResult.lowReg);
+
+    storeValue(cUnit, rlDest, rlResult);
+}
+
+typedef int (*NextCallInsn)(CompilationUnit*, MIR*, DecodedInstruction*, int);
+
+/*
+ * Bit of a hack here - in leiu of a real scheduling pass,
+ * emit the next instruction in static & direct invoke sequences.
+ */
+static int nextSDCallInsn(CompilationUnit* cUnit, MIR* mir,
+                        DecodedInstruction* dInsn, int state)
+{
+    switch(state) {
+        case 0:  // Get the current Method* [sets r0]
+            loadBaseDisp(cUnit, mir, rSP, 0, r0, kWord, INVALID_SREG);
+            break;
+        case 1:  // Get the pResMethods pointer [uses r0, sets r0]
+            loadBaseDisp(cUnit, mir, r0, OFFSETOF_MEMBER(Method, pResMethods),
+                         r0, kWord, INVALID_SREG);
+            break;
+        case 2: // Get the target Method* [uses r0, sets r0]
+            loadBaseDisp(cUnit, mir, r0, dInsn->vB * 4, r0,
+                         kWord, INVALID_SREG);
+            break;
+        case 3: // Get the target compiled code address [uses r0, sets rLR]
+            loadBaseDisp(cUnit, mir, r0,
+                         OFFSETOF_MEMBER(Method, compiledInsns), rLR,
+                         kWord, INVALID_SREG);
+            break;
+        default:
+            return -1;
+    }
+    return state + 1;
+}
+
+/*
+ * Bit of a hack here - in leiu of a real scheduling pass,
+ * emit the next instruction in a virtual invoke sequence.
+ * We can use rLR as a temp prior to target address loading
+ * Note also that we'll load the first argument ("this") into
+ * r1 here rather than the standard loadArgRegs.
+ */
+static int nextVCallInsn(CompilationUnit* cUnit, MIR* mir,
+                        DecodedInstruction* dInsn, int state)
+{
+    RegLocation rlArg;
+    switch(state) {
+        case 0:  // Get the current Method* [set r0]
+            loadBaseDisp(cUnit, mir, rSP, 0, r0, kWord, INVALID_SREG);
+            // Load "this" [set r1]
+            rlArg = oatGetSrc(cUnit, mir, 0);
+            loadValueDirectFixed(cUnit, rlArg, r1);
+            break;
+        case 1:  // Get the pResMethods pointer [use r0, set r12]
+            loadBaseDisp(cUnit, mir, r0, OFFSETOF_MEMBER(Method, pResMethods),
+                         r12, kWord, INVALID_SREG);
+             // Is "this" null? [use r1]
+             genNullCheck(cUnit, oatSSASrc(mir,0), r1,
+                           mir->offset, NULL);
+             break;
+        case 2: // Get the base Method* [use r12, set r0]
+            loadBaseDisp(cUnit, mir, r12, dInsn->vB * 4, r0,
+                         kWord, INVALID_SREG);
+            // get this->clazz [use r1, set rLR]
+            loadBaseDisp(cUnit, mir, r1, OFFSETOF_MEMBER(Object, clazz), rLR,
+                         kWord, INVALID_SREG);
+            break;
+        case 3: // Get the method index [use r0, set r12]
+            loadBaseDisp(cUnit, mir, r0, OFFSETOF_MEMBER(Method, methodIndex),
+                         r12, kUnsignedHalf, INVALID_SREG);
+            // get this->clazz->vtable [use rLR, set rLR]
+            loadBaseDisp(cUnit, mir, rLR,
+                         OFFSETOF_MEMBER(ClassObject, vtable), rLR, kWord,
+                         INVALID_SREG);
+            break;
+        case 4: // get target Method* [use rLR, use r12, set r0]
+              loadBaseIndexed(cUnit, rLR, r12, r0, 2, kWord);
+              break;
+        case 5: // Get the target compiled code address [use r0, set rLR]
+            loadBaseDisp(cUnit, mir, r0, OFFSETOF_MEMBER(Method, compiledInsns),
+                         rLR, kWord, INVALID_SREG);
+            break;
+        default:
+            return -1;
+    }
+    return state + 1;
+}
+
+/* Load up to 3 arguments in r1..r3 */
+static int loadArgRegs(CompilationUnit* cUnit, MIR* mir,
+                       DecodedInstruction* dInsn, int callState,
+                       int *args, NextCallInsn nextCallInsn)
+{
+    for (int i = 0; i < 3; i++) {
+        if (args[i] != INVALID_REG) {
+            RegLocation rlArg = oatGetSrc(cUnit, mir, i);
+            loadValueDirectFixed(cUnit, rlArg, r1 + i);
+            callState = nextCallInsn(cUnit, mir, dInsn, callState);
+        }
+    }
+    return callState;
+}
+
+/*
+ * Interleave launch code for INVOKE_INTERFACE.  The target is
+ * identified using artFindInterfaceMethodInCache(class, ref, method, dex)
+ * Note that we'll have to reload "this" following the helper call.
+ *
+ * FIXME: do we need to have artFindInterfaceMethodInCache return
+ * a NULL if not found so we can throw exception here?  Otherwise,
+ * may need to pass some additional info to allow the helper function
+ * to throw on its own.
+ */
+static int nextInterfaceCallInsn(CompilationUnit* cUnit, MIR* mir,
+                                 DecodedInstruction* dInsn, int state)
+{
+    RegLocation rlArg;
+    switch(state) {
+        case 0:
+            // Load "this" [set r12]
+            rlArg = oatGetSrc(cUnit, mir, 0);
+            loadValueDirectFixed(cUnit, rlArg, r12);
+            // Get the current Method* [set arg2]
+            loadBaseDisp(cUnit, mir, rSP, 0, r2, kWord, INVALID_SREG);
+            // Is "this" null? [use r12]
+            genNullCheck(cUnit, oatSSASrc(mir,0), r12,
+                           mir->offset, NULL);
+            // Get curMethod->clazz [set arg3]
+            loadBaseDisp(cUnit, mir, r2, OFFSETOF_MEMBER(Method, clazz),
+                         r3, kWord, INVALID_SREG);
+            // Load this->class [usr r12, set arg0]
+            loadBaseDisp(cUnit, mir, r12, OFFSETOF_MEMBER(ClassObject, clazz),
+                         r3, kWord, INVALID_SREG);
+            // Load address of helper function
+            loadBaseDisp(cUnit, mir, rSELF,
+                      OFFSETOF_MEMBER(Thread, pArtFindInterfaceMethodInCache),
+                      rLR, kWord, INVALID_SREG);
+            // Get dvmDex
+            loadBaseDisp(cUnit, mir, r3, OFFSETOF_MEMBER(ClassObject, pDvmDex),
+                         r3, kWord, INVALID_SREG);
+            // Load ref [set arg1]
+            loadConstant(cUnit, r1, dInsn->vB);
+            // Call out to helper, target Method returned in ret0
+            newLIR1(cUnit, kThumbBlxR, rLR);
+            break;
+        case 1: // Get the target compiled code address [use r0, set rLR]
+            loadBaseDisp(cUnit, mir, r0, OFFSETOF_MEMBER(Method, compiledInsns),
+                         rLR, kWord, INVALID_SREG);
+        default:
+            return -1;
+    }
+    return state + 1;
+}
+
+
+/*
+ * Interleave launch code for INVOKE_SUPER.  See comments
+ * for nextVCallIns.
+ */
+static int nextSuperCallInsn(CompilationUnit* cUnit, MIR* mir,
+                             DecodedInstruction* dInsn, int state)
+{
+    RegLocation rlArg;
+    switch(state) {
+        case 0:
+            // Get the current Method* [set r0]
+            loadBaseDisp(cUnit, mir, rSP, 0, r0, kWord, INVALID_SREG);
+            // Load "this" [set r1]
+            rlArg = oatGetSrc(cUnit, mir, 0);
+            loadValueDirectFixed(cUnit, rlArg, r1);
+            // Get method->clazz [use r0, set r12]
+            loadBaseDisp(cUnit, mir, r0, OFFSETOF_MEMBER(Method, clazz),
+                         r12, kWord, INVALID_SREG);
+            // Get pResmethods [use r0, set rLR]
+            loadBaseDisp(cUnit, mir, r0, OFFSETOF_MEMBER(Method, pResMethods),
+                         rLR, kWord, INVALID_SREG);
+            // Get clazz->super [use r12, set r12]
+            loadBaseDisp(cUnit, mir, r12, OFFSETOF_MEMBER(ClassObject, super),
+                         r12, kWord, INVALID_SREG);
+            // Get base method [use rLR, set r0]
+            loadBaseDisp(cUnit, mir, rLR, dInsn->vB * 4, r0,
+                         kWord, INVALID_SREG);
+            // Is "this" null? [use r1]
+            genNullCheck(cUnit, oatSSASrc(mir,0), r1,
+                           mir->offset, NULL);
+            // Get methodIndex [use r0, set rLR]
+            loadBaseDisp(cUnit, mir, r0, OFFSETOF_MEMBER(Method, methodIndex),
+                         rLR, kUnsignedHalf, INVALID_SREG);
+            // Get vtableCount [use r12, set r0]
+            loadBaseDisp(cUnit, mir, r12,
+                         OFFSETOF_MEMBER(ClassObject, vtableCount),
+                         r0, kWord, INVALID_SREG);
+            // Compare method index w/ vtable count [use r12, use rLR]
+            genRegRegCheck(cUnit, kArmCondGe, rLR, r0, mir->offset, NULL);
+            // get target Method* [use rLR, use r12, set r0]
+            loadBaseIndexed(cUnit, r0, r12, rLR, 2, kWord);
+        case 1: // Get the target compiled code address [use r0, set rLR]
+            loadBaseDisp(cUnit, mir, r0, OFFSETOF_MEMBER(Method, compiledInsns),
+                         rLR, kWord, INVALID_SREG);
+        default:
+            return -1;
+    }
+    return state + 1;
+}
+
+/*
+ * Load up to 5 arguments, the first three of which will be in
+ * r1 .. r3.  On entry r0 contains the current method pointer,
+ * and as part of the load sequence, it must be replaced with
+ * the target method pointer.  Note, this may also be called
+ * for "range" variants if the number of arguments is 5 or fewer.
+ */
+static int genDalvikArgsNoRange(CompilationUnit* cUnit, MIR* mir,
+                                DecodedInstruction* dInsn, int callState,
+                                ArmLIR** pcrLabel, bool isRange,
+                                NextCallInsn nextCallInsn)
+{
+    RegLocation rlArg;
+    int registerArgs[3];
+
+    /* If no arguments, just return */
+    if (dInsn->vA == 0)
+        return callState;
+
+    oatLockAllTemps(cUnit);
+    callState = nextCallInsn(cUnit, mir, dInsn, callState);
+
+    /*
+     * Load frame arguments arg4 & arg5 first. Coded a little odd to
+     * pre-schedule the method pointer target.
+     */
+    for (unsigned int i=3; i < dInsn->vA; i++) {
+        int reg;
+        int arg = (isRange) ? dInsn->vC + i : i;
+        rlArg = oatUpdateLoc(cUnit, oatGetSrc(cUnit, mir, arg));
+        if (rlArg.location == kLocPhysReg) {
+            reg = rlArg.lowReg;
+        } else {
+            reg = r1;
+            loadValueDirectFixed(cUnit, rlArg, r1);
+            callState = nextCallInsn(cUnit, mir, dInsn, callState);
+        }
+        storeBaseDisp(cUnit, rSP, (i + 1) * 4, reg, kWord);
+        callState = nextCallInsn(cUnit, mir, dInsn, callState);
+    }
+
+    /* Load register arguments r1..r3 */
+    for (unsigned int i = 0; i < 3; i++) {
+        if (i < dInsn->vA)
+            registerArgs[i] = (isRange) ? dInsn->vC + i : i;
+        else
+            registerArgs[i] = INVALID_REG;
+    }
+    callState = loadArgRegs(cUnit, mir, dInsn, callState, registerArgs,
+                            nextCallInsn);
+
+    // Load direct & need a "this" null check?
+    if (pcrLabel) {
+        *pcrLabel = genNullCheck(cUnit, oatSSASrc(mir,0), r1,
+                                 mir->offset, NULL);
+    }
+    return callState;
+}
+
+/*
+ * May have 0+ arguments (also used for jumbo).  Note that
+ * source virtual registers may be in physical registers, so may
+ * need to be flushed to home location before copying.  This
+ * applies to arg3 and above (see below).
+ *
+ * Two general strategies:
+ *    If < 20 arguments
+ *       Pass args 3-18 using vldm/vstm block copy
+ *       Pass arg0, arg1 & arg2 in r1-r3
+ *    If 20+ arguments
+ *       Pass args arg19+ using memcpy block copy
+ *       Pass arg0, arg1 & arg2 in r1-r3
+ *
+ */
+static int genDalvikArgsRange(CompilationUnit* cUnit, MIR* mir,
+                              DecodedInstruction* dInsn, int callState,
+                              ArmLIR** pcrLabel, NextCallInsn nextCallInsn)
+{
+    int firstArg = dInsn->vC;
+    int numArgs = dInsn->vA;
+
+    // If we can treat it as non-range (Jumbo ops will use range form)
+    if (numArgs <= 5)
+        return genDalvikArgsNoRange(cUnit, mir, dInsn, callState, pcrLabel,
+                                    true, nextCallInsn);
+    /*
+     * Make sure range list doesn't span the break between in normal
+     * Dalvik vRegs and the ins.
+     */
+    int highestVreg = oatGetSrc(cUnit, mir, numArgs-1).sRegLow;
+    if (highestVreg >= cUnit->method->registersSize - cUnit->method->insSize) {
+        LOG(FATAL) << "Wide argument spanned locals & args";
+    }
+
+    /*
+     * First load the non-register arguments.  Both forms expect all
+     * of the source arguments to be in their home frame location, so
+     * scan the sReg names and flush any that have been promoted to
+     * frame backing storage.
+     */
+    // Scan the rest of the args - if in physReg flush to memory
+    for (int i = 4; i < numArgs; i++) {
+        RegLocation loc = oatUpdateLoc(cUnit,
+            oatGetSrc(cUnit, mir, i));
+        if (loc.location == kLocPhysReg) {  // TUNING: if dirty?
+            storeBaseDisp(cUnit, rSP, loc.spOffset, loc.lowReg, kWord);
+            callState = nextCallInsn(cUnit, mir, dInsn, callState);
+        }
+    }
+
+    int startOffset = cUnit->regLocation[mir->ssaRep->uses[3]].spOffset;
+    int outsOffset = 4 /* Method* */ + (3 * 4);
+    if (numArgs >= 20) {
+        // Generate memcpy, but first make sure all of
+        opRegRegImm(cUnit, kOpAdd, r0, rSP, startOffset);
+        opRegRegImm(cUnit, kOpAdd, r1, rSP, outsOffset);
+        loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pMemcpy), rLR);
+        loadConstant(cUnit, r2, (numArgs - 3) * 4);
+        newLIR1(cUnit, kThumbBlxR, rLR);
+    } else {
+        // Use vldm/vstm pair using r3 as a temp
+        int regsLeft = MIN(numArgs - 3, 16);
+        callState = nextCallInsn(cUnit, mir, dInsn, callState);
+        opRegRegImm(cUnit, kOpAdd, r3, rSP, startOffset);
+        newLIR3(cUnit, kThumb2Vldms, r3, fr0 & FP_REG_MASK, regsLeft);
+        callState = nextCallInsn(cUnit, mir, dInsn, callState);
+        opRegRegImm(cUnit, kOpAdd, r3, rSP, 4 /* Method* */ + (3 * 4));
+        callState = nextCallInsn(cUnit, mir, dInsn, callState);
+        newLIR3(cUnit, kThumb2Vstms, r3, fr0 & FP_REG_MASK, regsLeft);
+        callState = nextCallInsn(cUnit, mir, dInsn, callState);
+    }
+
+    // Handle the 1st 3 in r1, r2 & r3
+    for (unsigned int i = 0; i < dInsn->vA && i < 3; i++) {
+        RegLocation loc = oatGetSrc(cUnit, mir, firstArg + i);
+        loadValueDirectFixed(cUnit, loc, r1 + i);
+        callState = nextCallInsn(cUnit, mir, dInsn, callState);
+    }
+
+    // Finally, deal with the register arguments
+    // We'll be using fixed registers here
+    oatLockAllTemps(cUnit);
+    callState = nextCallInsn(cUnit, mir, dInsn, callState);
+    return callState;
+}
+
+static void genInvokeStatic(CompilationUnit* cUnit, MIR* mir)
+{
+    DecodedInstruction* dInsn = &mir->dalvikInsn;
+    int callState = 0;
+    if (mir->dalvikInsn.opcode == OP_INVOKE_STATIC) {
+        callState = genDalvikArgsNoRange(cUnit, mir, dInsn, callState, NULL,
+                                         false, nextSDCallInsn);
+    } else {
+        callState = genDalvikArgsRange(cUnit, mir, dInsn, callState, NULL,
+                                       nextSDCallInsn);
+    }
+    // Finish up any of the call sequence not interleaved in arg loading
+    while (callState >= 0) {
+        callState = nextSDCallInsn(cUnit, mir, dInsn, callState);
+    }
+    newLIR1(cUnit, kThumbBlxR, rLR);
+}
+
+static void genInvokeDirect(CompilationUnit* cUnit, MIR* mir)
+{
+    DecodedInstruction* dInsn = &mir->dalvikInsn;
+    int callState = 0;
+    ArmLIR* nullCk;
+    if (mir->dalvikInsn.opcode == OP_INVOKE_DIRECT)
+        callState = genDalvikArgsNoRange(cUnit, mir, dInsn, callState, &nullCk,
+                                         false, nextSDCallInsn);
+    else
+        callState = genDalvikArgsRange(cUnit, mir, dInsn, callState, &nullCk,
+                                       nextSDCallInsn);
+    // Finish up any of the call sequence not interleaved in arg loading
+    while (callState >= 0) {
+        callState = nextSDCallInsn(cUnit, mir, dInsn, callState);
+    }
+    newLIR1(cUnit, kThumbBlxR, rLR);
+}
+
+static void genInvokeInterface(CompilationUnit* cUnit, MIR* mir)
+{
+    DecodedInstruction* dInsn = &mir->dalvikInsn;
+    int callState = 0;
+    ArmLIR* nullCk;
+    /* Note: must call nextInterfaceCallInsn() prior to 1st argument load */
+    callState = nextInterfaceCallInsn(cUnit, mir, dInsn, callState);
+    if (mir->dalvikInsn.opcode == OP_INVOKE_INTERFACE)
+        callState = genDalvikArgsNoRange(cUnit, mir, dInsn, callState, &nullCk,
+                                         false, nextInterfaceCallInsn);
+    else
+        callState = genDalvikArgsRange(cUnit, mir, dInsn, callState, &nullCk,
+                                       nextInterfaceCallInsn);
+    // Finish up any of the call sequence not interleaved in arg loading
+    while (callState >= 0) {
+        callState = nextInterfaceCallInsn(cUnit, mir, dInsn, callState);
+    }
+    newLIR1(cUnit, kThumbBlxR, rLR);
+}
+
+static void genInvokeSuper(CompilationUnit* cUnit, MIR* mir)
+{
+    DecodedInstruction* dInsn = &mir->dalvikInsn;
+    int callState = 0;
+    ArmLIR* nullCk;
+// FIXME - redundantly loading arg0/r1 ("this")
+    if (mir->dalvikInsn.opcode == OP_INVOKE_SUPER)
+        callState = genDalvikArgsNoRange(cUnit, mir, dInsn, callState, &nullCk,
+                                         false, nextSuperCallInsn);
+    else
+        callState = genDalvikArgsRange(cUnit, mir, dInsn, callState, &nullCk,
+                                       nextSuperCallInsn);
+    // Finish up any of the call sequence not interleaved in arg loading
+    while (callState >= 0) {
+        callState = nextSuperCallInsn(cUnit, mir, dInsn, callState);
+    }
+    newLIR1(cUnit, kThumbBlxR, rLR);
+}
+
+static void genInvokeVirtual(CompilationUnit* cUnit, MIR* mir)
+{
+    DecodedInstruction* dInsn = &mir->dalvikInsn;
+    int callState = 0;
+    ArmLIR* nullCk;
+// FIXME - redundantly loading arg0/r1 ("this")
+    if (mir->dalvikInsn.opcode == OP_INVOKE_VIRTUAL)
+        callState = genDalvikArgsNoRange(cUnit, mir, dInsn, callState, &nullCk,
+                                         false, nextVCallInsn);
+    else
+        callState = genDalvikArgsRange(cUnit, mir, dInsn, callState, &nullCk,
+                                       nextVCallInsn);
+    // Finish up any of the call sequence not interleaved in arg loading
+    while (callState >= 0) {
+        callState = nextVCallInsn(cUnit, mir, dInsn, callState);
+    }
+    newLIR1(cUnit, kThumbBlxR, rLR);
+}
+
+// TODO: break out the case handlers.  Might make it easier to support x86
+static bool compileDalvikInstruction(CompilationUnit* cUnit, MIR* mir,
+                                     BasicBlock* bb, ArmLIR* labelList)
+{
+    bool res = false;   // Assume success
+    RegLocation rlSrc[3];
+    RegLocation rlDest = badLoc;
+    RegLocation rlResult = badLoc;
+    Opcode opcode = mir->dalvikInsn.opcode;
+
+    /* Prep Src and Dest locations */
+    int nextSreg = 0;
+    int nextLoc = 0;
+    int attrs = oatDataFlowAttributes[opcode];
+    rlSrc[0] = rlSrc[1] = rlSrc[2] = badLoc;
+    if (attrs & DF_UA) {
+        rlSrc[nextLoc++] = oatGetSrc(cUnit, mir, nextSreg);
+        nextSreg++;
+    } else if (attrs & DF_UA_WIDE) {
+        rlSrc[nextLoc++] = oatGetSrcWide(cUnit, mir, nextSreg,
+                                                 nextSreg + 1);
+        nextSreg+= 2;
+    }
+    if (attrs & DF_UB) {
+        rlSrc[nextLoc++] = oatGetSrc(cUnit, mir, nextSreg);
+        nextSreg++;
+    } else if (attrs & DF_UB_WIDE) {
+        rlSrc[nextLoc++] = oatGetSrcWide(cUnit, mir, nextSreg,
+                                                 nextSreg + 1);
+        nextSreg+= 2;
+    }
+    if (attrs & DF_UC) {
+        rlSrc[nextLoc++] = oatGetSrc(cUnit, mir, nextSreg);
+    } else if (attrs & DF_UC_WIDE) {
+        rlSrc[nextLoc++] = oatGetSrcWide(cUnit, mir, nextSreg,
+                                                 nextSreg + 1);
+    }
+    if (attrs & DF_DA) {
+        rlDest = oatGetDest(cUnit, mir, 0);
+    } else if (attrs & DF_DA_WIDE) {
+        rlDest = oatGetDestWide(cUnit, mir, 0, 1);
+    }
+
+    switch(opcode) {
+        case OP_NOP:
+            break;
+
+        case OP_MOVE_EXCEPTION:
+            int exOffset;
+            int resetReg;
+            exOffset = OFFSETOF_MEMBER(Thread, exception);
+            resetReg = oatAllocTemp(cUnit);
+            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+            loadWordDisp(cUnit, rSELF, exOffset, rlResult.lowReg);
+            loadConstant(cUnit, resetReg, 0);
+            storeWordDisp(cUnit, rSELF, exOffset, resetReg);
+            storeValue(cUnit, rlDest, rlResult);
+            break;
+
+        case OP_RETURN_VOID:
+            break;
+
+        case OP_RETURN:
+        case OP_RETURN_OBJECT:
+            storeValue(cUnit, retLoc, rlSrc[0]);
+            break;
+
+        case OP_RETURN_WIDE:
+            rlDest = retLocWide;
+            rlDest.fp = rlSrc[0].fp;
+            storeValueWide(cUnit, rlDest, rlSrc[0]);
+            break;
+
+        case OP_MOVE_RESULT_WIDE:
+            if (mir->OptimizationFlags & MIR_INLINED)
+                break;  // Nop - combined w/ previous invoke
+            /*
+             * Somewhat hacky here.   Because we're now passing
+             * return values in registers, we have to let the
+             * register allocation utilities know that the return
+             * registers are live and may not be used for address
+             * formation in storeValueWide.
+             */
+            assert(retLocWide.lowReg == r0);
+            assert(retLocWide.lowReg == r1);
+            oatLockTemp(cUnit, retLocWide.lowReg);
+            oatLockTemp(cUnit, retLocWide.highReg);
+            storeValueWide(cUnit, rlDest, retLocWide);
+            oatFreeTemp(cUnit, retLocWide.lowReg);
+            oatFreeTemp(cUnit, retLocWide.highReg);
+            break;
+
+        case OP_MOVE_RESULT:
+        case OP_MOVE_RESULT_OBJECT:
+            if (mir->OptimizationFlags & MIR_INLINED)
+                break;  // Nop - combined w/ previous invoke
+            /* See comment for OP_MOVE_RESULT_WIDE */
+            assert(retLoc.lowReg == r0);
+            oatLockTemp(cUnit, retLoc.lowReg);
+            storeValue(cUnit, rlDest, retLoc);
+            oatFreeTemp(cUnit, retLoc.lowReg);
+            break;
+
+        case OP_MOVE:
+        case OP_MOVE_OBJECT:
+        case OP_MOVE_16:
+        case OP_MOVE_OBJECT_16:
+        case OP_MOVE_FROM16:
+        case OP_MOVE_OBJECT_FROM16:
+            storeValue(cUnit, rlDest, rlSrc[0]);
+            break;
+
+        case OP_MOVE_WIDE:
+        case OP_MOVE_WIDE_16:
+        case OP_MOVE_WIDE_FROM16:
+            storeValueWide(cUnit, rlDest, rlSrc[0]);
+            break;
+
+        case OP_CONST:
+        case OP_CONST_4:
+        case OP_CONST_16:
+            rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
+            loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB);
+            storeValue(cUnit, rlDest, rlResult);
+            break;
+
+        case OP_CONST_HIGH16:
+            rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
+            loadConstantNoClobber(cUnit, rlResult.lowReg,
+                                  mir->dalvikInsn.vB << 16);
+            storeValue(cUnit, rlDest, rlResult);
+            break;
+
+        case OP_CONST_WIDE_16:
+        case OP_CONST_WIDE_32:
+            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+            loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB);
+            //TUNING: do high separately to avoid load dependency
+            opRegRegImm(cUnit, kOpAsr, rlResult.highReg, rlResult.lowReg, 31);
+            storeValueWide(cUnit, rlDest, rlResult);
+            break;
+
+        case OP_CONST_WIDE:
+            rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
+            loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg,
+                                  0, mir->dalvikInsn.vB);
+            storeValue(cUnit, rlDest, rlResult);
+            break;
+
+        case OP_CONST_WIDE_HIGH16:
+            rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
+            loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg,
+                                  0, mir->dalvikInsn.vB << 16);
+            storeValue(cUnit, rlDest, rlResult);
+            break;
+
+        case OP_MONITOR_ENTER:
+            genMonitorEnter(cUnit, mir, rlSrc[0]);
+            break;
+
+        case OP_MONITOR_EXIT:
+            genMonitorExit(cUnit, mir, rlSrc[0]);
+            break;
+
+        case OP_CHECK_CAST:
+            genCheckCast(cUnit, mir, rlSrc[0]);
+            break;
+
+        case OP_INSTANCE_OF:
+            genInstanceof(cUnit, mir, rlDest, rlSrc[0]);
+            break;
+
+        case OP_NEW_INSTANCE:
+            genNewInstance(cUnit, mir, rlDest);
+            break;
+
+        case OP_THROW:
+            genThrow(cUnit, mir, rlSrc[0]);
+            break;
+
+        case OP_ARRAY_LENGTH:
+            int lenOffset;
+            lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
+            genNullCheck(cUnit, rlSrc[0].sRegLow, rlSrc[0].lowReg,
+                         mir->offset, NULL);
+            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+            loadWordDisp(cUnit, rlSrc[0].lowReg, lenOffset,
+                         rlResult.lowReg);
+            storeValue(cUnit, rlDest, rlResult);
+            break;
+
+        case OP_CONST_STRING:
+        case OP_CONST_STRING_JUMBO:
+            genConstString(cUnit, mir, rlDest, rlSrc[0]);
+            break;
+
+        case OP_CONST_CLASS:
+            genConstClass(cUnit, mir, rlDest, rlSrc[0]);
+            break;
+
+        case OP_FILL_ARRAY_DATA:
+            genFillArrayData(cUnit, mir, rlSrc[0]);
+            break;
+
+        case OP_FILLED_NEW_ARRAY:
+            genFilledNewArray(cUnit, mir, false /* not range */);
+            break;
+
+        case OP_FILLED_NEW_ARRAY_RANGE:
+            genFilledNewArray(cUnit, mir, true /* range */);
+            break;
+
+        case OP_NEW_ARRAY:
+            genNewArray(cUnit, mir, rlDest, rlSrc[0]);
+            break;
+
+        case OP_GOTO:
+        case OP_GOTO_16:
+        case OP_GOTO_32:
+            // TUNING: add MIR flag to disable when unnecessary
+            bool backwardBranch;
+            backwardBranch = (bb->taken->startOffset <= mir->offset);
+            if (backwardBranch) {
+                genSuspendPoll(cUnit, mir);
+            }
+            genUnconditionalBranch(cUnit, &labelList[bb->taken->id]);
+            break;
+
+        case OP_PACKED_SWITCH:
+            genPackedSwitch(cUnit, mir, rlSrc[0]);
+            break;
+
+        case OP_SPARSE_SWITCH:
+            genSparseSwitch(cUnit, mir, rlSrc[0]);
+            break;
+
+        case OP_CMPL_FLOAT:
+        case OP_CMPG_FLOAT:
+        case OP_CMPL_DOUBLE:
+        case OP_CMPG_DOUBLE:
+            res = genCmpFP(cUnit, mir, rlDest, rlSrc[0], rlSrc[1]);
+            break;
+
+        case OP_CMP_LONG:
+            genCmpLong(cUnit, mir, rlDest, rlSrc[0], rlSrc[1]);
+            break;
+
+        case OP_IF_EQ:
+        case OP_IF_NE:
+        case OP_IF_LT:
+        case OP_IF_GE:
+        case OP_IF_GT:
+        case OP_IF_LE: {
+            bool backwardBranch;
+            ArmConditionCode cond;
+            backwardBranch = (bb->taken->startOffset <= mir->offset);
+            if (backwardBranch) {
+                genSuspendPoll(cUnit, mir);
+            }
+            rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
+            rlSrc[1] = loadValue(cUnit, rlSrc[1], kCoreReg);
+            opRegReg(cUnit, kOpCmp, rlSrc[0].lowReg, rlSrc[1].lowReg);
+            switch(opcode) {
+                case OP_IF_EQ:
+                    cond = kArmCondEq;
+                    break;
+                case OP_IF_NE:
+                    cond = kArmCondNe;
+                    break;
+                case OP_IF_LT:
+                    cond = kArmCondLt;
+                    break;
+                case OP_IF_GE:
+                    cond = kArmCondGe;
+                    break;
+                case OP_IF_GT:
+                    cond = kArmCondGt;
+                    break;
+                case OP_IF_LE:
+                    cond = kArmCondLe;
+                    break;
+                default:
+                    cond = (ArmConditionCode)0;
+                    LOG(FATAL) << "Unexpected opcode " << (int)opcode;
+            }
+            genConditionalBranch(cUnit, cond, &labelList[bb->taken->id]);
+            genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]);
+            break;
+            }
+
+        case OP_IF_EQZ:
+        case OP_IF_NEZ:
+        case OP_IF_LTZ:
+        case OP_IF_GEZ:
+        case OP_IF_GTZ:
+        case OP_IF_LEZ: {
+            bool backwardBranch;
+            ArmConditionCode cond;
+            backwardBranch = (bb->taken->startOffset <= mir->offset);
+            if (backwardBranch) {
+                genSuspendPoll(cUnit, mir);
+            }
+            rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
+            opRegImm(cUnit, kOpCmp, rlSrc[0].lowReg, 0);
+            switch(opcode) {
+                case OP_IF_EQZ:
+                    cond = kArmCondEq;
+                    break;
+                case OP_IF_NEZ:
+                    cond = kArmCondNe;
+                    break;
+                case OP_IF_LTZ:
+                    cond = kArmCondLt;
+                    break;
+                case OP_IF_GEZ:
+                    cond = kArmCondGe;
+                    break;
+                case OP_IF_GTZ:
+                    cond = kArmCondGt;
+                    break;
+                case OP_IF_LEZ:
+                    cond = kArmCondLe;
+                    break;
+                default:
+                    cond = (ArmConditionCode)0;
+                    LOG(FATAL) << "Unexpected opcode " << (int)opcode;
+            }
+            genConditionalBranch(cUnit, cond, &labelList[bb->taken->id]);
+            genUnconditionalBranch(cUnit, &labelList[bb->fallThrough->id]);
+            break;
+            }
+
+      case OP_AGET_WIDE:
+            genArrayGet(cUnit, mir, kLong, rlSrc[0], rlSrc[1], rlDest, 3);
+            break;
+        case OP_AGET:
+        case OP_AGET_OBJECT:
+            genArrayGet(cUnit, mir, kWord, rlSrc[0], rlSrc[1], rlDest, 2);
+            break;
+        case OP_AGET_BOOLEAN:
+            genArrayGet(cUnit, mir, kUnsignedByte, rlSrc[0], rlSrc[1],
+                        rlDest, 0);
+            break;
+        case OP_AGET_BYTE:
+            genArrayGet(cUnit, mir, kSignedByte, rlSrc[0], rlSrc[1], rlDest, 0);
+            break;
+        case OP_AGET_CHAR:
+            genArrayGet(cUnit, mir, kUnsignedHalf, rlSrc[0], rlSrc[1],
+                        rlDest, 1);
+            break;
+        case OP_AGET_SHORT:
+            genArrayGet(cUnit, mir, kSignedHalf, rlSrc[0], rlSrc[1], rlDest, 1);
+            break;
+        case OP_APUT_WIDE:
+            genArrayPut(cUnit, mir, kLong, rlSrc[1], rlSrc[2], rlSrc[0], 3);
+            break;
+        case OP_APUT:
+            genArrayPut(cUnit, mir, kWord, rlSrc[1], rlSrc[2], rlSrc[0], 2);
+            break;
+        case OP_APUT_OBJECT:
+            genArrayObjectPut(cUnit, mir, rlSrc[1], rlSrc[2], rlSrc[0], 2);
+            break;
+        case OP_APUT_SHORT:
+        case OP_APUT_CHAR:
+            genArrayPut(cUnit, mir, kUnsignedHalf, rlSrc[1], rlSrc[2],
+                        rlSrc[0], 1);
+            break;
+        case OP_APUT_BYTE:
+        case OP_APUT_BOOLEAN:
+            genArrayPut(cUnit, mir, kUnsignedByte, rlSrc[1], rlSrc[2],
+                        rlSrc[0], 0);
+            break;
+
+        case OP_IGET_WIDE:
+        case OP_IGET_WIDE_VOLATILE:
+            genIGetWideX(cUnit, mir, rlDest, rlSrc[0]);
+            break;
+
+        case OP_IGET:
+        case OP_IGET_VOLATILE:
+        case OP_IGET_OBJECT:
+        case OP_IGET_OBJECT_VOLATILE:
+            genIGetX(cUnit, mir, kWord, rlDest, rlSrc[0]);
+            break;
+
+        case OP_IGET_BOOLEAN:
+        case OP_IGET_BYTE:
+            genIGetX(cUnit, mir, kUnsignedByte, rlDest, rlSrc[0]);
+            break;
+
+        case OP_IGET_CHAR:
+            genIGetX(cUnit, mir, kUnsignedHalf, rlDest, rlSrc[0]);
+            break;
+
+        case OP_IGET_SHORT:
+            genIGetX(cUnit, mir, kSignedHalf, rlDest, rlSrc[0]);
+            break;
+
+        case OP_IPUT_WIDE:
+        case OP_IPUT_WIDE_VOLATILE:
+            genIPutWideX(cUnit, mir, rlSrc[0], rlSrc[1]);
+            break;
+
+        case OP_IPUT_OBJECT:
+        case OP_IPUT_OBJECT_VOLATILE:
+            genIPutX(cUnit, mir, kWord, rlSrc[0], rlSrc[1], true);
+            break;
+
+        case OP_IPUT:
+        case OP_IPUT_VOLATILE:
+            genIPutX(cUnit, mir, kWord, rlSrc[0], rlSrc[1], false);
+            break;
+
+        case OP_IPUT_BOOLEAN:
+        case OP_IPUT_BYTE:
+            genIPutX(cUnit, mir, kUnsignedByte, rlSrc[0], rlSrc[1], false);
+            break;
+
+        case OP_IPUT_CHAR:
+            genIPutX(cUnit, mir, kUnsignedHalf, rlSrc[0], rlSrc[1], false);
+            break;
+
+        case OP_IPUT_SHORT:
+            genIPutX(cUnit, mir, kSignedHalf, rlSrc[0], rlSrc[1], false);
+            break;
+
+        case OP_SGET:
+        case OP_SGET_OBJECT:
+        case OP_SGET_BOOLEAN:
+        case OP_SGET_BYTE:
+        case OP_SGET_CHAR:
+        case OP_SGET_SHORT:
+            genSget(cUnit, mir, rlResult, rlDest);
+            break;
+
+        case OP_SGET_WIDE:
+            genSgetWide(cUnit, mir, rlResult, rlDest);
+            break;
+
+        case OP_SPUT:
+        case OP_SPUT_OBJECT:
+        case OP_SPUT_BOOLEAN:
+        case OP_SPUT_BYTE:
+        case OP_SPUT_CHAR:
+        case OP_SPUT_SHORT:
+            genSput(cUnit, mir, rlSrc[0]);
+            break;
+
+        case OP_SPUT_WIDE:
+            genSputWide(cUnit, mir, rlSrc[0]);
+            break;
+
+        case OP_INVOKE_STATIC_RANGE:
+        case OP_INVOKE_STATIC:
+            genInvokeStatic(cUnit, mir);
+            break;
+
+        case OP_INVOKE_DIRECT:
+        case OP_INVOKE_DIRECT_RANGE:
+            genInvokeDirect(cUnit, mir);
+            break;
+
+        case OP_INVOKE_VIRTUAL:
+        case OP_INVOKE_VIRTUAL_RANGE:
+            genInvokeVirtual(cUnit, mir);
+            break;
+
+        case OP_INVOKE_SUPER:
+        case OP_INVOKE_SUPER_RANGE:
+            genInvokeSuper(cUnit, mir);
+            break;
+
+        case OP_INVOKE_INTERFACE:
+        case OP_INVOKE_INTERFACE_RANGE:
+            genInvokeInterface(cUnit, mir);
+            break;
+
+        case OP_NEG_INT:
+        case OP_NOT_INT:
+            res = genArithOpInt(cUnit, mir, rlDest, rlSrc[0], rlSrc[0]);
+            break;
+
+        case OP_NEG_LONG:
+        case OP_NOT_LONG:
+            res = genArithOpLong(cUnit, mir, rlDest, rlSrc[0], rlSrc[0]);
+            break;
+
+        case OP_NEG_FLOAT:
+            res = genArithOpFloat(cUnit, mir, rlDest, rlSrc[0], rlSrc[0]);
+            break;
+
+        case OP_NEG_DOUBLE:
+            res = genArithOpDouble(cUnit, mir, rlDest, rlSrc[0], rlSrc[0]);
+            break;
+
+        case OP_INT_TO_LONG:
+            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+            if (rlSrc[0].location == kLocPhysReg) {
+                genRegCopy(cUnit, rlResult.lowReg, rlSrc[0].lowReg);
+            } else {
+                loadValueDirect(cUnit, rlSrc[0], rlResult.lowReg);
+            }
+            opRegRegImm(cUnit, kOpAsr, rlResult.highReg,
+                        rlResult.lowReg, 31);
+            storeValueWide(cUnit, rlDest, rlResult);
+            break;
+
+        case OP_LONG_TO_INT:
+            rlSrc[0] = oatUpdateLocWide(cUnit, rlSrc[0]);
+            rlSrc[0] = oatWideToNarrow(cUnit, rlSrc[0]);
+            storeValue(cUnit, rlDest, rlSrc[0]);
+            break;
+
+        case OP_INT_TO_BYTE:
+            rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
+            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+            opRegReg(cUnit, kOp2Byte, rlResult.lowReg, rlSrc[0].lowReg);
+            storeValue(cUnit, rlDest, rlResult);
+            break;
+
+        case OP_INT_TO_SHORT:
+            rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
+            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+            opRegReg(cUnit, kOp2Short, rlResult.lowReg, rlSrc[0].lowReg);
+            storeValue(cUnit, rlDest, rlResult);
+            break;
+
+        case OP_INT_TO_CHAR:
+            rlSrc[0] = loadValue(cUnit, rlSrc[0], kCoreReg);
+            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+            opRegReg(cUnit, kOp2Char, rlResult.lowReg, rlSrc[0].lowReg);
+            storeValue(cUnit, rlDest, rlResult);
+            break;
+
+        case OP_INT_TO_FLOAT:
+        case OP_INT_TO_DOUBLE:
+        case OP_LONG_TO_FLOAT:
+        case OP_LONG_TO_DOUBLE:
+        case OP_FLOAT_TO_INT:
+        case OP_FLOAT_TO_LONG:
+        case OP_FLOAT_TO_DOUBLE:
+        case OP_DOUBLE_TO_INT:
+        case OP_DOUBLE_TO_LONG:
+        case OP_DOUBLE_TO_FLOAT:
+            genConversion(cUnit, mir);
+            break;
+
+        case OP_ADD_INT:
+        case OP_SUB_INT:
+        case OP_MUL_INT:
+        case OP_DIV_INT:
+        case OP_REM_INT:
+        case OP_AND_INT:
+        case OP_OR_INT:
+        case OP_XOR_INT:
+        case OP_SHL_INT:
+        case OP_SHR_INT:
+        case OP_USHR_INT:
+        case OP_ADD_INT_2ADDR:
+        case OP_SUB_INT_2ADDR:
+        case OP_MUL_INT_2ADDR:
+        case OP_DIV_INT_2ADDR:
+        case OP_REM_INT_2ADDR:
+        case OP_AND_INT_2ADDR:
+        case OP_OR_INT_2ADDR:
+        case OP_XOR_INT_2ADDR:
+        case OP_SHL_INT_2ADDR:
+        case OP_SHR_INT_2ADDR:
+        case OP_USHR_INT_2ADDR:
+            genArithOpInt(cUnit, mir, rlDest, rlSrc[0], rlSrc[1]);
+            break;
+
+        case OP_ADD_LONG:
+        case OP_SUB_LONG:
+        case OP_MUL_LONG:
+        case OP_DIV_LONG:
+        case OP_REM_LONG:
+        case OP_AND_LONG:
+        case OP_OR_LONG:
+        case OP_XOR_LONG:
+        case OP_ADD_LONG_2ADDR:
+        case OP_SUB_LONG_2ADDR:
+        case OP_MUL_LONG_2ADDR:
+        case OP_DIV_LONG_2ADDR:
+        case OP_REM_LONG_2ADDR:
+        case OP_AND_LONG_2ADDR:
+        case OP_OR_LONG_2ADDR:
+        case OP_XOR_LONG_2ADDR:
+            genArithOpLong(cUnit, mir, rlDest, rlSrc[0], rlSrc[1]);
+            break;
+
+        case OP_SHL_LONG_2ADDR:
+        case OP_SHR_LONG_2ADDR:
+        case OP_USHR_LONG_2ADDR:
+            genShiftOpLong(cUnit,mir, rlDest, rlSrc[0], rlSrc[0]);
+            break;
+
+        case OP_SHL_LONG:
+        case OP_SHR_LONG:
+        case OP_USHR_LONG:
+            genShiftOpLong(cUnit,mir, rlDest, rlSrc[0], rlSrc[1]);
+            break;
+
+        case OP_ADD_FLOAT:
+        case OP_SUB_FLOAT:
+        case OP_MUL_FLOAT:
+        case OP_DIV_FLOAT:
+        case OP_REM_FLOAT:
+        case OP_ADD_FLOAT_2ADDR:
+        case OP_SUB_FLOAT_2ADDR:
+        case OP_MUL_FLOAT_2ADDR:
+        case OP_DIV_FLOAT_2ADDR:
+        case OP_REM_FLOAT_2ADDR:
+            genArithOpFloat(cUnit, mir, rlDest, rlSrc[0], rlSrc[1]);
+            break;
+
+        case OP_ADD_DOUBLE:
+        case OP_SUB_DOUBLE:
+        case OP_MUL_DOUBLE:
+        case OP_DIV_DOUBLE:
+        case OP_REM_DOUBLE:
+        case OP_ADD_DOUBLE_2ADDR:
+        case OP_SUB_DOUBLE_2ADDR:
+        case OP_MUL_DOUBLE_2ADDR:
+        case OP_DIV_DOUBLE_2ADDR:
+        case OP_REM_DOUBLE_2ADDR:
+            genArithOpDouble(cUnit, mir, rlDest, rlSrc[0], rlSrc[1]);
+            break;
+
+        case OP_RSUB_INT:
+        case OP_ADD_INT_LIT16:
+        case OP_MUL_INT_LIT16:
+        case OP_DIV_INT_LIT16:
+        case OP_REM_INT_LIT16:
+        case OP_AND_INT_LIT16:
+        case OP_OR_INT_LIT16:
+        case OP_XOR_INT_LIT16:
+        case OP_ADD_INT_LIT8:
+        case OP_RSUB_INT_LIT8:
+        case OP_MUL_INT_LIT8:
+        case OP_DIV_INT_LIT8:
+        case OP_REM_INT_LIT8:
+        case OP_AND_INT_LIT8:
+        case OP_OR_INT_LIT8:
+        case OP_XOR_INT_LIT8:
+        case OP_SHL_INT_LIT8:
+        case OP_SHR_INT_LIT8:
+        case OP_USHR_INT_LIT8:
+            genArithOpIntLit(cUnit, mir, rlDest, rlSrc[0], mir->dalvikInsn.vC);
+            break;
+
+        default:
+            res = true;
+    }
+    return res;
+}
+
+static const char *extendedMIROpNames[kMirOpLast - kMirOpFirst] = {
+    "kMirOpPhi",
+    "kMirOpNullNRangeUpCheck",
+    "kMirOpNullNRangeDownCheck",
+    "kMirOpLowerBound",
+    "kMirOpPunt",
+    "kMirOpCheckInlinePrediction",
+};
+
+/* Extended MIR instructions like PHI */
+static void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir)
+{
+    int opOffset = mir->dalvikInsn.opcode - kMirOpFirst;
+    char* msg = (char*)oatNew(strlen(extendedMIROpNames[opOffset]) + 1, false);
+    strcpy(msg, extendedMIROpNames[opOffset]);
+    ArmLIR* op = newLIR1(cUnit, kArmPseudoExtended, (int) msg);
+
+    switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
+        case kMirOpPhi: {
+            char* ssaString = oatGetSSAString(cUnit, mir->ssaRep);
+            op->flags.isNop = true;
+            newLIR1(cUnit, kArmPseudoSSARep, (int) ssaString);
+            break;
+        }
+        default:
+            break;
+    }
+}
+
+/* If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame.
+ * Note: at this pointCopy any ins that are passed in register to their home location */
+static void flushIns(CompilationUnit* cUnit)
+{
+    if (cUnit->method->insSize == 0)
+        return;
+    int inRegs = (cUnit->method->insSize > 2) ? 3 : cUnit->method->insSize;
+    int startReg = r1;
+    int startLoc = cUnit->method->registersSize - cUnit->method->insSize;
+    for (int i = 0; i < inRegs; i++) {
+        RegLocation loc = cUnit->regLocation[startLoc + i];
+        if (loc.location == kLocPhysReg) {
+            genRegCopy(cUnit, loc.lowReg, startReg + i);
+        } else {
+            assert(loc.location == kLocDalvikFrame);
+            storeBaseDisp(cUnit, rSP, loc.spOffset, startReg + i, kWord);
+        }
+    }
+
+    // Handle special case of wide argument half in regs, half in frame
+    if (inRegs == 3) {
+        RegLocation loc = cUnit->regLocation[startLoc + 2];
+        if (loc.wide && loc.location == kLocPhysReg) {
+            // Load the other half of the arg into the promoted pair
+            loadBaseDisp(cUnit, NULL, rSP, loc.spOffset+4,
+                         loc.highReg, kWord, INVALID_SREG);
+            inRegs++;
+        }
+    }
+
+    // Now, do initial assignment of all promoted arguments passed in frame
+    for (int i = inRegs; i < cUnit->method->insSize;) {
+        RegLocation loc = cUnit->regLocation[startLoc + i];
+        if (loc.fpLocation == kLocPhysReg) {
+            loc.location = kLocPhysReg;
+            loc.fp = true;
+            loc.lowReg = loc.fpLowReg;
+            loc.highReg = loc.fpHighReg;
+        }
+        if (loc.location == kLocPhysReg) {
+            if (loc.wide) {
+                loadBaseDispWide(cUnit, NULL, rSP, loc.spOffset,
+                                 loc.lowReg, loc.highReg, INVALID_SREG);
+                i++;
+            } else {
+                loadBaseDisp(cUnit, NULL, rSP, loc.spOffset,
+                             loc.lowReg, kWord, INVALID_SREG);
+            }
+        }
+        i++;
+    }
+}
+
+/* Handle the content in each basic block */
+static bool methodBlockCodeGen(CompilationUnit* cUnit, BasicBlock* bb)
+{
+    MIR* mir;
+    ArmLIR* labelList = (ArmLIR*) cUnit->blockLabelList;
+    int blockId = bb->id;
+
+    cUnit->curBlock = bb;
+    labelList[blockId].operands[0] = bb->startOffset;
+
+    /* Insert the block label */
+    labelList[blockId].opcode = kArmPseudoNormalBlockLabel;
+    oatAppendLIR(cUnit, (LIR*) &labelList[blockId]);
+
+    oatClobberAllRegs(cUnit);
+    oatResetNullCheck(cUnit);
+
+    ArmLIR* headLIR = NULL;
+
+    if (bb->blockType == kEntryBlock) {
+        /*
+         * On entry, r0, r1, r2 & r3 are live.  Let the register allocation
+         * mechanism know so it doesn't try to use any of them when
+         * expanding the frame or flushing.  This leaves the utility
+         * code with a single temp: r12.  This should be enough.
+         */
+        oatLockTemp(cUnit, r0);
+        oatLockTemp(cUnit, r1);
+        oatLockTemp(cUnit, r2);
+        oatLockTemp(cUnit, r3);
+        newLIR0(cUnit, kArmPseudoMethodEntry);
+        /* Spill core callee saves */
+        newLIR1(cUnit, kThumb2Push, cUnit->coreSpillMask);
+        /* Need to spill any FP regs? */
+        if (cUnit->numFPSpills) {
+            newLIR1(cUnit, kThumb2VPushCS, cUnit->numFPSpills);
+        }
+        opRegImm(cUnit, kOpSub, rSP, cUnit->frameSize - (cUnit->numSpills * 4));
+        storeBaseDisp(cUnit, rSP, 0, r0, kWord);
+        flushIns(cUnit);
+        oatFreeTemp(cUnit, r0);
+        oatFreeTemp(cUnit, r1);
+        oatFreeTemp(cUnit, r2);
+        oatFreeTemp(cUnit, r3);
+    } else if (bb->blockType == kExitBlock) {
+        newLIR0(cUnit, kArmPseudoMethodExit);
+        opRegImm(cUnit, kOpAdd, rSP, cUnit->frameSize - (cUnit->numSpills * 4));
+        /* Need to restore any FP callee saves? */
+        if (cUnit->numFPSpills) {
+            newLIR1(cUnit, kThumb2VPopCS, cUnit->numFPSpills);
+        }
+        if (cUnit->coreSpillMask & (1 << rLR)) {
+            /* Unspill rLR to rPC */
+            cUnit->coreSpillMask &= ~(1 << rLR);
+            cUnit->coreSpillMask |= (1 << rPC);
+        }
+        newLIR1(cUnit, kThumb2Pop, cUnit->coreSpillMask);
+        if (!(cUnit->coreSpillMask & (1 << rPC))) {
+            /* We didn't pop to rPC, so must do a bv rLR */
+            newLIR1(cUnit, kThumbBx, rLR);
+        }
+    }
+
+    for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
+
+        oatResetRegPool(cUnit);
+        if (cUnit->disableOpt & (1 << kTrackLiveTemps)) {
+            oatClobberAllRegs(cUnit);
+        }
+
+        if (cUnit->disableOpt & (1 << kSuppressLoads)) {
+            oatResetDefTracking(cUnit);
+        }
+
+        if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) {
+            handleExtendedMethodMIR(cUnit, mir);
+            continue;
+        }
+
+        cUnit->currentDalvikOffset = mir->offset;
+
+        Opcode dalvikOpcode = mir->dalvikInsn.opcode;
+        InstructionFormat dalvikFormat =
+            dexGetFormatFromOpcode(dalvikOpcode);
+
+        ArmLIR* boundaryLIR;
+
+        /* Mark the beginning of a Dalvik instruction for line tracking */
+        boundaryLIR = newLIR1(cUnit, kArmPseudoDalvikByteCodeBoundary,
+                             (int) oatGetDalvikDisassembly(
+                             &mir->dalvikInsn, ""));
+        /* Remember the first LIR for this block */
+        if (headLIR == NULL) {
+            headLIR = boundaryLIR;
+            /* Set the first boundaryLIR as a scheduling barrier */
+            headLIR->defMask = ENCODE_ALL;
+        }
+
+        /* Don't generate the SSA annotation unless verbose mode is on */
+        if (cUnit->printMe && mir->ssaRep) {
+            char *ssaString = oatGetSSAString(cUnit, mir->ssaRep);
+            newLIR1(cUnit, kArmPseudoSSARep, (int) ssaString);
+        }
+
+        bool notHandled = compileDalvikInstruction(cUnit, mir, bb, labelList);
+
+        if (notHandled) {
+            char buf[100];
+            snprintf(buf, 100, "%#06x: Opcode %#x (%s) / Fmt %d not handled",
+                 mir->offset,
+                 dalvikOpcode, dexGetOpcodeName(dalvikOpcode),
+                 dalvikFormat);
+            LOG(FATAL) << buf;
+        }
+    }
+
+    if (headLIR) {
+        /*
+         * Eliminate redundant loads/stores and delay stores into later
+         * slots
+         */
+        oatApplyLocalOptimizations(cUnit, (LIR*) headLIR,
+                                           cUnit->lastLIRInsn);
+
+        /*
+         * Generate an unconditional branch to the fallthrough block.
+         */
+        if (bb->fallThrough) {
+            genUnconditionalBranch(cUnit,
+                                   &labelList[bb->fallThrough->id]);
+        }
+    }
+    return false;
+}
+
+/*
+ * Nop any unconditional branches that go to the next instruction.
+ * Note: new redundant branches may be inserted later, and we'll
+ * use a check in final instruction assembly to nop those out.
+ */
+void removeRedundantBranches(CompilationUnit* cUnit)
+{
+    ArmLIR* thisLIR;
+
+    for (thisLIR = (ArmLIR*) cUnit->firstLIRInsn;
+         thisLIR != (ArmLIR*) cUnit->lastLIRInsn;
+         thisLIR = NEXT_LIR(thisLIR)) {
+
+        /* Branch to the next instruction */
+        if ((thisLIR->opcode == kThumbBUncond) ||
+            (thisLIR->opcode == kThumb2BUncond)) {
+            ArmLIR* nextLIR = thisLIR;
+
+            while (true) {
+                nextLIR = NEXT_LIR(nextLIR);
+
+                /*
+                 * Is the branch target the next instruction?
+                 */
+                if (nextLIR == (ArmLIR*) thisLIR->generic.target) {
+                    thisLIR->flags.isNop = true;
+                    break;
+                }
+
+                /*
+                 * Found real useful stuff between the branch and the target.
+                 * Need to explicitly check the lastLIRInsn here because it
+                 * might be the last real instruction.
+                 */
+                if (!isPseudoOpcode(nextLIR->opcode) ||
+                    (nextLIR = (ArmLIR*) cUnit->lastLIRInsn))
+                    break;
+            }
+        }
+    }
+}
+
+void oatMethodMIR2LIR(CompilationUnit* cUnit)
+{
+    /* Used to hold the labels of each block */
+    cUnit->blockLabelList =
+        (void *) oatNew(sizeof(ArmLIR) * cUnit->numBlocks, true);
+
+    oatDataFlowAnalysisDispatcher(cUnit, methodBlockCodeGen,
+                                  kPreOrderDFSTraversal, false /* Iterative */);
+    removeRedundantBranches(cUnit);
+}
+
+/* Common initialization routine for an architecture family */
+bool oatArchInit()
+{
+    int i;
+
+    for (i = 0; i < kArmLast; i++) {
+        if (EncodingMap[i].opcode != i) {
+            LOG(FATAL) << "Encoding order for " << EncodingMap[i].name <<
+               " is wrong: expecting " << i << ", seeing " <<
+               (int)EncodingMap[i].opcode;
+        }
+    }
+
+    return oatArchVariantInit();
+}
+
+/* Needed by the Assembler */
+void oatSetupResourceMasks(ArmLIR* lir)
+{
+    setupResourceMasks(lir);
+}
+
+/* Needed by the ld/st optmizatons */
+ArmLIR* oatRegCopyNoInsert(CompilationUnit* cUnit, int rDest, int rSrc)
+{
+    return genRegCopyNoInsert(cUnit, rDest, rSrc);
+}
+
+/* Needed by the register allocator */
+ArmLIR* oatRegCopy(CompilationUnit* cUnit, int rDest, int rSrc)
+{
+    return genRegCopy(cUnit, rDest, rSrc);
+}
+
+/* Needed by the register allocator */
+void oatRegCopyWide(CompilationUnit* cUnit, int destLo, int destHi,
+                            int srcLo, int srcHi)
+{
+    genRegCopyWide(cUnit, destLo, destHi, srcLo, srcHi);
+}
+
+void oatFlushRegImpl(CompilationUnit* cUnit, int rBase,
+                             int displacement, int rSrc, OpSize size)
+{
+    storeBaseDisp(cUnit, rBase, displacement, rSrc, size);
+}
+
+void oatFlushRegWideImpl(CompilationUnit* cUnit, int rBase,
+                                 int displacement, int rSrcLo, int rSrcHi)
+{
+    storeBaseDispWide(cUnit, rBase, displacement, rSrcLo, rSrcHi);
+}
+
+#ifdef TESTMODE
+// Will be done at runtime by art.  Keep for debugging
+void oatInitHelpers(Thread* thread)
+{
+    thread->pMemcpy = memcpy;
+    thread->pI2f = __aeabi_i2f;
+    thread->pF2iz = __aeabi_f2iz;
+    thread->pD2f = __aeabi_d2f;
+    thread->pF2d = __aeabi_f2d;
+    thread->pI2d = __aeabi_i2d;
+    thread->pD2iz = __aeabi_d2iz;
+    thread->pL2f = __aeabi_l2f;
+    thread->pL2d = __aeabi_l2d;
+    thread->pArtF2l = artF2L;
+    thread->pArtD2l = artD2L;
+    thread->pFadd = __aeabi_fadd;
+    thread->pFsub = __aeabi_fsub;
+    thread->pFdiv = __aeabi_fdiv;
+    thread->pFmul = __aeabi_fmul;
+    thread->pFmodf = fmodf;
+    thread->pDadd = __aeabi_dadd;
+    thread->pDsub = __aeabi_dsub;
+    thread->pDdiv = __aeabi_ddiv;
+    thread->pDmul = __aeabi_dmul;
+    thread->pFmod = fmod;
+    thread->pIdivmod = __aeabi_idivmod;
+    thread->pIdiv = __aeabi_idiv;
+    thread->pLdivmod = __aeabi_ldivmod;
+    thread->pArtUnlockObject = dvmUnlockObject;
+    thread->pArtCanPutArrayElementNoThrow = dvmCanPutArrayElement;
+    thread->pArtInstanceofNonTrivialNoThrow = dvmInstanceofNonTrivial;
+    thread->pArtInstanceofNonTrivial = dvmInstanceofNonTrivial;
+    thread->pArtAllocArrayByClass = dvmAllocArrayByClass;
+    thread->pArtFindInterfaceMethodInCache = dvmFindInterfaceMethodInCache;
+    thread->pArtUnlockObjectNoThrow = dvmUnlockObject;
+    thread->pArtLockObjectNoThrow = dvmLockObject;
+    thread->pArtAllocObjectNoThrow = dvmAllocObject;
+    thread->pArtThrowException = NULL;  //TBD
+    thread->pArtHandleFillArrayDataNoThrow = dvmInterpHandleFillArrayData;
+}
+#endif
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
new file mode 100644
index 0000000..b3f3a41
--- /dev/null
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -0,0 +1,1137 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains codegen for the Thumb ISA and is intended to be
+ * includes by:
+ *
+ *        Codegen-$(TARGET_ARCH_VARIANT).c
+ *
+ */
+
+static int coreRegs[] = {r0, r1, r2, r3, r4, r5, r6, r7, rSELF, r8, r10, r11,
+                         r12, rSP, rLR, rPC};
+static int reservedRegs[] = {rSELF, rSP, rLR, rPC};
+static int fpRegs[] = {fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7,
+                       fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15,
+                       fr16, fr17, fr18, fr19, fr20, fr21, fr22, fr23,
+                       fr24, fr25, fr26, fr27, fr28, fr29, fr30, fr31};
+static int coreTemps[] = {r0, r1, r2, r3, r12};
+static int fpTemps[] = {fr0, fr1, fr2, fr3, fr4, fr5, fr6, fr7,
+                        fr8, fr9, fr10, fr11, fr12, fr13, fr14, fr15};
+
+static int encodeImmSingle(int value)
+{
+    int res;
+    int bitA =    (value & 0x80000000) >> 31;
+    int notBitB = (value & 0x40000000) >> 30;
+    int bitB =    (value & 0x20000000) >> 29;
+    int bSmear =  (value & 0x3e000000) >> 25;
+    int slice =   (value & 0x01f80000) >> 19;
+    int zeroes =  (value & 0x0007ffff);
+    if (zeroes != 0)
+        return -1;
+    if (bitB) {
+        if ((notBitB != 0) || (bSmear != 0x1f))
+            return -1;
+    } else {
+        if ((notBitB != 1) || (bSmear != 0x0))
+            return -1;
+    }
+    res = (bitA << 7) | (bitB << 6) | slice;
+    return res;
+}
+
+static ArmLIR* loadFPConstantValue(CompilationUnit* cUnit, int rDest,
+                                   int value)
+{
+    int encodedImm = encodeImmSingle(value);
+    assert(SINGLEREG(rDest));
+    if (encodedImm >= 0) {
+        return newLIR2(cUnit, kThumb2Vmovs_IMM8, rDest, encodedImm);
+    }
+    ArmLIR* dataTarget = scanLiteralPool(cUnit->literalList, value, 0);
+    if (dataTarget == NULL) {
+        dataTarget = addWordData(cUnit, &cUnit->literalList, value);
+    }
+    ArmLIR* loadPcRel = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    loadPcRel->generic.dalvikOffset = cUnit->currentDalvikOffset;
+    loadPcRel->opcode = kThumb2Vldrs;
+    loadPcRel->generic.target = (LIR* ) dataTarget;
+    loadPcRel->operands[0] = rDest;
+    loadPcRel->operands[1] = r15pc;
+    setupResourceMasks(loadPcRel);
+    setMemRefType(loadPcRel, true, kLiteral);
+    loadPcRel->aliasInfo = dataTarget->operands[0];
+    oatAppendLIR(cUnit, (LIR* ) loadPcRel);
+    return loadPcRel;
+}
+
+static int leadingZeros(u4 val)
+{
+    u4 alt;
+    int n;
+    int count;
+
+    count = 16;
+    n = 32;
+    do {
+        alt = val >> count;
+        if (alt != 0) {
+            n = n - count;
+            val = alt;
+        }
+        count >>= 1;
+    } while (count);
+    return n - val;
+}
+
+/*
+ * Determine whether value can be encoded as a Thumb2 modified
+ * immediate.  If not, return -1.  If so, return i:imm3:a:bcdefgh form.
+ */
+static int modifiedImmediate(u4 value)
+{
+   int zLeading;
+   int zTrailing;
+   u4 b0 = value & 0xff;
+
+   /* Note: case of value==0 must use 0:000:0:0000000 encoding */
+   if (value <= 0xFF)
+       return b0;  // 0:000:a:bcdefgh
+   if (value == ((b0 << 16) | b0))
+       return (0x1 << 8) | b0; /* 0:001:a:bcdefgh */
+   if (value == ((b0 << 24) | (b0 << 16) | (b0 << 8) | b0))
+       return (0x3 << 8) | b0; /* 0:011:a:bcdefgh */
+   b0 = (value >> 8) & 0xff;
+   if (value == ((b0 << 24) | (b0 << 8)))
+       return (0x2 << 8) | b0; /* 0:010:a:bcdefgh */
+   /* Can we do it with rotation? */
+   zLeading = leadingZeros(value);
+   zTrailing = 32 - leadingZeros(~value & (value - 1));
+   /* A run of eight or fewer active bits? */
+   if ((zLeading + zTrailing) < 24)
+       return -1;  /* No - bail */
+   /* left-justify the constant, discarding msb (known to be 1) */
+   value <<= zLeading + 1;
+   /* Create bcdefgh */
+   value >>= 25;
+   /* Put it all together */
+   return value | ((0x8 + zLeading) << 7); /* [01000..11111]:bcdefgh */
+}
+
+/*
+ * Load a immediate using a shortcut if possible; otherwise
+ * grab from the per-translation literal pool.
+ *
+ * No additional register clobbering operation performed. Use this version when
+ * 1) rDest is freshly returned from oatAllocTemp or
+ * 2) The codegen is under fixed register usage
+ */
+static ArmLIR* loadConstantNoClobber(CompilationUnit* cUnit, int rDest,
+                                     int value)
+{
+    ArmLIR* res;
+    int modImm;
+
+    if (FPREG(rDest)) {
+        return loadFPConstantValue(cUnit, rDest, value);
+    }
+
+    /* See if the value can be constructed cheaply */
+    if (LOWREG(rDest) && (value >= 0) && (value <= 255)) {
+        return newLIR2(cUnit, kThumbMovImm, rDest, value);
+    }
+    /* Check Modified immediate special cases */
+    modImm = modifiedImmediate(value);
+    if (modImm >= 0) {
+        res = newLIR2(cUnit, kThumb2MovImmShift, rDest, modImm);
+        return res;
+    }
+    modImm = modifiedImmediate(~value);
+    if (modImm >= 0) {
+        res = newLIR2(cUnit, kThumb2MvnImmShift, rDest, modImm);
+        return res;
+    }
+    /* 16-bit immediate? */
+    if ((value & 0xffff) == value) {
+        res = newLIR2(cUnit, kThumb2MovImm16, rDest, value);
+        return res;
+    }
+    /* No shortcut - go ahead and use literal pool */
+    ArmLIR* dataTarget = scanLiteralPool(cUnit->literalList, value, 0);
+    if (dataTarget == NULL) {
+        dataTarget = addWordData(cUnit, &cUnit->literalList, value);
+    }
+    ArmLIR* loadPcRel = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    loadPcRel->opcode = kThumb2LdrPcRel12;
+    loadPcRel->generic.target = (LIR* ) dataTarget;
+    loadPcRel->generic.dalvikOffset = cUnit->currentDalvikOffset;
+    loadPcRel->operands[0] = rDest;
+    setupResourceMasks(loadPcRel);
+    setMemRefType(loadPcRel, true, kLiteral);
+    loadPcRel->aliasInfo = dataTarget->operands[0];
+    res = loadPcRel;
+    oatAppendLIR(cUnit, (LIR* ) loadPcRel);
+
+    /*
+     * To save space in the constant pool, we use the ADD_RRI8 instruction to
+     * add up to 255 to an existing constant value.
+     */
+    if (dataTarget->operands[0] != value) {
+        opRegImm(cUnit, kOpAdd, rDest, value - dataTarget->operands[0]);
+    }
+    return res;
+}
+
+/*
+ * Load an immediate value into a fixed or temp register.  Target
+ * register is clobbered, and marked inUse.
+ */
+static ArmLIR* loadConstant(CompilationUnit* cUnit, int rDest, int value)
+{
+    if (oatIsTemp(cUnit, rDest)) {
+        oatClobber(cUnit, rDest);
+        oatMarkInUse(cUnit, rDest);
+    }
+    return loadConstantNoClobber(cUnit, rDest, value);
+}
+
+static ArmLIR* opNone(CompilationUnit* cUnit, OpKind op)
+{
+    ArmOpcode opcode = kThumbBkpt;
+    switch (op) {
+        case kOpUncondBr:
+            opcode = kThumbBUncond;
+            break;
+        default:
+            assert(0);
+    }
+    return newLIR0(cUnit, opcode);
+}
+
+static ArmLIR* opCondBranch(CompilationUnit* cUnit, ArmConditionCode cc)
+{
+    return newLIR2(cUnit, kThumb2BCond, 0 /* offset to be patched */, cc);
+}
+
+static ArmLIR* opReg(CompilationUnit* cUnit, OpKind op, int rDestSrc)
+{
+    ArmOpcode opcode = kThumbBkpt;
+    switch (op) {
+        case kOpBlx:
+            opcode = kThumbBlxR;
+            break;
+        default:
+            assert(0);
+    }
+    return newLIR1(cUnit, opcode, rDestSrc);
+}
+
+static ArmLIR* opRegRegShift(CompilationUnit* cUnit, OpKind op, int rDestSrc1,
+                             int rSrc2, int shift)
+{
+    bool thumbForm = ((shift == 0) && LOWREG(rDestSrc1) && LOWREG(rSrc2));
+    ArmOpcode opcode = kThumbBkpt;
+    switch (op) {
+        case kOpAdc:
+            opcode = (thumbForm) ? kThumbAdcRR : kThumb2AdcRRR;
+            break;
+        case kOpAnd:
+            opcode = (thumbForm) ? kThumbAndRR : kThumb2AndRRR;
+            break;
+        case kOpBic:
+            opcode = (thumbForm) ? kThumbBicRR : kThumb2BicRRR;
+            break;
+        case kOpCmn:
+            assert(shift == 0);
+            opcode = (thumbForm) ? kThumbCmnRR : kThumb2CmnRR;
+            break;
+        case kOpCmp:
+            if (thumbForm)
+                opcode = kThumbCmpRR;
+            else if ((shift == 0) && !LOWREG(rDestSrc1) && !LOWREG(rSrc2))
+                opcode = kThumbCmpHH;
+            else if ((shift == 0) && LOWREG(rDestSrc1))
+                opcode = kThumbCmpLH;
+            else if (shift == 0)
+                opcode = kThumbCmpHL;
+            else
+                opcode = kThumb2CmpRR;
+            break;
+        case kOpXor:
+            opcode = (thumbForm) ? kThumbEorRR : kThumb2EorRRR;
+            break;
+        case kOpMov:
+            assert(shift == 0);
+            if (LOWREG(rDestSrc1) && LOWREG(rSrc2))
+                opcode = kThumbMovRR;
+            else if (!LOWREG(rDestSrc1) && !LOWREG(rSrc2))
+                opcode = kThumbMovRR_H2H;
+            else if (LOWREG(rDestSrc1))
+                opcode = kThumbMovRR_H2L;
+            else
+                opcode = kThumbMovRR_L2H;
+            break;
+        case kOpMul:
+            assert(shift == 0);
+            opcode = (thumbForm) ? kThumbMul : kThumb2MulRRR;
+            break;
+        case kOpMvn:
+            opcode = (thumbForm) ? kThumbMvn : kThumb2MnvRR;
+            break;
+        case kOpNeg:
+            assert(shift == 0);
+            opcode = (thumbForm) ? kThumbNeg : kThumb2NegRR;
+            break;
+        case kOpOr:
+            opcode = (thumbForm) ? kThumbOrr : kThumb2OrrRRR;
+            break;
+        case kOpSbc:
+            opcode = (thumbForm) ? kThumbSbc : kThumb2SbcRRR;
+            break;
+        case kOpTst:
+            opcode = (thumbForm) ? kThumbTst : kThumb2TstRR;
+            break;
+        case kOpLsl:
+            assert(shift == 0);
+            opcode = (thumbForm) ? kThumbLslRR : kThumb2LslRRR;
+            break;
+        case kOpLsr:
+            assert(shift == 0);
+            opcode = (thumbForm) ? kThumbLsrRR : kThumb2LsrRRR;
+            break;
+        case kOpAsr:
+            assert(shift == 0);
+            opcode = (thumbForm) ? kThumbAsrRR : kThumb2AsrRRR;
+            break;
+        case kOpRor:
+            assert(shift == 0);
+            opcode = (thumbForm) ? kThumbRorRR : kThumb2RorRRR;
+            break;
+        case kOpAdd:
+            opcode = (thumbForm) ? kThumbAddRRR : kThumb2AddRRR;
+            break;
+        case kOpSub:
+            opcode = (thumbForm) ? kThumbSubRRR : kThumb2SubRRR;
+            break;
+        case kOp2Byte:
+            assert(shift == 0);
+            return newLIR4(cUnit, kThumb2Sbfx, rDestSrc1, rSrc2, 0, 8);
+        case kOp2Short:
+            assert(shift == 0);
+            return newLIR4(cUnit, kThumb2Sbfx, rDestSrc1, rSrc2, 0, 16);
+        case kOp2Char:
+            assert(shift == 0);
+            return newLIR4(cUnit, kThumb2Ubfx, rDestSrc1, rSrc2, 0, 16);
+        default:
+            assert(0);
+            break;
+    }
+    assert(opcode >= 0);
+    if (EncodingMap[opcode].flags & IS_BINARY_OP)
+        return newLIR2(cUnit, opcode, rDestSrc1, rSrc2);
+    else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
+        if (EncodingMap[opcode].fieldLoc[2].kind == kFmtShift)
+            return newLIR3(cUnit, opcode, rDestSrc1, rSrc2, shift);
+        else
+            return newLIR3(cUnit, opcode, rDestSrc1, rDestSrc1, rSrc2);
+    } else if (EncodingMap[opcode].flags & IS_QUAD_OP)
+        return newLIR4(cUnit, opcode, rDestSrc1, rDestSrc1, rSrc2, shift);
+    else {
+        assert(0);
+        return NULL;
+    }
+}
+
+static ArmLIR* opRegReg(CompilationUnit* cUnit, OpKind op, int rDestSrc1,
+                        int rSrc2)
+{
+    return opRegRegShift(cUnit, op, rDestSrc1, rSrc2, 0);
+}
+
+static ArmLIR* opRegRegRegShift(CompilationUnit* cUnit, OpKind op,
+                                int rDest, int rSrc1, int rSrc2, int shift)
+{
+    ArmOpcode opcode = kThumbBkpt;
+    bool thumbForm = (shift == 0) && LOWREG(rDest) && LOWREG(rSrc1) &&
+                      LOWREG(rSrc2);
+    switch (op) {
+        case kOpAdd:
+            opcode = (thumbForm) ? kThumbAddRRR : kThumb2AddRRR;
+            break;
+        case kOpSub:
+            opcode = (thumbForm) ? kThumbSubRRR : kThumb2SubRRR;
+            break;
+        case kOpAdc:
+            opcode = kThumb2AdcRRR;
+            break;
+        case kOpAnd:
+            opcode = kThumb2AndRRR;
+            break;
+        case kOpBic:
+            opcode = kThumb2BicRRR;
+            break;
+        case kOpXor:
+            opcode = kThumb2EorRRR;
+            break;
+        case kOpMul:
+            assert(shift == 0);
+            opcode = kThumb2MulRRR;
+            break;
+        case kOpOr:
+            opcode = kThumb2OrrRRR;
+            break;
+        case kOpSbc:
+            opcode = kThumb2SbcRRR;
+            break;
+        case kOpLsl:
+            assert(shift == 0);
+            opcode = kThumb2LslRRR;
+            break;
+        case kOpLsr:
+            assert(shift == 0);
+            opcode = kThumb2LsrRRR;
+            break;
+        case kOpAsr:
+            assert(shift == 0);
+            opcode = kThumb2AsrRRR;
+            break;
+        case kOpRor:
+            assert(shift == 0);
+            opcode = kThumb2RorRRR;
+            break;
+        default:
+            assert(0);
+            break;
+    }
+    assert(opcode >= 0);
+    if (EncodingMap[opcode].flags & IS_QUAD_OP)
+        return newLIR4(cUnit, opcode, rDest, rSrc1, rSrc2, shift);
+    else {
+        assert(EncodingMap[opcode].flags & IS_TERTIARY_OP);
+        return newLIR3(cUnit, opcode, rDest, rSrc1, rSrc2);
+    }
+}
+
+static ArmLIR* opRegRegReg(CompilationUnit* cUnit, OpKind op, int rDest,
+                           int rSrc1, int rSrc2)
+{
+    return opRegRegRegShift(cUnit, op, rDest, rSrc1, rSrc2, 0);
+}
+
+static ArmLIR* opRegRegImm(CompilationUnit* cUnit, OpKind op, int rDest,
+                           int rSrc1, int value)
+{
+    ArmLIR* res;
+    bool neg = (value < 0);
+    int absValue = (neg) ? -value : value;
+    ArmOpcode opcode = kThumbBkpt;
+    ArmOpcode altOpcode = kThumbBkpt;
+    bool allLowRegs = (LOWREG(rDest) && LOWREG(rSrc1));
+    int modImm = modifiedImmediate(value);
+    int modImmNeg = modifiedImmediate(-value);
+
+    switch(op) {
+        case kOpLsl:
+            if (allLowRegs)
+                return newLIR3(cUnit, kThumbLslRRI5, rDest, rSrc1, value);
+            else
+                return newLIR3(cUnit, kThumb2LslRRI5, rDest, rSrc1, value);
+        case kOpLsr:
+            if (allLowRegs)
+                return newLIR3(cUnit, kThumbLsrRRI5, rDest, rSrc1, value);
+            else
+                return newLIR3(cUnit, kThumb2LsrRRI5, rDest, rSrc1, value);
+        case kOpAsr:
+            if (allLowRegs)
+                return newLIR3(cUnit, kThumbAsrRRI5, rDest, rSrc1, value);
+            else
+                return newLIR3(cUnit, kThumb2AsrRRI5, rDest, rSrc1, value);
+        case kOpRor:
+            return newLIR3(cUnit, kThumb2RorRRI5, rDest, rSrc1, value);
+        case kOpAdd:
+            if (LOWREG(rDest) && (rSrc1 == r13sp) &&
+                (value <= 1020) && ((value & 0x3)==0)) {
+                return newLIR3(cUnit, kThumbAddSpRel, rDest, rSrc1,
+                               value >> 2);
+            } else if (LOWREG(rDest) && (rSrc1 == r15pc) &&
+                       (value <= 1020) && ((value & 0x3)==0)) {
+                return newLIR3(cUnit, kThumbAddPcRel, rDest, rSrc1,
+                               value >> 2);
+            }
+            opcode = kThumb2AddRRI8;
+            altOpcode = kThumb2AddRRR;
+            // Note: intentional fallthrough
+        case kOpSub:
+            if (allLowRegs && ((absValue & 0x7) == absValue)) {
+                if (op == kOpAdd)
+                    opcode = (neg) ? kThumbSubRRI3 : kThumbAddRRI3;
+                else
+                    opcode = (neg) ? kThumbAddRRI3 : kThumbSubRRI3;
+                return newLIR3(cUnit, opcode, rDest, rSrc1, absValue);
+            } else if ((absValue & 0xff) == absValue) {
+                if (op == kOpAdd)
+                    opcode = (neg) ? kThumb2SubRRI12 : kThumb2AddRRI12;
+                else
+                    opcode = (neg) ? kThumb2AddRRI12 : kThumb2SubRRI12;
+                return newLIR3(cUnit, opcode, rDest, rSrc1, absValue);
+            }
+            if (modImmNeg >= 0) {
+                op = (op == kOpAdd) ? kOpSub : kOpAdd;
+                modImm = modImmNeg;
+            }
+            if (op == kOpSub) {
+                opcode = kThumb2SubRRI8;
+                altOpcode = kThumb2SubRRR;
+            }
+            break;
+        case kOpAdc:
+            opcode = kThumb2AdcRRI8;
+            altOpcode = kThumb2AdcRRR;
+            break;
+        case kOpSbc:
+            opcode = kThumb2SbcRRI8;
+            altOpcode = kThumb2SbcRRR;
+            break;
+        case kOpOr:
+            opcode = kThumb2OrrRRI8;
+            altOpcode = kThumb2OrrRRR;
+            break;
+        case kOpAnd:
+            opcode = kThumb2AndRRI8;
+            altOpcode = kThumb2AndRRR;
+            break;
+        case kOpXor:
+            opcode = kThumb2EorRRI8;
+            altOpcode = kThumb2EorRRR;
+            break;
+        case kOpMul:
+            //TUNING: power of 2, shift & add
+            modImm = -1;
+            altOpcode = kThumb2MulRRR;
+            break;
+        case kOpCmp: {
+            int modImm = modifiedImmediate(value);
+            ArmLIR* res;
+            if (modImm >= 0) {
+                res = newLIR2(cUnit, kThumb2CmpRI8, rSrc1, modImm);
+            } else {
+                int rTmp = oatAllocTemp(cUnit);
+                res = loadConstant(cUnit, rTmp, value);
+                opRegReg(cUnit, kOpCmp, rSrc1, rTmp);
+                oatFreeTemp(cUnit, rTmp);
+            }
+            return res;
+        }
+        default:
+            assert(0);
+    }
+
+    if (modImm >= 0) {
+        return newLIR3(cUnit, opcode, rDest, rSrc1, modImm);
+    } else {
+        int rScratch = oatAllocTemp(cUnit);
+        loadConstant(cUnit, rScratch, value);
+        if (EncodingMap[altOpcode].flags & IS_QUAD_OP)
+            res = newLIR4(cUnit, altOpcode, rDest, rSrc1, rScratch, 0);
+        else
+            res = newLIR3(cUnit, altOpcode, rDest, rSrc1, rScratch);
+        oatFreeTemp(cUnit, rScratch);
+        return res;
+    }
+}
+
+/* Handle Thumb-only variants here - otherwise punt to opRegRegImm */
+static ArmLIR* opRegImm(CompilationUnit* cUnit, OpKind op, int rDestSrc1,
+                        int value)
+{
+    bool neg = (value < 0);
+    int absValue = (neg) ? -value : value;
+    bool shortForm = (((absValue & 0xff) == absValue) && LOWREG(rDestSrc1));
+    ArmOpcode opcode = kThumbBkpt;
+    switch (op) {
+        case kOpAdd:
+            if ( !neg && (rDestSrc1 == r13sp) && (value <= 508)) { /* sp */
+                assert((value & 0x3) == 0);
+                return newLIR1(cUnit, kThumbAddSpI7, value >> 2);
+            } else if (shortForm) {
+                opcode = (neg) ? kThumbSubRI8 : kThumbAddRI8;
+            }
+            break;
+        case kOpSub:
+            if (!neg && (rDestSrc1 == r13sp) && (value <= 508)) { /* sp */
+                assert((value & 0x3) == 0);
+                return newLIR1(cUnit, kThumbSubSpI7, value >> 2);
+            } else if (shortForm) {
+                opcode = (neg) ? kThumbAddRI8 : kThumbSubRI8;
+            }
+            break;
+        case kOpCmp:
+            if (LOWREG(rDestSrc1) && shortForm)
+                opcode = (shortForm) ?  kThumbCmpRI8 : kThumbCmpRR;
+            else if (LOWREG(rDestSrc1))
+                opcode = kThumbCmpRR;
+            else {
+                shortForm = false;
+                opcode = kThumbCmpHL;
+            }
+            break;
+        default:
+            /* Punt to opRegRegImm - if bad case catch it there */
+            shortForm = false;
+            break;
+    }
+    if (shortForm)
+        return newLIR2(cUnit, opcode, rDestSrc1, absValue);
+    else {
+        return opRegRegImm(cUnit, op, rDestSrc1, rDestSrc1, value);
+    }
+}
+
+/*
+ * Determine whether value can be encoded as a Thumb2 floating point
+ * immediate.  If not, return -1.  If so return encoded 8-bit value.
+ */
+static int encodeImmDoubleHigh(int value)
+{
+    int res;
+    int bitA =    (value & 0x80000000) >> 31;
+    int notBitB = (value & 0x40000000) >> 30;
+    int bitB =    (value & 0x20000000) >> 29;
+    int bSmear =  (value & 0x3fc00000) >> 22;
+    int slice =   (value & 0x003f0000) >> 16;
+    int zeroes =  (value & 0x0000ffff);
+    if (zeroes != 0)
+        return -1;
+    if (bitB) {
+        if ((notBitB != 0) || (bSmear != 0x1f))
+            return -1;
+    } else {
+        if ((notBitB != 1) || (bSmear != 0x0))
+            return -1;
+    }
+    res = (bitA << 7) | (bitB << 6) | slice;
+    return res;
+}
+
+static int encodeImmDouble(int valLo, int valHi)
+{
+    int res = -1;
+    if (valLo == 0)
+        res = encodeImmDoubleHigh(valHi);
+    return res;
+}
+
+static ArmLIR* loadConstantValueWide(CompilationUnit* cUnit, int rDestLo,
+                                     int rDestHi, int valLo, int valHi)
+{
+    int encodedImm = encodeImmDouble(valLo, valHi);
+    ArmLIR* res;
+    if (FPREG(rDestLo) && (encodedImm >= 0)) {
+        res = newLIR2(cUnit, kThumb2Vmovd_IMM8, S2D(rDestLo, rDestHi),
+                      encodedImm);
+    } else {
+        res = loadConstantNoClobber(cUnit, rDestLo, valLo);
+        loadConstantNoClobber(cUnit, rDestHi, valHi);
+    }
+    return res;
+}
+
+static int encodeShift(int code, int amount) {
+    return ((amount & 0x1f) << 2) | code;
+}
+
+static ArmLIR* loadBaseIndexed(CompilationUnit* cUnit, int rBase,
+                               int rIndex, int rDest, int scale, OpSize size)
+{
+    bool allLowRegs = LOWREG(rBase) && LOWREG(rIndex) && LOWREG(rDest);
+    ArmLIR* load;
+    ArmOpcode opcode = kThumbBkpt;
+    bool thumbForm = (allLowRegs && (scale == 0));
+    int regPtr;
+
+    if (FPREG(rDest)) {
+        assert(SINGLEREG(rDest));
+        assert((size == kWord) || (size == kSingle));
+        opcode = kThumb2Vldrs;
+        size = kSingle;
+    } else {
+        if (size == kSingle)
+            size = kWord;
+    }
+
+    switch (size) {
+        case kSingle:
+            regPtr = oatAllocTemp(cUnit);
+            if (scale) {
+                newLIR4(cUnit, kThumb2AddRRR, regPtr, rBase, rIndex,
+                        encodeShift(kArmLsl, scale));
+            } else {
+                opRegRegReg(cUnit, kOpAdd, regPtr, rBase, rIndex);
+            }
+            load = newLIR3(cUnit, opcode, rDest, regPtr, 0);
+            return load;
+        case kWord:
+            opcode = (thumbForm) ? kThumbLdrRRR : kThumb2LdrRRR;
+            break;
+        case kUnsignedHalf:
+            opcode = (thumbForm) ? kThumbLdrhRRR : kThumb2LdrhRRR;
+            break;
+        case kSignedHalf:
+            opcode = (thumbForm) ? kThumbLdrshRRR : kThumb2LdrshRRR;
+            break;
+        case kUnsignedByte:
+            opcode = (thumbForm) ? kThumbLdrbRRR : kThumb2LdrbRRR;
+            break;
+        case kSignedByte:
+            opcode = (thumbForm) ? kThumbLdrsbRRR : kThumb2LdrsbRRR;
+            break;
+        default:
+            assert(0);
+    }
+    if (thumbForm)
+        load = newLIR3(cUnit, opcode, rDest, rBase, rIndex);
+    else
+        load = newLIR4(cUnit, opcode, rDest, rBase, rIndex, scale);
+
+    return load;
+}
+
+static ArmLIR* storeBaseIndexed(CompilationUnit* cUnit, int rBase,
+                                int rIndex, int rSrc, int scale, OpSize size)
+{
+    bool allLowRegs = LOWREG(rBase) && LOWREG(rIndex) && LOWREG(rSrc);
+    ArmLIR* store;
+    ArmOpcode opcode = kThumbBkpt;
+    bool thumbForm = (allLowRegs && (scale == 0));
+    int regPtr;
+
+    if (FPREG(rSrc)) {
+        assert(SINGLEREG(rSrc));
+        assert((size == kWord) || (size == kSingle));
+        opcode = kThumb2Vstrs;
+        size = kSingle;
+    } else {
+        if (size == kSingle)
+            size = kWord;
+    }
+
+    switch (size) {
+        case kSingle:
+            regPtr = oatAllocTemp(cUnit);
+            if (scale) {
+                newLIR4(cUnit, kThumb2AddRRR, regPtr, rBase, rIndex,
+                        encodeShift(kArmLsl, scale));
+            } else {
+                opRegRegReg(cUnit, kOpAdd, regPtr, rBase, rIndex);
+            }
+            store = newLIR3(cUnit, opcode, rSrc, regPtr, 0);
+            return store;
+        case kWord:
+            opcode = (thumbForm) ? kThumbStrRRR : kThumb2StrRRR;
+            break;
+        case kUnsignedHalf:
+        case kSignedHalf:
+            opcode = (thumbForm) ? kThumbStrhRRR : kThumb2StrhRRR;
+            break;
+        case kUnsignedByte:
+        case kSignedByte:
+            opcode = (thumbForm) ? kThumbStrbRRR : kThumb2StrbRRR;
+            break;
+        default:
+            assert(0);
+    }
+    if (thumbForm)
+        store = newLIR3(cUnit, opcode, rSrc, rBase, rIndex);
+    else
+        store = newLIR4(cUnit, opcode, rSrc, rBase, rIndex, scale);
+
+    return store;
+}
+
+/*
+ * Load value from base + displacement.  Optionally perform null check
+ * on base (which must have an associated sReg and MIR).  If not
+ * performing null check, incoming MIR can be null.
+ */
+static ArmLIR* loadBaseDispBody(CompilationUnit* cUnit, MIR* mir, int rBase,
+                                int displacement, int rDest, int rDestHi,
+                                OpSize size, int sReg)
+{
+    ArmLIR* res;
+    ArmLIR* load;
+    ArmOpcode opcode = kThumbBkpt;
+    bool shortForm = false;
+    bool thumb2Form = (displacement < 4092 && displacement >= 0);
+    bool allLowRegs = (LOWREG(rBase) && LOWREG(rDest));
+    int encodedDisp = displacement;
+
+    switch (size) {
+        case kDouble:
+        case kLong:
+            if (FPREG(rDest)) {
+                if (SINGLEREG(rDest)) {
+                    assert(FPREG(rDestHi));
+                    rDest = S2D(rDest, rDestHi);
+                }
+                opcode = kThumb2Vldrd;
+                if (displacement <= 1020) {
+                    shortForm = true;
+                    encodedDisp >>= 2;
+                }
+                break;
+            } else {
+                res = loadBaseDispBody(cUnit, mir, rBase, displacement, rDest,
+                                       -1, kWord, sReg);
+                loadBaseDispBody(cUnit, NULL, rBase, displacement + 4, rDestHi,
+                                 -1, kWord, INVALID_SREG);
+                return res;
+            }
+        case kSingle:
+        case kWord:
+            if (FPREG(rDest)) {
+                opcode = kThumb2Vldrs;
+                if (displacement <= 1020) {
+                    shortForm = true;
+                    encodedDisp >>= 2;
+                }
+                break;
+            }
+            if (LOWREG(rDest) && (rBase == r15pc) &&
+                (displacement <= 1020) && (displacement >= 0)) {
+                shortForm = true;
+                encodedDisp >>= 2;
+                opcode = kThumbLdrPcRel;
+            } else if (LOWREG(rDest) && (rBase == r13sp) &&
+                      (displacement <= 1020) && (displacement >= 0)) {
+                shortForm = true;
+                encodedDisp >>= 2;
+                opcode = kThumbLdrSpRel;
+            } else if (allLowRegs && displacement < 128 && displacement >= 0) {
+                assert((displacement & 0x3) == 0);
+                shortForm = true;
+                encodedDisp >>= 2;
+                opcode = kThumbLdrRRI5;
+            } else if (thumb2Form) {
+                shortForm = true;
+                opcode = kThumb2LdrRRI12;
+            }
+            break;
+        case kUnsignedHalf:
+            if (allLowRegs && displacement < 64 && displacement >= 0) {
+                assert((displacement & 0x1) == 0);
+                shortForm = true;
+                encodedDisp >>= 1;
+                opcode = kThumbLdrhRRI5;
+            } else if (displacement < 4092 && displacement >= 0) {
+                shortForm = true;
+                opcode = kThumb2LdrhRRI12;
+            }
+            break;
+        case kSignedHalf:
+            if (thumb2Form) {
+                shortForm = true;
+                opcode = kThumb2LdrshRRI12;
+            }
+            break;
+        case kUnsignedByte:
+            if (allLowRegs && displacement < 32 && displacement >= 0) {
+                shortForm = true;
+                opcode = kThumbLdrbRRI5;
+            } else if (thumb2Form) {
+                shortForm = true;
+                opcode = kThumb2LdrbRRI12;
+            }
+            break;
+        case kSignedByte:
+            if (thumb2Form) {
+                shortForm = true;
+                opcode = kThumb2LdrsbRRI12;
+            }
+            break;
+        default:
+            assert(0);
+    }
+
+    if (shortForm) {
+        load = res = newLIR3(cUnit, opcode, rDest, rBase, encodedDisp);
+    } else {
+        int regOffset = oatAllocTemp(cUnit);
+        res = loadConstant(cUnit, regOffset, encodedDisp);
+        load = loadBaseIndexed(cUnit, rBase, regOffset, rDest, 0, size);
+        oatFreeTemp(cUnit, regOffset);
+    }
+
+    // TODO: in future may need to differentiate Dalvik accesses w/ spills
+    if (rBase == rSP) {
+        annotateDalvikRegAccess(load, displacement >> 2, true /* isLoad */);
+    }
+    return load;
+}
+
+static ArmLIR* loadBaseDisp(CompilationUnit* cUnit, MIR* mir, int rBase,
+                            int displacement, int rDest, OpSize size,
+                            int sReg)
+{
+    return loadBaseDispBody(cUnit, mir, rBase, displacement, rDest, -1,
+                            size, sReg);
+}
+
+static  ArmLIR* loadBaseDispWide(CompilationUnit* cUnit, MIR* mir, int rBase,
+                                 int displacement, int rDestLo, int rDestHi,
+                                 int sReg)
+{
+    return loadBaseDispBody(cUnit, mir, rBase, displacement, rDestLo, rDestHi,
+                            kLong, sReg);
+}
+
+
+static ArmLIR* storeBaseDispBody(CompilationUnit* cUnit, int rBase,
+                                 int displacement, int rSrc, int rSrcHi,
+                                 OpSize size)
+{
+    ArmLIR* res, *store;
+    ArmOpcode opcode = kThumbBkpt;
+    bool shortForm = false;
+    bool thumb2Form = (displacement < 4092 && displacement >= 0);
+    bool allLowRegs = (LOWREG(rBase) && LOWREG(rSrc));
+    int encodedDisp = displacement;
+
+    switch (size) {
+        case kLong:
+        case kDouble:
+            if (!FPREG(rSrc)) {
+                res = storeBaseDispBody(cUnit, rBase, displacement, rSrc,
+                                        -1, kWord);
+                storeBaseDispBody(cUnit, rBase, displacement + 4, rSrcHi,
+                                  -1, kWord);
+                return res;
+            }
+            if (SINGLEREG(rSrc)) {
+                assert(FPREG(rSrcHi));
+                rSrc = S2D(rSrc, rSrcHi);
+            }
+            opcode = kThumb2Vstrd;
+            if (displacement <= 1020) {
+                shortForm = true;
+                encodedDisp >>= 2;
+            }
+            break;
+        case kSingle:
+        case kWord:
+            if (FPREG(rSrc)) {
+                assert(SINGLEREG(rSrc));
+                opcode = kThumb2Vstrs;
+                if (displacement <= 1020) {
+                    shortForm = true;
+                    encodedDisp >>= 2;
+                }
+            break;
+            }
+            if (allLowRegs && displacement < 128 && displacement >= 0) {
+                assert((displacement & 0x3) == 0);
+                shortForm = true;
+                encodedDisp >>= 2;
+                opcode = kThumbStrRRI5;
+            } else if (thumb2Form) {
+                shortForm = true;
+                opcode = kThumb2StrRRI12;
+            }
+            break;
+        case kUnsignedHalf:
+        case kSignedHalf:
+            if (allLowRegs && displacement < 64 && displacement >= 0) {
+                assert((displacement & 0x1) == 0);
+                shortForm = true;
+                encodedDisp >>= 1;
+                opcode = kThumbStrhRRI5;
+            } else if (thumb2Form) {
+                shortForm = true;
+                opcode = kThumb2StrhRRI12;
+            }
+            break;
+        case kUnsignedByte:
+        case kSignedByte:
+            if (allLowRegs && displacement < 32 && displacement >= 0) {
+                shortForm = true;
+                opcode = kThumbStrbRRI5;
+            } else if (thumb2Form) {
+                shortForm = true;
+                opcode = kThumb2StrbRRI12;
+            }
+            break;
+        default:
+            assert(0);
+    }
+    if (shortForm) {
+        store = res = newLIR3(cUnit, opcode, rSrc, rBase, encodedDisp);
+    } else {
+        int rScratch = oatAllocTemp(cUnit);
+        res = loadConstant(cUnit, rScratch, encodedDisp);
+        store = storeBaseIndexed(cUnit, rBase, rScratch, rSrc, 0, size);
+        oatFreeTemp(cUnit, rScratch);
+    }
+
+    // TODO: In future, may need to differentiate Dalvik & spill accesses
+    if (rBase == rSP) {
+        annotateDalvikRegAccess(store, displacement >> 2, false /* isLoad */);
+    }
+    return res;
+}
+
+static ArmLIR* storeBaseDisp(CompilationUnit* cUnit, int rBase,
+                             int displacement, int rSrc, OpSize size)
+{
+    return storeBaseDispBody(cUnit, rBase, displacement, rSrc, -1, size);
+}
+
+static ArmLIR* storeBaseDispWide(CompilationUnit* cUnit, int rBase,
+                                 int displacement, int rSrcLo, int rSrcHi)
+{
+    return storeBaseDispBody(cUnit, rBase, displacement, rSrcLo, rSrcHi, kLong);
+}
+
+static void storePair(CompilationUnit* cUnit, int base, int lowReg, int highReg)
+{
+    storeBaseDispWide(cUnit, base, 0, lowReg, highReg);
+}
+
+static void loadPair(CompilationUnit* cUnit, int base, int lowReg, int highReg)
+{
+    loadBaseDispWide(cUnit, NULL, base, 0, lowReg, highReg, INVALID_SREG);
+}
+
+/*
+ * Generate a register comparison to an immediate and branch.  Caller
+ * is responsible for setting branch target field.
+ */
+static ArmLIR* genCmpImmBranch(CompilationUnit* cUnit,
+                              ArmConditionCode cond, int reg,
+                              int checkValue)
+{
+    ArmLIR* branch;
+    int modImm;
+    if ((LOWREG(reg)) && (checkValue == 0) &&
+       ((cond == kArmCondEq) || (cond == kArmCondNe))) {
+        branch = newLIR2(cUnit,
+                         (cond == kArmCondEq) ? kThumb2Cbz : kThumb2Cbnz,
+                         reg, 0);
+    } else {
+        modImm = modifiedImmediate(checkValue);
+        if (LOWREG(reg) && ((checkValue & 0xff) == checkValue)) {
+            newLIR2(cUnit, kThumbCmpRI8, reg, checkValue);
+        } else if (modImm >= 0) {
+            newLIR2(cUnit, kThumb2CmpRI8, reg, modImm);
+        } else {
+            int tReg = oatAllocTemp(cUnit);
+            loadConstant(cUnit, tReg, checkValue);
+            opRegReg(cUnit, kOpCmp, reg, tReg);
+        }
+        branch = newLIR2(cUnit, kThumbBCond, 0, cond);
+    }
+    return branch;
+}
+
+static ArmLIR* fpRegCopy(CompilationUnit* cUnit, int rDest, int rSrc)
+{
+    ArmLIR* res = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    res->generic.dalvikOffset = cUnit->currentDalvikOffset;
+    res->operands[0] = rDest;
+    res->operands[1] = rSrc;
+    if (rDest == rSrc) {
+        res->flags.isNop = true;
+    } else {
+        assert(DOUBLEREG(rDest) == DOUBLEREG(rSrc));
+        if (DOUBLEREG(rDest)) {
+            res->opcode = kThumb2Vmovd;
+        } else {
+            if (SINGLEREG(rDest)) {
+                res->opcode = SINGLEREG(rSrc) ? kThumb2Vmovs : kThumb2Fmsr;
+            } else {
+                assert(SINGLEREG(rSrc));
+                res->opcode = kThumb2Fmrs;
+            }
+        }
+        res->operands[0] = rDest;
+        res->operands[1] = rSrc;
+    }
+    setupResourceMasks(res);
+    return res;
+}
+
+static ArmLIR* genRegCopyNoInsert(CompilationUnit* cUnit, int rDest, int rSrc)
+{
+    ArmLIR* res;
+    ArmOpcode opcode;
+    if (FPREG(rDest) || FPREG(rSrc))
+        return fpRegCopy(cUnit, rDest, rSrc);
+    res = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    res->generic.dalvikOffset = cUnit->currentDalvikOffset;
+    if (LOWREG(rDest) && LOWREG(rSrc))
+        opcode = kThumbMovRR;
+    else if (!LOWREG(rDest) && !LOWREG(rSrc))
+         opcode = kThumbMovRR_H2H;
+    else if (LOWREG(rDest))
+         opcode = kThumbMovRR_H2L;
+    else
+         opcode = kThumbMovRR_L2H;
+
+    res->operands[0] = rDest;
+    res->operands[1] = rSrc;
+    res->opcode = opcode;
+    setupResourceMasks(res);
+    if (rDest == rSrc) {
+        res->flags.isNop = true;
+    }
+    return res;
+}
+
+static ArmLIR* genRegCopy(CompilationUnit* cUnit, int rDest, int rSrc)
+{
+    ArmLIR* res = genRegCopyNoInsert(cUnit, rDest, rSrc);
+    oatAppendLIR(cUnit, (LIR*)res);
+    return res;
+}
+
+static void genRegCopyWide(CompilationUnit* cUnit, int destLo, int destHi,
+                           int srcLo, int srcHi)
+{
+    bool destFP = FPREG(destLo) && FPREG(destHi);
+    bool srcFP = FPREG(srcLo) && FPREG(srcHi);
+    assert(FPREG(srcLo) == FPREG(srcHi));
+    assert(FPREG(destLo) == FPREG(destHi));
+    if (destFP) {
+        if (srcFP) {
+            genRegCopy(cUnit, S2D(destLo, destHi), S2D(srcLo, srcHi));
+        } else {
+            newLIR3(cUnit, kThumb2Fmdrr, S2D(destLo, destHi), srcLo, srcHi);
+        }
+    } else {
+        if (srcFP) {
+            newLIR3(cUnit, kThumb2Fmrrd, destLo, destHi, S2D(srcLo, srcHi));
+        } else {
+            // Handle overlap
+            if (srcHi == destLo) {
+                genRegCopy(cUnit, destHi, srcHi);
+                genRegCopy(cUnit, destLo, srcLo);
+            } else {
+                genRegCopy(cUnit, destLo, srcLo);
+                genRegCopy(cUnit, destHi, srcHi);
+            }
+        }
+    }
+}
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
new file mode 100644
index 0000000..c9d72f7
--- /dev/null
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -0,0 +1,1885 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains codegen for the Thumb2 ISA and is intended to be
+ * includes by:
+ *
+ *        Codegen-$(TARGET_ARCH_VARIANT).c
+ *
+ */
+
+/*
+ * Construct an s4 from two consecutive half-words of switch data.
+ * This needs to check endianness because the DEX optimizer only swaps
+ * half-words in instruction stream.
+ *
+ * "switchData" must be 32-bit aligned.
+ */
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+static inline s4 s4FromSwitchData(const void* switchData) {
+    return *(s4*) switchData;
+}
+#else
+static inline s4 s4FromSwitchData(const void* switchData) {
+    u2* data = switchData;
+    return data[0] | (((s4) data[1]) << 16);
+}
+#endif
+
+/*
+ * Generate a Thumb2 IT instruction, which can nullify up to
+ * four subsequent instructions based on a condition and its
+ * inverse.  The condition applies to the first instruction, which
+ * is executed if the condition is met.  The string "guide" consists
+ * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
+ * A "T" means the instruction is executed if the condition is
+ * met, and an "E" means the instruction is executed if the condition
+ * is not met.
+ */
+static ArmLIR* genIT(CompilationUnit* cUnit, ArmConditionCode code,
+                     const char* guide)
+{
+    int mask;
+    int condBit = code & 1;
+    int altBit = condBit ^ 1;
+    int mask3 = 0;
+    int mask2 = 0;
+    int mask1 = 0;
+
+    //Note: case fallthroughs intentional
+    switch(strlen(guide)) {
+        case 3:
+            mask1 = (guide[2] == 'T') ? condBit : altBit;
+        case 2:
+            mask2 = (guide[1] == 'T') ? condBit : altBit;
+        case 1:
+            mask3 = (guide[0] == 'T') ? condBit : altBit;
+            break;
+        case 0:
+            break;
+        default:
+            LOG(FATAL) << "OAT: bad case in genIT";
+    }
+    mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
+           (1 << (3 - strlen(guide)));
+    return newLIR2(cUnit, kThumb2It, code, mask);
+}
+
+/*
+ * Insert a kArmPseudoCaseLabel at the beginning of the Dalvik
+ * offset vaddr.  This label will be used to fix up the case
+ * branch table during the assembly phase.  Be sure to set
+ * all resource flags on this to prevent code motion across
+ * target boundaries.  KeyVal is just there for debugging.
+ */
+static ArmLIR* insertCaseLabel(CompilationUnit* cUnit, int vaddr, int keyVal)
+{
+    ArmLIR* lir;
+    for (lir = (ArmLIR*)cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
+        if ((lir->opcode == kArmPseudoDalvikByteCodeBoundary) &&
+            (lir->generic.dalvikOffset == vaddr)) {
+            ArmLIR* newLabel = (ArmLIR*)oatNew(sizeof(ArmLIR), true);
+            newLabel->generic.dalvikOffset = vaddr;
+            newLabel->opcode = kArmPseudoCaseLabel;
+            newLabel->operands[0] = keyVal;
+            oatInsertLIRAfter((LIR*)lir, (LIR*)newLabel);
+            return newLabel;
+        }
+    }
+    oatCodegenDump(cUnit);
+    LOG(FATAL) << "Error: didn't find vaddr 0x" << std::hex << vaddr;
+    return NULL; // Quiet gcc
+}
+
+static void markPackedCaseLabels(CompilationUnit* cUnit, SwitchTable *tabRec)
+{
+    const u2* table = tabRec->table;
+    int baseVaddr = tabRec->vaddr;
+    int *targets = (int*)&table[4];
+    int entries = table[1];
+    int lowKey = s4FromSwitchData(&table[2]);
+    for (int i = 0; i < entries; i++) {
+        tabRec->targets[i] = insertCaseLabel(cUnit, baseVaddr + targets[i],
+                                             i + lowKey);
+    }
+}
+
+static void markSparseCaseLabels(CompilationUnit* cUnit, SwitchTable *tabRec)
+{
+    const u2* table = tabRec->table;
+    int baseVaddr = tabRec->vaddr;
+    int entries = table[1];
+    int* keys = (int*)&table[2];
+    int* targets = &keys[entries];
+    for (int i = 0; i < entries; i++) {
+        tabRec->targets[i] = insertCaseLabel(cUnit, baseVaddr + targets[i],
+                                             keys[i]);
+    }
+}
+
+void oatProcessSwitchTables(CompilationUnit* cUnit)
+{
+    GrowableListIterator iterator;
+    oatGrowableListIteratorInit(&cUnit->switchTables, &iterator);
+    while (true) {
+        SwitchTable *tabRec = (SwitchTable *) oatGrowableListIteratorNext(
+             &iterator);
+        if (tabRec == NULL) break;
+        if (tabRec->table[0] == kPackedSwitchSignature)
+            markPackedCaseLabels(cUnit, tabRec);
+        else if (tabRec->table[0] == kSparseSwitchSignature)
+            markSparseCaseLabels(cUnit, tabRec);
+        else {
+            LOG(FATAL) << "Invalid switch table";
+        }
+    }
+}
+
+static void dumpSparseSwitchTable(const u2* table)
+    /*
+     * Sparse switch data format:
+     *  ushort ident = 0x0200   magic value
+     *  ushort size             number of entries in the table; > 0
+     *  int keys[size]          keys, sorted low-to-high; 32-bit aligned
+     *  int targets[size]       branch targets, relative to switch opcode
+     *
+     * Total size is (2+size*4) 16-bit code units.
+     */
+{
+    u2 ident = table[0];
+    int entries = table[1];
+    int* keys = (int*)&table[2];
+    int* targets = &keys[entries];
+    LOG(INFO) <<  "Sparse switch table - ident:0x" << std::hex << ident <<
+       ", entries: " << std::dec << entries;
+    for (int i = 0; i < entries; i++) {
+        LOG(INFO) << "    Key[" << keys[i] << "] -> 0x" << std::hex <<
+        targets[i];
+    }
+}
+
+static void dumpPackedSwitchTable(const u2* table)
+    /*
+     * Packed switch data format:
+     *  ushort ident = 0x0100   magic value
+     *  ushort size             number of entries in the table
+     *  int first_key           first (and lowest) switch case value
+     *  int targets[size]       branch targets, relative to switch opcode
+     *
+     * Total size is (4+size*2) 16-bit code units.
+     */
+{
+    u2 ident = table[0];
+    int* targets = (int*)&table[4];
+    int entries = table[1];
+    int lowKey = s4FromSwitchData(&table[2]);
+    LOG(INFO) << "Packed switch table - ident:0x" << std::hex << ident <<
+        ", entries: " << std::dec << entries << ", lowKey: " << lowKey;
+    for (int i = 0; i < entries; i++) {
+        LOG(INFO) << "    Key[" << (i + lowKey) << "] -> 0x" << std::hex <<
+            targets[i];
+    }
+}
+
+/*
+ * The sparse table in the literal pool is an array of <key,displacement>
+ * pairs.  For each set, we'll load them as a pair using ldmia.
+ * This means that the register number of the temp we use for the key
+ * must be lower than the reg for the displacement.
+ *
+ * The test loop will look something like:
+ *
+ *   adr   rBase, <table>
+ *   ldr   rVal, [rSP, vRegOff]
+ *   mov   rIdx, #tableSize
+ * lp:
+ *   ldmia rBase!, {rKey, rDisp}
+ *   sub   rIdx, #1
+ *   cmp   rVal, rKey
+ *   ifeq
+ *   add   rPC, rDisp   ; This is the branch from which we compute displacement
+ *   cbnz  rIdx, lp
+ */
+static void genSparseSwitch(CompilationUnit* cUnit, MIR* mir,
+                            RegLocation rlSrc)
+{
+    const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
+    if (cUnit->printMe) {
+        dumpSparseSwitchTable(table);
+    }
+    // Add the table to the list - we'll process it later
+    SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
+                         true);
+    tabRec->table = table;
+    tabRec->vaddr = mir->offset;
+    int size = table[1];
+    tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
+    oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
+
+    // Get the switch value
+    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+    int rBase = oatAllocTemp(cUnit);
+    /* Allocate key and disp temps */
+    int rKey = oatAllocTemp(cUnit);
+    int rDisp = oatAllocTemp(cUnit);
+    // Make sure rKey's register number is less than rDisp's number for ldmia
+    if (rKey > rDisp) {
+        int tmp = rDisp;
+        rDisp = rKey;
+        rKey = tmp;
+    }
+    // Materialize a pointer to the switch table
+    newLIR3(cUnit, kThumb2AdrST, rBase, 0, (intptr_t)tabRec);
+    // Set up rIdx
+    int rIdx = oatAllocTemp(cUnit);
+    loadConstant(cUnit, rIdx, size);
+    // Establish loop branch target
+    ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
+    target->defMask = ENCODE_ALL;
+    // Load next key/disp
+    newLIR2(cUnit, kThumb2LdmiaWB, rBase, (1 << rKey) | (1 << rDisp));
+    opRegReg(cUnit, kOpCmp, rKey, rlSrc.lowReg);
+    // Go if match. NOTE: No instruction set switch here - must stay Thumb2
+    genIT(cUnit, kArmCondEq, "");
+    ArmLIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, rDisp);
+    tabRec->bxInst = switchBranch;
+    // Needs to use setflags encoding here
+    newLIR3(cUnit, kThumb2SubsRRI12, rIdx, rIdx, 1);
+    ArmLIR* branch = opCondBranch(cUnit, kArmCondNe);
+    branch->generic.target = (LIR*)target;
+}
+
+
+static void genPackedSwitch(CompilationUnit* cUnit, MIR* mir,
+                            RegLocation rlSrc)
+{
+    const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
+    if (cUnit->printMe) {
+        dumpPackedSwitchTable(table);
+    }
+    // Add the table to the list - we'll process it later
+    SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
+                         true);
+    tabRec->table = table;
+    tabRec->vaddr = mir->offset;
+    int size = table[1];
+    tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
+    oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
+
+    // Get the switch value
+    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+    int tableBase = oatAllocTemp(cUnit);
+    // Materialize a pointer to the switch table
+    newLIR3(cUnit, kThumb2AdrST, tableBase, 0, (intptr_t)tabRec);
+    int lowKey = s4FromSwitchData(&table[2]);
+    int keyReg;
+    // Remove the bias, if necessary
+    if (lowKey == 0) {
+        keyReg = rlSrc.lowReg;
+    } else {
+        keyReg = oatAllocTemp(cUnit);
+        opRegRegImm(cUnit, kOpSub, keyReg, rlSrc.lowReg, lowKey);
+    }
+    // Bounds check - if < 0 or >= size continue following switch
+    opRegImm(cUnit, kOpCmp, keyReg, size-1);
+    ArmLIR* branchOver = opCondBranch(cUnit, kArmCondHi);
+
+    // Load the displacement from the switch table
+    int dispReg = oatAllocTemp(cUnit);
+    loadBaseIndexed(cUnit, tableBase, keyReg, dispReg, 2, kWord);
+
+    // ..and go! NOTE: No instruction set switch here - must stay Thumb2
+    ArmLIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, dispReg);
+    tabRec->bxInst = switchBranch;
+
+    /* branchOver target here */
+    ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
+    target->defMask = ENCODE_ALL;
+    branchOver->generic.target = (LIR*)target;
+}
+
+/*
+ * Array data table format:
+ *  ushort ident = 0x0300   magic value
+ *  ushort width            width of each element in the table
+ *  uint   size             number of elements in the table
+ *  ubyte  data[size*width] table of data values (may contain a single-byte
+ *                          padding at the end)
+ *
+ * Total size is 4+(width * size + 1)/2 16-bit code units.
+ */
+static void genFillArrayData(CompilationUnit* cUnit, MIR* mir,
+                              RegLocation rlSrc)
+{
+    const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
+    // Add the table to the list - we'll process it later
+    FillArrayData *tabRec = (FillArrayData *)
+         oatNew(sizeof(FillArrayData), true);
+    tabRec->table = table;
+    tabRec->vaddr = mir->offset;
+    u2 width = tabRec->table[1];
+    u4 size = tabRec->table[2] | (((u4)tabRec->table[3]) << 16);
+    tabRec->size = (size * width) + 8;
+
+    oatInsertGrowableList(&cUnit->fillArrayData, (intptr_t)tabRec);
+
+    // Making a call - use explicit registers
+    oatFlushAllRegs(cUnit);   /* Everything to home location */
+    loadValueDirectFixed(cUnit, rlSrc, r0);
+    loadWordDisp(cUnit, rSELF,
+                 OFFSETOF_MEMBER(Thread, pArtHandleFillArrayDataNoThrow), rLR);
+    // Materialize a pointer to the switch table
+    newLIR3(cUnit, kThumb2AdrST, r1, 0, (intptr_t)tabRec);
+    opReg(cUnit, kOpBlx, rLR);
+    oatClobberCallRegs(cUnit);
+}
+
+/*
+ * Mark garbage collection card. Skip if the value we're storing is null.
+ */
+static void markGCCard(CompilationUnit* cUnit, int valReg, int tgtAddrReg)
+{
+    int regCardBase = oatAllocTemp(cUnit);
+    int regCardNo = oatAllocTemp(cUnit);
+    ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondEq, valReg, 0);
+    loadWordDisp(cUnit, rSELF, offsetof(Thread, cardTable),
+                 regCardBase);
+    opRegRegImm(cUnit, kOpLsr, regCardNo, tgtAddrReg, GC_CARD_SHIFT);
+    storeBaseIndexed(cUnit, regCardBase, regCardNo, regCardBase, 0,
+                     kUnsignedByte);
+    ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
+    target->defMask = ENCODE_ALL;
+    branchOver->generic.target = (LIR*)target;
+    oatFreeTemp(cUnit, regCardBase);
+    oatFreeTemp(cUnit, regCardNo);
+}
+
+static void genIGetX(CompilationUnit* cUnit, MIR* mir, OpSize size,
+                     RegLocation rlDest, RegLocation rlObj)
+{
+    Field* fieldPtr =
+        cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vC];
+    if (fieldPtr == NULL) {
+        /*
+         * With current scheme, we should never be in a situation
+         * in which the fieldPtr is null here.  If something changes
+         * and we need to handle it, generate code to load the field
+         * pointer at run-time.
+         */
+        LOG(FATAL) << "Unexpected null field pointer";
+    }
+#if ANDROID_SMP != 0
+    bool isVolatile = dvmIsVolatileField(fieldPtr);
+#else
+    bool isVolatile = false;
+#endif
+    int fieldOffset = ((InstField *)fieldPtr)->byteOffset;
+    RegLocation rlResult;
+    RegisterClass regClass = oatRegClassBySize(size);
+    rlObj = loadValue(cUnit, rlObj, kCoreReg);
+    rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
+    genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
+                 NULL);/* null object? */
+    loadBaseDisp(cUnit, mir, rlObj.lowReg, fieldOffset, rlResult.lowReg,
+                 size, rlObj.sRegLow);
+    if (isVolatile) {
+        oatGenMemBarrier(cUnit, kSY);
+    }
+
+    storeValue(cUnit, rlDest, rlResult);
+}
+
+static void genIPutX(CompilationUnit* cUnit, MIR* mir, OpSize size,
+                    RegLocation rlSrc, RegLocation rlObj, bool isObject)
+{
+    Field* fieldPtr =
+        cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vC];
+    if (fieldPtr == NULL) {
+        /*
+         * With current scheme, we should never be in a situation
+         * in which the fieldPtr is null here.  If something changes
+         * and we need to handle it, generate code to load the field
+         * pointer at run-time.
+         */
+        LOG(FATAL) << "Unexpected null field pointer";
+    }
+#if ANDROID_SMP != 0
+    bool isVolatile = dvmIsVolatileField(fieldPtr);
+#else
+    bool isVolatile = false;
+#endif
+    int fieldOffset = ((InstField *)fieldPtr)->byteOffset;
+    RegisterClass regClass = oatRegClassBySize(size);
+    rlObj = loadValue(cUnit, rlObj, kCoreReg);
+    rlSrc = loadValue(cUnit, rlSrc, regClass);
+    genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
+                 NULL);/* null object? */
+
+    if (isVolatile) {
+        oatGenMemBarrier(cUnit, kSY);
+    }
+    storeBaseDisp(cUnit, rlObj.lowReg, fieldOffset, rlSrc.lowReg, size);
+    if (isObject) {
+        /* NOTE: marking card based on object head */
+        markGCCard(cUnit, rlSrc.lowReg, rlObj.lowReg);
+    }
+}
+
+static void genIGetWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                        RegLocation rlObj)
+{
+   Field* fieldPtr =
+       cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vC];
+    if (fieldPtr == NULL) {
+        /*
+         * With current scheme, we should never be in a situation
+         * in which the fieldPtr is null here.  If something changes
+         * and we need to handle it, generate code to load the field
+         * pointer at run-time.
+         */
+        LOG(FATAL) << "Unexpected null field pointer";
+    }
+#if ANDROID_SMP != 0
+    bool isVolatile = dvmIsVolatileField(fieldPtr);
+#else
+    bool isVolatile = false;
+#endif
+    int fieldOffset = ((InstField *)fieldPtr)->byteOffset;
+    RegLocation rlResult;
+    rlObj = loadValue(cUnit, rlObj, kCoreReg);
+    int regPtr = oatAllocTemp(cUnit);
+
+    assert(rlDest.wide);
+
+    genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
+                 NULL);/* null object? */
+    opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
+    rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
+
+    loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
+
+    if (isVolatile) {
+        oatGenMemBarrier(cUnit, kSY);
+    }
+
+    oatFreeTemp(cUnit, regPtr);
+    storeValueWide(cUnit, rlDest, rlResult);
+}
+
+static void genIPutWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
+                        RegLocation rlObj)
+{
+   Field* fieldPtr =
+        cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vC];
+    if (fieldPtr == NULL) {
+        /*
+         * With current scheme, we should never be in a situation
+         * in which the fieldPtr is null here.  If something changes
+         * and we need to handle it, generate code to load the field
+         * pointer at run-time.
+         */
+        LOG(FATAL) << "Unexpected null field pointer";
+    }
+#if ANDROID_SMP != 0
+    bool isVolatile = dvmIsVolatileField(fieldPtr);
+#else
+    bool isVolatile = false;
+#endif
+    int fieldOffset = ((InstField *)fieldPtr)->byteOffset;
+
+    rlObj = loadValue(cUnit, rlObj, kCoreReg);
+    int regPtr;
+    rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
+    genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
+                 NULL);/* null object? */
+    regPtr = oatAllocTemp(cUnit);
+    opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
+
+    if (isVolatile) {
+        oatGenMemBarrier(cUnit, kSY);
+    }
+    storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
+
+    oatFreeTemp(cUnit, regPtr);
+}
+
+static void genConstClass(CompilationUnit* cUnit, MIR* mir,
+                          RegLocation rlDest, RegLocation rlSrc)
+{
+    void* classPtr = (void*)
+      (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
+
+    if (classPtr == NULL) {
+        LOG(FATAL) << "Unexpected null class pointer";
+    }
+
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    loadConstantNoClobber(cUnit, rlResult.lowReg, (int) classPtr );
+    storeValue(cUnit, rlDest, rlResult);
+}
+
+static void genConstString(CompilationUnit* cUnit, MIR* mir,
+                           RegLocation rlDest, RegLocation rlSrc)
+{
+    void* strPtr = (void*)
+      (cUnit->method->clazz->pDvmDex->pResStrings[mir->dalvikInsn.vB]);
+
+    if (strPtr == NULL) {
+        /* Shouldn't happen */
+        LOG(FATAL) << "Unexpected null const string pointer";
+    }
+
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    loadConstantNoClobber(cUnit, rlResult.lowReg, (int) strPtr );
+    storeValue(cUnit, rlDest, rlResult);
+}
+
+static void genNewInstance(CompilationUnit* cUnit, MIR* mir,
+                           RegLocation rlDest)
+{
+    ClassObject* classPtr = (ClassObject *)
+      (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
+
+    if (classPtr == NULL) {
+        /* Shouldn't happen */
+        LOG(FATAL) << "Unexpected null class pointer";
+    }
+
+    // Verifier should have already rejected abstract/interface
+    assert((classPtr->accessFlags & (ACC_INTERFACE|ACC_ABSTRACT)) == 0);
+    oatFlushAllRegs(cUnit);   /* Everything to home location */
+    loadWordDisp(cUnit, rSELF,
+                 OFFSETOF_MEMBER(Thread, pArtAllocObjectNoThrow), rLR);
+    loadConstant(cUnit, r0, (int) classPtr);
+    loadConstant(cUnit, r1, ALLOC_DONT_TRACK);
+    opReg(cUnit, kOpBlx, rLR);
+    oatClobberCallRegs(cUnit);
+    RegLocation rlResult = oatGetReturn(cUnit);
+    storeValue(cUnit, rlDest, rlResult);
+}
+
+void genThrow(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
+{
+    loadWordDisp(cUnit, rSELF,
+                 OFFSETOF_MEMBER(Thread, pArtAllocObjectNoThrow), rLR);
+    loadValueDirectFixed(cUnit, rlSrc, r1);  /* Exception object */
+    genRegCopy(cUnit, r0, rSELF);
+    opReg(cUnit, kOpBlx, rLR);
+}
+
+static void genInstanceof(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                          RegLocation rlSrc)
+{
+   // May generate a call - use explicit registers
+    RegLocation rlResult;
+    ClassObject* classPtr =
+      (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vC]);
+    if (classPtr == NULL) {
+        /* Shouldn't happen */
+        LOG(FATAL) << "Unexpected null class pointer";
+    }
+    oatFlushAllRegs(cUnit);   /* Everything to home location */
+    loadValueDirectFixed(cUnit, rlSrc, r0);  /* Ref */
+    loadConstant(cUnit, r2, (int) classPtr );
+    /* When taken r0 has NULL which can be used for store directly */
+    ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq, r0, 0);
+    /* r1 now contains object->clazz */
+    loadWordDisp(cUnit, r0, offsetof(Object, clazz), r1);
+    /* r1 now contains object->clazz */
+    loadWordDisp(cUnit, rSELF,
+                 OFFSETOF_MEMBER(Thread, pArtInstanceofNonTrivial), rLR);
+    loadConstant(cUnit, r0, 1);                /* Assume true */
+    opRegReg(cUnit, kOpCmp, r1, r2);
+    ArmLIR* branch2 = opCondBranch(cUnit, kArmCondEq);
+    genRegCopy(cUnit, r0, r1);
+    genRegCopy(cUnit, r1, r2);
+    opReg(cUnit, kOpBlx, rLR);
+    oatClobberCallRegs(cUnit);
+    /* branch target here */
+    ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
+    target->defMask = ENCODE_ALL;
+    rlResult = oatGetReturn(cUnit);
+    storeValue(cUnit, rlDest, rlResult);
+    branch1->generic.target = (LIR*)target;
+    branch2->generic.target = (LIR*)target;
+}
+
+static void genCheckCast(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
+{
+   ClassObject* classPtr =
+      (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
+    if (classPtr == NULL) {
+        /* Shouldn't happen with our current model */
+        LOG(FATAL) << "Unexpected null class pointer";
+    }
+    oatFlushAllRegs(cUnit);   /* Everything to home location */
+    loadConstant(cUnit, r1, (int) classPtr );
+    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+    /* Null? */
+    ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq,
+                                      rlSrc.lowReg, 0);
+    /*
+     *  rlSrc.lowReg now contains object->clazz.  Note that
+     *  it could have been allocated r0, but we're okay so long
+     *  as we don't do anything desctructive until r0 is loaded
+     *  with clazz.
+     */
+    /* r0 now contains object->clazz */
+    loadWordDisp(cUnit, rlSrc.lowReg, offsetof(Object, clazz), r0);
+    loadWordDisp(cUnit, rSELF,
+                 OFFSETOF_MEMBER(Thread, pArtInstanceofNonTrivialNoThrow), rLR);
+    opRegReg(cUnit, kOpCmp, r0, r1);
+    ArmLIR* branch2 = opCondBranch(cUnit, kArmCondEq);
+    // Assume success - if not, artInstanceOfNonTrivial will handle throw
+    opReg(cUnit, kOpBlx, rLR);
+    oatClobberCallRegs(cUnit);
+    ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
+    target->defMask = ENCODE_ALL;
+    branch1->generic.target = (LIR*)target;
+    branch2->generic.target = (LIR*)target;
+}
+
+static void genNegFloat(CompilationUnit* cUnit, RegLocation rlDest,
+                        RegLocation rlSrc)
+{
+    RegLocation rlResult;
+    rlSrc = loadValue(cUnit, rlSrc, kFPReg);
+    rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+    newLIR2(cUnit, kThumb2Vnegs, rlResult.lowReg, rlSrc.lowReg);
+    storeValue(cUnit, rlDest, rlResult);
+}
+
+static void genNegDouble(CompilationUnit* cUnit, RegLocation rlDest,
+                         RegLocation rlSrc)
+{
+    RegLocation rlResult;
+    rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
+    rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
+    newLIR2(cUnit, kThumb2Vnegd, S2D(rlResult.lowReg, rlResult.highReg),
+            S2D(rlSrc.lowReg, rlSrc.highReg));
+    storeValueWide(cUnit, rlDest, rlResult);
+}
+
+/*
+ * To avoid possible conflicts, we use a lot of temps here.  Note that
+ * our usage of Thumb2 instruction forms avoids the problems with register
+ * reuse for multiply instructions prior to arm6.
+ */
+static void genMulLong(CompilationUnit* cUnit, RegLocation rlDest,
+                       RegLocation rlSrc1, RegLocation rlSrc2)
+{
+    RegLocation rlResult;
+    int resLo = oatAllocTemp(cUnit);
+    int resHi = oatAllocTemp(cUnit);
+    int tmp1 = oatAllocTemp(cUnit);
+
+    rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
+    rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
+
+    newLIR3(cUnit, kThumb2MulRRR, tmp1, rlSrc2.lowReg, rlSrc1.highReg);
+    newLIR4(cUnit, kThumb2Umull, resLo, resHi, rlSrc2.lowReg, rlSrc1.lowReg);
+    newLIR4(cUnit, kThumb2Mla, tmp1, rlSrc1.lowReg, rlSrc2.highReg, tmp1);
+    newLIR4(cUnit, kThumb2AddRRR, resHi, tmp1, resHi, 0);
+    oatFreeTemp(cUnit, tmp1);
+
+    rlResult = oatGetReturnWide(cUnit);
+    rlResult.lowReg = resLo;
+    rlResult.highReg = resHi;
+    storeValueWide(cUnit, rlDest, rlResult);
+}
+
+static void genLong3Addr(CompilationUnit* cUnit, MIR* mir, OpKind firstOp,
+                         OpKind secondOp, RegLocation rlDest,
+                         RegLocation rlSrc1, RegLocation rlSrc2)
+{
+    RegLocation rlResult;
+    rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
+    rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
+    rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
+    opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
+                rlSrc2.highReg);
+    storeValueWide(cUnit, rlDest, rlResult);
+}
+
+void oatInitializeRegAlloc(CompilationUnit* cUnit)
+{
+    int numRegs = sizeof(coreRegs)/sizeof(*coreRegs);
+    int numReserved = sizeof(reservedRegs)/sizeof(*reservedRegs);
+    int numTemps = sizeof(coreTemps)/sizeof(*coreTemps);
+    int numFPRegs = sizeof(fpRegs)/sizeof(*fpRegs);
+    int numFPTemps = sizeof(fpTemps)/sizeof(*fpTemps);
+    RegisterPool *pool = (RegisterPool *)oatNew(sizeof(*pool), true);
+    cUnit->regPool = pool;
+    pool->numCoreRegs = numRegs;
+    pool->coreRegs = (RegisterInfo *)
+            oatNew(numRegs * sizeof(*cUnit->regPool->coreRegs), true);
+    pool->numFPRegs = numFPRegs;
+    pool->FPRegs = (RegisterInfo *)
+            oatNew(numFPRegs * sizeof(*cUnit->regPool->FPRegs), true);
+    oatInitPool(pool->coreRegs, coreRegs, pool->numCoreRegs);
+    oatInitPool(pool->FPRegs, fpRegs, pool->numFPRegs);
+    // Keep special registers from being allocated
+    for (int i = 0; i < numReserved; i++) {
+        oatMarkInUse(cUnit, reservedRegs[i]);
+    }
+    // Mark temp regs - all others not in use can be used for promotion
+    for (int i = 0; i < numTemps; i++) {
+        oatMarkTemp(cUnit, coreTemps[i]);
+    }
+    for (int i = 0; i < numFPTemps; i++) {
+        oatMarkTemp(cUnit, fpTemps[i]);
+    }
+    pool->nullCheckedRegs =
+        oatAllocBitVector(cUnit->numSSARegs, false);
+}
+
+/*
+ * Handle simple case (thin lock) inline.  If it's complicated, bail
+ * out to the heavyweight lock/unlock routines.  We'll use dedicated
+ * registers here in order to be in the right position in case we
+ * to bail to dvm[Lock/Unlock]Object(self, object)
+ *
+ * r0 -> self pointer [arg0 for dvm[Lock/Unlock]Object
+ * r1 -> object [arg1 for dvm[Lock/Unlock]Object
+ * r2 -> intial contents of object->lock, later result of strex
+ * r3 -> self->threadId
+ * r12 -> allow to be used by utilities as general temp
+ *
+ * The result of the strex is 0 if we acquire the lock.
+ *
+ * See comments in Sync.c for the layout of the lock word.
+ * Of particular interest to this code is the test for the
+ * simple case - which we handle inline.  For monitor enter, the
+ * simple case is thin lock, held by no-one.  For monitor exit,
+ * the simple case is thin lock, held by the unlocking thread with
+ * a recurse count of 0.
+ *
+ * A minor complication is that there is a field in the lock word
+ * unrelated to locking: the hash state.  This field must be ignored, but
+ * preserved.
+ *
+ */
+static void genMonitorEnter(CompilationUnit* cUnit, MIR* mir,
+                            RegLocation rlSrc)
+{
+    ArmLIR* target;
+    ArmLIR* hopTarget;
+    ArmLIR* branch;
+    ArmLIR* hopBranch;
+
+    oatFlushAllRegs(cUnit);
+    assert(LW_SHAPE_THIN == 0);
+    loadValueDirectFixed(cUnit, rlSrc, r1);  // Get obj
+    oatLockAllTemps(cUnit);  // Prepare for explicit register usage
+    genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
+    loadWordDisp(cUnit, rSELF, offsetof(Thread, threadId), r3); // Get threadId
+    newLIR3(cUnit, kThumb2Ldrex, r2, r1,
+            offsetof(Object, lock) >> 2); // Get object->lock
+    opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner
+    // Is lock unheld on lock or held by us (==threadId) on unlock?
+    newLIR4(cUnit, kThumb2Bfi, r3, r2, 0, LW_LOCK_OWNER_SHIFT - 1);
+    newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT,
+            LW_LOCK_OWNER_SHIFT - 1);
+    hopBranch = newLIR2(cUnit, kThumb2Cbnz, r2, 0);
+    newLIR4(cUnit, kThumb2Strex, r2, r3, r1, offsetof(Object, lock) >> 2);
+    oatGenMemBarrier(cUnit, kSY);
+    branch = newLIR2(cUnit, kThumb2Cbz, r2, 0);
+
+    hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
+    hopTarget->defMask = ENCODE_ALL;
+    hopBranch->generic.target = (LIR*)hopTarget;
+
+    // Go expensive route - artLockObjectNoThrow(self, obj);
+    loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pArtLockObjectNoThrow),
+                 rLR);
+    genRegCopy(cUnit, r0, rSELF);
+    newLIR1(cUnit, kThumbBlxR, rLR);
+
+    // Resume here
+    target = newLIR0(cUnit, kArmPseudoTargetLabel);
+    target->defMask = ENCODE_ALL;
+    branch->generic.target = (LIR*)target;
+}
+
+/*
+ * For monitor unlock, we don't have to use ldrex/strex.  Once
+ * we've determined that the lock is thin and that we own it with
+ * a zero recursion count, it's safe to punch it back to the
+ * initial, unlock thin state with a store word.
+ */
+static void genMonitorExit(CompilationUnit* cUnit, MIR* mir,
+                           RegLocation rlSrc)
+{
+    ArmLIR* target;
+    ArmLIR* branch;
+    ArmLIR* hopTarget;
+    ArmLIR* hopBranch;
+
+    assert(LW_SHAPE_THIN == 0);
+    oatFlushAllRegs(cUnit);
+    loadValueDirectFixed(cUnit, rlSrc, r1);  // Get obj
+    oatLockAllTemps(cUnit);  // Prepare for explicit register usage
+    genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
+    loadWordDisp(cUnit, r1, offsetof(Object, lock), r2); // Get object->lock
+    loadWordDisp(cUnit, rSELF, offsetof(Thread, threadId), r3); // Get threadId
+    // Is lock unheld on lock or held by us (==threadId) on unlock?
+    opRegRegImm(cUnit, kOpAnd, r12, r2,
+                (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
+    opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner
+    newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT,
+            LW_LOCK_OWNER_SHIFT - 1);
+    opRegReg(cUnit, kOpSub, r2, r3);
+    hopBranch = opCondBranch(cUnit, kArmCondNe);
+    oatGenMemBarrier(cUnit, kSY);
+    storeWordDisp(cUnit, r1, offsetof(Object, lock), r12);
+    branch = opNone(cUnit, kOpUncondBr);
+
+    hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
+    hopTarget->defMask = ENCODE_ALL;
+    hopBranch->generic.target = (LIR*)hopTarget;
+
+    // Go expensive route - artUnlockObjectNoThrow(self, obj);
+    loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pArtUnlockObjectNoThrow),
+                 rLR);
+    genRegCopy(cUnit, r0, rSELF);
+    newLIR1(cUnit, kThumbBlxR, rLR);
+
+    // Resume here
+    target = newLIR0(cUnit, kArmPseudoTargetLabel);
+    target->defMask = ENCODE_ALL;
+    branch->generic.target = (LIR*)target;
+}
+
+/*
+ * 64-bit 3way compare function.
+ *     mov   rX, #-1
+ *     cmp   op1hi, op2hi
+ *     blt   done
+ *     bgt   flip
+ *     sub   rX, op1lo, op2lo (treat as unsigned)
+ *     beq   done
+ *     ite   hi
+ *     mov(hi)   rX, #-1
+ *     mov(!hi)  rX, #1
+ * flip:
+ *     neg   rX
+ * done:
+ */
+static void genCmpLong(CompilationUnit* cUnit, MIR* mir,
+                       RegLocation rlDest, RegLocation rlSrc1,
+                       RegLocation rlSrc2)
+{
+    RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change
+    ArmLIR* target1;
+    ArmLIR* target2;
+    rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
+    rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
+    rlTemp.lowReg = oatAllocTemp(cUnit);
+    loadConstant(cUnit, rlTemp.lowReg, -1);
+    opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
+    ArmLIR* branch1 = opCondBranch(cUnit, kArmCondLt);
+    ArmLIR* branch2 = opCondBranch(cUnit, kArmCondGt);
+    opRegRegReg(cUnit, kOpSub, rlTemp.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
+    ArmLIR* branch3 = opCondBranch(cUnit, kArmCondEq);
+
+    genIT(cUnit, kArmCondHi, "E");
+    newLIR2(cUnit, kThumb2MovImmShift, rlTemp.lowReg, modifiedImmediate(-1));
+    loadConstant(cUnit, rlTemp.lowReg, 1);
+    genBarrier(cUnit);
+
+    target2 = newLIR0(cUnit, kArmPseudoTargetLabel);
+    target2->defMask = -1;
+    opRegReg(cUnit, kOpNeg, rlTemp.lowReg, rlTemp.lowReg);
+
+    target1 = newLIR0(cUnit, kArmPseudoTargetLabel);
+    target1->defMask = -1;
+
+    storeValue(cUnit, rlDest, rlTemp);
+
+    branch1->generic.target = (LIR*)target1;
+    branch2->generic.target = (LIR*)target2;
+    branch3->generic.target = branch1->generic.target;
+}
+
+static void genMultiplyByTwoBitMultiplier(CompilationUnit* cUnit,
+        RegLocation rlSrc, RegLocation rlResult, int lit,
+        int firstBit, int secondBit)
+{
+    opRegRegRegShift(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg,
+                     encodeShift(kArmLsl, secondBit - firstBit));
+    if (firstBit != 0) {
+        opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlResult.lowReg, firstBit);
+    }
+}
+
+static bool genConversionCall(CompilationUnit* cUnit, MIR* mir, int funcOffset,
+                                     int srcSize, int tgtSize)
+{
+    /*
+     * Don't optimize the register usage since it calls out to support
+     * functions
+     */
+    RegLocation rlSrc;
+    RegLocation rlDest;
+    oatFlushAllRegs(cUnit);   /* Send everything to home location */
+    loadWordDisp(cUnit, rSELF, funcOffset, rLR);
+    if (srcSize == 1) {
+        rlSrc = oatGetSrc(cUnit, mir, 0);
+        loadValueDirectFixed(cUnit, rlSrc, r0);
+    } else {
+        rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
+        loadValueDirectWideFixed(cUnit, rlSrc, r0, r1);
+    }
+    opReg(cUnit, kOpBlx, rLR);
+    oatClobberCallRegs(cUnit);
+    if (tgtSize == 1) {
+        RegLocation rlResult;
+        rlDest = oatGetDest(cUnit, mir, 0);
+        rlResult = oatGetReturn(cUnit);
+        storeValue(cUnit, rlDest, rlResult);
+    } else {
+        RegLocation rlResult;
+        rlDest = oatGetDestWide(cUnit, mir, 0, 1);
+        rlResult = oatGetReturnWide(cUnit);
+        storeValueWide(cUnit, rlDest, rlResult);
+    }
+    return false;
+}
+
+static bool genArithOpFloatPortable(CompilationUnit* cUnit, MIR* mir,
+                                    RegLocation rlDest, RegLocation rlSrc1,
+                                    RegLocation rlSrc2)
+{
+    RegLocation rlResult;
+    int funcOffset;
+
+    switch (mir->dalvikInsn.opcode) {
+        case OP_ADD_FLOAT_2ADDR:
+        case OP_ADD_FLOAT:
+            funcOffset = OFFSETOF_MEMBER(Thread, pFadd);
+            break;
+        case OP_SUB_FLOAT_2ADDR:
+        case OP_SUB_FLOAT:
+            funcOffset = OFFSETOF_MEMBER(Thread, pFsub);
+            break;
+        case OP_DIV_FLOAT_2ADDR:
+        case OP_DIV_FLOAT:
+            funcOffset = OFFSETOF_MEMBER(Thread, pFdiv);
+            break;
+        case OP_MUL_FLOAT_2ADDR:
+        case OP_MUL_FLOAT:
+            funcOffset = OFFSETOF_MEMBER(Thread, pFmul);
+            break;
+        case OP_REM_FLOAT_2ADDR:
+        case OP_REM_FLOAT:
+            funcOffset = OFFSETOF_MEMBER(Thread, pFmodf);
+            break;
+        case OP_NEG_FLOAT: {
+            genNegFloat(cUnit, rlDest, rlSrc1);
+            return false;
+        }
+        default:
+            return true;
+    }
+    oatFlushAllRegs(cUnit);   /* Send everything to home location */
+    loadWordDisp(cUnit, rSELF, funcOffset, rLR);
+    loadValueDirectFixed(cUnit, rlSrc1, r0);
+    loadValueDirectFixed(cUnit, rlSrc2, r1);
+    opReg(cUnit, kOpBlx, rLR);
+    oatClobberCallRegs(cUnit);
+    rlResult = oatGetReturn(cUnit);
+    storeValue(cUnit, rlDest, rlResult);
+    return false;
+}
+
+static bool genArithOpDoublePortable(CompilationUnit* cUnit, MIR* mir,
+                                     RegLocation rlDest, RegLocation rlSrc1,
+                                     RegLocation rlSrc2)
+{
+    RegLocation rlResult;
+    int funcOffset;
+
+    switch (mir->dalvikInsn.opcode) {
+        case OP_ADD_DOUBLE_2ADDR:
+        case OP_ADD_DOUBLE:
+            funcOffset = OFFSETOF_MEMBER(Thread, pDadd);
+            break;
+        case OP_SUB_DOUBLE_2ADDR:
+        case OP_SUB_DOUBLE:
+            funcOffset = OFFSETOF_MEMBER(Thread, pDsub);
+            break;
+        case OP_DIV_DOUBLE_2ADDR:
+        case OP_DIV_DOUBLE:
+            funcOffset = OFFSETOF_MEMBER(Thread, pDdiv);
+            break;
+        case OP_MUL_DOUBLE_2ADDR:
+        case OP_MUL_DOUBLE:
+            funcOffset = OFFSETOF_MEMBER(Thread, pDmul);
+            break;
+        case OP_REM_DOUBLE_2ADDR:
+        case OP_REM_DOUBLE:
+            funcOffset = OFFSETOF_MEMBER(Thread, pFmod);
+            break;
+        case OP_NEG_DOUBLE: {
+            genNegDouble(cUnit, rlDest, rlSrc1);
+            return false;
+        }
+        default:
+            return true;
+    }
+    oatFlushAllRegs(cUnit);   /* Send everything to home location */
+    loadWordDisp(cUnit, rSELF, funcOffset, rLR);
+    loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
+    loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
+    opReg(cUnit, kOpBlx, rLR);
+    oatClobberCallRegs(cUnit);
+    rlResult = oatGetReturnWide(cUnit);
+    storeValueWide(cUnit, rlDest, rlResult);
+    return false;
+}
+
+static bool genConversionPortable(CompilationUnit* cUnit, MIR* mir)
+{
+    Opcode opcode = mir->dalvikInsn.opcode;
+
+    switch (opcode) {
+        case OP_INT_TO_FLOAT:
+            return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pI2f),
+                                     1, 1);
+        case OP_FLOAT_TO_INT:
+            return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pF2iz),
+                                     1, 1);
+        case OP_DOUBLE_TO_FLOAT:
+            return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pD2f),
+                                     2, 1);
+        case OP_FLOAT_TO_DOUBLE:
+            return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pF2d),
+                                     1, 2);
+        case OP_INT_TO_DOUBLE:
+            return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pI2d),
+                                     1, 2);
+        case OP_DOUBLE_TO_INT:
+            return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pD2iz),
+                                     2, 1);
+        case OP_FLOAT_TO_LONG:
+            return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread,
+                                     pArtF2l), 1, 2);
+        case OP_LONG_TO_FLOAT:
+            return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pL2f),
+                                     2, 1);
+        case OP_DOUBLE_TO_LONG:
+            return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread,
+                                     pArtD2l), 2, 2);
+        case OP_LONG_TO_DOUBLE:
+            return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pL2d),
+                                     2, 2);
+        default:
+            return true;
+    }
+    return false;
+}
+
+/* Generate conditional branch instructions */
+static ArmLIR* genConditionalBranch(CompilationUnit* cUnit,
+                                    ArmConditionCode cond,
+                                    ArmLIR* target)
+{
+    ArmLIR* branch = opCondBranch(cUnit, cond);
+    branch->generic.target = (LIR*) target;
+    return branch;
+}
+
+/* Generate a unconditional branch to go to the interpreter */
+static inline ArmLIR* genTrap(CompilationUnit* cUnit, int dOffset,
+                                  ArmLIR* pcrLabel)
+{
+    ArmLIR* branch = opNone(cUnit, kOpUncondBr);
+    return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
+}
+
+/*
+ * Generate array store
+ *
+ */
+static void genArrayObjectPut(CompilationUnit* cUnit, MIR* mir,
+                              RegLocation rlArray, RegLocation rlIndex,
+                              RegLocation rlSrc, int scale)
+{
+    RegisterClass regClass = oatRegClassBySize(kWord);
+    int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
+    int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
+
+    /* Make sure it's a legal object Put. Use direct regs at first */
+    loadValueDirectFixed(cUnit, rlArray, r1);
+    loadValueDirectFixed(cUnit, rlSrc, r0);
+
+    /* null array object? */
+    ArmLIR*  pcrLabel = NULL;
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+        pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, r1,
+                                mir->offset, NULL);
+    }
+    loadWordDisp(cUnit, rSELF,
+                 OFFSETOF_MEMBER(Thread, pArtCanPutArrayElementNoThrow), rLR);
+    /* Get the array's clazz */
+    loadWordDisp(cUnit, r1, offsetof(Object, clazz), r1);
+    /* Get the object's clazz */
+    loadWordDisp(cUnit, r0, offsetof(Object, clazz), r0);
+    opReg(cUnit, kOpBlx, rLR);
+    oatClobberCallRegs(cUnit);
+
+    // Now, redo loadValues in case they didn't survive the call
+
+    int regPtr;
+    rlArray = loadValue(cUnit, rlArray, kCoreReg);
+    rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
+
+    if (oatIsTemp(cUnit, rlArray.lowReg)) {
+        oatClobber(cUnit, rlArray.lowReg);
+        regPtr = rlArray.lowReg;
+    } else {
+        regPtr = oatAllocTemp(cUnit);
+        genRegCopy(cUnit, regPtr, rlArray.lowReg);
+    }
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+        int regLen = oatAllocTemp(cUnit);
+        //NOTE: max live temps(4) here.
+        /* Get len */
+        loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
+        /* regPtr -> array data */
+        opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+        genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
+                       pcrLabel);
+        oatFreeTemp(cUnit, regLen);
+    } else {
+        /* regPtr -> array data */
+        opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+    }
+    /* at this point, regPtr points to array, 2 live temps */
+    rlSrc = loadValue(cUnit, rlSrc, regClass);
+    storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
+                     scale, kWord);
+}
+
+/*
+ * Generate array load
+ */
+static void genArrayGet(CompilationUnit* cUnit, MIR* mir, OpSize size,
+                        RegLocation rlArray, RegLocation rlIndex,
+                        RegLocation rlDest, int scale)
+{
+    RegisterClass regClass = oatRegClassBySize(size);
+    int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
+    int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
+    RegLocation rlResult;
+    rlArray = loadValue(cUnit, rlArray, kCoreReg);
+    rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
+    int regPtr;
+
+    /* null object? */
+    ArmLIR*  pcrLabel = NULL;
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+        pcrLabel = genNullCheck(cUnit, rlArray.sRegLow,
+                                rlArray.lowReg, mir->offset, NULL);
+    }
+
+    regPtr = oatAllocTemp(cUnit);
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+        int regLen = oatAllocTemp(cUnit);
+        /* Get len */
+        loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
+        /* regPtr -> array data */
+        opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
+        genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
+                       pcrLabel);
+        oatFreeTemp(cUnit, regLen);
+    } else {
+        /* regPtr -> array data */
+        opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
+    }
+    if ((size == kLong) || (size == kDouble)) {
+        if (scale) {
+            int rNewIndex = oatAllocTemp(cUnit);
+            opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
+            opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
+            oatFreeTemp(cUnit, rNewIndex);
+        } else {
+            opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
+        }
+        rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
+
+        loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
+
+        oatFreeTemp(cUnit, regPtr);
+        storeValueWide(cUnit, rlDest, rlResult);
+    } else {
+        rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
+
+        loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
+                        scale, size);
+
+        oatFreeTemp(cUnit, regPtr);
+        storeValue(cUnit, rlDest, rlResult);
+    }
+}
+
+/*
+ * Generate array store
+ *
+ */
+static void genArrayPut(CompilationUnit* cUnit, MIR* mir, OpSize size,
+                        RegLocation rlArray, RegLocation rlIndex,
+                        RegLocation rlSrc, int scale)
+{
+    RegisterClass regClass = oatRegClassBySize(size);
+    int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
+    int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
+
+    int regPtr;
+    rlArray = loadValue(cUnit, rlArray, kCoreReg);
+    rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
+
+    if (oatIsTemp(cUnit, rlArray.lowReg)) {
+        oatClobber(cUnit, rlArray.lowReg);
+        regPtr = rlArray.lowReg;
+    } else {
+        regPtr = oatAllocTemp(cUnit);
+        genRegCopy(cUnit, regPtr, rlArray.lowReg);
+    }
+
+    /* null object? */
+    ArmLIR*  pcrLabel = NULL;
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
+        pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg,
+                                mir->offset, NULL);
+    }
+
+    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+        int regLen = oatAllocTemp(cUnit);
+        //NOTE: max live temps(4) here.
+        /* Get len */
+        loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
+        /* regPtr -> array data */
+        opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+        genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
+                       pcrLabel);
+        oatFreeTemp(cUnit, regLen);
+    } else {
+        /* regPtr -> array data */
+        opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+    }
+    /* at this point, regPtr points to array, 2 live temps */
+    if ((size == kLong) || (size == kDouble)) {
+        //TODO: need specific wide routine that can handle fp regs
+        if (scale) {
+            int rNewIndex = oatAllocTemp(cUnit);
+            opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
+            opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
+            oatFreeTemp(cUnit, rNewIndex);
+        } else {
+            opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
+        }
+        rlSrc = loadValueWide(cUnit, rlSrc, regClass);
+
+        storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
+
+        oatFreeTemp(cUnit, regPtr);
+    } else {
+        rlSrc = loadValue(cUnit, rlSrc, regClass);
+
+        storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
+                         scale, size);
+    }
+}
+
+static bool genShiftOpLong(CompilationUnit* cUnit, MIR* mir,
+                           RegLocation rlDest, RegLocation rlSrc1,
+                           RegLocation rlShift)
+{
+    /*
+     * Don't mess with the regsiters here as there is a particular calling
+     * convention to the out-of-line handler.
+     */
+    RegLocation rlResult;
+
+    loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
+    loadValueDirect(cUnit, rlShift, r2);
+    switch( mir->dalvikInsn.opcode) {
+        case OP_SHL_LONG:
+        case OP_SHL_LONG_2ADDR:
+            //genDispatchToHandler(cUnit, TEMPLATE_SHL_LONG);
+            assert(0);  // unimp
+            break;
+        case OP_SHR_LONG:
+        case OP_SHR_LONG_2ADDR:
+            //genDispatchToHandler(cUnit, TEMPLATE_SHR_LONG);
+            assert(0); // unimp
+            break;
+        case OP_USHR_LONG:
+        case OP_USHR_LONG_2ADDR:
+            //genDispatchToHandler(cUnit, TEMPLATE_USHR_LONG);
+            assert(0); // unimp
+            break;
+        default:
+            return true;
+    }
+    rlResult = oatGetReturnWide(cUnit);
+    storeValueWide(cUnit, rlDest, rlResult);
+    return false;
+}
+
+static bool genArithOpLong(CompilationUnit* cUnit, MIR* mir,
+                           RegLocation rlDest, RegLocation rlSrc1,
+                           RegLocation rlSrc2)
+{
+    RegLocation rlResult;
+    OpKind firstOp = kOpBkpt;
+    OpKind secondOp = kOpBkpt;
+    bool callOut = false;
+    int funcOffset;
+    int retReg = r0;
+
+    switch (mir->dalvikInsn.opcode) {
+        case OP_NOT_LONG:
+            rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
+            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+            opRegReg(cUnit, kOpMvn, rlResult.lowReg, rlSrc2.lowReg);
+            opRegReg(cUnit, kOpMvn, rlResult.highReg, rlSrc2.highReg);
+            storeValueWide(cUnit, rlDest, rlResult);
+            return false;
+            break;
+        case OP_ADD_LONG:
+        case OP_ADD_LONG_2ADDR:
+            firstOp = kOpAdd;
+            secondOp = kOpAdc;
+            break;
+        case OP_SUB_LONG:
+        case OP_SUB_LONG_2ADDR:
+            firstOp = kOpSub;
+            secondOp = kOpSbc;
+            break;
+        case OP_MUL_LONG:
+        case OP_MUL_LONG_2ADDR:
+            genMulLong(cUnit, rlDest, rlSrc1, rlSrc2);
+            return false;
+        case OP_DIV_LONG:
+        case OP_DIV_LONG_2ADDR:
+            callOut = true;
+            retReg = r0;
+            funcOffset = OFFSETOF_MEMBER(Thread, pLdivmod);
+            break;
+        /* NOTE - result is in r2/r3 instead of r0/r1 */
+        case OP_REM_LONG:
+        case OP_REM_LONG_2ADDR:
+            callOut = true;
+            funcOffset = OFFSETOF_MEMBER(Thread, pLdivmod);
+            retReg = r2;
+            break;
+        case OP_AND_LONG_2ADDR:
+        case OP_AND_LONG:
+            firstOp = kOpAnd;
+            secondOp = kOpAnd;
+            break;
+        case OP_OR_LONG:
+        case OP_OR_LONG_2ADDR:
+            firstOp = kOpOr;
+            secondOp = kOpOr;
+            break;
+        case OP_XOR_LONG:
+        case OP_XOR_LONG_2ADDR:
+            firstOp = kOpXor;
+            secondOp = kOpXor;
+            break;
+        case OP_NEG_LONG: {
+            //TUNING: can improve this using Thumb2 code
+            int tReg = oatAllocTemp(cUnit);
+            rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
+            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+            loadConstantNoClobber(cUnit, tReg, 0);
+            opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
+                        tReg, rlSrc2.lowReg);
+            opRegReg(cUnit, kOpSbc, tReg, rlSrc2.highReg);
+            genRegCopy(cUnit, rlResult.highReg, tReg);
+            storeValueWide(cUnit, rlDest, rlResult);
+            return false;
+        }
+        default:
+            LOG(FATAL) << "Invalid long arith op";
+    }
+    if (!callOut) {
+        genLong3Addr(cUnit, mir, firstOp, secondOp, rlDest, rlSrc1, rlSrc2);
+    } else {
+        // Adjust return regs in to handle case of rem returning r2/r3
+        oatFlushAllRegs(cUnit);   /* Send everything to home location */
+        loadWordDisp(cUnit, rSELF, funcOffset, rLR);
+        loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
+        loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
+        opReg(cUnit, kOpBlx, rLR);
+        oatClobberCallRegs(cUnit);
+        if (retReg == r0)
+            rlResult = oatGetReturnWide(cUnit);
+        else
+            rlResult = oatGetReturnWideAlt(cUnit);
+        storeValueWide(cUnit, rlDest, rlResult);
+    }
+    return false;
+}
+
+static bool genArithOpInt(CompilationUnit* cUnit, MIR* mir,
+                          RegLocation rlDest, RegLocation rlSrc1,
+                          RegLocation rlSrc2)
+{
+    OpKind op = kOpBkpt;
+    bool callOut = false;
+    bool checkZero = false;
+    bool unary = false;
+    int retReg = r0;
+    int funcOffset;
+    RegLocation rlResult;
+    bool shiftOp = false;
+
+    switch (mir->dalvikInsn.opcode) {
+        case OP_NEG_INT:
+            op = kOpNeg;
+            unary = true;
+            break;
+        case OP_NOT_INT:
+            op = kOpMvn;
+            unary = true;
+            break;
+        case OP_ADD_INT:
+        case OP_ADD_INT_2ADDR:
+            op = kOpAdd;
+            break;
+        case OP_SUB_INT:
+        case OP_SUB_INT_2ADDR:
+            op = kOpSub;
+            break;
+        case OP_MUL_INT:
+        case OP_MUL_INT_2ADDR:
+            op = kOpMul;
+            break;
+        case OP_DIV_INT:
+        case OP_DIV_INT_2ADDR:
+            callOut = true;
+            checkZero = true;
+            funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
+            retReg = r0;
+            break;
+        /* NOTE: returns in r1 */
+        case OP_REM_INT:
+        case OP_REM_INT_2ADDR:
+            callOut = true;
+            checkZero = true;
+            funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
+            retReg = r1;
+            break;
+        case OP_AND_INT:
+        case OP_AND_INT_2ADDR:
+            op = kOpAnd;
+            break;
+        case OP_OR_INT:
+        case OP_OR_INT_2ADDR:
+            op = kOpOr;
+            break;
+        case OP_XOR_INT:
+        case OP_XOR_INT_2ADDR:
+            op = kOpXor;
+            break;
+        case OP_SHL_INT:
+        case OP_SHL_INT_2ADDR:
+            shiftOp = true;
+            op = kOpLsl;
+            break;
+        case OP_SHR_INT:
+        case OP_SHR_INT_2ADDR:
+            shiftOp = true;
+            op = kOpAsr;
+            break;
+        case OP_USHR_INT:
+        case OP_USHR_INT_2ADDR:
+            shiftOp = true;
+            op = kOpLsr;
+            break;
+        default:
+            LOG(FATAL) << "Invalid word arith op: " <<
+                (int)mir->dalvikInsn.opcode;
+    }
+    if (!callOut) {
+        rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
+        if (unary) {
+            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+            opRegReg(cUnit, op, rlResult.lowReg,
+                     rlSrc1.lowReg);
+        } else {
+            rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg);
+            if (shiftOp) {
+                int tReg = oatAllocTemp(cUnit);
+                opRegRegImm(cUnit, kOpAnd, tReg, rlSrc2.lowReg, 31);
+                rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+                opRegRegReg(cUnit, op, rlResult.lowReg,
+                            rlSrc1.lowReg, tReg);
+                oatFreeTemp(cUnit, tReg);
+            } else {
+                rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+                opRegRegReg(cUnit, op, rlResult.lowReg,
+                            rlSrc1.lowReg, rlSrc2.lowReg);
+            }
+        }
+        storeValue(cUnit, rlDest, rlResult);
+    } else {
+        RegLocation rlResult;
+        oatFlushAllRegs(cUnit);   /* Send everything to home location */
+        loadValueDirectFixed(cUnit, rlSrc2, r1);
+        loadWordDisp(cUnit, rSELF, funcOffset, rLR);
+        loadValueDirectFixed(cUnit, rlSrc1, r0);
+        if (checkZero) {
+            genNullCheck(cUnit, rlSrc2.sRegLow, r1, mir->offset, NULL);
+        }
+        opReg(cUnit, kOpBlx, rLR);
+        oatClobberCallRegs(cUnit);
+        if (retReg == r0)
+            rlResult = oatGetReturn(cUnit);
+        else
+            rlResult = oatGetReturnAlt(cUnit);
+        storeValue(cUnit, rlDest, rlResult);
+    }
+    return false;
+}
+
+/* Generate unconditional branch instructions */
+static ArmLIR* genUnconditionalBranch(CompilationUnit* cUnit, ArmLIR* target)
+{
+    ArmLIR* branch = opNone(cUnit, kOpUncondBr);
+    branch->generic.target = (LIR*) target;
+    return branch;
+}
+
+/*
+ * Fetch *self->info.breakFlags. If the breakFlags are non-zero,
+ * punt to the interpreter.
+ */
+static void genSuspendPoll(CompilationUnit* cUnit, MIR* mir)
+{
+    UNIMPLEMENTED(WARNING);
+#if 0
+    int rTemp = oatAllocTemp(cUnit);
+    ArmLIR* ld;
+    ld = loadBaseDisp(cUnit, NULL, rSELF,
+                      offsetof(Thread, interpBreak.ctl.breakFlags),
+                      rTemp, kUnsignedByte, INVALID_SREG);
+    setMemRefType(ld, true /* isLoad */, kMustNotAlias);
+    genRegImmCheck(cUnit, kArmCondNe, rTemp, 0, mir->offset, NULL);
+#endif
+}
+
+/*
+ * The following are the first-level codegen routines that analyze the format
+ * of each bytecode then either dispatch special purpose codegen routines
+ * or produce corresponding Thumb instructions directly.
+ */
+
+static bool isPowerOfTwo(int x)
+{
+    return (x & (x - 1)) == 0;
+}
+
+// Returns true if no more than two bits are set in 'x'.
+static bool isPopCountLE2(unsigned int x)
+{
+    x &= x - 1;
+    return (x & (x - 1)) == 0;
+}
+
+// Returns the index of the lowest set bit in 'x'.
+static int lowestSetBit(unsigned int x) {
+    int bit_posn = 0;
+    while ((x & 0xf) == 0) {
+        bit_posn += 4;
+        x >>= 4;
+    }
+    while ((x & 1) == 0) {
+        bit_posn++;
+        x >>= 1;
+    }
+    return bit_posn;
+}
+
+// Returns true if it added instructions to 'cUnit' to divide 'rlSrc' by 'lit'
+// and store the result in 'rlDest'.
+static bool handleEasyDivide(CompilationUnit* cUnit, Opcode dalvikOpcode,
+                             RegLocation rlSrc, RegLocation rlDest, int lit)
+{
+    if (lit < 2 || !isPowerOfTwo(lit)) {
+        return false;
+    }
+    int k = lowestSetBit(lit);
+    if (k >= 30) {
+        // Avoid special cases.
+        return false;
+    }
+    bool div = (dalvikOpcode == OP_DIV_INT_LIT8 ||
+                dalvikOpcode == OP_DIV_INT_LIT16);
+    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    if (div) {
+        int tReg = oatAllocTemp(cUnit);
+        if (lit == 2) {
+            // Division by 2 is by far the most common division by constant.
+            opRegRegImm(cUnit, kOpLsr, tReg, rlSrc.lowReg, 32 - k);
+            opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
+            opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
+        } else {
+            opRegRegImm(cUnit, kOpAsr, tReg, rlSrc.lowReg, 31);
+            opRegRegImm(cUnit, kOpLsr, tReg, tReg, 32 - k);
+            opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
+            opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
+        }
+    } else {
+        int cReg = oatAllocTemp(cUnit);
+        loadConstant(cUnit, cReg, lit - 1);
+        int tReg1 = oatAllocTemp(cUnit);
+        int tReg2 = oatAllocTemp(cUnit);
+        if (lit == 2) {
+            opRegRegImm(cUnit, kOpLsr, tReg1, rlSrc.lowReg, 32 - k);
+            opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
+            opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
+            opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
+        } else {
+            opRegRegImm(cUnit, kOpAsr, tReg1, rlSrc.lowReg, 31);
+            opRegRegImm(cUnit, kOpLsr, tReg1, tReg1, 32 - k);
+            opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
+            opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
+            opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
+        }
+    }
+    storeValue(cUnit, rlDest, rlResult);
+    return true;
+}
+
+// Returns true if it added instructions to 'cUnit' to multiply 'rlSrc' by 'lit'
+// and store the result in 'rlDest'.
+static bool handleEasyMultiply(CompilationUnit* cUnit,
+                               RegLocation rlSrc, RegLocation rlDest, int lit)
+{
+    // Can we simplify this multiplication?
+    bool powerOfTwo = false;
+    bool popCountLE2 = false;
+    bool powerOfTwoMinusOne = false;
+    if (lit < 2) {
+        // Avoid special cases.
+        return false;
+    } else if (isPowerOfTwo(lit)) {
+        powerOfTwo = true;
+    } else if (isPopCountLE2(lit)) {
+        popCountLE2 = true;
+    } else if (isPowerOfTwo(lit + 1)) {
+        powerOfTwoMinusOne = true;
+    } else {
+        return false;
+    }
+    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    if (powerOfTwo) {
+        // Shift.
+        opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlSrc.lowReg,
+                    lowestSetBit(lit));
+    } else if (popCountLE2) {
+        // Shift and add and shift.
+        int firstBit = lowestSetBit(lit);
+        int secondBit = lowestSetBit(lit ^ (1 << firstBit));
+        genMultiplyByTwoBitMultiplier(cUnit, rlSrc, rlResult, lit,
+                                      firstBit, secondBit);
+    } else {
+        // Reverse subtract: (src << (shift + 1)) - src.
+        assert(powerOfTwoMinusOne);
+        // TODO: rsb dst, src, src lsl#lowestSetBit(lit + 1)
+        int tReg = oatAllocTemp(cUnit);
+        opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1));
+        opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg);
+    }
+    storeValue(cUnit, rlDest, rlResult);
+    return true;
+}
+
+static bool genArithOpIntLit(CompilationUnit* cUnit, MIR* mir,
+                             RegLocation rlDest, RegLocation rlSrc,
+                             int lit)
+{
+    Opcode dalvikOpcode = mir->dalvikInsn.opcode;
+    RegLocation rlResult;
+    OpKind op = (OpKind)0;      /* Make gcc happy */
+    int shiftOp = false;
+    bool isDiv = false;
+    int funcOffset;
+
+    switch (dalvikOpcode) {
+        case OP_RSUB_INT_LIT8:
+        case OP_RSUB_INT: {
+            int tReg;
+            //TUNING: add support for use of Arm rsub op
+            rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+            tReg = oatAllocTemp(cUnit);
+            loadConstant(cUnit, tReg, lit);
+            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+            opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
+                        tReg, rlSrc.lowReg);
+            storeValue(cUnit, rlDest, rlResult);
+            return false;
+            break;
+        }
+
+        case OP_ADD_INT_LIT8:
+        case OP_ADD_INT_LIT16:
+            op = kOpAdd;
+            break;
+        case OP_MUL_INT_LIT8:
+        case OP_MUL_INT_LIT16: {
+            if (handleEasyMultiply(cUnit, rlSrc, rlDest, lit)) {
+                return false;
+            }
+            op = kOpMul;
+            break;
+        }
+        case OP_AND_INT_LIT8:
+        case OP_AND_INT_LIT16:
+            op = kOpAnd;
+            break;
+        case OP_OR_INT_LIT8:
+        case OP_OR_INT_LIT16:
+            op = kOpOr;
+            break;
+        case OP_XOR_INT_LIT8:
+        case OP_XOR_INT_LIT16:
+            op = kOpXor;
+            break;
+        case OP_SHL_INT_LIT8:
+            lit &= 31;
+            shiftOp = true;
+            op = kOpLsl;
+            break;
+        case OP_SHR_INT_LIT8:
+            lit &= 31;
+            shiftOp = true;
+            op = kOpAsr;
+            break;
+        case OP_USHR_INT_LIT8:
+            lit &= 31;
+            shiftOp = true;
+            op = kOpLsr;
+            break;
+
+        case OP_DIV_INT_LIT8:
+        case OP_DIV_INT_LIT16:
+        case OP_REM_INT_LIT8:
+        case OP_REM_INT_LIT16:
+            if (lit == 0) {
+                UNIMPLEMENTED(FATAL);
+                // FIXME: generate an explicit throw here
+                return false;
+            }
+            if (handleEasyDivide(cUnit, dalvikOpcode, rlSrc, rlDest, lit)) {
+                return false;
+            }
+            oatFlushAllRegs(cUnit);   /* Everything to home location */
+            loadValueDirectFixed(cUnit, rlSrc, r0);
+            oatClobber(cUnit, r0);
+            if ((dalvikOpcode == OP_DIV_INT_LIT8) ||
+                (dalvikOpcode == OP_DIV_INT_LIT16)) {
+                funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
+                isDiv = true;
+            } else {
+                funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
+                isDiv = false;
+            }
+            loadWordDisp(cUnit, rSELF, funcOffset, rLR);
+            loadConstant(cUnit, r1, lit);
+            opReg(cUnit, kOpBlx, rLR);
+            oatClobberCallRegs(cUnit);
+            if (isDiv)
+                rlResult = oatGetReturn(cUnit);
+            else
+                rlResult = oatGetReturnAlt(cUnit);
+            storeValue(cUnit, rlDest, rlResult);
+            return false;
+            break;
+        default:
+            return true;
+    }
+    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+    rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    // Avoid shifts by literal 0 - no support in Thumb.  Change to copy
+    if (shiftOp && (lit == 0)) {
+        genRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg);
+    } else {
+        opRegRegImm(cUnit, op, rlResult.lowReg, rlSrc.lowReg, lit);
+    }
+    storeValue(cUnit, rlDest, rlResult);
+    return false;
+}
+
+/* Architectural-specific debugging helpers go here */
+void oatArchDump(void)
+{
+    /* Print compiled opcode in this VM instance */
+    int i, start, streak;
+    char buf[1024];
+
+    streak = i = 0;
+    buf[0] = 0;
+    while (opcodeCoverage[i] == 0 && i < kNumPackedOpcodes) {
+        i++;
+    }
+    if (i == kNumPackedOpcodes) {
+        return;
+    }
+    for (start = i++, streak = 1; i < kNumPackedOpcodes; i++) {
+        if (opcodeCoverage[i]) {
+            streak++;
+        } else {
+            if (streak == 1) {
+                sprintf(buf+strlen(buf), "%x,", start);
+            } else {
+                sprintf(buf+strlen(buf), "%x-%x,", start, start + streak - 1);
+            }
+            streak = 0;
+            while (opcodeCoverage[i] == 0 && i < kNumPackedOpcodes) {
+                i++;
+            }
+            if (i < kNumPackedOpcodes) {
+                streak = 1;
+                start = i;
+            }
+        }
+    }
+    if (streak) {
+        if (streak == 1) {
+            sprintf(buf+strlen(buf), "%x", start);
+        } else {
+            sprintf(buf+strlen(buf), "%x-%x", start, start + streak - 1);
+        }
+    }
+    if (strlen(buf)) {
+        LOG(INFO) << "dalvik.vm.oat.op = " << buf;
+    }
+}
diff --git a/src/compiler/codegen/arm/Thumb2/Ralloc.cc b/src/compiler/codegen/arm/Thumb2/Ralloc.cc
new file mode 100644
index 0000000..63d2430
--- /dev/null
+++ b/src/compiler/codegen/arm/Thumb2/Ralloc.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * This file contains codegen for the Thumb ISA and is intended to be
+ * includes by:
+ *
+ *        Codegen-$(TARGET_ARCH_VARIANT).c
+ *
+ */
+
+/*
+ * Alloc a pair of core registers, or a double.  Low reg in low byte,
+ * high reg in next byte.
+ */
+int oatAllocTypedTempPair(CompilationUnit* cUnit, bool fpHint, int regClass)
+{
+    int highReg;
+    int lowReg;
+    int res = 0;
+
+    if (((regClass == kAnyReg) && fpHint) || (regClass == kFPReg)) {
+        lowReg = oatAllocTempDouble(cUnit);
+        highReg = lowReg + 1;
+    } else {
+        lowReg = oatAllocTemp(cUnit);
+        highReg = oatAllocTemp(cUnit);
+    }
+    res = (lowReg & 0xff) | ((highReg & 0xff) << 8);
+    return res;
+}
+
+int oatAllocTypedTemp(CompilationUnit* cUnit, bool fpHint, int regClass)
+{
+    if (((regClass == kAnyReg) && fpHint) || (regClass == kFPReg))
+        return oatAllocTempFloat(cUnit);
+    return oatAllocTemp(cUnit);
+}
diff --git a/src/compiler/codegen/arm/armv7-a-neon/ArchVariant.cc b/src/compiler/codegen/arm/armv7-a-neon/ArchVariant.cc
new file mode 100644
index 0000000..ee56369
--- /dev/null
+++ b/src/compiler/codegen/arm/armv7-a-neon/ArchVariant.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Determine the initial instruction set to be used for this trace.
+ * Later components may decide to change this.
+ */
+OatInstructionSetType oatInstructionSet(void)
+{
+    return DALVIK_OAT_THUMB2;
+}
+
+/* Architecture-specific initializations and checks go here */
+bool oatArchVariantInit(void)
+{
+    return true;
+}
+
+int oatTargetOptHint(int key)
+{
+    int res = 0;
+    switch (key) {
+        case kMaxHoistDistance:
+            res = 7;
+            break;
+        default:
+            LOG(FATAL) << "Unknown target optimization hint key: " << key;
+    }
+    return res;
+}
+
+void oatGenMemBarrier(CompilationUnit* cUnit, int barrierKind)
+{
+#if ANDROID_SMP != 0
+    ArmLIR* dmb = newLIR1(cUnit, kThumb2Dmb, barrierKind);
+    dmb->defMask = ENCODE_ALL;
+#endif
+}
diff --git a/src/compiler/codegen/arm/armv7-a-neon/Codegen.cc b/src/compiler/codegen/arm/armv7-a-neon/Codegen.cc
new file mode 100644
index 0000000..00339ef
--- /dev/null
+++ b/src/compiler/codegen/arm/armv7-a-neon/Codegen.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define _CODEGEN_C
+#define _ARMV7_A_NEON
+#define TGT_LIR ArmLIR
+
+#include "../../../Dalvik.h"
+//#include "interp/InterpDefs.h"
+//#include "libdex/DexOpcodes.h"
+#include "../../../CompilerInternals.h"
+#include "../arm/ArmLIR.h"
+//#include "mterp/common/FindInterface.h"
+#include "../../Ralloc.h"
+#include "../Codegen.h"
+
+/* Arm codegen building blocks */
+#include "../CodegenCommon.cc"
+
+/* Thumb2-specific factory utilities */
+#include "../Thumb2/Factory.cc"
+/* Target indepedent factory utilities */
+#include "../../CodegenFactory.cc"
+/* Arm-specific factory utilities */
+#include "../ArchFactory.cc"
+
+/* Thumb2-specific codegen routines */
+#include "../Thumb2/Gen.cc"
+/* Thumb2+VFP codegen routines */
+#include "../FP/Thumb2VFP.cc"
+
+/* Thumb2-specific register allocation */
+#include "../Thumb2/Ralloc.cc"
+
+/* MIR2LIR dispatcher and architectural independent codegen routines */
+#include "../MethodCodegenDriver.cc"
+
+/* Architecture manifest */
+#include "ArchVariant.cc"
diff --git a/src/compiler/codegen/arm/armv7-a/ArchVariant.cc b/src/compiler/codegen/arm/armv7-a/ArchVariant.cc
new file mode 100644
index 0000000..609d73b
--- /dev/null
+++ b/src/compiler/codegen/arm/armv7-a/ArchVariant.cc
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/*
+ * Determine the initial instruction set to be used for this trace.
+ * Later components may decide to change this.
+ */
+OatInstructionSetType oatInstructionSet(void)
+{
+    return DALVIK_OAT_THUMB2;
+}
+
+/* Architecture-specific initializations and checks go here */
+bool oatArchVariantInit(void)
+{
+    return true;
+}
+
+int oatTargetOptHint(int key)
+{
+    int res = 0;
+    switch (key) {
+        case kMaxHoistDistance:
+            res = 7;
+            break;
+        default:
+            LOG(FATAL) << "Unknown target optimization hint key: " << key;
+    }
+    return res;
+}
+
+void oatGenMemBarrier(CompilationUnit* cUnit, int barrierKind)
+{
+#if ANDROID_SMP != 0
+    ArmLIR* dmb = newLIR1(cUnit, kThumb2Dmb, barrierKind);
+    dmb->defMask = ENCODE_ALL;
+#endif
+}
diff --git a/src/compiler/codegen/arm/armv7-a/Codegen.cc b/src/compiler/codegen/arm/armv7-a/Codegen.cc
new file mode 100644
index 0000000..384ce15
--- /dev/null
+++ b/src/compiler/codegen/arm/armv7-a/Codegen.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define _CODEGEN_C
+#define _ARMV7_A
+#define TGT_LIR ArmLIR
+
+#include "../../../Dalvik.h"
+//#include "interp/InterpDefs.h"
+//#include "libdex/DexOpcodes.h"
+#include "../../../CompilerInternals.h"
+#include "../ArmLIR.h"
+//#include "mterp/common/FindInterface.h"
+#include "../../Ralloc.h"
+#include "../Codegen.h"
+
+/* Arm codegen building blocks */
+#include "../CodegenCommon.cc"
+
+/* Thumb2-specific factory utilities */
+#include "../Thumb2/Factory.cc"
+/* Target independent factory utilities */
+#include "../../CodegenFactory.cc"
+/* Arm-specific factory utilities */
+#include "../ArchFactory.cc"
+
+/* Thumb2-specific codegen routines */
+#include "../Thumb2/Gen.cc"
+/* Thumb2+VFP codegen routines */
+#include "../FP/Thumb2VFP.cc"
+
+/* Thumb2-specific register allocation */
+#include "../Thumb2/Ralloc.cc"
+
+/* MIR2LIR dispatcher and architectural independent codegen routines */
+#include "../MethodCodegenDriver.cc"
+
+/* Architecture manifest */
+#include "ArchVariant.cc"