Compiler tuning

Significant reduction in memory usage by the compiler.
    o Estimated sizes of growable lists to avoid waste
    o Changed basic block predecessor structure from a growable bitmap
      to a growable list.
    o Conditionalized code which produced disassembly strings.
    o Avoided generating some dataflow-related structures when compiling
      in dataflow-disabled mode.
    o Added memory usage statistics
    o Eliminated floating point usage as a barrier to disabling expensive
      dataflow analysis for very large init routines.
    o Because iterating through sparse bit maps is much less of a concern now,
      removed earlier hack that remembered runs of leading and trailing
      zeroes.

Also, some general tuning.
    o Minor tweaks to register utilties
    o Speed up the assembly loop
    o Rewrite of the bit vector iterator

Our previous worst-case method originally consumed 360 megabytes, but through
earlier changes was whittled down to 113 megabytes.  Now it consumes 12 (which
so far appears to close to the highest compiler heap usage of anything
I've seen).

Post-wipe cold boot time is now less than 7 minutes.

Installation time for our application test cases also shows a large
gain - typically 25% to 40% speedup.

Single-threaded host compilation of core.jar down to <3.0s, boot.oat builds
in 17.2s.  Next up: multi-threaded compilation.

Change-Id: I493d0d584c4145a6deccdd9bff344473023deb46
diff --git a/src/compiler.cc b/src/compiler.cc
index a04a836..a3bb985 100644
--- a/src/compiler.cc
+++ b/src/compiler.cc
@@ -466,7 +466,7 @@
     CHECK(compiled_method != NULL) << PrettyMethod(method_idx, dex_file);
   }
   uint64_t duration_ns = NanoTime() - start_ns;
-  if (duration_ns > MsToNs(10)) {
+  if (duration_ns > MsToNs(100)) {
     LOG(WARNING) << "Compilation of " << PrettyMethod(method_idx, dex_file)
                  << " took " << PrettyDuration(duration_ns);
   }
diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h
index 05e05b6..909ac96 100644
--- a/src/compiler/Compiler.h
+++ b/src/compiler/Compiler.h
@@ -41,6 +41,62 @@
     kSkipLargeMethodOptimization,
 };
 
+/* Type of allocation for memory tuning */
+enum oatAllocKind {
+    kAllocMisc,
+    kAllocBB,
+    kAllocLIR,
+    kAllocMIR,
+    kAllocDFInfo,
+    kAllocGrowableList,
+    kAllocGrowableBitMap,
+    kAllocDalvikToSSAMap,
+    kAllocDebugInfo,
+    kAllocSuccessor,
+    kAllocRegAlloc,
+    kAllocData,
+    kAllocPredecessors,
+    kNumAllocKinds
+};
+
+/* Type of growable list for memory tuning */
+enum oatListKind {
+    kListMisc = 0,
+    kListBlockList,
+    kListSSAtoDalvikMap,
+    kListDfsOrder,
+    kListDfsPostOrder,
+    kListDomPostOrderTraversal,
+    kListThrowLaunchPads,
+    kListSuspendLaunchPads,
+    kListSwitchTables,
+    kListFillArrayData,
+    kListSuccessorBlocks,
+    kListPredecessors,
+    kNumListKinds
+};
+
+/* Type of growable bitmap for memory tuning */
+enum oatBitMapKind {
+    kBitMapMisc = 0,
+    kBitMapUse,
+    kBitMapDef,
+    kBitMapLiveIn,
+    kBitMapBMatrix,
+    kBitMapDominators,
+    kBitMapIDominated,
+    kBitMapDomFrontier,
+    kBitMapPhi,
+    kBitMapTmpBlocks,
+    kBitMapInputBlocks,
+    kBitMapRegisterV,
+    kBitMapTempSSARegisterV,
+    kBitMapNullCheck,
+    kBitMapTmpBlockV,
+    kBitMapPredecessors,
+    kNumBitMapKinds
+};
+
 extern uint32_t compilerOptimizerDisableFlags;
 
 /* Force code generation paths for testing */
@@ -56,6 +112,7 @@
     kDebugSlowestStringPath,
     kDebugExerciseResolveMethod,
     kDebugVerifyDataflow,
+    kDebugShowMemoryUsage,
 };
 
 extern uint32_t compilerDebugFlags;
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 1eb6c64..b17855b 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -66,12 +66,10 @@
  * benefit in optimizing these methods, and the cost can be very high.
  * We attempt to identify these cases, and avoid performing most dataflow
  * analysis.  Two thresholds are used - one for known initializers and one
- * for everything else.  Note: we require dataflow analysis for floating point
- * type inference. If any non-move fp operations exist in a method, dataflow
- * is performed regardless of block count.
+ * for everything else.
  */
-#define MANY_BLOCKS_INITIALIZER 200 /* Threshold for switching dataflow off */
-#define MANY_BLOCKS 3000 /* Non-initializer threshold */
+#define MANY_BLOCKS_INITIALIZER 1000 /* Threshold for switching dataflow off */
+#define MANY_BLOCKS 4000 /* Non-initializer threshold */
 
 typedef enum BBType {
     kEntryBlock,
@@ -175,7 +173,7 @@
     struct BasicBlock* taken;
     struct BasicBlock* iDom;            // Immediate dominator
     struct BasicBlockDataFlow* dataFlowInfo;
-    ArenaBitVector* predecessors;
+    GrowableList* predecessors;
     ArenaBitVector* dominators;
     ArenaBitVector* iDominated;         // Set nodes being immediately dominated
     ArenaBitVector* domFrontier;        // Dominance frontier
@@ -328,13 +326,13 @@
      GrowableList fillArrayData;
      const u2* insns;
      u4 insnsSize;
-     bool usesFP;          // Method contains at least 1 non-move FP operation
      bool disableDataflow; // Skip dataflow analysis if possible
      std::map<unsigned int, BasicBlock*> blockMap; // findBlock lookup cache
      std::map<unsigned int, LIR*> boundaryMap; // boundary lookup cache
+     int defCount;         // Used to estimate number of SSA names
 } CompilationUnit;
 
-BasicBlock* oatNewBB(BBType blockType, int blockId);
+BasicBlock* oatNewBB(CompilationUnit* cUnit, BBType blockType, int blockId);
 
 void oatAppendMIR(BasicBlock* bb, MIR* mir);
 
diff --git a/src/compiler/CompilerUtility.h b/src/compiler/CompilerUtility.h
index a68ee5f..ba449a4 100644
--- a/src/compiler/CompilerUtility.h
+++ b/src/compiler/CompilerUtility.h
@@ -27,6 +27,9 @@
 /* Allocate the initial memory block for arena-based allocation */
 bool oatHeapInit(void);
 
+/* Collect memory usage statstics */
+//#define WITH_MEMSTATS
+
 typedef struct ArenaMemBlock {
     size_t blockSize;
     size_t bytesAllocated;
@@ -34,7 +37,7 @@
     char ptr[0];
 } ArenaMemBlock;
 
-void* oatNew(size_t size, bool zero);
+void* oatNew(size_t size, bool zero, oatAllocKind kind = kAllocMisc);
 
 void oatArenaReset(void);
 
@@ -42,6 +45,9 @@
     size_t numAllocated;
     size_t numUsed;
     intptr_t *elemList;
+#ifdef WITH_MEMSTATS
+    oatListKind kind;
+#endif
 } GrowableList;
 
 typedef struct GrowableListIterator {
@@ -58,20 +64,11 @@
  */
 struct ArenaBitVector {
     bool    expandable;     /* expand bitmap if we run out? */
-    bool    firstDirty;     /* when true, don't believe firstBitSet */
-    bool    lastDirty;      /* when true, don't believe lastBitSet */
     u4      storageSize;    /* current size, in 32-bit words */
     u4*     storage;
-                            /*
-                             * Opportunistically remember first and
-                             * last set bits.  This yeilds a performance
-                             * advantage in cases where large
-                             * sparse vectors are repeatedly scanned
-                             * (something that can happen a lot during
-                             * dataflow analysis.
-                             */
-    int     firstBitSet;
-    int     lastBitSet;
+#ifdef WITH_MEMSTATS
+    oatBitMapKind kind;      /* for memory use tuning */
+#endif
 };
 
 /* Handy iterator to walk through the bit positions set to 1 */
@@ -90,14 +87,17 @@
 struct BasicBlock;
 struct CompilationUnit;
 
-void oatInitGrowableList(GrowableList* gList, size_t initLength);
+void oatInitGrowableList(GrowableList* gList, size_t initLength,
+                         oatListKind kind = kListMisc);
 void oatInsertGrowableList(GrowableList* gList, intptr_t elem);
+void oatDeleteGrowableList(GrowableList* gList, intptr_t elem);
 void oatGrowableListIteratorInit(GrowableList* gList,
                                  GrowableListIterator* iterator);
 intptr_t oatGrowableListIteratorNext(GrowableListIterator* iterator);
 intptr_t oatGrowableListGetElement(const GrowableList* gList, size_t idx);
 
-ArenaBitVector* oatAllocBitVector(unsigned int startBits, bool expandable);
+ArenaBitVector* oatAllocBitVector(unsigned int startBits, bool expandable,
+                                  oatBitMapKind = kBitMapMisc);
 void oatBitVectorIteratorInit(ArenaBitVector* pBits,
                               ArenaBitVectorIterator* iterator);
 int oatBitVectorIteratorNext(ArenaBitVectorIterator* iterator);
@@ -125,6 +125,7 @@
 void oatGetBlockName(struct BasicBlock* bb, char* name);
 const char* oatGetShortyFromTargetIdx(CompilationUnit*, int);
 void oatDumpRegLocTable(struct RegLocation*, int);
+void oatDumpMemStats(CompilationUnit* cUnit);
 
 }  // namespace art
 
diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc
index 3f88ea5..cd63290 100644
--- a/src/compiler/Dataflow.cc
+++ b/src/compiler/Dataflow.cc
@@ -900,7 +900,7 @@
         }
     }
     int length = strlen(buffer) + 1;
-    ret = (char*)oatNew(length, false);
+    ret = (char*)oatNew(length, false, kAllocDFInfo);
     memcpy(ret, buffer, length);
     return ret;
 }
@@ -1036,7 +1036,7 @@
 
 done:
     length = strlen(buffer) + 1;
-    ret = (char*) oatNew(length, false);
+    ret = (char*) oatNew(length, false, kAllocDFInfo);
     memcpy(ret, buffer, length);
     return ret;
 }
@@ -1078,7 +1078,7 @@
     }
 
     int length = strlen(buffer) + 1;
-    ret = (char*)oatNew(length, false);
+    ret = (char*)oatNew(length, false, kAllocDFInfo);
     memcpy(ret, buffer, length);
     return ret;
 }
@@ -1111,11 +1111,11 @@
     if (bb->dataFlowInfo == NULL) return false;
 
     useV = bb->dataFlowInfo->useV =
-        oatAllocBitVector(cUnit->numDalvikRegisters, false);
+        oatAllocBitVector(cUnit->numDalvikRegisters, false, kBitMapUse);
     defV = bb->dataFlowInfo->defV =
-        oatAllocBitVector(cUnit->numDalvikRegisters, false);
+        oatAllocBitVector(cUnit->numDalvikRegisters, false, kBitMapDef);
     liveInV = bb->dataFlowInfo->liveInV =
-        oatAllocBitVector(cUnit->numDalvikRegisters, false);
+        oatAllocBitVector(cUnit->numDalvikRegisters, false, kBitMapLiveIn);
 
     for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
         int dfAttributes =
@@ -1186,9 +1186,11 @@
     int i;
 
     mir->ssaRep->numUses = numUses;
-    mir->ssaRep->uses = (int *)oatNew(sizeof(int) * numUses, true);
+    mir->ssaRep->uses = (int *)oatNew(sizeof(int) * numUses, true,
+                                      kAllocDFInfo);
     // NOTE: will be filled in during type & size inference pass
-    mir->ssaRep->fpUse = (bool *)oatNew(sizeof(bool) * numUses, true);
+    mir->ssaRep->fpUse = (bool *)oatNew(sizeof(bool) * numUses, true,
+                                        kAllocDFInfo);
 
     for (i = 0; i < numUses; i++) {
         handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->arg[i], i);
@@ -1203,9 +1205,11 @@
     int i;
 
     mir->ssaRep->numUses = numUses;
-    mir->ssaRep->uses = (int *)oatNew(sizeof(int) * numUses, true);
+    mir->ssaRep->uses = (int *)oatNew(sizeof(int) * numUses, true,
+                                      kAllocDFInfo);
     // NOTE: will be filled in during type & size inference pass
-    mir->ssaRep->fpUse = (bool *)oatNew(sizeof(bool) * numUses, true);
+    mir->ssaRep->fpUse = (bool *)oatNew(sizeof(bool) * numUses, true,
+                                        kAllocDFInfo);
 
     for (i = 0; i < numUses; i++) {
         handleSSAUse(cUnit, mir->ssaRep->uses, dInsn->vC+i, i);
@@ -1221,7 +1225,7 @@
 
     for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
         mir->ssaRep = (struct SSARepresentation *)
-            oatNew(sizeof(SSARepresentation), true);
+            oatNew(sizeof(SSARepresentation), true, kAllocDFInfo);
 
         int dfAttributes =
             oatDataFlowAttributes[mir->dalvikInsn.opcode];
@@ -1272,9 +1276,9 @@
         if (numUses) {
             mir->ssaRep->numUses = numUses;
             mir->ssaRep->uses = (int *)oatNew(sizeof(int) * numUses,
-                                                      false);
+                                              false, kAllocDFInfo);
             mir->ssaRep->fpUse = (bool *)oatNew(sizeof(bool) * numUses,
-                                                false);
+                                                false, kAllocDFInfo);
         }
 
         int numDefs = 0;
@@ -1289,9 +1293,9 @@
         if (numDefs) {
             mir->ssaRep->numDefs = numDefs;
             mir->ssaRep->defs = (int *)oatNew(sizeof(int) * numDefs,
-                                                      false);
+                                              false, kAllocDFInfo);
             mir->ssaRep->fpDef = (bool *)oatNew(sizeof(bool) * numDefs,
-                                                        false);
+                                                false, kAllocDFInfo);
         }
 
         DecodedInstruction *dInsn = &mir->dalvikInsn;
@@ -1336,16 +1340,19 @@
         }
     }
 
-    /*
-     * Take a snapshot of Dalvik->SSA mapping at the end of each block. The
-     * input to PHI nodes can be derived from the snapshot of all predecessor
-     * blocks.
-     */
-    bb->dataFlowInfo->dalvikToSSAMap =
-        (int *)oatNew(sizeof(int) * cUnit->numDalvikRegisters, false);
+    if (!cUnit->disableDataflow) {
+        /*
+         * Take a snapshot of Dalvik->SSA mapping at the end of each block. The
+         * input to PHI nodes can be derived from the snapshot of all
+         * predecessor blocks.
+         */
+        bb->dataFlowInfo->dalvikToSSAMap =
+            (int *)oatNew(sizeof(int) * cUnit->numDalvikRegisters, false,
+                          kAllocDFInfo);
 
-    memcpy(bb->dataFlowInfo->dalvikToSSAMap, cUnit->dalvikToSSAMap,
-           sizeof(int) * cUnit->numDalvikRegisters);
+        memcpy(bb->dataFlowInfo->dalvikToSSAMap, cUnit->dalvikToSSAMap,
+               sizeof(int) * cUnit->numDalvikRegisters);
+    }
     return true;
 }
 
@@ -1436,9 +1443,11 @@
     int numDalvikReg = cUnit->numDalvikRegisters;
 
     cUnit->ssaToDalvikMap = (GrowableList *)oatNew(sizeof(GrowableList),
-                                                           false);
-    oatInitGrowableList(cUnit->ssaToDalvikMap, numDalvikReg);
-
+                                                   false, kAllocDFInfo);
+    // Create the SSAtoDalvikMap, estimating the max size
+    oatInitGrowableList(cUnit->ssaToDalvikMap,
+                        numDalvikReg + cUnit->defCount + 128,
+                        kListSSAtoDalvikMap);
     /*
      * Initial number of SSA registers is equal to the number of Dalvik
      * registers.
@@ -1460,10 +1469,10 @@
      * register N is mapped to SSA register N with subscript 0.
      */
     cUnit->dalvikToSSAMap = (int *)oatNew(sizeof(int) * numDalvikReg,
-                                                  false);
+                                          false, kAllocDFInfo);
     /* Keep track of the higest def for each dalvik reg */
     cUnit->SSALastDefs = (int *)oatNew(sizeof(int) * numDalvikReg,
-                                                  false);
+                                       false, kAllocDFInfo);
 
     for (i = 0; i < numDalvikReg; i++) {
         cUnit->dalvikToSSAMap[i] = i;
@@ -1486,7 +1495,7 @@
             bb->blockType == kExitBlock) {
             bb->dataFlowInfo = (BasicBlockDataFlow *)
                 oatNew(sizeof(BasicBlockDataFlow),
-                               true);
+                       true, kAllocDFInfo);
         }
     }
 }
@@ -1618,7 +1627,7 @@
 {
     if (bb->dataFlowInfo == NULL) return false;
     bb->dataFlowInfo->endingNullCheckV =
-        oatAllocBitVector(cUnit->numSSARegs, false);
+        oatAllocBitVector(cUnit->numSSARegs, false, kBitMapNullCheck);
     oatClearAllBits(bb->dataFlowInfo->endingNullCheckV);
     return true;
 }
@@ -1628,12 +1637,12 @@
                                  struct BasicBlock* bb)
 {
     if (bb->dataFlowInfo == NULL) return false;
+
     /*
      * Set initial state.  Be conservative with catch
      * blocks and start with no assumptions about null check
      * status (except for "this").
      */
-
     if ((bb->blockType == kEntryBlock) | bb->catchEntry) {
         oatClearAllBits(cUnit->tempSSARegisterV);
         if ((cUnit->access_flags & kAccStatic) == 0) {
@@ -1643,20 +1652,15 @@
         }
     } else {
         // Starting state is intesection of all incoming arcs
-        GrowableList* blockList = &cUnit->blockList;
-        ArenaBitVectorIterator bvIterator;
-        oatBitVectorIteratorInit(bb->predecessors, &bvIterator);
-        int predBBIdx = oatBitVectorIteratorNext(&bvIterator);
-        DCHECK_NE(predBBIdx, -1);
-        BasicBlock* predBB = (BasicBlock*)oatGrowableListGetElement(
-            blockList, predBBIdx);
+        GrowableListIterator iter;
+        oatGrowableListIteratorInit(bb->predecessors, &iter);
+        BasicBlock* predBB = (BasicBlock*)oatGrowableListIteratorNext(&iter);
+        DCHECK(predBB != NULL);
         oatCopyBitVector(cUnit->tempSSARegisterV,
                          predBB->dataFlowInfo->endingNullCheckV);
         while (true) {
-            predBBIdx = oatBitVectorIteratorNext(&bvIterator);
-            if (predBBIdx == -1) break;
-            predBB = (BasicBlock*)oatGrowableListGetElement(
-                blockList, predBBIdx);
+            predBB = (BasicBlock*)oatGrowableListIteratorNext(&iter);
+            if (!predBB) break;
             if ((predBB->dataFlowInfo == NULL) ||
                 (predBB->dataFlowInfo->endingNullCheckV == NULL)) {
                 continue;
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index 2a6714f..b41afc0 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -46,6 +46,7 @@
      //(1 << kDebugSlowestStringPath) |
      //(1 << kDebugExerciseResolveMethod) |
      //(1 << kDebugVerifyDataflow) |
+     //(1 << kDebugShowMemoryUsage) |
      0;
 
 std::string compilerMethodMatch;      // Method name match to apply above flags
@@ -130,8 +131,8 @@
     if (insn == NULL) {
         LOG(FATAL) << "Break split failed";
     }
-    BasicBlock *bottomBlock = oatNewBB(kDalvikByteCode,
-                                               cUnit->numBlocks++);
+    BasicBlock *bottomBlock = oatNewBB(cUnit, kDalvikByteCode,
+                                       cUnit->numBlocks++);
     oatInsertGrowableList(&cUnit->blockList, (intptr_t) bottomBlock);
 
     bottomBlock->startOffset = codeOffset;
@@ -146,8 +147,10 @@
     bottomBlock->taken = origBlock->taken;
     if (bottomBlock->taken) {
         origBlock->taken = NULL;
-        oatClearBit(bottomBlock->taken->predecessors, origBlock->id);
-        oatSetBit(bottomBlock->taken->predecessors, bottomBlock->id);
+        oatDeleteGrowableList(bottomBlock->taken->predecessors,
+                              (intptr_t)origBlock);
+        oatInsertGrowableList(bottomBlock->taken->predecessors,
+                              (intptr_t)bottomBlock);
     }
 
     /* Handle the fallthrough path */
@@ -155,12 +158,12 @@
     bottomBlock->fallThrough = origBlock->fallThrough;
     origBlock->fallThrough = bottomBlock;
     origBlock->needFallThroughBranch = true;
-    oatSetBit(bottomBlock->predecessors, origBlock->id);
+    oatInsertGrowableList(bottomBlock->predecessors, (intptr_t)origBlock);
     if (bottomBlock->fallThrough) {
-        oatClearBit(bottomBlock->fallThrough->predecessors,
-                            origBlock->id);
-        oatSetBit(bottomBlock->fallThrough->predecessors,
-                          bottomBlock->id);
+        oatDeleteGrowableList(bottomBlock->fallThrough->predecessors,
+                              (intptr_t)origBlock);
+        oatInsertGrowableList(bottomBlock->fallThrough->predecessors,
+                              (intptr_t)bottomBlock);
     }
 
     /* Handle the successor list */
@@ -176,8 +179,8 @@
                 (SuccessorBlockInfo *) oatGrowableListIteratorNext(&iterator);
             if (successorBlockInfo == NULL) break;
             BasicBlock *bb = successorBlockInfo->block;
-            oatClearBit(bb->predecessors, origBlock->id);
-            oatSetBit(bb->predecessors, bottomBlock->id);
+            oatDeleteGrowableList(bb->predecessors, (intptr_t)origBlock);
+            oatInsertGrowableList(bb->predecessors, (intptr_t)bottomBlock);
         }
     }
 
@@ -237,7 +240,7 @@
     }
 
     /* Create a new one */
-    bb = oatNewBB(kDalvikByteCode, cUnit->numBlocks++);
+    bb = oatNewBB(cUnit, kDalvikByteCode, cUnit->numBlocks++);
     oatInsertGrowableList(&cUnit->blockList, (intptr_t) bb);
     bb->startOffset = codeOffset;
     cUnit->blockMap.insert(std::make_pair(bb->startOffset, bb));
@@ -254,7 +257,7 @@
     char* fileName = (char*) oatNew(
                         strlen(dirPrefix) +
                         name.length() +
-                        strlen(".dot") + 1, true);
+                        strlen(".dot") + 1, true, kAllocDebugInfo);
     sprintf(fileName, "%s%s%s.dot", dirPrefix, name.c_str(), startOffset);
 
     /*
@@ -405,14 +408,12 @@
 /* Verify if all the successor is connected with all the claimed predecessors */
 STATIC bool verifyPredInfo(CompilationUnit* cUnit, BasicBlock* bb)
 {
-    ArenaBitVectorIterator bvIterator;
+    GrowableListIterator iter;
 
-    oatBitVectorIteratorInit(bb->predecessors, &bvIterator);
+    oatGrowableListIteratorInit(bb->predecessors, &iter);
     while (true) {
-        int blockIdx = oatBitVectorIteratorNext(&bvIterator);
-        if (blockIdx == -1) break;
-        BasicBlock *predBB = (BasicBlock *)
-            oatGrowableListGetElement(&cUnit->blockList, blockIdx);
+        BasicBlock *predBB = (BasicBlock*)oatGrowableListIteratorNext(&iter);
+        if (!predBB) break;
         bool found = false;
         if (predBB->taken == bb) {
             found = true;
@@ -525,7 +526,7 @@
                                        /* immedPredBlockP */
                                        &curBlock);
     curBlock->taken = takenBlock;
-    oatSetBit(takenBlock->predecessors, curBlock->id);
+    oatInsertGrowableList(takenBlock->predecessors, (intptr_t)curBlock);
 
     /* Always terminate the current block for conditional branches */
     if (flags & kInstrCanContinue) {
@@ -549,7 +550,8 @@
                                                  /* immedPredBlockP */
                                                  &curBlock);
         curBlock->fallThrough = fallthroughBlock;
-        oatSetBit(fallthroughBlock->predecessors, curBlock->id);
+        oatInsertGrowableList(fallthroughBlock->predecessors,
+                              (intptr_t)curBlock);
     } else if (codePtr < codeEnd) {
         /* Create a fallthrough block for real instructions (incl. OP_NOP) */
         if (contentIsInsn(codePtr)) {
@@ -616,7 +618,8 @@
     curBlock->successorBlockList.blockListType =
         (insn->dalvikInsn.opcode == OP_PACKED_SWITCH) ?
         kPackedSwitch : kSparseSwitch;
-    oatInitGrowableList(&curBlock->successorBlockList.blocks, size);
+    oatInitGrowableList(&curBlock->successorBlockList.blocks, size,
+                        kListSuccessorBlocks);
 
     for (i = 0; i < size; i++) {
         BasicBlock *caseBlock = findBlock(cUnit, curOffset + targetTable[i],
@@ -628,13 +631,13 @@
                                           &curBlock);
         SuccessorBlockInfo *successorBlockInfo =
             (SuccessorBlockInfo *) oatNew(sizeof(SuccessorBlockInfo),
-                                                  false);
+                                                  false, kAllocSuccessor);
         successorBlockInfo->block = caseBlock;
         successorBlockInfo->key = (insn->dalvikInsn.opcode == OP_PACKED_SWITCH)?
                                   firstKey + i : keyTable[i];
         oatInsertGrowableList(&curBlock->successorBlockList.blocks,
                               (intptr_t) successorBlockInfo);
-        oatSetBit(caseBlock->predecessors, curBlock->id);
+        oatInsertGrowableList(caseBlock->predecessors, (intptr_t)curBlock);
     }
 
     /* Fall-through case */
@@ -647,7 +650,7 @@
                                              /* immedPredBlockP */
                                              NULL);
     curBlock->fallThrough = fallthroughBlock;
-    oatSetBit(fallthroughBlock->predecessors, curBlock->id);
+    oatInsertGrowableList(fallthroughBlock->predecessors, (intptr_t)curBlock);
 }
 
 /* Process instructions with the kInstrCanThrow flag */
@@ -668,7 +671,8 @@
         }
 
         curBlock->successorBlockList.blockListType = kCatch;
-        oatInitGrowableList(&curBlock->successorBlockList.blocks, 2);
+        oatInitGrowableList(&curBlock->successorBlockList.blocks, 2,
+                            kListSuccessorBlocks);
 
         for (;iterator.HasNext(); iterator.Next()) {
             BasicBlock *catchBlock = findBlock(cUnit, iterator.GetHandlerAddress(),
@@ -678,20 +682,20 @@
             catchBlock->catchEntry = true;
             SuccessorBlockInfo *successorBlockInfo =
                   (SuccessorBlockInfo *) oatNew(sizeof(SuccessorBlockInfo),
-                  false);
+                                                false, kAllocSuccessor);
             successorBlockInfo->block = catchBlock;
             successorBlockInfo->key = iterator.GetHandlerTypeIndex();
             oatInsertGrowableList(&curBlock->successorBlockList.blocks,
                                   (intptr_t) successorBlockInfo);
-            oatSetBit(catchBlock->predecessors, curBlock->id);
+            oatInsertGrowableList(catchBlock->predecessors, (intptr_t)curBlock);
         }
     } else {
-        BasicBlock *ehBlock = oatNewBB(kExceptionHandling,
-                                               cUnit->numBlocks++);
+        BasicBlock *ehBlock = oatNewBB(cUnit, kExceptionHandling,
+                                       cUnit->numBlocks++);
         curBlock->taken = ehBlock;
         oatInsertGrowableList(&cUnit->blockList, (intptr_t) ehBlock);
         ehBlock->startOffset = curOffset;
-        oatSetBit(ehBlock->predecessors, curBlock->id);
+        oatInsertGrowableList(ehBlock->predecessors, (intptr_t)curBlock);
     }
 
     /*
@@ -720,7 +724,8 @@
              */
             if (insn->dalvikInsn.opcode != OP_THROW) {
                 curBlock->fallThrough = fallthroughBlock;
-                oatSetBit(fallthroughBlock->predecessors, curBlock->id);
+                oatInsertGrowableList(fallthroughBlock->predecessors,
+                                      (intptr_t)curBlock);
             }
         }
     }
@@ -779,20 +784,22 @@
     /* Assume non-throwing leaf */
     cUnit->attrs = (METHOD_IS_LEAF | METHOD_IS_THROW_FREE);
 
-    /* Initialize the block list */
-    oatInitGrowableList(&cUnit->blockList, 40);
+    /* Initialize the block list, estimate size based on insnsSize */
+    oatInitGrowableList(&cUnit->blockList, cUnit->insnsSize, kListBlockList);
 
     /* Initialize the switchTables list */
-    oatInitGrowableList(&cUnit->switchTables, 4);
+    oatInitGrowableList(&cUnit->switchTables, 4, kListSwitchTables);
 
     /* Intialize the fillArrayData list */
-    oatInitGrowableList(&cUnit->fillArrayData, 4);
+    oatInitGrowableList(&cUnit->fillArrayData, 4, kListFillArrayData);
 
-    /* Intialize the throwLaunchpads list */
-    oatInitGrowableList(&cUnit->throwLaunchpads, 4);
+    /* Intialize the throwLaunchpads list, estimate size based on insnsSize */
+    oatInitGrowableList(&cUnit->throwLaunchpads, cUnit->insnsSize,
+                        kListThrowLaunchPads);
 
     /* Intialize the suspendLaunchpads list */
-    oatInitGrowableList(&cUnit->suspendLaunchpads, 4);
+    oatInitGrowableList(&cUnit->suspendLaunchpads, 2048,
+                        kListSuspendLaunchPads);
 
     /* Allocate the bit-vector to track the beginning of basic blocks */
     ArenaBitVector *tryBlockAddr = oatAllocBitVector(cUnit->insnsSize,
@@ -800,8 +807,8 @@
     cUnit->tryBlockAddr = tryBlockAddr;
 
     /* Create the default entry and exit blocks and enter them to the list */
-    BasicBlock *entryBlock = oatNewBB(kEntryBlock, numBlocks++);
-    BasicBlock *exitBlock = oatNewBB(kExitBlock, numBlocks++);
+    BasicBlock *entryBlock = oatNewBB(cUnit.get(), kEntryBlock, numBlocks++);
+    BasicBlock *exitBlock = oatNewBB(cUnit.get(), kExitBlock, numBlocks++);
 
     cUnit->entryBlock = entryBlock;
     cUnit->exitBlock = exitBlock;
@@ -810,13 +817,13 @@
     oatInsertGrowableList(&cUnit->blockList, (intptr_t) exitBlock);
 
     /* Current block to record parsed instructions */
-    BasicBlock *curBlock = oatNewBB(kDalvikByteCode, numBlocks++);
+    BasicBlock *curBlock = oatNewBB(cUnit.get(), kDalvikByteCode, numBlocks++);
     curBlock->startOffset = 0;
     oatInsertGrowableList(&cUnit->blockList, (intptr_t) curBlock);
     /* Add first block to the fast lookup cache */
     cUnit->blockMap.insert(std::make_pair(curBlock->startOffset, curBlock));
     entryBlock->fallThrough = curBlock;
-    oatSetBit(curBlock->predecessors, entryBlock->id);
+    oatInsertGrowableList(curBlock->predecessors, (intptr_t)entryBlock);
 
     /*
      * Store back the number of blocks since new blocks may be created of
@@ -829,7 +836,7 @@
 
     /* Parse all instructions and put them into containing basic blocks */
     while (codePtr < codeEnd) {
-        MIR *insn = (MIR *) oatNew(sizeof(MIR), true);
+        MIR *insn = (MIR *) oatNew(sizeof(MIR), true, kAllocMIR);
         insn->offset = curOffset;
         int width = parseInsn(codePtr, &insn->dalvikInsn, false);
         insn->width = width;
@@ -843,15 +850,18 @@
         codePtr += width;
         int flags = dexGetFlagsFromOpcode(insn->dalvikInsn.opcode);
 
-        cUnit->usesFP |= (oatDataFlowAttributes[insn->dalvikInsn.opcode] &
-              DF_USES_FP);
+        int dfFlags = oatDataFlowAttributes[insn->dalvikInsn.opcode];
+
+        if (dfFlags & DF_HAS_DEFS) {
+            cUnit->defCount += (dfFlags & DF_DA_WIDE) ? 2 : 1;
+        }
 
         if (flags & kInstrCanBranch) {
             curBlock = processCanBranch(cUnit.get(), curBlock, insn, curOffset,
                                         width, flags, codePtr, codeEnd);
         } else if (flags & kInstrCanReturn) {
             curBlock->fallThrough = exitBlock;
-            oatSetBit(exitBlock->predecessors, curBlock->id);
+            oatInsertGrowableList(exitBlock->predecessors, (intptr_t)curBlock);
             /*
              * Terminate the current block if there are instructions
              * afterwards.
@@ -899,14 +909,14 @@
             if ((curBlock->fallThrough == NULL) &&
                 (flags & kInstrCanContinue)) {
                 curBlock->fallThrough = nextBlock;
-                oatSetBit(nextBlock->predecessors, curBlock->id);
+                oatInsertGrowableList(nextBlock->predecessors,
+                                      (intptr_t)curBlock);
             }
             curBlock = nextBlock;
         }
     }
 
-    if (!cUnit->usesFP &&
-        !(cUnit->disableOpt & (1 << kSkipLargeMethodOptimization))) {
+    if (!(cUnit->disableOpt & (1 << kSkipLargeMethodOptimization))) {
         if ((cUnit->numBlocks > MANY_BLOCKS) ||
               ((cUnit->numBlocks > MANY_BLOCKS_INITIALIZER) &&
                PrettyMethod(method_idx, dex_file).find("init>") !=
@@ -929,7 +939,8 @@
 
     if (cUnit->enableDebug & (1 << kDebugVerifyDataflow)) {
         /* Verify if all blocks are connected as claimed */
-        oatDataFlowAnalysisDispatcher(cUnit.get(), verifyPredInfo, kAllNodes, false /* isIterative */);
+        oatDataFlowAnalysisDispatcher(cUnit.get(), verifyPredInfo, kAllNodes,
+                                      false /* isIterative */);
     }
 
     /* Perform SSA transformation for the whole method */
@@ -987,7 +998,14 @@
                                                 vmapTable);
 
     VLOG(compiler) << "Compiled " << PrettyMethod(method_idx, dex_file)
-                   << " (" << (cUnit->codeBuffer.size() * sizeof(cUnit->codeBuffer[0])) << " bytes)";
+       << " (" << (cUnit->codeBuffer.size() * sizeof(cUnit->codeBuffer[0]))
+       << " bytes)";
+
+#ifdef WITH_MEMSTATS
+    if (cUnit->enableDebug & (1 << kDebugShowMemoryUsage)) {
+        oatDumpMemStats(cUnit.get());
+    }
+#endif
 
     return result;
 }
diff --git a/src/compiler/IntermediateRep.cc b/src/compiler/IntermediateRep.cc
index c39aed3..6cb6580 100644
--- a/src/compiler/IntermediateRep.cc
+++ b/src/compiler/IntermediateRep.cc
@@ -20,13 +20,15 @@
 namespace art {
 
 /* Allocate a new basic block */
-BasicBlock* oatNewBB(BBType blockType, int blockId)
+BasicBlock* oatNewBB(CompilationUnit* cUnit, BBType blockType, int blockId)
 {
-    BasicBlock* bb = (BasicBlock* )oatNew(sizeof(BasicBlock), true);
+    BasicBlock* bb = (BasicBlock* )oatNew(sizeof(BasicBlock), true, kAllocBB);
     bb->blockType = blockType;
     bb->id = blockId;
-    bb->predecessors = oatAllocBitVector(blockId > 32 ? blockId : 32,
-                                                 true /* expandable */);
+    bb->predecessors = (GrowableList*) oatNew(sizeof(GrowableList), false,
+                                              kAllocPredecessors);
+    oatInitGrowableList(bb->predecessors, (blockType == kExitBlock) ? 2048 : 2,
+                        kListPredecessors);
     return bb;
 }
 
diff --git a/src/compiler/Ralloc.cc b/src/compiler/Ralloc.cc
index c6375ee..4529a83 100644
--- a/src/compiler/Ralloc.cc
+++ b/src/compiler/Ralloc.cc
@@ -315,7 +315,8 @@
     RegLocation* loc;
 
     /* Allocate the location map */
-    loc = (RegLocation*)oatNew(cUnit->numSSARegs * sizeof(*loc), true);
+    loc = (RegLocation*)oatNew(cUnit->numSSARegs * sizeof(*loc), true,
+                               kAllocRegAlloc);
     for (i=0; i< cUnit->numSSARegs; i++) {
         loc[i] = freshLoc;
         loc[i].sRegLow = i;
@@ -325,7 +326,8 @@
     /* Allocation the promotion map */
     int numRegs = cUnit->numDalvikRegisters;
     cUnit->promotionMap =
-        (PromotionMap*)oatNew(numRegs * sizeof(cUnit->promotionMap[0]), true);
+        (PromotionMap*)oatNew(numRegs * sizeof(cUnit->promotionMap[0]), true,
+                              kAllocRegAlloc);
 
     /* Add types of incoming arguments based on signature */
     int numIns = cUnit->numIns;
diff --git a/src/compiler/SSATransformation.cc b/src/compiler/SSATransformation.cc
index e9c3d70..3e1728f 100644
--- a/src/compiler/SSATransformation.cc
+++ b/src/compiler/SSATransformation.cc
@@ -55,7 +55,7 @@
 {
     /* Initialize or reset the DFS preOrder list */
     if (cUnit->dfsOrder.elemList == NULL) {
-        oatInitGrowableList(&cUnit->dfsOrder, cUnit->numBlocks);
+        oatInitGrowableList(&cUnit->dfsOrder, cUnit->numBlocks, kListDfsOrder);
     } else {
         /* Just reset the used length on the counter */
         cUnit->dfsOrder.numUsed = 0;
@@ -63,7 +63,8 @@
 
     /* Initialize or reset the DFS postOrder list */
     if (cUnit->dfsPostOrder.elemList == NULL) {
-        oatInitGrowableList(&cUnit->dfsPostOrder, cUnit->numBlocks);
+        oatInitGrowableList(&cUnit->dfsPostOrder, cUnit->numBlocks,
+                            kListDfsPostOrder);
     } else {
         /* Just reset the used length on the counter */
         cUnit->dfsPostOrder.numUsed = 0;
@@ -102,13 +103,14 @@
     int numRegisters = cUnit->numDalvikRegisters;
     /* Allocate numDalvikRegisters bit vector pointers */
     cUnit->defBlockMatrix = (ArenaBitVector **)
-        oatNew(sizeof(ArenaBitVector *) * numRegisters, true);
+        oatNew(sizeof(ArenaBitVector *) * numRegisters, true,
+               kAllocDFInfo);
     int i;
 
     /* Initialize numRegister vectors with numBlocks bits each */
     for (i = 0; i < numRegisters; i++) {
-        cUnit->defBlockMatrix[i] = oatAllocBitVector(cUnit->numBlocks,
-                                                             false);
+        cUnit->defBlockMatrix[i] = oatAllocBitVector(cUnit->numBlocks, false,
+                                                     kBitMapBMatrix);
     }
     oatDataFlowAnalysisDispatcher(cUnit, oatFindLocalLiveIn,
                                           kAllNodes,
@@ -224,11 +226,14 @@
 
     if (bb->dominators == NULL ) {
         bb->dominators = oatAllocBitVector(numTotalBlocks,
-                                                   false /* expandable */);
+                                           false /* expandable */,
+                                           kBitMapDominators);
         bb->iDominated = oatAllocBitVector(numTotalBlocks,
-                                                   false /* expandable */);
+                                           false /* expandable */,
+                                           kBitMapIDominated);
         bb->domFrontier = oatAllocBitVector(numTotalBlocks,
-                                                   false /* expandable */);
+                                            false /* expandable */,
+                                            kBitMapDomFrontier);
     } else {
         oatClearAllBits(bb->dominators);
         oatClearAllBits(bb->iDominated);
@@ -250,7 +255,7 @@
     GrowableList* blockList = &cUnit->blockList;
     int numTotalBlocks = blockList->numUsed;
     ArenaBitVector* tempBlockV = cUnit->tempBlockV;
-    ArenaBitVectorIterator bvIterator;
+    GrowableListIterator iter;
 
     /*
      * The dominator of the entry block has been preset to itself and we need
@@ -261,12 +266,10 @@
     oatSetInitialBits(tempBlockV, numTotalBlocks);
 
     /* Iterate through the predecessors */
-    oatBitVectorIteratorInit(bb->predecessors, &bvIterator);
+    oatGrowableListIteratorInit(bb->predecessors, &iter);
     while (true) {
-        int predIdx = oatBitVectorIteratorNext(&bvIterator);
-        if (predIdx == -1) break;
-        BasicBlock* predBB = (BasicBlock* ) oatGrowableListGetElement(
-                                 blockList, predIdx);
+        BasicBlock* predBB = (BasicBlock*)oatGrowableListIteratorNext(&iter);
+        if (!predBB) break;
         /* tempBlockV = tempBlockV ^ dominators */
         if (predBB->dominators != NULL) {
             oatIntersectBitVectors(tempBlockV, tempBlockV, predBB->dominators);
@@ -350,8 +353,7 @@
 /* Worker function to compute each block's immediate dominator */
 STATIC bool computeBlockIDom(CompilationUnit* cUnit, BasicBlock* bb)
 {
-    GrowableList* blockList = &cUnit->blockList;
-    ArenaBitVectorIterator bvIterator;
+    GrowableListIterator iter;
     int idom = -1;
 
     /* Special-case entry block */
@@ -360,15 +362,12 @@
     }
 
     /* Iterate through the predecessors */
-    oatBitVectorIteratorInit(bb->predecessors, &bvIterator);
+    oatGrowableListIteratorInit(bb->predecessors, &iter);
 
     /* Find the first processed predecessor */
     while (true) {
-        //TUNING: hot call to oatBitVectorIteratorNext
-        int predIdx = oatBitVectorIteratorNext(&bvIterator);
-        DCHECK_NE(predIdx, -1);  /* Should find one */
-        BasicBlock* predBB = (BasicBlock* ) oatGrowableListGetElement(
-                                 blockList, predIdx);
+        BasicBlock* predBB = (BasicBlock*)oatGrowableListIteratorNext(&iter);
+        CHECK(predBB != NULL);
         if (cUnit->iDomList[predBB->dfsId] != NOTVISITED) {
             idom = predBB->dfsId;
             break;
@@ -377,10 +376,8 @@
 
     /* Scan the rest of the predecessors */
     while (true) {
-        int predIdx = oatBitVectorIteratorNext(&bvIterator);
-        if (predIdx == -1) break;
-        BasicBlock* predBB = (BasicBlock* ) oatGrowableListGetElement(
-                                 blockList, predIdx);
+        BasicBlock* predBB = (BasicBlock*)oatGrowableListIteratorNext(&iter);
+        if (!predBB) break;
         if (cUnit->iDomList[predBB->dfsId] == NOTVISITED) {
             continue;
         } else {
@@ -441,7 +438,8 @@
 
     /* Initalize & Clear iDomList */
     if (cUnit->iDomList == NULL) {
-        cUnit->iDomList = (int*)oatNew(sizeof(int) * numReachableBlocks, false);
+        cUnit->iDomList = (int*)oatNew(sizeof(int) * numReachableBlocks, false,
+                                       kAllocDFInfo);
     }
     for (int i = 0; i < numReachableBlocks; i++) {
         cUnit->iDomList[i] = NOTVISITED;
@@ -462,7 +460,8 @@
 
     if (cUnit->tempBlockV == NULL) {
         cUnit->tempBlockV = oatAllocBitVector(numTotalBlocks,
-                                                  false /* expandable */);
+                                              false /* expandable */,
+                                              kBitMapTmpBlockV);
     } else {
         oatClearAllBits(cUnit->tempBlockV);
     }
@@ -493,7 +492,8 @@
      * iDominated sets.
      */
     if (cUnit->domPostOrderTraversal.elemList == NULL) {
-        oatInitGrowableList(&cUnit->domPostOrderTraversal, numReachableBlocks);
+        oatInitGrowableList(&cUnit->domPostOrderTraversal, numReachableBlocks,
+                            kListDomPostOrderTraversal);
     } else {
         cUnit->domPostOrderTraversal.numUsed = 0;
     }
@@ -576,14 +576,15 @@
     int dalvikReg;
     const GrowableList* blockList = &cUnit->blockList;
     ArenaBitVector* phiBlocks =
-        oatAllocBitVector(cUnit->numBlocks, false);
+        oatAllocBitVector(cUnit->numBlocks, false, kBitMapPhi);
     ArenaBitVector* tmpBlocks =
-        oatAllocBitVector(cUnit->numBlocks, false);
+        oatAllocBitVector(cUnit->numBlocks, false, kBitMapTmpBlocks);
     ArenaBitVector* inputBlocks =
-        oatAllocBitVector(cUnit->numBlocks, false);
+        oatAllocBitVector(cUnit->numBlocks, false, kBitMapInputBlocks);
 
     cUnit->tempDalvikRegisterV =
-        oatAllocBitVector(cUnit->numDalvikRegisters, false);
+        oatAllocBitVector(cUnit->numDalvikRegisters, false,
+                          kBitMapRegisterV);
 
     oatDataFlowAnalysisDispatcher(cUnit, computeBlockLiveIns,
                                           kPostOrderDFSTraversal,
@@ -641,7 +642,7 @@
                 (BasicBlock* ) oatGrowableListGetElement(blockList, idx);
             /* Variable will be clobbered before being used - no need for phi */
             if (!oatIsBitSet(phiBB->dataFlowInfo->liveInV, dalvikReg)) continue;
-            MIR *phi = (MIR *) oatNew(sizeof(MIR), true);
+            MIR *phi = (MIR *) oatNew(sizeof(MIR), true, kAllocDFInfo);
             phi->dalvikInsn.opcode = (Opcode)kMirOpPhi;
             phi->dalvikInsn.vA = dalvikReg;
             phi->offset = phiBB->startOffset;
@@ -659,8 +660,7 @@
 STATIC bool insertPhiNodeOperands(CompilationUnit* cUnit, BasicBlock* bb)
 {
     ArenaBitVector* ssaRegV = cUnit->tempSSARegisterV;
-    ArenaBitVectorIterator bvIterator;
-    GrowableList* blockList = &cUnit->blockList;
+    GrowableListIterator iter;
     MIR *mir;
 
     /* Phi nodes are at the beginning of each block */
@@ -675,12 +675,11 @@
         oatClearAllBits(ssaRegV);
 
         /* Iterate through the predecessors */
-        oatBitVectorIteratorInit(bb->predecessors, &bvIterator);
+        oatGrowableListIteratorInit(bb->predecessors, &iter);
         while (true) {
-            int predIdx = oatBitVectorIteratorNext(&bvIterator);
-            if (predIdx == -1) break;
-            BasicBlock* predBB = (BasicBlock* ) oatGrowableListGetElement(
-                                     blockList, predIdx);
+            BasicBlock* predBB =
+               (BasicBlock*)oatGrowableListIteratorNext(&iter);
+            if (!predBB) break;
             int encodedSSAValue =
                 predBB->dataFlowInfo->dalvikToSSAMap[dalvikReg];
             int ssaReg = DECODE_REG(encodedSSAValue);
@@ -691,9 +690,9 @@
         int numUses = oatCountSetBits(ssaRegV);
         mir->ssaRep->numUses = numUses;
         mir->ssaRep->uses =
-            (int *) oatNew(sizeof(int) * numUses, false);
+            (int *) oatNew(sizeof(int) * numUses, false, kAllocDFInfo);
         mir->ssaRep->fpUse =
-            (bool *) oatNew(sizeof(bool) * numUses, true);
+            (bool *) oatNew(sizeof(bool) * numUses, true, kAllocDFInfo);
 
         ArenaBitVectorIterator phiIterator;
 
@@ -722,7 +721,7 @@
     int mapSize = sizeof(int) * cUnit->numDalvikRegisters;
 
     /* Save SSA map snapshot */
-    int* savedSSAMap = (int*)oatNew(mapSize, false);
+    int* savedSSAMap = (int*)oatNew(mapSize, false, kAllocDalvikToSSAMap);
     memcpy(savedSSAMap, cUnit->dalvikToSSAMap, mapSize);
 
     if (block->fallThrough) {
@@ -786,8 +785,8 @@
          * Shared temp bit vector used by each block to count the number of defs
          * from all the predecessor blocks.
          */
-        cUnit->tempSSARegisterV = oatAllocBitVector(cUnit->numSSARegs,
-                                                        false);
+        cUnit->tempSSARegisterV = oatAllocBitVector(cUnit->numSSARegs, false,
+                                                    kBitMapTempSSARegisterV);
 
         /* Insert phi-operands with latest SSA names from predecessor blocks */
         oatDataFlowAnalysisDispatcher(cUnit, insertPhiNodeOperands,
diff --git a/src/compiler/Utility.cc b/src/compiler/Utility.cc
index c76143b..0cdcfd3 100644
--- a/src/compiler/Utility.cc
+++ b/src/compiler/Utility.cc
@@ -22,6 +22,67 @@
 static ArenaMemBlock *arenaHead, *currentArena;
 static int numArenaBlocks;
 
+#ifdef WITH_MEMSTATS
+static u4 allocStats[kNumAllocKinds];
+static int listSizes[kNumListKinds];
+static int listWasted[kNumListKinds];
+static int listGrows[kNumListKinds];
+static int listMaxElems[kNumListKinds];
+static int bitMapSizes[kNumBitMapKinds];
+static int bitMapWasted[kNumBitMapKinds];
+static int bitMapGrows[kNumBitMapKinds];
+
+const char* allocNames[kNumAllocKinds] = {
+    "Misc       ",
+    "BasicBlock ",
+    "LIR        ",
+    "MIR        ",
+    "DataFlow   ",
+    "GrowList   ",
+    "GrowBitMap ",
+    "Dalvik2SSA ",
+    "DebugInfo  ",
+    "Successor  ",
+    "RegAlloc   ",
+    "Data       ",
+    "Preds      ",
+};
+
+const char* listNames[kNumListKinds] = {
+    "Misc                  ",
+    "blockList             ",
+    "SSAtoDalvik           ",
+    "dfsOrder              ",
+    "dfsPostOrder          ",
+    "domPostOrderTraversal ",
+    "throwLaunchPads       ",
+    "suspendLaunchPads     ",
+    "switchTables          ",
+    "fillArrayData         ",
+    "SuccessorBlocks       ",
+    "Predecessors          ",
+};
+
+const char* bitMapNames[kNumBitMapKinds] = {
+    "Misc                  ",
+    "Use                   ",
+    "Def                   ",
+    "LiveIn                ",
+    "BlockMatrix           ",
+    "Dominators            ",
+    "IDominated            ",
+    "DomFrontier           ",
+    "Phi                   ",
+    "TmpBlocks             ",
+    "InputBlocks           ",
+    "RegisterV             ",
+    "TempSSARegisterV      ",
+    "Null Check            ",
+    "TmpBlockV             ",
+    "Predecessors          ",
+};
+#endif
+
 #define kArenaBitVectorGrowth    4   /* increase by 4 u4s when limit hit */
 
 /* Allocate the initial memory block for arena-based allocation */
@@ -38,14 +99,16 @@
     currentArena->bytesAllocated = 0;
     currentArena->next = NULL;
     numArenaBlocks = 1;
-
     return true;
 }
 
 /* Arena-based malloc for compilation tasks */
-void* oatNew(size_t size, bool zero)
+void* oatNew(size_t size, bool zero, oatAllocKind kind)
 {
     size = (size + 3) & ~3;
+#ifdef WITH_MEMSTATS
+    allocStats[kind] += size;
+#endif
 retry:
     /* Normal case - space is available in the current page */
     if (size + currentArena->bytesAllocated <= currentArena->blockSize) {
@@ -91,6 +154,16 @@
 /* Reclaim all the arena blocks allocated so far */
 void oatArenaReset(void)
 {
+#ifdef WITH_MEMSTATS
+    memset(&allocStats[0], 0, sizeof(allocStats));
+    memset(&listSizes[0], 0, sizeof(listSizes));
+    memset(&listWasted[0], 0, sizeof(listWasted));
+    memset(&listGrows[0], 0, sizeof(listGrows));
+    memset(&listMaxElems[0], 0, sizeof(listMaxElems));
+    memset(&bitMapSizes[0], 0, sizeof(bitMapSizes));
+    memset(&bitMapWasted[0], 0, sizeof(bitMapWasted));
+    memset(&bitMapGrows[0], 0, sizeof(bitMapGrows));
+#endif
     currentArena = arenaHead;
     if (currentArena) {
         currentArena->bytesAllocated = 0;
@@ -98,12 +171,20 @@
 }
 
 /* Growable List initialization */
-void oatInitGrowableList(GrowableList* gList, size_t initLength)
+void oatInitGrowableList(GrowableList* gList, size_t initLength,
+                         oatListKind kind)
 {
     gList->numAllocated = initLength;
     gList->numUsed = 0;
     gList->elemList = (intptr_t *) oatNew(sizeof(intptr_t) * initLength,
-                                                  true);
+                                                  true, kAllocGrowableList);
+#ifdef WITH_MEMSTATS
+    listSizes[kind] += sizeof(intptr_t) * initLength;
+    gList->kind = kind;
+    if ((int)initLength > listMaxElems[kind]) {
+        listMaxElems[kind] = initLength;
+    }
+#endif
 }
 
 /* Expand the capacity of a growable list */
@@ -116,8 +197,17 @@
         newLength += 128;
     }
     intptr_t *newArray =
-        (intptr_t *) oatNew(sizeof(intptr_t) * newLength, true);
+        (intptr_t *) oatNew(sizeof(intptr_t) * newLength, true,
+                            kAllocGrowableList);
     memcpy(newArray, gList->elemList, sizeof(intptr_t) * gList->numAllocated);
+#ifdef WITH_MEMSTATS
+    listSizes[gList->kind] += sizeof(intptr_t) * newLength;
+    listWasted[gList->kind] += sizeof(intptr_t) * gList->numAllocated;
+    listGrows[gList->kind]++;
+    if (newLength > listMaxElems[gList->kind]) {
+        listMaxElems[gList->kind] = newLength;
+    }
+#endif
     gList->numAllocated = newLength;
     gList->elemList = newArray;
 }
@@ -132,6 +222,22 @@
     gList->elemList[gList->numUsed++] = elem;
 }
 
+/* Delete an element from a growable list. Element must be present */
+void oatDeleteGrowableList(GrowableList* gList, intptr_t elem)
+{
+    bool found = false;
+    for (unsigned int i = 0; i < gList->numUsed; i++) {
+        if (!found && gList->elemList[i] == elem) {
+            found = true;
+        }
+        if (found) {
+            gList->elemList[i] = gList->elemList[i+1];
+        }
+    }
+    DCHECK_EQ(found, true);
+    gList->numUsed--;
+}
+
 void oatGrowableListIteratorInit(GrowableList* gList,
                                  GrowableListIterator* iterator)
 {
@@ -153,6 +259,44 @@
     return gList->elemList[idx];
 }
 
+#ifdef WITH_MEMSTATS
+/* Dump memory usage stats */
+void oatDumpMemStats(CompilationUnit* cUnit)
+{
+    u4 total = 0;
+    for (int i = 0; i < kNumAllocKinds; i++) {
+        total += allocStats[i];
+    }
+    if (total > (10 * 1024 * 1024)) {
+        LOG(INFO) << "MEMUSAGE: " << total << " : "
+            << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
+        LOG(INFO) << "insnsSize: " << cUnit->insnsSize;
+        if (cUnit->disableDataflow) {
+            LOG(INFO) << " ** Dataflow disabled ** ";
+        }
+        LOG(INFO) << "===== Overall allocations";
+        for (int i = 0; i < kNumAllocKinds; i++) {
+            LOG(INFO) << allocNames[i] << std::setw(10) <<allocStats[i];
+        }
+        LOG(INFO) << "===== GrowableList allocations";
+        for (int i = 0; i < kNumListKinds; i++) {
+            LOG(INFO) << listNames[i]
+                << " S:" << listSizes[i]
+                << ", W:" << listWasted[i]
+                << ", G:" << listGrows[i]
+                << ", E:" << listMaxElems[i];
+        }
+        LOG(INFO) << "===== GrowableBitMap allocations";
+        for (int i = 0; i < kNumBitMapKinds; i++) {
+            LOG(INFO) << bitMapNames[i]
+                << " S:" << bitMapSizes[i]
+                << ", W:" << bitMapWasted[i]
+                << ", G:" << bitMapGrows[i];
+        }
+    }
+}
+#endif
+
 /* Debug Utility - dump a compilation unit */
 void oatDumpCompilationUnit(CompilationUnit* cUnit)
 {
@@ -217,22 +361,26 @@
  *
  * NOTE: memory is allocated from the compiler arena.
  */
-ArenaBitVector* oatAllocBitVector(unsigned int startBits, bool expandable)
+ArenaBitVector* oatAllocBitVector(unsigned int startBits, bool expandable,
+                                  oatBitMapKind kind)
 {
     ArenaBitVector* bv;
     unsigned int count;
 
     DCHECK_EQ(sizeof(bv->storage[0]), 4U);        /* assuming 32-bit units */
 
-    bv = (ArenaBitVector*) oatNew(sizeof(ArenaBitVector), false);
+    bv = (ArenaBitVector*) oatNew(sizeof(ArenaBitVector), false,
+                                  kAllocGrowableBitMap);
 
     count = (startBits + 31) >> 5;
 
     bv->storageSize = count;
     bv->expandable = expandable;
-    bv->storage = (u4*) oatNew(count * sizeof(u4), true);
-    bv->firstDirty = true;
-    bv->lastDirty = true;
+    bv->storage = (u4*) oatNew(count * sizeof(u4), true, kAllocGrowableBitMap);
+#ifdef WITH_MEMSTATS
+    bv->kind = kind;
+    bitMapSizes[kind] += count * sizeof(u4);
+#endif
     return bv;
 }
 
@@ -254,8 +402,6 @@
 {
     unsigned int count = pBits->storageSize;
     memset(pBits->storage, 0, count * sizeof(u4));
-    pBits->firstDirty = true;
-    pBits->lastDirty = true;
 }
 
 /*
@@ -276,21 +422,21 @@
         /* Round up to word boundaries for "num+1" bits */
         unsigned int newSize = (num + 1 + 31) >> 5;
         DCHECK_GT(newSize, pBits->storageSize);
-        u4 *newStorage = (u4*)oatNew(newSize * sizeof(u4), false);
+        u4 *newStorage = (u4*)oatNew(newSize * sizeof(u4), false,
+                                     kAllocGrowableBitMap);
         memcpy(newStorage, pBits->storage, pBits->storageSize * sizeof(u4));
         memset(&newStorage[pBits->storageSize], 0,
                (newSize - pBits->storageSize) * sizeof(u4));
+#ifdef WITH_MEMSTATS
+        bitMapWasted[pBits->kind] += pBits->storageSize * sizeof(u4);
+        bitMapSizes[pBits->kind] += newSize * sizeof(u4);
+        bitMapGrows[pBits->kind]++;
+#endif
         pBits->storage = newStorage;
         pBits->storageSize = newSize;
     }
 
     pBits->storage[num >> 5] |= checkMasks[num & 0x1f];
-    if (!pBits->firstDirty && ((int)num < pBits->firstBitSet)) {
-        pBits->firstBitSet = num;
-    }
-    if (!pBits->lastDirty && ((int)num > pBits->lastBitSet)) {
-        pBits->lastBitSet = num;
-    }
     return true;
 }
 
@@ -309,8 +455,6 @@
     }
 
     pBits->storage[num >> 5] &= ~checkMasks[num & 0x1f];
-    pBits->firstDirty = true;
-    pBits->lastDirty = true;
     return true;
 }
 
@@ -321,8 +465,6 @@
 {
     int value = set ? -1 : 0;
     memset(pBits->storage, value, pBits->storageSize * (int)sizeof(u4));
-    pBits->firstDirty = true;
-    pBits->lastDirty = true;
 }
 
 void oatDebugBitVector(char* msg, const ArenaBitVector* bv, int length)
@@ -388,10 +530,6 @@
     checkSizes(dest, src);
 
     memcpy(dest->storage, src->storage, sizeof(u4) * dest->storageSize);
-    dest->firstDirty = src->firstDirty;
-    dest->firstBitSet = src->firstBitSet;
-    dest->lastDirty = src->lastDirty;
-    dest->lastBitSet = src->lastBitSet;
 }
 
 /*
@@ -413,8 +551,6 @@
     for (idx = 0; idx < dest->storageSize; idx++) {
         dest->storage[idx] = src1->storage[idx] & src2->storage[idx];
     }
-    dest->firstDirty = true;
-    dest->lastDirty = true;
     return true;
 }
 
@@ -436,8 +572,6 @@
     for (idx = 0; idx < dest->storageSize; idx++) {
         dest->storage[idx] = src1->storage[idx] | src2->storage[idx];
     }
-    dest->firstDirty = true;
-    dest->lastDirty = true;
     return true;
 }
 
@@ -490,52 +624,37 @@
 {
     ArenaBitVector* pBits = iterator->pBits;
     u4 bitIndex = iterator->idx;
+    u4 bitSize = iterator->bitSize;
 
-    DCHECK_EQ(iterator->bitSize, pBits->storageSize * sizeof(u4) * 8);
-    if (bitIndex >= iterator->bitSize) return -1;
+    DCHECK_EQ(bitSize, pBits->storageSize * sizeof(u4) * 8);
 
-    /* If we know, skip past leading zeros */
-    if (!pBits->firstDirty && ((int)bitIndex < pBits->firstBitSet)) {
-        iterator->idx = pBits->firstBitSet + 1;
-        return pBits->firstBitSet;
-    }
+    if (bitIndex >= bitSize) return -1;
 
-    /* If we know, skip past trailing zeroes */
-    if (!pBits->lastDirty && ((int)bitIndex > pBits->lastBitSet)) {
-        iterator->idx = iterator->bitSize;
-        return -1;
-    }
+    u4 wordIndex = bitIndex >> 5;
+    u4 endWordIndex = bitSize >> 5;
+    u4* storage = pBits->storage;
+    u4 word = storage[wordIndex++];
 
-    bool firstPass = (bitIndex == 0);
-    u4 startIndex = bitIndex;
-    for (; bitIndex < iterator->bitSize;) {
-        unsigned int wordIndex = bitIndex >> 5;
-        unsigned int bitPos = bitIndex & 0x1f;
-        unsigned int word = pBits->storage[wordIndex];
-        if (word & checkMasks[bitPos]) {
-            iterator->idx = bitIndex+1;
-            if (firstPass && pBits->firstDirty) {
-                pBits->firstBitSet = bitIndex;
-                pBits->firstDirty = false;
-            }
-            return bitIndex;
-        }
+    // Mask out any bits in the first word we've already considered
+    word &= ~((1 << (bitIndex & 0x1f))-1);
+
+    for (; wordIndex <= endWordIndex;) {
+        u4 bitPos = bitIndex & 0x1f;
         if (word == 0) {
-            // Helps if this is a sparse vector
             bitIndex += (32 - bitPos);
-        } else {
+            word = storage[wordIndex++];
+            continue;
+        }
+        for (; bitPos < 32; bitPos++) {
+            if (word & (1 << bitPos)) {
+                iterator->idx = bitIndex + 1;
+                return bitIndex;
+            }
             bitIndex++;
         }
+        word = storage[wordIndex++];
     }
-    /* No more set bits */
-    if (firstPass) {
-        // Empty
-        pBits->firstBitSet = -1;
-        pBits->firstDirty = false;
-    } else {
-        pBits->lastBitSet = startIndex - 1;
-        pBits->lastDirty = false;
-    }
+    iterator->idx = iterator->bitSize;
     return -1;
 }
 
@@ -555,8 +674,6 @@
     if (remNumBits) {
         pBits->storage[idx] = (1 << remNumBits) - 1;
     }
-    pBits->firstDirty = true;
-    pBits->lastDirty = true;
 }
 
 void oatGetBlockName(BasicBlock* bb, char* name)
diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc
index 7dcb95b..2a4fe59 100644
--- a/src/compiler/codegen/RallocUtil.cc
+++ b/src/compiler/codegen/RallocUtil.cc
@@ -137,9 +137,8 @@
 }
 
 /* Mark a temp register as dead.  Does not affect allocation state. */
-void oatClobber(CompilationUnit* cUnit, int reg)
+static inline void clobberBody(CompilationUnit *cUnit, RegisterInfo* p)
 {
-    RegisterInfo* p = oatGetRegInfo(cUnit, reg);
     if (p->isTemp) {
         DCHECK(!(p->live && p->dirty))  << "Live & dirty temp in clobber";
         p->live = false;
@@ -153,6 +152,12 @@
     }
 }
 
+/* Mark a temp register as dead.  Does not affect allocation state. */
+void oatClobber(CompilationUnit* cUnit, int reg)
+{
+    clobberBody(cUnit, oatGetRegInfo(cUnit, reg));
+}
+
 STATIC void clobberSRegBody(RegisterInfo* p, int numRegs, int sReg)
 {
     int i;
@@ -577,13 +582,17 @@
     LOG(FATAL) << "Tried to lock a non-existant temp: r" << reg;
 }
 
-extern void oatResetDef(CompilationUnit* cUnit, int reg)
+static inline void resetDefBody(RegisterInfo* p)
 {
-    RegisterInfo* p = oatGetRegInfo(cUnit, reg);
     p->defStart = NULL;
     p->defEnd = NULL;
 }
 
+extern void oatResetDef(CompilationUnit* cUnit, int reg)
+{
+    resetDefBody(oatGetRegInfo(cUnit, reg));
+}
+
 STATIC void nullifyRange(CompilationUnit* cUnit, LIR *start, LIR *finish,
                          int sReg1, int sReg2)
 {
@@ -687,10 +696,10 @@
 {
     int i;
     for (i=0; i< cUnit->regPool->numCoreRegs; i++) {
-        oatResetDef(cUnit, cUnit->regPool->coreRegs[i].reg);
+        resetDefBody(&cUnit->regPool->coreRegs[i]);
     }
     for (i=0; i< cUnit->regPool->numFPRegs; i++) {
-        oatResetDef(cUnit, cUnit->regPool->FPRegs[i].reg);
+        resetDefBody(&cUnit->regPool->FPRegs[i]);
     }
 }
 
@@ -698,10 +707,10 @@
 {
     int i;
     for (i=0; i< cUnit->regPool->numCoreRegs; i++) {
-        oatClobber(cUnit, cUnit->regPool->coreRegs[i].reg);
+        clobberBody(cUnit, &cUnit->regPool->coreRegs[i]);
     }
     for (i=0; i< cUnit->regPool->numFPRegs; i++) {
-        oatClobber(cUnit, cUnit->regPool->FPRegs[i].reg);
+        clobberBody(cUnit, &cUnit->regPool->FPRegs[i]);
     }
 }
 
diff --git a/src/compiler/codegen/arm/ArchFactory.cc b/src/compiler/codegen/arm/ArchFactory.cc
index 14758b8..484a22d 100644
--- a/src/compiler/codegen/arm/ArchFactory.cc
+++ b/src/compiler/codegen/arm/ArchFactory.cc
@@ -56,7 +56,7 @@
 STATIC ArmLIR* genCheck(CompilationUnit* cUnit, ArmConditionCode cCode,
                         MIR* mir, ArmThrowKind kind)
 {
-    ArmLIR* tgt = (ArmLIR*)oatNew(sizeof(ArmLIR), true);
+    ArmLIR* tgt = (ArmLIR*)oatNew(sizeof(ArmLIR), true, kAllocLIR);
     tgt->opcode = kArmPseudoThrowTarget;
     tgt->operands[0] = kind;
     tgt->operands[1] = mir ? mir->offset : 0;
@@ -69,7 +69,7 @@
 STATIC ArmLIR* genImmedCheck(CompilationUnit* cUnit, ArmConditionCode cCode,
                              int reg, int immVal, MIR* mir, ArmThrowKind kind)
 {
-    ArmLIR* tgt = (ArmLIR*)oatNew(sizeof(ArmLIR), true);
+    ArmLIR* tgt = (ArmLIR*)oatNew(sizeof(ArmLIR), true, kAllocLIR);
     tgt->opcode = kArmPseudoThrowTarget;
     tgt->operands[0] = kind;
     tgt->operands[1] = mir->offset;
@@ -100,7 +100,7 @@
 STATIC TGT_LIR* genRegRegCheck(CompilationUnit* cUnit, ArmConditionCode cCode,
                                int reg1, int reg2, MIR* mir, ArmThrowKind kind)
 {
-    ArmLIR* tgt = (ArmLIR*)oatNew(sizeof(ArmLIR), true);
+    ArmLIR* tgt = (ArmLIR*)oatNew(sizeof(ArmLIR), true, kAllocLIR);
     tgt->opcode = kArmPseudoThrowTarget;
     tgt->operands[0] = kind;
     tgt->operands[1] = mir ? mir->offset : 0;
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index e77bed3..3c5daad 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -739,6 +739,7 @@
     kUsesCCodes,
     kMemLoad,
     kMemStore,
+    kPCRelFixup,
 } ArmOpFeatureFlags;
 
 #define IS_LOAD         (1 << kMemLoad)
@@ -770,6 +771,7 @@
 #define IS_IT           (1 << kIsIT)
 #define SETS_CCODES     (1 << kSetsCCodes)
 #define USES_CCODES     (1 << kUsesCCodes)
+#define NEEDS_FIXUP     (1 << kPCRelFixup)
 
 /* Common combo register usage patterns */
 #define REG_USE01       (REG_USE0 | REG_USE1)
@@ -844,9 +846,10 @@
         bool isNop:1;           // LIR is optimized away
         bool insertWrapper:1;   // insert branch to emulate memory accesses
         bool squashed:1;        // Eliminated def
+        bool pcRelFixup:1;      // May need pc-relative fixup
         unsigned int age:4;     // default is 0, set lazily by the optimizer
         unsigned int size:3;    // bytes (2 for thumb, 2/4 for thumb2)
-        unsigned int unused:22;
+        unsigned int unused:21;
     } flags;
     int aliasInfo;              // For Dalvik register & litpool disambiguation
     u8 useMask;                 // Resource mask for use
diff --git a/src/compiler/codegen/arm/ArmRallocUtil.cc b/src/compiler/codegen/arm/ArmRallocUtil.cc
index a193a7c..d540007 100644
--- a/src/compiler/codegen/arm/ArmRallocUtil.cc
+++ b/src/compiler/codegen/arm/ArmRallocUtil.cc
@@ -132,9 +132,9 @@
      * reg.
      */
     RefCounts *coreRegs = (RefCounts *)
-          oatNew(sizeof(RefCounts) * numRegs, true);
+          oatNew(sizeof(RefCounts) * numRegs, true, kAllocRegAlloc);
     RefCounts *fpRegs = (RefCounts *)
-          oatNew(sizeof(RefCounts) * numRegs, true);
+          oatNew(sizeof(RefCounts) * numRegs, true, kAllocRegAlloc);
     for (int i = 0; i < numRegs; i++) {
         coreRegs[i].sReg = fpRegs[i].sReg = i;
     }
diff --git a/src/compiler/codegen/arm/Assemble.cc b/src/compiler/codegen/arm/Assemble.cc
index 83c7a6c..685dd4c 100644
--- a/src/compiler/codegen/arm/Assemble.cc
+++ b/src/compiler/codegen/arm/Assemble.cc
@@ -118,7 +118,7 @@
                  "add", "!0C, !1C", 1),
     ENCODING_MAP(kThumbAddPcRel,    0xa000,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | IS_BRANCH | NEEDS_FIXUP,
                  "add", "!0C, pc, #!1E", 1),
     ENCODING_MAP(kThumbAddSpRel,    0xa800,
                  kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0,
@@ -145,11 +145,11 @@
                  "asrs", "!0C, !1C", 1),
     ENCODING_MAP(kThumbBCond,        0xd000,
                  kFmtBitBlt, 7, 0, kFmtBitBlt, 11, 8, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES,
-                 "b!1c", "!0t", 1),
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | USES_CCODES |
+                 NEEDS_FIXUP, "b!1c", "!0t", 1),
     ENCODING_MAP(kThumbBUncond,      0xe000,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH,
+                 kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH | NEEDS_FIXUP,
                  "b", "!0t", 1),
     ENCODING_MAP(kThumbBicRR,        0x4380,
                  kFmtBitBlt, 2, 0, kFmtBitBlt, 5, 3, kFmtUnused, -1, -1,
@@ -162,12 +162,12 @@
                  "bkpt", "!0d", 1),
     ENCODING_MAP(kThumbBlx1,         0xf000,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR,
-                 "blx_1", "!0u", 1),
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
+                 NEEDS_FIXUP, "blx_1", "!0u", 1),
     ENCODING_MAP(kThumbBlx2,         0xe800,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR,
-                 "blx_2", "!0v", 1),
+                 kFmtUnused, -1, -1, IS_BINARY_OP | IS_BRANCH | REG_DEF_LR |
+                 NEEDS_FIXUP, "blx_2", "!0v", 1),
     ENCODING_MAP(kThumbBl1,          0xf000,
                  kFmtBitBlt, 10, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_UNARY_OP | IS_BRANCH | REG_DEF_LR,
@@ -230,7 +230,7 @@
     ENCODING_MAP(kThumbLdrPcRel,    0x4800,
                  kFmtBitBlt, 10, 8, kFmtBitBlt, 7, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC
-                 | IS_LOAD, "ldr", "!0C, [pc, #!1E]", 1),
+                 | IS_LOAD | NEEDS_FIXUP, "ldr", "!0C, [pc, #!1E]", 1),
     ENCODING_MAP(kThumbLdrSpRel,    0x9800,
                  kFmtBitBlt, 10, 8, kFmtUnused, -1, -1, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0 | REG_USE_SP
@@ -405,11 +405,11 @@
     ENCODING_MAP(kThumb2Vldrs,       0xed900a00,
                  kFmtSfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
-                 REG_DEF_LR, "vldr", "!0s, [!1C, #!2E]", 2),
+                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0s, [!1C, #!2E]", 2),
     ENCODING_MAP(kThumb2Vldrd,       0xed900b00,
                  kFmtDfp, 22, 12, kFmtBitBlt, 19, 16, kFmtBitBlt, 7, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1 | IS_LOAD |
-                 REG_DEF_LR, "vldr", "!0S, [!1C, #!2E]", 2),
+                 REG_DEF_LR | NEEDS_FIXUP, "vldr", "!0S, [!1C, #!2E]", 2),
     ENCODING_MAP(kThumb2Vmuls,        0xee200a00,
                  kFmtSfp, 22, 12, kFmtSfp, 7, 16, kFmtSfp, 5, 0,
                  kFmtUnused, -1, -1,
@@ -509,12 +509,12 @@
                  "ldr", "!0C, [!1C, #-!2d]", 2),
     ENCODING_MAP(kThumb2Cbnz,       0xb900, /* Note: does not affect flags */
                  kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH,
-                 "cbnz", "!0C,!1t", 1),
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
+                 NEEDS_FIXUP, "cbnz", "!0C,!1t", 1),
     ENCODING_MAP(kThumb2Cbz,       0xb100, /* Note: does not affect flags */
                  kFmtBitBlt, 2, 0, kFmtImm6, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH,
-                 "cbz", "!0C,!1t", 1),
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | IS_BRANCH |
+                 NEEDS_FIXUP, "cbz", "!0C,!1t", 1),
     ENCODING_MAP(kThumb2AddRRI12,       0xf2000000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtImm12, -1, -1,
                  kFmtUnused, -1, -1,
@@ -644,12 +644,12 @@
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0
-                 | IS_LOAD, "pop", "<!0R>", 2),
+                 | IS_LOAD | NEEDS_FIXUP, "pop", "<!0R>", 2),
     ENCODING_MAP(kThumb2Push,          0xe92d0000,
                  kFmtBitBlt, 15, 0, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
                  IS_UNARY_OP | REG_DEF_SP | REG_USE_SP | REG_USE_LIST0
-                 | IS_STORE, "push", "<!0R>", 2),
+                 | IS_STORE | NEEDS_FIXUP, "push", "<!0R>", 2),
     ENCODING_MAP(kThumb2CmpRI8, 0xf1b00f00,
                  kFmtBitBlt, 19, 16, kFmtModImm, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
@@ -791,12 +791,12 @@
     ENCODING_MAP(kThumb2LdrPcRel12,       0xf8df0000,
                  kFmtBitBlt, 15, 12, kFmtBitBlt, 11, 0, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD,
+                 IS_TERTIARY_OP | REG_DEF0 | REG_USE_PC | IS_LOAD | NEEDS_FIXUP,
                  "ldr", "!0C, [r15pc, #!1d]", 2),
     ENCODING_MAP(kThumb2BCond,        0xf0008000,
                  kFmtBrOffset, -1, -1, kFmtBitBlt, 25, 22, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_BINARY_OP | IS_BRANCH | USES_CCODES,
+                 IS_BINARY_OP | IS_BRANCH | USES_CCODES | NEEDS_FIXUP,
                  "b!1c", "!0t", 2),
     ENCODING_MAP(kThumb2Vmovd_RR,       0xeeb00b40,
                  kFmtDfp, 22, 12, kFmtDfp, 5, 0, kFmtUnused, -1, -1,
@@ -931,15 +931,16 @@
     ENCODING_MAP(kThumb2Adr,         0xf20f0000,
                  kFmtBitBlt, 11, 8, kFmtImm12, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1,
-                 IS_TERTIARY_OP | REG_DEF0,/* Note: doesn't affect flags */
+                 /* Note: doesn't affect flags */
+                 IS_TERTIARY_OP | REG_DEF0 | NEEDS_FIXUP,
                  "adr", "!0C,#!1d", 2),
     ENCODING_MAP(kThumb2MovImm16LST,     0xf2400000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
                  "mov", "!0C, #!1M", 2),
     ENCODING_MAP(kThumb2MovImm16HST,     0xf2c00000,
                  kFmtBitBlt, 11, 8, kFmtImm16, -1, -1, kFmtUnused, -1, -1,
-                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0,
+                 kFmtUnused, -1, -1, IS_BINARY_OP | REG_DEF0 | NEEDS_FIXUP,
                  "movh", "!0C, #!1M", 2),
     ENCODING_MAP(kThumb2LdmiaWB,         0xe8b00000,
                  kFmtBitBlt, 19, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
@@ -1067,6 +1068,7 @@
     AssemblerStatus res = kSuccess;  // Assume success
 
     for (lir = (ArmLIR *) cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
+
         if (lir->opcode < 0) {
             if ((lir->opcode == kArmPseudoPseudoAlign4) &&
                 /* 1 means padding is needed */
@@ -1091,244 +1093,249 @@
          * Of course, the patching itself may cause new overflows so this
          * is an iterative process.
          */
-
-        if (lir->opcode == kThumbLdrPcRel ||
-            lir->opcode == kThumb2LdrPcRel12 ||
-            lir->opcode == kThumbAddPcRel ||
-            ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) ||
-            ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) {
-            /*
-             * PC-relative loads are mostly used to load immediates
-             * that are too large to materialize directly in one shot.
-             * However, if the load displacement exceeds the limit,
-             * we revert to a 2-instruction materialization sequence.
-             */
-            ArmLIR *lirTarget = (ArmLIR *) lir->generic.target;
-            intptr_t pc = (lir->generic.offset + 4) & ~3;
-            intptr_t target = lirTarget->generic.offset;
-            int delta = target - pc;
-            if (delta & 0x3) {
-                LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta;
-            }
-            // First, a sanity check for cases we shouldn't see now
-            if (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) ||
-                ((lir->opcode == kThumbLdrPcRel) && (delta > 1020))) {
-                // Shouldn't happen in current codegen.
-                LOG(FATAL) << "Unexpected pc-rel offset " << delta;
-            }
-            // Now, check for the two difficult cases
-            if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
-                ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
-                ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
+        if (lir->flags.pcRelFixup) {
+            if (lir->opcode == kThumbLdrPcRel ||
+                lir->opcode == kThumb2LdrPcRel12 ||
+                lir->opcode == kThumbAddPcRel ||
+                ((lir->opcode == kThumb2Vldrd) && (lir->operands[1] == r15pc)) ||
+                ((lir->opcode == kThumb2Vldrs) && (lir->operands[1] == r15pc))) {
                 /*
-                 * Note: because rLR may be used to fix up out-of-range
-                 * vldrs/vldrd we include REG_DEF_LR in the resource
-                 * masks for these instructions.
+                 * PC-relative loads are mostly used to load immediates
+                 * that are too large to materialize directly in one shot.
+                 * However, if the load displacement exceeds the limit,
+                 * we revert to a 2-instruction materialization sequence.
                  */
-                int baseReg = (lir->opcode == kThumb2LdrPcRel12) ?
-                    lir->operands[0] : rLR;
-
-                // Add new Adr to generate the address
-                ArmLIR *newAdr =
-                    (ArmLIR *)oatNew(sizeof(ArmLIR), true);
-                newAdr->generic.dalvikOffset = lir->generic.dalvikOffset;
-                newAdr->generic.target = lir->generic.target;
-                newAdr->opcode = kThumb2Adr;
-                newAdr->operands[0] = baseReg;
-                oatSetupResourceMasks(newAdr);
-                oatInsertLIRBefore((LIR*)lir, (LIR*)newAdr);
-
-                // Convert to normal load
-                if (lir->opcode == kThumb2LdrPcRel12) {
-                    lir->opcode = kThumb2LdrRRI12;
+                ArmLIR *lirTarget = (ArmLIR *) lir->generic.target;
+                intptr_t pc = (lir->generic.offset + 4) & ~3;
+                intptr_t target = lirTarget->generic.offset;
+                int delta = target - pc;
+                if (delta & 0x3) {
+                    LOG(FATAL) << "PC-rel offset not multiple of 4: " << delta;
                 }
-                // Change the load to be relative to the new Adr base
-                lir->operands[1] = baseReg;
-                lir->operands[2] = 0;
-                oatSetupResourceMasks(lir);
-                res = kRetryAll;
-            } else {
-                if ((lir->opcode == kThumb2Vldrs) ||
-                    (lir->opcode == kThumb2Vldrd)) {
-                    lir->operands[2] = delta >> 2;
+                // First, a sanity check for cases we shouldn't see now
+                if (((lir->opcode == kThumbAddPcRel) && (delta > 1020)) ||
+                    ((lir->opcode == kThumbLdrPcRel) && (delta > 1020))) {
+                    // Shouldn't happen in current codegen.
+                    LOG(FATAL) << "Unexpected pc-rel offset " << delta;
+                }
+                // Now, check for the two difficult cases
+                if (((lir->opcode == kThumb2LdrPcRel12) && (delta > 4091)) ||
+                    ((lir->opcode == kThumb2Vldrs) && (delta > 1020)) ||
+                    ((lir->opcode == kThumb2Vldrd) && (delta > 1020))) {
+                    /*
+                     * Note: because rLR may be used to fix up out-of-range
+                     * vldrs/vldrd we include REG_DEF_LR in the resource
+                     * masks for these instructions.
+                     */
+                    int baseReg = (lir->opcode == kThumb2LdrPcRel12) ?
+                        lir->operands[0] : rLR;
+
+                    // Add new Adr to generate the address
+                    ArmLIR *newAdr =
+                        (ArmLIR *)oatNew(sizeof(ArmLIR), true, kAllocLIR);
+                    newAdr->generic.dalvikOffset = lir->generic.dalvikOffset;
+                    newAdr->generic.target = lir->generic.target;
+                    newAdr->opcode = kThumb2Adr;
+                    newAdr->operands[0] = baseReg;
+                    oatSetupResourceMasks(newAdr);
+                    oatInsertLIRBefore((LIR*)lir, (LIR*)newAdr);
+
+                    // Convert to normal load
+                    if (lir->opcode == kThumb2LdrPcRel12) {
+                        lir->opcode = kThumb2LdrRRI12;
+                    }
+                    // Change the load to be relative to the new Adr base
+                    lir->operands[1] = baseReg;
+                    lir->operands[2] = 0;
+                    oatSetupResourceMasks(lir);
+                    res = kRetryAll;
                 } else {
-                    lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?
-                                        delta : delta >> 2;
-                }
-            }
-        } else if (lir->opcode == kThumb2Cbnz || lir->opcode == kThumb2Cbz) {
-            ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
-            intptr_t pc = lir->generic.offset + 4;
-            intptr_t target = targetLIR->generic.offset;
-            int delta = target - pc;
-            if (delta > 126 || delta < 0) {
-                /* Convert to cmp rx,#0 / b[eq/ne] tgt pair */
-                ArmLIR *newInst =
-                    (ArmLIR *)oatNew(sizeof(ArmLIR), true);
-                /* Make new branch instruction and insert after */
-                newInst->generic.dalvikOffset = lir->generic.dalvikOffset;
-                newInst->opcode = kThumbBCond;
-                newInst->operands[0] = 0;
-                newInst->operands[1] = (lir->opcode == kThumb2Cbz) ?
-                                        kArmCondEq : kArmCondNe;
-                newInst->generic.target = lir->generic.target;
-                oatSetupResourceMasks(newInst);
-                oatInsertLIRAfter((LIR *)lir, (LIR *)newInst);
-                /* Convert the cb[n]z to a cmp rx, #0 ] */
-                lir->opcode = kThumbCmpRI8;
-                /* operand[0] is src1 in both cb[n]z & CmpRI8 */
-                lir->operands[1] = 0;
-                lir->generic.target = 0;
-                oatSetupResourceMasks(lir);
-                res = kRetryAll;
-            } else {
-                lir->operands[1] = delta >> 1;
-            }
-        } else if (lir->opcode == kThumb2Push ||
-                   lir->opcode == kThumb2Pop) {
-            if (__builtin_popcount(lir->operands[0]) == 1) {
-                /*
-                 * The standard push/pop multiple instruction
-                 * requires at least two registers in the list.
-                 * If we've got just one, switch to the single-reg
-                 * encoding.
-                 */
-                lir->opcode = (lir->opcode == kThumb2Push)
-                    ? kThumb2Push1 : kThumb2Pop1;
-                int reg = 0;
-                while (lir->operands[0]) {
-                    if (lir->operands[0] & 0x1) {
-                        break;
+                    if ((lir->opcode == kThumb2Vldrs) ||
+                        (lir->opcode == kThumb2Vldrd)) {
+                        lir->operands[2] = delta >> 2;
                     } else {
-                        reg++;
-                        lir->operands[0] >>= 1;
+                        lir->operands[1] = (lir->opcode == kThumb2LdrPcRel12) ?
+                                            delta : delta >> 2;
                     }
                 }
-                lir->operands[0] = reg;
-                oatSetupResourceMasks(lir);
-                res = kRetryAll;
-            }
-        } else if (lir->opcode == kThumbBCond ||
-                   lir->opcode == kThumb2BCond) {
-            ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
-            int delta = 0;
-            DCHECK(targetLIR);
-            intptr_t pc = lir->generic.offset + 4;
-            intptr_t target = targetLIR->generic.offset;
-            delta = target - pc;
-            if ((lir->opcode == kThumbBCond) && (delta > 254 || delta < -256)) {
-                lir->opcode = kThumb2BCond;
-                oatSetupResourceMasks(lir);
-                res = kRetryAll;
-            }
-            lir->operands[0] = delta >> 1;
-        } else if (lir->opcode == kThumb2BUncond) {
-            ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
-            intptr_t pc = lir->generic.offset + 4;
-            intptr_t target = targetLIR->generic.offset;
-            int delta = target - pc;
-            lir->operands[0] = delta >> 1;
-            if (lir->operands[0] == 0) {  // Useless branch?
-                lir->flags.isNop = true;
-                res = kRetryAll;
-            }
-        } else if (lir->opcode == kThumbBUncond) {
-            ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
-            intptr_t pc = lir->generic.offset + 4;
-            intptr_t target = targetLIR->generic.offset;
-            int delta = target - pc;
-            if (delta > 2046 || delta < -2048) {
-                // Convert to Thumb2BCond w/ kArmCondAl
-                lir->opcode = kThumb2BUncond;
-                lir->operands[0] = 0;
-                oatSetupResourceMasks(lir);
-                res = kRetryAll;
-            }
-            lir->operands[0] = delta >> 1;
-            if ((lir->operands[0] == 0) ||
-                (lir->operands[0] == -1)) {  // Useless branch?
-                lir->flags.isNop = true;
-                res = kRetryAll;
-            }
-        } else if (lir->opcode == kThumbBlx1) {
-            DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2);
-            /* curPC is Thumb */
-            intptr_t curPC = (startAddr + lir->generic.offset + 4) & ~3;
-            intptr_t target = lir->operands[1];
+            } else if (lir->opcode == kThumb2Cbnz || lir->opcode == kThumb2Cbz) {
+                ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
+                intptr_t pc = lir->generic.offset + 4;
+                intptr_t target = targetLIR->generic.offset;
+                int delta = target - pc;
+                if (delta > 126 || delta < 0) {
+                    /* Convert to cmp rx,#0 / b[eq/ne] tgt pair */
+                    ArmLIR *newInst =
+                        (ArmLIR *)oatNew(sizeof(ArmLIR), true, kAllocLIR);
+                    /* Make new branch instruction and insert after */
+                    newInst->generic.dalvikOffset = lir->generic.dalvikOffset;
+                    newInst->opcode = kThumbBCond;
+                    newInst->operands[0] = 0;
+                    newInst->operands[1] = (lir->opcode == kThumb2Cbz) ?
+                                            kArmCondEq : kArmCondNe;
+                    newInst->generic.target = lir->generic.target;
+                    oatSetupResourceMasks(newInst);
+                    oatInsertLIRAfter((LIR *)lir, (LIR *)newInst);
+                    /* Convert the cb[n]z to a cmp rx, #0 ] */
+                    lir->opcode = kThumbCmpRI8;
+                    /* operand[0] is src1 in both cb[n]z & CmpRI8 */
+                    lir->operands[1] = 0;
+                    lir->generic.target = 0;
+                    oatSetupResourceMasks(lir);
+                    res = kRetryAll;
+                } else {
+                    lir->operands[1] = delta >> 1;
+                }
+            } else if (lir->opcode == kThumb2Push ||
+                       lir->opcode == kThumb2Pop) {
+                if (__builtin_popcount(lir->operands[0]) == 1) {
+                    /*
+                     * The standard push/pop multiple instruction
+                     * requires at least two registers in the list.
+                     * If we've got just one, switch to the single-reg
+                     * encoding.
+                     */
+                    lir->opcode = (lir->opcode == kThumb2Push)
+                        ? kThumb2Push1 : kThumb2Pop1;
+                    int reg = 0;
+                    while (lir->operands[0]) {
+                        if (lir->operands[0] & 0x1) {
+                            break;
+                        } else {
+                            reg++;
+                            lir->operands[0] >>= 1;
+                        }
+                    }
+                    lir->operands[0] = reg;
+                    oatSetupResourceMasks(lir);
+                    res = kRetryAll;
+                }
+            } else if (lir->opcode == kThumbBCond ||
+                       lir->opcode == kThumb2BCond) {
+                ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
+                int delta = 0;
+                DCHECK(targetLIR);
+                intptr_t pc = lir->generic.offset + 4;
+                intptr_t target = targetLIR->generic.offset;
+                delta = target - pc;
+                if ((lir->opcode == kThumbBCond) &&
+                    (delta > 254 || delta < -256)) {
+                    lir->opcode = kThumb2BCond;
+                    oatSetupResourceMasks(lir);
+                    res = kRetryAll;
+                }
+                lir->operands[0] = delta >> 1;
+            } else if (lir->opcode == kThumb2BUncond) {
+                ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
+                intptr_t pc = lir->generic.offset + 4;
+                intptr_t target = targetLIR->generic.offset;
+                int delta = target - pc;
+                lir->operands[0] = delta >> 1;
+                if (lir->operands[0] == 0) {  // Useless branch?
+                    lir->flags.isNop = true;
+                    res = kRetryAll;
+                }
+            } else if (lir->opcode == kThumbBUncond) {
+                ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
+                intptr_t pc = lir->generic.offset + 4;
+                intptr_t target = targetLIR->generic.offset;
+                int delta = target - pc;
+                if (delta > 2046 || delta < -2048) {
+                    // Convert to Thumb2BCond w/ kArmCondAl
+                    lir->opcode = kThumb2BUncond;
+                    lir->operands[0] = 0;
+                    oatSetupResourceMasks(lir);
+                    res = kRetryAll;
+                }
+                lir->operands[0] = delta >> 1;
+                if ((lir->operands[0] == 0) ||
+                    (lir->operands[0] == -1)) {  // Useless branch?
+                    lir->flags.isNop = true;
+                    res = kRetryAll;
+                }
+            } else if (lir->opcode == kThumbBlx1) {
+                DCHECK(NEXT_LIR(lir)->opcode == kThumbBlx2);
+                /* curPC is Thumb */
+                intptr_t curPC = (startAddr + lir->generic.offset + 4) & ~3;
+                intptr_t target = lir->operands[1];
 
-            /* Match bit[1] in target with base */
-            if (curPC & 0x2) {
-                target |= 0x2;
+                /* Match bit[1] in target with base */
+                if (curPC & 0x2) {
+                    target |= 0x2;
+                }
+                int delta = target - curPC;
+                DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+                lir->operands[0] = (delta >> 12) & 0x7ff;
+                NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+            } else if (lir->opcode == kThumbBl1) {
+                DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2);
+                /* Both curPC and target are Thumb */
+                intptr_t curPC = startAddr + lir->generic.offset + 4;
+                intptr_t target = lir->operands[1];
+
+                int delta = target - curPC;
+                DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
+
+                lir->operands[0] = (delta >> 12) & 0x7ff;
+                NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
+            } else if (lir->opcode == kThumb2Adr) {
+                SwitchTable *tabRec = (SwitchTable*)lir->operands[2];
+                ArmLIR* target = (ArmLIR*)lir->generic.target;
+                int targetDisp = tabRec ? tabRec->offset
+                                        : target->generic.offset;
+                int disp = targetDisp - ((lir->generic.offset + 4) & ~3);
+                if (disp < 4096) {
+                    lir->operands[1] = disp;
+                } else {
+                    // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0]
+                    ArmLIR *newMov16L =
+                        (ArmLIR *)oatNew(sizeof(ArmLIR), true, kAllocLIR);
+                    newMov16L->generic.dalvikOffset = lir->generic.dalvikOffset;
+                    newMov16L->generic.target = lir->generic.target;
+                    newMov16L->opcode = kThumb2MovImm16LST;
+                    newMov16L->operands[0] = lir->operands[0];
+                    newMov16L->operands[2] = (intptr_t)lir;
+                    newMov16L->operands[3] = (intptr_t)tabRec;
+                    oatSetupResourceMasks(newMov16L);
+                    oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16L);
+                    ArmLIR *newMov16H =
+                        (ArmLIR *)oatNew(sizeof(ArmLIR), true, kAllocLIR);
+                    newMov16H->generic.dalvikOffset = lir->generic.dalvikOffset;
+                    newMov16H->generic.target = lir->generic.target;
+                    newMov16H->opcode = kThumb2MovImm16HST;
+                    newMov16H->operands[0] = lir->operands[0];
+                    newMov16H->operands[2] = (intptr_t)lir;
+                    newMov16H->operands[3] = (intptr_t)tabRec;
+                    oatSetupResourceMasks(newMov16H);
+                    oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16H);
+                    lir->opcode = kThumb2AddRRR;
+                    lir->operands[1] = rPC;
+                    lir->operands[2] = lir->operands[0];
+                    oatSetupResourceMasks(lir);
+                    res = kRetryAll;
+                }
+            } else if (lir->opcode == kThumb2MovImm16LST) {
+                // operands[1] should hold disp, [2] has add, [3] has tabRec
+                ArmLIR *addPCInst = (ArmLIR*)lir->operands[2];
+                SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
+                // If tabRec is null, this is a literal load. Use generic.target
+                ArmLIR* target = (ArmLIR*)lir->generic.target;
+                int targetDisp = tabRec ? tabRec->offset
+                                        : target->generic.offset;
+                lir->operands[1] = (targetDisp -
+                    (addPCInst->generic.offset + 4)) & 0xffff;
+            } else if (lir->opcode == kThumb2MovImm16HST) {
+                // operands[1] should hold disp, [2] has add, [3] has tabRec
+                ArmLIR *addPCInst = (ArmLIR*)lir->operands[2];
+                SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
+                // If tabRec is null, this is a literal load. Use generic.target
+                ArmLIR* target = (ArmLIR*)lir->generic.target;
+                int targetDisp = tabRec ? tabRec->offset
+                                        : target->generic.offset;
+                lir->operands[1] = ((targetDisp -
+                    (addPCInst->generic.offset + 4)) >> 16) & 0xffff;
             }
-            int delta = target - curPC;
-            DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
-
-            lir->operands[0] = (delta >> 12) & 0x7ff;
-            NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
-        } else if (lir->opcode == kThumbBl1) {
-            DCHECK(NEXT_LIR(lir)->opcode == kThumbBl2);
-            /* Both curPC and target are Thumb */
-            intptr_t curPC = startAddr + lir->generic.offset + 4;
-            intptr_t target = lir->operands[1];
-
-            int delta = target - curPC;
-            DCHECK((delta >= -(1<<22)) && (delta <= ((1<<22)-2)));
-
-            lir->operands[0] = (delta >> 12) & 0x7ff;
-            NEXT_LIR(lir)->operands[0] = (delta>> 1) & 0x7ff;
-        } else if (lir->opcode == kThumb2Adr) {
-            SwitchTable *tabRec = (SwitchTable*)lir->operands[2];
-            ArmLIR* target = (ArmLIR*)lir->generic.target;
-            int targetDisp = tabRec ? tabRec->offset : target->generic.offset;
-            int disp = targetDisp - ((lir->generic.offset + 4) & ~3);
-            if (disp < 4096) {
-                lir->operands[1] = disp;
-            } else {
-                // convert to ldimm16l, ldimm16h, add tgt, pc, operands[0]
-                ArmLIR *newMov16L =
-                    (ArmLIR *)oatNew(sizeof(ArmLIR), true);
-                newMov16L->generic.dalvikOffset = lir->generic.dalvikOffset;
-                newMov16L->generic.target = lir->generic.target;
-                newMov16L->opcode = kThumb2MovImm16LST;
-                newMov16L->operands[0] = lir->operands[0];
-                newMov16L->operands[2] = (intptr_t)lir;
-                newMov16L->operands[3] = (intptr_t)tabRec;
-                oatSetupResourceMasks(newMov16L);
-                oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16L);
-                ArmLIR *newMov16H =
-                    (ArmLIR *)oatNew(sizeof(ArmLIR), true);
-                newMov16H->generic.dalvikOffset = lir->generic.dalvikOffset;
-                newMov16H->generic.target = lir->generic.target;
-                newMov16H->opcode = kThumb2MovImm16HST;
-                newMov16H->operands[0] = lir->operands[0];
-                newMov16H->operands[2] = (intptr_t)lir;
-                newMov16H->operands[3] = (intptr_t)tabRec;
-                oatSetupResourceMasks(newMov16H);
-                oatInsertLIRBefore((LIR*)lir, (LIR*)newMov16H);
-                lir->opcode = kThumb2AddRRR;
-                lir->operands[1] = rPC;
-                lir->operands[2] = lir->operands[0];
-                oatSetupResourceMasks(lir);
-                res = kRetryAll;
-            }
-        } else if (lir->opcode == kThumb2MovImm16LST) {
-            // operands[1] should hold disp, [2] has add, [3] has tabRec
-            ArmLIR *addPCInst = (ArmLIR*)lir->operands[2];
-            SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
-            // If tabRec is null, this is a literal load - use generic.target
-            ArmLIR* target = (ArmLIR*)lir->generic.target;
-            int targetDisp = tabRec ? tabRec->offset : target->generic.offset;
-            lir->operands[1] = (targetDisp -
-                (addPCInst->generic.offset + 4)) & 0xffff;
-        } else if (lir->opcode == kThumb2MovImm16HST) {
-            // operands[1] should hold disp, [2] has add, [3] has tabRec
-            ArmLIR *addPCInst = (ArmLIR*)lir->operands[2];
-            SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
-            // If tabRec is null, this is a literal load - use generic.target
-            ArmLIR* target = (ArmLIR*)lir->generic.target;
-            int targetDisp = tabRec ? tabRec->offset : target->generic.offset;
-            lir->operands[1] = ((targetDisp -
-                (addPCInst->generic.offset + 4)) >> 16) & 0xffff;
         }
         ArmEncodingMap *encoder = &EncodingMap[lir->opcode];
         u4 bits = encoder->skeleton;
@@ -1526,9 +1533,11 @@
          armLIR;
          armLIR = NEXT_LIR(armLIR)) {
         armLIR->generic.offset = offset;
-        if (armLIR->opcode >= 0 && !armLIR->flags.isNop) {
-            armLIR->flags.size = EncodingMap[armLIR->opcode].size * 2;
-            offset += armLIR->flags.size;
+        if (armLIR->opcode >= 0) {
+            if (!armLIR->flags.isNop) {
+                armLIR->flags.size = EncodingMap[armLIR->opcode].size * 2;
+                offset += armLIR->flags.size;
+            }
         } else if (armLIR->opcode == kArmPseudoPseudoAlign4) {
             if (offset & 0x2) {
                 offset += 2;
diff --git a/src/compiler/codegen/arm/CodegenCommon.cc b/src/compiler/codegen/arm/CodegenCommon.cc
index 26c17ef..c99573f 100644
--- a/src/compiler/codegen/arm/CodegenCommon.cc
+++ b/src/compiler/codegen/arm/CodegenCommon.cc
@@ -124,6 +124,10 @@
 
     flags = EncodingMap[lir->opcode].flags;
 
+    if (flags & NEEDS_FIXUP) {
+        lir->flags.pcRelFixup = true;
+    }
+
     /* Set up the mask for resources that are updated */
     if (flags & (IS_LOAD | IS_STORE)) {
         /* Default to heap - will catch specialized classes later */
@@ -241,7 +245,7 @@
  */
 STATIC ArmLIR* newLIR0(CompilationUnit* cUnit, ArmOpcode opcode)
 {
-    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true, kAllocLIR);
     DCHECK(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & NO_OPERAND));
     insn->opcode = opcode;
     setupResourceMasks(insn);
@@ -253,7 +257,7 @@
 STATIC ArmLIR* newLIR1(CompilationUnit* cUnit, ArmOpcode opcode,
                            int dest)
 {
-    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true, kAllocLIR);
     DCHECK(isPseudoOpcode(opcode) || (EncodingMap[opcode].flags & IS_UNARY_OP));
     insn->opcode = opcode;
     insn->operands[0] = dest;
@@ -266,7 +270,7 @@
 STATIC ArmLIR* newLIR2(CompilationUnit* cUnit, ArmOpcode opcode,
                            int dest, int src1)
 {
-    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true, kAllocLIR);
     DCHECK(isPseudoOpcode(opcode) ||
            (EncodingMap[opcode].flags & IS_BINARY_OP));
     insn->opcode = opcode;
@@ -281,7 +285,7 @@
 STATIC ArmLIR* newLIR3(CompilationUnit* cUnit, ArmOpcode opcode,
                            int dest, int src1, int src2)
 {
-    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true, kAllocLIR);
     DCHECK(isPseudoOpcode(opcode) ||
            (EncodingMap[opcode].flags & IS_TERTIARY_OP))
             << (int)opcode << " "
@@ -301,7 +305,7 @@
 STATIC ArmLIR* newLIR4(CompilationUnit* cUnit, ArmOpcode opcode,
                            int dest, int src1, int src2, int info)
 {
-    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    ArmLIR* insn = (ArmLIR* ) oatNew(sizeof(ArmLIR), true, kAllocLIR);
     DCHECK(isPseudoOpcode(opcode) ||
            (EncodingMap[opcode].flags & IS_QUAD_OP));
     insn->opcode = opcode;
@@ -361,7 +365,7 @@
 {
     /* Add the constant to the literal pool */
     if (constantListP) {
-        ArmLIR* newValue = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+        ArmLIR* newValue = (ArmLIR* ) oatNew(sizeof(ArmLIR), true, kAllocData);
         newValue->operands[0] = value;
         newValue->generic.next = *constantListP;
         *constantListP = (LIR*) newValue;
diff --git a/src/compiler/codegen/arm/LocalOptimizations.cc b/src/compiler/codegen/arm/LocalOptimizations.cc
index eba701b..2883209 100644
--- a/src/compiler/codegen/arm/LocalOptimizations.cc
+++ b/src/compiler/codegen/arm/LocalOptimizations.cc
@@ -236,7 +236,7 @@
                 /* Only sink store instructions */
                 if (sinkDistance && !isThisLIRLoad) {
                     ArmLIR* newStoreLIR =
-                        (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+                        (ArmLIR* ) oatNew(sizeof(ArmLIR), true, kAllocLIR);
                     *newStoreLIR = *thisLIR;
                     /*
                      * Stop point found - insert *before* the checkLIR
@@ -424,7 +424,7 @@
             if (slot >= 0) {
                 ArmLIR* curLIR = prevInstList[slot];
                 ArmLIR* newLoadLIR = (ArmLIR* ) oatNew(sizeof(ArmLIR),
-                                                               true);
+                                                       true, kAllocLIR);
                 *newLoadLIR = *thisLIR;
                 /*
                  * Insertion is guaranteed to succeed since checkLIR
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index bb66451..1efab12 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -1859,13 +1859,20 @@
 STATIC void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir)
 {
     int opOffset = mir->dalvikInsn.opcode - kMirOpFirst;
-    char* msg = (char*)oatNew(strlen(extendedMIROpNames[opOffset]) + 1, false);
-    strcpy(msg, extendedMIROpNames[opOffset]);
+    char* msg = NULL;
+    if (cUnit->printMe) {
+        msg = (char*)oatNew(strlen(extendedMIROpNames[opOffset]) + 1, false,
+                            kAllocDebugInfo);
+        strcpy(msg, extendedMIROpNames[opOffset]);
+    }
     ArmLIR* op = newLIR1(cUnit, kArmPseudoExtended, (int) msg);
 
     switch ((ExtendedMIROpcode)mir->dalvikInsn.opcode) {
         case kMirOpPhi: {
-            char* ssaString = oatGetSSAString(cUnit, mir->ssaRep);
+            char* ssaString = NULL;
+            if (cUnit->printMe) {
+                ssaString = oatGetSSAString(cUnit, mir->ssaRep);
+            }
             op->flags.isNop = true;
             newLIR1(cUnit, kArmPseudoSSARep, (int) ssaString);
             break;
@@ -2043,9 +2050,10 @@
         ArmLIR* boundaryLIR;
 
         /* Mark the beginning of a Dalvik instruction for line tracking */
+        char* instStr = cUnit->printMe ?
+           oatGetDalvikDisassembly(&mir->dalvikInsn, "") : NULL;
         boundaryLIR = newLIR1(cUnit, kArmPseudoDalvikByteCodeBoundary,
-                             (int) oatGetDalvikDisassembly(
-                             &mir->dalvikInsn, ""));
+                              (intptr_t) instStr);
         cUnit->boundaryMap.insert(std::make_pair(mir->offset,
                                  (LIR*)boundaryLIR));
         /* Remember the first LIR for this block */
@@ -2227,7 +2235,7 @@
 {
     /* Used to hold the labels of each block */
     cUnit->blockLabelList =
-        (void *) oatNew(sizeof(ArmLIR) * cUnit->numBlocks, true);
+        (void *) oatNew(sizeof(ArmLIR) * cUnit->numBlocks, true, kAllocLIR);
 
     oatDataFlowAnalysisDispatcher(cUnit, methodBlockCodeGen,
                                   kPreOrderDFSTraversal, false /* Iterative */);
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
index ebc30f8..34ffa60 100644
--- a/src/compiler/codegen/arm/Thumb2/Factory.cc
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -69,7 +69,7 @@
     if (dataTarget == NULL) {
         dataTarget = addWordData(cUnit, &cUnit->literalList, value);
     }
-    ArmLIR* loadPcRel = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    ArmLIR* loadPcRel = (ArmLIR* ) oatNew(sizeof(ArmLIR), true, kAllocLIR);
     loadPcRel->generic.dalvikOffset = cUnit->currentDalvikOffset;
     loadPcRel->opcode = kThumb2Vldrs;
     loadPcRel->generic.target = (LIR* ) dataTarget;
@@ -178,7 +178,7 @@
     if (dataTarget == NULL) {
         dataTarget = addWordData(cUnit, &cUnit->literalList, value);
     }
-    ArmLIR* loadPcRel = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    ArmLIR* loadPcRel = (ArmLIR* ) oatNew(sizeof(ArmLIR), true, kAllocLIR);
     loadPcRel->opcode = kThumb2LdrPcRel12;
     loadPcRel->generic.target = (LIR* ) dataTarget;
     loadPcRel->generic.dalvikOffset = cUnit->currentDalvikOffset;
@@ -655,7 +655,8 @@
                 dataTarget = addWideData(cUnit, &cUnit->literalList, valLo,
                                          valHi);
             }
-            ArmLIR* loadPcRel = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+            ArmLIR* loadPcRel = (ArmLIR* ) oatNew(sizeof(ArmLIR), true,
+                                                  kAllocLIR);
             loadPcRel->generic.dalvikOffset = cUnit->currentDalvikOffset;
             loadPcRel->opcode = kThumb2Vldrd;
             loadPcRel->generic.target = (LIR* ) dataTarget;
@@ -1071,7 +1072,7 @@
 
 STATIC ArmLIR* fpRegCopy(CompilationUnit* cUnit, int rDest, int rSrc)
 {
-    ArmLIR* res = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    ArmLIR* res = (ArmLIR* ) oatNew(sizeof(ArmLIR), true, kAllocLIR);
     res->generic.dalvikOffset = cUnit->currentDalvikOffset;
     res->operands[0] = rDest;
     res->operands[1] = rSrc;
@@ -1102,7 +1103,7 @@
     ArmOpcode opcode;
     if (FPREG(rDest) || FPREG(rSrc))
         return fpRegCopy(cUnit, rDest, rSrc);
-    res = (ArmLIR* ) oatNew(sizeof(ArmLIR), true);
+    res = (ArmLIR* ) oatNew(sizeof(ArmLIR), true, kAllocLIR);
     res->generic.dalvikOffset = cUnit->currentDalvikOffset;
     if (LOWREG(rDest) && LOWREG(rSrc))
         opcode = kThumbMovRR;
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 1c2f850..fe0d3f2 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -134,7 +134,7 @@
     if (it == cUnit->boundaryMap.end()) {
         LOG(FATAL) << "Error: didn't find vaddr 0x" << std::hex << vaddr;
     }
-    ArmLIR* newLabel = (ArmLIR*)oatNew(sizeof(ArmLIR), true);
+    ArmLIR* newLabel = (ArmLIR*)oatNew(sizeof(ArmLIR), true, kAllocLIR);
     newLabel->generic.dalvikOffset = vaddr;
     newLabel->opcode = kArmPseudoCaseLabel;
     newLabel->operands[0] = keyVal;
@@ -260,11 +260,12 @@
     }
     // Add the table to the list - we'll process it later
     SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
-                         true);
+                         true, kAllocData);
     tabRec->table = table;
     tabRec->vaddr = mir->offset;
     int size = table[1];
-    tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
+    tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true,
+                                        kAllocLIR);
     oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
 
     // Get the switch value
@@ -310,11 +311,12 @@
     }
     // Add the table to the list - we'll process it later
     SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
-                         true);
+                                                true, kAllocData);
     tabRec->table = table;
     tabRec->vaddr = mir->offset;
     int size = table[1];
-    tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
+    tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true,
+                                        kAllocLIR);
     oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
 
     // Get the switch value
@@ -365,7 +367,7 @@
     const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
     // Add the table to the list - we'll process it later
     FillArrayData *tabRec = (FillArrayData *)
-         oatNew(sizeof(FillArrayData), true);
+         oatNew(sizeof(FillArrayData), true, kAllocData);
     tabRec->table = table;
     tabRec->vaddr = mir->offset;
     u2 width = tabRec->table[1];
@@ -932,14 +934,17 @@
     int numTemps = sizeof(coreTemps)/sizeof(*coreTemps);
     int numFPRegs = sizeof(fpRegs)/sizeof(*fpRegs);
     int numFPTemps = sizeof(fpTemps)/sizeof(*fpTemps);
-    RegisterPool *pool = (RegisterPool *)oatNew(sizeof(*pool), true);
+    RegisterPool *pool = (RegisterPool *)oatNew(sizeof(*pool), true,
+                                                kAllocRegAlloc);
     cUnit->regPool = pool;
     pool->numCoreRegs = numRegs;
     pool->coreRegs = (RegisterInfo *)
-            oatNew(numRegs * sizeof(*cUnit->regPool->coreRegs), true);
+            oatNew(numRegs * sizeof(*cUnit->regPool->coreRegs), true,
+                   kAllocRegAlloc);
     pool->numFPRegs = numFPRegs;
     pool->FPRegs = (RegisterInfo *)
-            oatNew(numFPRegs * sizeof(*cUnit->regPool->FPRegs), true);
+            oatNew(numFPRegs * sizeof(*cUnit->regPool->FPRegs), true,
+                                      kAllocRegAlloc);
     oatInitPool(pool->coreRegs, coreRegs, pool->numCoreRegs);
     oatInitPool(pool->FPRegs, fpRegs, pool->numFPRegs);
     // Keep special registers from being allocated
@@ -959,7 +964,8 @@
     }
     // Construct the alias map.
     cUnit->phiAliasMap = (int*)oatNew(cUnit->numSSARegs *
-                                      sizeof(cUnit->phiAliasMap[0]), false);
+                                      sizeof(cUnit->phiAliasMap[0]), false,
+                                      kAllocDFInfo);
     for (int i = 0; i < cUnit->numSSARegs; i++) {
         cUnit->phiAliasMap[i] = i;
     }
@@ -1810,7 +1816,7 @@
     ArmLIR* branch = opCondBranch(cUnit, kArmCondEq);
     ArmLIR* retLab = newLIR0(cUnit, kArmPseudoTargetLabel);
     retLab->defMask = ENCODE_ALL;
-    ArmLIR* target = (ArmLIR*)oatNew(sizeof(ArmLIR), true);
+    ArmLIR* target = (ArmLIR*)oatNew(sizeof(ArmLIR), true, kAllocLIR);
     target->generic.dalvikOffset = cUnit->currentDalvikOffset;
     target->opcode = kArmPseudoSuspendTarget;
     target->operands[0] = (intptr_t)retLab;