Compile-time tuning: assembly phase

Not as much compile-time gain from reworking the assembly phase as I'd
hoped, but still worthwhile.  Should see ~2% improvement thanks to
the assembly rework.  On the other hand, expect some huge gains for some
application thanks to better detection of large machine-generated init
methods.  Thinkfree shows a 25% improvement.

The major assembly change was to establish thread the LIR nodes that
require fixup into a fixup chain.  Only those are processed during the
final assembly pass(es).  This doesn't help for methods which only
require a single pass to assemble, but does speed up the larger methods
which required multiple assembly passes.

Also replaced the block_map_ basic block lookup table (which contained
space for a BasicBlock* for each dex instruction unit) with a block id
map - cutting its space requirements by half in a 32-bit pointer
environment.

Changes:
  o Reduce size of LIR struct by 12.5% (one of the big memory users)
  o Repurpose the use/def portion of the LIR after optimization complete.
  o Encode instruction bits to LIR
  o Thread LIR nodes requiring pc fixup
  o Change follow-on assembly passes to only consider fixup LIRs
  o Switch on pc-rel fixup kind
  o Fast-path for small methods - single pass assembly
  o Avoid using cb[n]z for null checks (almost always exceed displacement)
  o Improve detection of large initialization methods.
  o Rework def/use flag setup.
  o Remove a sequential search from FindBlock using lookup table of 16-bit
    block ids rather than full block pointers.
  o Eliminate pcRelFixup and use fixup kind instead.
  o Add check for 16-bit overflow on dex offset.

Change-Id: I4c6615f83fed46f84629ad6cfe4237205a9562b4
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index 203a8cc..a4ea10b 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -118,78 +118,83 @@
   return ENCODE_ARM_REG_PC;
 }
 
-void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir) {
+// Thumb2 specific setup.  TODO: inline?:
+void ArmMir2Lir::SetupTargetResourceMasks(LIR* lir, uint64_t flags) {
   DCHECK_EQ(cu_->instruction_set, kThumb2);
+  DCHECK(!lir->flags.use_def_invalid);
 
-  // Thumb2 specific setup
-  uint64_t flags = ArmMir2Lir::EncodingMap[lir->opcode].flags;
   int opcode = lir->opcode;
 
-  if (flags & REG_DEF_SP) {
-    lir->def_mask |= ENCODE_ARM_REG_SP;
-  }
-
-  if (flags & REG_USE_SP) {
-    lir->use_mask |= ENCODE_ARM_REG_SP;
-  }
-
-  if (flags & REG_DEF_LIST0) {
-    lir->def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
-  }
-
-  if (flags & REG_DEF_LIST1) {
-    lir->def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
-  }
-
-  if (flags & REG_DEF_FPCS_LIST0) {
-    lir->def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
-  }
-
-  if (flags & REG_DEF_FPCS_LIST2) {
-    for (int i = 0; i < lir->operands[2]; i++) {
-      SetupRegMask(&lir->def_mask, lir->operands[1] + i);
+  // These flags are somewhat uncommon - bypass if we can.
+  if ((flags & (REG_DEF_SP | REG_USE_SP | REG_DEF_LIST0 | REG_DEF_LIST1 |
+                REG_DEF_FPCS_LIST0 | REG_DEF_FPCS_LIST2 | REG_USE_PC | IS_IT | REG_USE_LIST0 |
+                REG_USE_LIST1 | REG_USE_FPCS_LIST0 | REG_USE_FPCS_LIST2 | REG_DEF_LR)) != 0) {
+    if (flags & REG_DEF_SP) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_SP;
     }
-  }
 
-  if (flags & REG_USE_PC) {
-    lir->use_mask |= ENCODE_ARM_REG_PC;
-  }
-
-  /* Conservatively treat the IT block */
-  if (flags & IS_IT) {
-    lir->def_mask = ENCODE_ALL;
-  }
-
-  if (flags & REG_USE_LIST0) {
-    lir->use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
-  }
-
-  if (flags & REG_USE_LIST1) {
-    lir->use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
-  }
-
-  if (flags & REG_USE_FPCS_LIST0) {
-    lir->use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
-  }
-
-  if (flags & REG_USE_FPCS_LIST2) {
-    for (int i = 0; i < lir->operands[2]; i++) {
-      SetupRegMask(&lir->use_mask, lir->operands[1] + i);
+    if (flags & REG_USE_SP) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_SP;
     }
-  }
-  /* Fixup for kThumbPush/lr and kThumbPop/pc */
-  if (opcode == kThumbPush || opcode == kThumbPop) {
-    uint64_t r8Mask = GetRegMaskCommon(r8);
-    if ((opcode == kThumbPush) && (lir->use_mask & r8Mask)) {
-      lir->use_mask &= ~r8Mask;
-      lir->use_mask |= ENCODE_ARM_REG_LR;
-    } else if ((opcode == kThumbPop) && (lir->def_mask & r8Mask)) {
-      lir->def_mask &= ~r8Mask;
-      lir->def_mask |= ENCODE_ARM_REG_PC;
+
+    if (flags & REG_DEF_LIST0) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
     }
-  }
-  if (flags & REG_DEF_LR) {
-    lir->def_mask |= ENCODE_ARM_REG_LR;
+
+    if (flags & REG_DEF_LIST1) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
+    }
+
+    if (flags & REG_DEF_FPCS_LIST0) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_DEF_FPCS_LIST2) {
+      for (int i = 0; i < lir->operands[2]; i++) {
+        SetupRegMask(&lir->u.m.def_mask, lir->operands[1] + i);
+      }
+    }
+
+    if (flags & REG_USE_PC) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_PC;
+    }
+
+    /* Conservatively treat the IT block */
+    if (flags & IS_IT) {
+      lir->u.m.def_mask = ENCODE_ALL;
+    }
+
+    if (flags & REG_USE_LIST0) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_USE_LIST1) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_LIST(lir->operands[1]);
+    }
+
+    if (flags & REG_USE_FPCS_LIST0) {
+      lir->u.m.use_mask |= ENCODE_ARM_REG_FPCS_LIST(lir->operands[0]);
+    }
+
+    if (flags & REG_USE_FPCS_LIST2) {
+      for (int i = 0; i < lir->operands[2]; i++) {
+        SetupRegMask(&lir->u.m.use_mask, lir->operands[1] + i);
+      }
+    }
+    /* Fixup for kThumbPush/lr and kThumbPop/pc */
+    if (opcode == kThumbPush || opcode == kThumbPop) {
+      uint64_t r8Mask = GetRegMaskCommon(r8);
+      if ((opcode == kThumbPush) && (lir->u.m.use_mask & r8Mask)) {
+        lir->u.m.use_mask &= ~r8Mask;
+        lir->u.m.use_mask |= ENCODE_ARM_REG_LR;
+      } else if ((opcode == kThumbPop) && (lir->u.m.def_mask & r8Mask)) {
+        lir->u.m.def_mask &= ~r8Mask;
+        lir->u.m.def_mask |= ENCODE_ARM_REG_PC;
+      }
+    }
+    if (flags & REG_DEF_LR) {
+      lir->u.m.def_mask |= ENCODE_ARM_REG_LR;
+    }
   }
 }
 
@@ -466,8 +471,8 @@
 
     /* Memory bits */
     if (arm_lir && (mask & ENCODE_DALVIK_REG)) {
-      sprintf(buf + strlen(buf), "dr%d%s", arm_lir->alias_info & 0xffff,
-              (arm_lir->alias_info & 0x80000000) ? "(+1)" : "");
+      sprintf(buf + strlen(buf), "dr%d%s", DECODE_ALIAS_INFO_REG(arm_lir->flags.alias_info),
+              DECODE_ALIAS_INFO_WIDE(arm_lir->flags.alias_info) ? "(+1)" : "");
     }
     if (mask & ENCODE_LITERAL) {
       strcat(buf, "lit ");