Fix null check elimination

The existing null check elimination mechanism suffered from the same
limitation as the SSA renaming: it took shortcuts that were valid in
a trace compilation world, but not in a method compilation world.

This CL replaces the old mechanism, and additionally takes advantage
of some the fact that "this" is always non-null, as are objects returned
from OP_NEW_* (thanks Ian!).

Two test cases added.  The one for ensuring that unnecessary null checks
are elminated requires manual inspection.  The other - that we don't
eliminate a necessary null check - is disabled until exceptions are working.

Change-Id: I2a9b72741f56617bf609e4d7c20244796c988f28
diff --git a/src/compiler/codegen/CodegenFactory.cc b/src/compiler/codegen/CodegenFactory.cc
index 5a75cee..14c7e57 100644
--- a/src/compiler/codegen/CodegenFactory.cc
+++ b/src/compiler/codegen/CodegenFactory.cc
@@ -127,7 +127,6 @@
     LIR* defEnd;
     assert(!rlDest.wide);
     assert(!rlSrc.wide);
-    oatKillNullCheckedLoc(cUnit, rlDest);
     rlSrc = oatUpdateLoc(cUnit, rlSrc);
     rlDest = oatUpdateLoc(cUnit, rlDest);
     if (rlSrc.location == kLocPhysReg) {
@@ -190,7 +189,6 @@
     assert(FPREG(rlSrc.lowReg)==FPREG(rlSrc.highReg));
     assert(rlDest.wide);
     assert(rlSrc.wide);
-    oatKillNullCheckedLoc(cUnit, rlDest);
     if (rlSrc.location == kLocPhysReg) {
         if (oatIsLive(cUnit, rlSrc.lowReg) ||
             oatIsLive(cUnit, rlSrc.highReg) ||
diff --git a/src/compiler/codegen/Optimizer.h b/src/compiler/codegen/Optimizer.h
index b234587..23f0f17 100644
--- a/src/compiler/codegen/Optimizer.h
+++ b/src/compiler/codegen/Optimizer.h
@@ -27,6 +27,7 @@
     kLoadStoreElimination = 0,
     kLoadHoisting,
     kSuppressLoads,
+    kNullCheckElimination,
     kPromoteRegs,
 };
 
diff --git a/src/compiler/codegen/Ralloc.h b/src/compiler/codegen/Ralloc.h
index 82ce23e..ad19475 100644
--- a/src/compiler/codegen/Ralloc.h
+++ b/src/compiler/codegen/Ralloc.h
@@ -45,12 +45,6 @@
     return DECODE_REG(oatConvertSSARegToDalvik(cUnit, sReg));
 }
 
-/* Reset the tracker to unknown state */
-static inline void oatResetNullCheck(CompilationUnit* cUnit)
-{
-    oatClearAllBits(cUnit->regPool->nullCheckedRegs);
-}
-
 /*
  * Get the "real" sreg number associated with an sReg slot.  In general,
  * sReg values passed through codegen are the SSA names created by
@@ -161,10 +155,6 @@
 
 extern void oatResetDefTracking(CompilationUnit* cUnit);
 
-/* Kill the corresponding bit in the null-checked register list */
-extern void oatKillNullCheckedLoc(CompilationUnit* cUnit,
-                                  RegLocation loc);
-
 extern RegisterInfo *oatIsLive(CompilationUnit* cUnit, int reg);
 
 /* To be used when explicitly managing register use */
diff --git a/src/compiler/codegen/RallocUtil.cc b/src/compiler/codegen/RallocUtil.cc
index 8295b33..8b84500 100644
--- a/src/compiler/codegen/RallocUtil.cc
+++ b/src/compiler/codegen/RallocUtil.cc
@@ -1036,17 +1036,3 @@
     assert(res.wide);
     return res;
 }
-
-/* Kill the corresponding bit in the null-checked register list */
-extern void oatKillNullCheckedLoc(CompilationUnit* cUnit,
-                                  RegLocation loc)
-{
-    if (loc.sRegLow == INVALID_SREG)
-        return;
-    oatClearBit(cUnit->regPool->nullCheckedRegs, loc.sRegLow);
-    if (loc.wide) {
-        assert(oatSRegHi(loc.sRegLow) != INVALID_SREG);
-        oatClearBit(cUnit->regPool->nullCheckedRegs,
-                    oatSRegHi(loc.sRegLow));
-    }
-}
diff --git a/src/compiler/codegen/arm/ArchFactory.cc b/src/compiler/codegen/arm/ArchFactory.cc
index 34a333d..8012d7d 100644
--- a/src/compiler/codegen/arm/ArchFactory.cc
+++ b/src/compiler/codegen/arm/ArchFactory.cc
@@ -70,19 +70,14 @@
     return branch;
 }
 
-/*
- * Perform null-check on a register. sReg is the ssa register being checked,
- * and mReg is the machine register holding the actual value. If internal state
- * indicates that sReg has been checked before the check request is ignored.
- */
+/* Perform null-check on a register.  */
 static ArmLIR* genNullCheck(CompilationUnit* cUnit, int sReg, int mReg,
                              MIR* mir)
 {
-    if (oatIsBitSet(cUnit->regPool->nullCheckedRegs, sReg)) {
-        /* This particular Dalvik register has been null-checked */
+    if (!(cUnit->disableOpt & (1 << kNullCheckElimination)) &&
+        mir->optimizationFlags & MIR_IGNORE_NULL_CHECK) {
         return NULL;
     }
-    oatSetBit(cUnit->regPool->nullCheckedRegs, sReg);
     return genImmedCheck(cUnit, kArmCondEq, mReg, 0, mir, kArmThrowNullPointer);
 }
 
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index 212358f..53e8dc8 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -149,7 +149,6 @@
 } RegisterInfo;
 
 typedef struct RegisterPool {
-    ArenaBitVector *nullCheckedRegs; // Which registers have been null-checked?
     int numCoreRegs;
     RegisterInfo *coreRegs;
     int nextCoreReg;
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index 10f470d..7196efc 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -681,7 +681,7 @@
             break;
         case 4: // Get ...->super_class_->vtable [u/s r0]
             loadWordDisp(cUnit, r0, Class::VTableOffset().Int32Value(), r0);
-            if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+            if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
                 // Range check, throw NSM on failure
                 tReg = oatAllocTemp(cUnit);
                 loadWordDisp(cUnit, r0, art::Array::LengthOffset().Int32Value(),
@@ -1110,7 +1110,7 @@
             break;
 
         case OP_MOVE_RESULT_WIDE:
-            if (mir->OptimizationFlags & MIR_INLINED)
+            if (mir->optimizationFlags & MIR_INLINED)
                 break;  // Nop - combined w/ previous invoke
             /*
              * Somewhat hacky here.   Because we're now passing
@@ -1130,7 +1130,7 @@
 
         case OP_MOVE_RESULT:
         case OP_MOVE_RESULT_OBJECT:
-            if (mir->OptimizationFlags & MIR_INLINED)
+            if (mir->optimizationFlags & MIR_INLINED)
                 break;  // Nop - combined w/ previous invoke
             /* See comment for OP_MOVE_RESULT_WIDE */
             assert(retLoc.lowReg == r0);
@@ -1420,55 +1420,55 @@
 
         case OP_IGET_WIDE:
         case OP_IGET_WIDE_VOLATILE:
-            genIGetWideX(cUnit, mir, rlDest, rlSrc[0]);
+            genIGetWide(cUnit, mir, rlDest, rlSrc[0]);
             break;
 
         case OP_IGET:
         case OP_IGET_VOLATILE:
         case OP_IGET_OBJECT:
         case OP_IGET_OBJECT_VOLATILE:
-            genIGetX(cUnit, mir, kWord, rlDest, rlSrc[0]);
+            genIGet(cUnit, mir, kWord, rlDest, rlSrc[0]);
             break;
 
         case OP_IGET_BOOLEAN:
         case OP_IGET_BYTE:
-            genIGetX(cUnit, mir, kUnsignedByte, rlDest, rlSrc[0]);
+            genIGet(cUnit, mir, kUnsignedByte, rlDest, rlSrc[0]);
             break;
 
         case OP_IGET_CHAR:
-            genIGetX(cUnit, mir, kUnsignedHalf, rlDest, rlSrc[0]);
+            genIGet(cUnit, mir, kUnsignedHalf, rlDest, rlSrc[0]);
             break;
 
         case OP_IGET_SHORT:
-            genIGetX(cUnit, mir, kSignedHalf, rlDest, rlSrc[0]);
+            genIGet(cUnit, mir, kSignedHalf, rlDest, rlSrc[0]);
             break;
 
         case OP_IPUT_WIDE:
         case OP_IPUT_WIDE_VOLATILE:
-            genIPutWideX(cUnit, mir, rlSrc[0], rlSrc[1]);
+            genIPutWide(cUnit, mir, rlSrc[0], rlSrc[1]);
             break;
 
         case OP_IPUT_OBJECT:
         case OP_IPUT_OBJECT_VOLATILE:
-            genIPutX(cUnit, mir, kWord, rlSrc[0], rlSrc[1], true);
+            genIPut(cUnit, mir, kWord, rlSrc[0], rlSrc[1], true);
             break;
 
         case OP_IPUT:
         case OP_IPUT_VOLATILE:
-            genIPutX(cUnit, mir, kWord, rlSrc[0], rlSrc[1], false);
+            genIPut(cUnit, mir, kWord, rlSrc[0], rlSrc[1], false);
             break;
 
         case OP_IPUT_BOOLEAN:
         case OP_IPUT_BYTE:
-            genIPutX(cUnit, mir, kUnsignedByte, rlSrc[0], rlSrc[1], false);
+            genIPut(cUnit, mir, kUnsignedByte, rlSrc[0], rlSrc[1], false);
             break;
 
         case OP_IPUT_CHAR:
-            genIPutX(cUnit, mir, kUnsignedHalf, rlSrc[0], rlSrc[1], false);
+            genIPut(cUnit, mir, kUnsignedHalf, rlSrc[0], rlSrc[1], false);
             break;
 
         case OP_IPUT_SHORT:
-            genIPutX(cUnit, mir, kSignedHalf, rlSrc[0], rlSrc[1], false);
+            genIPut(cUnit, mir, kSignedHalf, rlSrc[0], rlSrc[1], false);
             break;
 
         case OP_SGET:
@@ -1805,7 +1805,6 @@
     oatAppendLIR(cUnit, (LIR*) &labelList[blockId]);
 
     oatClobberAllRegs(cUnit);
-    oatResetNullCheck(cUnit);
 
     ArmLIR* headLIR = NULL;
 
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index cd3a64a..c54a0a8 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -450,7 +450,7 @@
     loadWordDisp(cUnit, r0, art::Field::OffsetOffset().Int32Value(), r0);
 }
 
-static void genIGetX(CompilationUnit* cUnit, MIR* mir, OpSize size,
+static void genIGet(CompilationUnit* cUnit, MIR* mir, OpSize size,
                      RegLocation rlDest, RegLocation rlObj)
 {
     Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
@@ -485,7 +485,7 @@
     }
 }
 
-static void genIPutX(CompilationUnit* cUnit, MIR* mir, OpSize size,
+static void genIPut(CompilationUnit* cUnit, MIR* mir, OpSize size,
                     RegLocation rlSrc, RegLocation rlObj, bool isObject)
 {
     Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
@@ -521,7 +521,7 @@
     }
 }
 
-static void genIGetWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+static void genIGetWide(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
                         RegLocation rlObj)
 {
     Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
@@ -564,7 +564,7 @@
     }
 }
 
-static void genIPutWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
+static void genIPutWide(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
                         RegLocation rlObj)
 {
     Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
@@ -886,8 +886,6 @@
     for (int i = 0; i < numFPTemps; i++) {
         oatMarkTemp(cUnit, fpTemps[i]);
     }
-    pool->nullCheckedRegs =
-        oatAllocBitVector(cUnit->numSSARegs, false);
 }
 
 /*
@@ -1267,11 +1265,7 @@
     loadValueDirectFixed(cUnit, rlSrc, r0);
 
     /* null array object? */
-    ArmLIR*  pcrLabel = NULL;
-
-    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
-        pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, r1, mir);
-    }
+    genNullCheck(cUnit, rlArray.sRegLow, r1, mir);
     loadWordDisp(cUnit, rSELF,
                  OFFSETOF_MEMBER(Thread, pCanPutArrayElementFromCode), rLR);
     /* Get the array's clazz */
@@ -1295,7 +1289,7 @@
         genRegCopy(cUnit, regPtr, rlArray.lowReg);
     }
 
-    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+    if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
         int regLen = oatAllocTemp(cUnit);
         //NOTE: max live temps(4) here.
         /* Get len */
@@ -1331,15 +1325,11 @@
     int regPtr;
 
     /* null object? */
-    ArmLIR*  pcrLabel = NULL;
-
-    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
-        pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, mir);
-    }
+    genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, mir);
 
     regPtr = oatAllocTemp(cUnit);
 
-    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+    if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
         int regLen = oatAllocTemp(cUnit);
         /* Get len */
         loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
@@ -1405,13 +1395,9 @@
     }
 
     /* null object? */
-    ArmLIR*  pcrLabel = NULL;
+    genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, mir);
 
-    if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
-        pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg, mir);
-    }
-
-    if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
+    if (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
         int regLen = oatAllocTemp(cUnit);
         //NOTE: max live temps(4) here.
         /* Get len */