Compiler intrinsics

Add intrinsic support.  Some of these appear to be of
limited value, so we may end up removing a few.  In general,
the instrinsics provide small, but measurable, gains.

Only Arm is currently supported, but most of these should
work for our other targets as well.

This is an interim solution.  My plan is to the intrinsic
recognition action up into the basic block building phase once
we start doing inlining.

Change-Id: Ia2913f2cdecaa4e80469caf69dbf8e2f61d4506a
diff --git a/src/compiler/codegen/CodegenUtil.cc b/src/compiler/codegen/CodegenUtil.cc
index f2449e5..2318a04 100644
--- a/src/compiler/codegen/CodegenUtil.cc
+++ b/src/compiler/codegen/CodegenUtil.cc
@@ -296,6 +296,9 @@
         case kPseudoThrowTarget:
             LOG(INFO) << "LT" << (void*)lir << ":";
             break;
+        case kPseudoIntrinsicRetry:
+            LOG(INFO) << "IR" << (void*)lir << ":";
+            break;
         case kPseudoSuspendTarget:
             LOG(INFO) << "LS" << (void*)lir << ":";
             break;
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index 3ca0450..0ef1641 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -21,7 +21,8 @@
  * be applicable to most targets.  Only mid-level support utilities
  * and "op" calls may be used here.
  */
-
+void genInvoke(CompilationUnit* cUnit, BasicBlock* bb,  MIR* mir,
+               InvokeType type, bool isRange);
 #if defined(TARGET_ARM)
 LIR* opIT(CompilationUnit* cUnit, ArmConditionCode cond, const char* guide);
 #endif
@@ -835,6 +836,7 @@
     int numElems = cUnit->suspendLaunchpads.numUsed;
     for (int i = 0; i < numElems; i++) {
         oatResetRegPool(cUnit);
+        oatResetDefTracking(cUnit);
         LIR* lab = suspendLabel[i];
         LIR* resumeLab = (LIR*)lab->operands[0];
         cUnit->currentDalvikOffset = lab->operands[1];
@@ -851,12 +853,34 @@
     }
 }
 
+void handleIntrinsicLaunchpads(CompilationUnit *cUnit)
+{
+    LIR** intrinsicLabel = (LIR **)cUnit->intrinsicLaunchpads.elemList;
+    int numElems = cUnit->intrinsicLaunchpads.numUsed;
+    for (int i = 0; i < numElems; i++) {
+        oatResetRegPool(cUnit);
+        oatResetDefTracking(cUnit);
+        LIR* lab = intrinsicLabel[i];
+        MIR* mir = (MIR*)lab->operands[0];
+        InvokeType type = (InvokeType)lab->operands[1];
+        BasicBlock* bb = (BasicBlock*)lab->operands[3];
+        cUnit->currentDalvikOffset = mir->offset;
+        oatAppendLIR(cUnit, lab);
+        genInvoke(cUnit, bb, mir, type, false /* isRange */);
+        LIR* resumeLab = (LIR*)lab->operands[2];
+        if (resumeLab != NULL) {
+            opUnconditionalBranch(cUnit, resumeLab);
+        }
+    }
+}
+
 void handleThrowLaunchpads(CompilationUnit *cUnit)
 {
     LIR** throwLabel = (LIR **)cUnit->throwLaunchpads.elemList;
     int numElems = cUnit->throwLaunchpads.numUsed;
     for (int i = 0; i < numElems; i++) {
         oatResetRegPool(cUnit);
+        oatResetDefTracking(cUnit);
         LIR* lab = throwLabel[i];
         cUnit->currentDalvikOffset = lab->operands[1];
         oatAppendLIR(cUnit, lab);
diff --git a/src/compiler/codegen/GenInvoke.cc b/src/compiler/codegen/GenInvoke.cc
index ebc8bc2..ba027f0 100644
--- a/src/compiler/codegen/GenInvoke.cc
+++ b/src/compiler/codegen/GenInvoke.cc
@@ -25,6 +25,8 @@
 typedef int (*NextCallInsn)(CompilationUnit*, MIR*, int, uint32_t dexIdx,
                             uint32_t methodIdx, uintptr_t directCode,
                             uintptr_t directMethod, InvokeType type);
+LIR* opCondBranch(CompilationUnit* cUnit, ConditionCode cc, LIR* target);
+
 /*
  * If there are any ins passed in registers that have not been promoted
  * to a callee-save register, flush them to the frame.  Perform intial
@@ -596,4 +598,339 @@
     return callState;
 }
 
+RegLocation inlineTarget(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir)
+{
+    RegLocation res;
+    mir = oatFindMoveResult(cUnit, bb, mir, false);
+    if (mir == NULL) {
+        res = oatGetReturn(cUnit, false);
+    } else {
+        res = oatGetDest(cUnit, mir, 0);
+        mir->dalvikInsn.opcode = Instruction::NOP;
+    }
+    return res;
+}
+
+RegLocation inlineTargetWide(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir)
+{
+    RegLocation res;
+    mir = oatFindMoveResult(cUnit, bb, mir, true);
+    if (mir == NULL) {
+        res = oatGetReturnWide(cUnit, false);
+    } else {
+        res = oatGetDestWide(cUnit, mir, 0, 1);
+        mir->dalvikInsn.opcode = Instruction::NOP;
+    }
+    return res;
+}
+
+bool genInlinedCharAt(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+                      InvokeType type, bool isRange)
+{
+#if defined(TARGET_ARM)
+    // Location of reference to data array
+    int valueOffset = String::ValueOffset().Int32Value();
+    // Location of count
+    int countOffset = String::CountOffset().Int32Value();
+    // Starting offset within data array
+    int offsetOffset = String::OffsetOffset().Int32Value();
+    // Start of char data with array_
+    int dataOffset = Array::DataOffset(sizeof(uint16_t)).Int32Value();
+
+    RegLocation rlObj = oatGetSrc(cUnit, mir, 0);
+    RegLocation rlIdx = oatGetSrc(cUnit, mir, 1);
+    rlObj = loadValue(cUnit, rlObj, kCoreReg);
+    rlIdx = loadValue(cUnit, rlIdx, kCoreReg);
+    int regMax;
+    int regOff = oatAllocTemp(cUnit);
+    int regPtr = oatAllocTemp(cUnit);
+    genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir);
+    bool rangeCheck = (!(mir->optimizationFlags & MIR_IGNORE_RANGE_CHECK));
+    if (rangeCheck) {
+        regMax = oatAllocTemp(cUnit);
+        loadWordDisp(cUnit, rlObj.lowReg, countOffset, regMax);
+    }
+    loadWordDisp(cUnit, rlObj.lowReg, offsetOffset, regOff);
+    loadWordDisp(cUnit, rlObj.lowReg, valueOffset, regPtr);
+    LIR* launchPad = NULL;
+    if (rangeCheck) {
+        // Set up a launch pad to allow retry in case of bounds violation */
+        launchPad = rawLIR(cUnit, 0, kPseudoIntrinsicRetry, (int)mir, type);
+        oatInsertGrowableList(cUnit, &cUnit->intrinsicLaunchpads,
+                              (intptr_t)launchPad);
+        opRegReg(cUnit, kOpCmp, rlIdx.lowReg, regMax);
+        oatFreeTemp(cUnit, regMax);
+        opCondBranch(cUnit, kCondCs, launchPad);
+    }
+    opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
+    opRegReg(cUnit, kOpAdd, regOff, rlIdx.lowReg);
+    RegLocation rlDest = inlineTarget(cUnit, bb, mir);
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    loadBaseIndexed(cUnit, regPtr, regOff, rlResult.lowReg, 1, kUnsignedHalf);
+    oatFreeTemp(cUnit, regOff);
+    oatFreeTemp(cUnit, regPtr);
+    storeValue(cUnit, rlDest, rlResult);
+    if (rangeCheck) {
+        launchPad->operands[2] = NULL;  // no resumption
+        launchPad->operands[3] = (uintptr_t)bb;
+    }
+    // Record that we've already inlined & null checked
+    mir->optimizationFlags |= (MIR_INLINED | MIR_IGNORE_NULL_CHECK);
+    return true;
+#else
+    return false;
+#endif
+}
+
+bool genInlinedMinMaxInt(CompilationUnit *cUnit, BasicBlock* bb, MIR *mir,
+                         bool isMin)
+{
+#if defined(TARGET_ARM)
+    RegLocation rlSrc1 = oatGetSrc(cUnit, mir, 0);
+    RegLocation rlSrc2 = oatGetSrc(cUnit, mir, 1);
+    rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
+    rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg);
+    RegLocation rlDest = inlineTarget(cUnit, bb, mir);
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    opRegReg(cUnit, kOpCmp, rlSrc1.lowReg, rlSrc2.lowReg);
+    opIT(cUnit, (isMin) ? kArmCondGt : kArmCondLt, "E");
+    opRegReg(cUnit, kOpMov, rlResult.lowReg, rlSrc2.lowReg);
+    opRegReg(cUnit, kOpMov, rlResult.lowReg, rlSrc1.lowReg);
+    genBarrier(cUnit);
+    storeValue(cUnit, rlDest, rlResult);
+    return true;
+#else
+    return false;
+#endif
+}
+
+// Generates an inlined String.isEmpty or String.length.
+bool genInlinedStringIsEmptyOrLength(CompilationUnit* cUnit,
+                                            BasicBlock* bb, MIR* mir,
+                                            bool isEmpty)
+{
+#if defined(TARGET_ARM)
+    // dst = src.length();
+    RegLocation rlObj = oatGetSrc(cUnit, mir, 0);
+    rlObj = loadValue(cUnit, rlObj, kCoreReg);
+    RegLocation rlDest = inlineTarget(cUnit, bb, mir);
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir);
+    loadWordDisp(cUnit, rlObj.lowReg, String::CountOffset().Int32Value(),
+                 rlResult.lowReg);
+    if (isEmpty) {
+        // dst = (dst == 0);
+        int tReg = oatAllocTemp(cUnit);
+        opRegReg(cUnit, kOpNeg, tReg, rlResult.lowReg);
+        opRegRegReg(cUnit, kOpAdc, rlResult.lowReg, rlResult.lowReg, tReg);
+    }
+    storeValue(cUnit, rlDest, rlResult);
+    return true;
+#else
+    return false;
+#endif
+}
+
+bool genInlinedAbsInt(CompilationUnit *cUnit, BasicBlock* bb, MIR *mir)
+{
+#if defined(TARGET_ARM)
+    RegLocation rlSrc = oatGetSrc(cUnit, mir, 0);
+    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+    RegLocation rlDest = inlineTarget(cUnit, bb, mir);
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    int signReg = oatAllocTemp(cUnit);
+    // abs(x) = y<=x>>31, (x+y)^y.
+    opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.lowReg, 31);
+    opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg);
+    opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg);
+    storeValue(cUnit, rlDest, rlResult);
+    return true;
+#else
+    return false;
+#endif
+}
+
+bool genInlinedAbsLong(CompilationUnit *cUnit, BasicBlock* bb, MIR *mir)
+{
+#if defined(TARGET_ARM)
+    RegLocation rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
+    rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg);
+    RegLocation rlDest = inlineTargetWide(cUnit, bb, mir);
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    int signReg = oatAllocTemp(cUnit);
+    // abs(x) = y<=x>>31, (x+y)^y.
+    opRegRegImm(cUnit, kOpAsr, signReg, rlSrc.highReg, 31);
+    opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, signReg);
+    opRegRegReg(cUnit, kOpAdc, rlResult.highReg, rlSrc.highReg, signReg);
+    opRegReg(cUnit, kOpXor, rlResult.lowReg, signReg);
+    opRegReg(cUnit, kOpXor, rlResult.highReg, signReg);
+    storeValueWide(cUnit, rlDest, rlResult);
+    return true;
+#else
+    return false;
+#endif
+}
+
+bool genInlinedFloatCvt(CompilationUnit *cUnit, BasicBlock* bb, MIR *mir)
+{
+#if defined(TARGET_ARM)
+    RegLocation rlSrc = oatGetSrc(cUnit, mir, 0);
+    RegLocation rlDest = inlineTarget(cUnit, bb, mir);
+    storeValue(cUnit, rlDest, rlSrc);
+    return true;
+#else
+    return false;
+#endif
+}
+
+bool genInlinedDoubleCvt(CompilationUnit *cUnit, BasicBlock* bb, MIR *mir)
+{
+#if defined(TARGET_ARM)
+    RegLocation rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
+    RegLocation rlDest = inlineTargetWide(cUnit, bb, mir);
+    storeValueWide(cUnit, rlDest, rlSrc);
+    return true;
+#else
+    return false;
+#endif
+}
+
+/*
+ * Fast string.indexOf(I) & (II).  Tests for simple case of char <= 0xffff,
+ * otherwise bails to standard library code.
+ */
+bool genInlinedIndexOf(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+                       InvokeType type, bool zeroBased)
+{
+#if defined(TARGET_ARM)
+
+    oatClobberCalleeSave(cUnit);
+    oatLockCallTemps(cUnit);  // Using fixed registers
+    int regPtr = rARG0;
+    int regChar = rARG1;
+    int regStart = rARG2;
+
+    RegLocation rlObj = oatGetSrc(cUnit, mir, 0);
+    RegLocation rlChar = oatGetSrc(cUnit, mir, 1);
+    RegLocation rlStart = oatGetSrc(cUnit, mir, 2);
+    loadValueDirectFixed(cUnit, rlObj, regPtr);
+    loadValueDirectFixed(cUnit, rlChar, regChar);
+    if (zeroBased) {
+        loadConstant(cUnit, regStart, 0);
+    } else {
+        loadValueDirectFixed(cUnit, rlStart, regStart);
+    }
+    int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pIndexOf));
+    genNullCheck(cUnit, rlObj.sRegLow, regPtr, mir);
+    LIR* launchPad = rawLIR(cUnit, 0, kPseudoIntrinsicRetry, (int)mir, type);
+    oatInsertGrowableList(cUnit, &cUnit->intrinsicLaunchpads,
+                          (intptr_t)launchPad);
+    opCmpImmBranch(cUnit, kCondGt, regChar, 0xFFFF, launchPad);
+    opReg(cUnit, kOpBlx, rTgt);
+    LIR* resumeTgt = newLIR0(cUnit, kPseudoTargetLabel);
+    launchPad->operands[2] = (uintptr_t)resumeTgt;
+    launchPad->operands[3] = (uintptr_t)bb;
+    // Record that we've already inlined & null checked
+    mir->optimizationFlags |= (MIR_INLINED | MIR_IGNORE_NULL_CHECK);
+    return true;
+#else
+    return false;
+#endif
+}
+
+/* Fast string.compareTo(Ljava/lang/string;)I. */
+bool genInlinedStringCompareTo(CompilationUnit* cUnit, BasicBlock* bb,
+                               MIR* mir, InvokeType type)
+{
+#if defined(TARGET_ARM)
+    oatClobberCalleeSave(cUnit);
+    oatLockCallTemps(cUnit);  // Using fixed registers
+    int regThis = rARG0;
+    int regCmp = rARG1;
+
+    RegLocation rlThis = oatGetSrc(cUnit, mir, 0);
+    RegLocation rlCmp = oatGetSrc(cUnit, mir, 1);
+    loadValueDirectFixed(cUnit, rlThis, regThis);
+    loadValueDirectFixed(cUnit, rlCmp, regCmp);
+    int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread, pStringCompareTo));
+    genNullCheck(cUnit, rlThis.sRegLow, regThis, mir);
+    //TUNING: check if rlCmp.sRegLow is already null checked
+    LIR* launchPad = rawLIR(cUnit, 0, kPseudoIntrinsicRetry, (int)mir, type);
+    oatInsertGrowableList(cUnit, &cUnit->intrinsicLaunchpads,
+                          (intptr_t)launchPad);
+    opCmpImmBranch(cUnit, kCondEq, regCmp, 0, launchPad);
+    opReg(cUnit, kOpBlx, rTgt);
+    launchPad->operands[2] = NULL;  // No return possible
+    launchPad->operands[3] = (uintptr_t)bb;
+    // Record that we've already inlined & null checked
+    mir->optimizationFlags |= (MIR_INLINED | MIR_IGNORE_NULL_CHECK);
+    return true;
+#else
+    return false;
+#endif
+}
+
+bool genIntrinsic(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+                  InvokeType type, bool isRange)
+{
+    if ((mir->optimizationFlags & MIR_INLINED) || isRange)  {
+        return false;
+    }
+    /*
+     * TODO: move these to a target-specific structured constant array
+     * and use a generic match function.  The list of intrinsics may be
+     * slightly different depending on target.
+     * TODO: Fold this into a matching function that runs during
+     * basic block building.  This should be part of the action for
+     * small method inlining and recognition of the special object init
+     * method.  By doing this during basic block construction, we can also
+     * take advantage of/generate new useful dataflow info.
+     */
+    std::string tgtMethod = PrettyMethod(mir->dalvikInsn.vB, *cUnit->dex_file);
+    if (tgtMethod.compare("char java.lang.String.charAt(int)") == 0) {
+        return genInlinedCharAt(cUnit, bb, mir, type, isRange);
+    }
+    if (tgtMethod.compare("int java.lang.Math.min(int, int)") == 0) {
+        return genInlinedMinMaxInt(cUnit, bb, mir, true /* isMin */);
+    }
+    if (tgtMethod.compare("int java.lang.Math.max(int, int)") == 0) {
+        return genInlinedMinMaxInt(cUnit, bb, mir, false /* isMin */);
+    }
+    if (tgtMethod.compare("int java.lang.String.length()") == 0) {
+        return genInlinedStringIsEmptyOrLength(cUnit, bb, mir, false /* isEmpty */);
+    }
+    if (tgtMethod.compare("boolean java.lang.String.isEmpty()") == 0) {
+        return genInlinedStringIsEmptyOrLength(cUnit, bb, mir, true /* isEmpty */);
+    }
+    if (tgtMethod.compare("int java.lang.Math.abs(int)") == 0) {
+        return genInlinedAbsInt(cUnit, bb, mir);
+    }
+    if (tgtMethod.compare("long java.lang.Math.abs(long)") == 0) {
+        return genInlinedAbsLong(cUnit, bb, mir);
+    }
+    if (tgtMethod.compare("int java.lang.Float.floatToRawIntBits(float)") == 0) {
+        return genInlinedFloatCvt(cUnit, bb, mir);
+    }
+    if (tgtMethod.compare("float java.lang.Float.intBitsToFloat(int)") == 0) {
+        return genInlinedFloatCvt(cUnit, bb, mir);
+    }
+    if (tgtMethod.compare("long java.lang.Double.doubleToRawLongBits(double)") == 0) {
+        return genInlinedDoubleCvt(cUnit, bb, mir);
+    }
+    if (tgtMethod.compare("double java.lang.Double.longBitsToDouble(long)") == 0) {
+        return genInlinedDoubleCvt(cUnit, bb, mir);
+    }
+    if (tgtMethod.compare("int java.lang.String.indexOf(int, int)") == 0) {
+        return genInlinedIndexOf(cUnit, bb, mir, type, false /* base 0 */);
+    }
+    if (tgtMethod.compare("int java.lang.String.indexOf(int)") == 0) {
+        return genInlinedIndexOf(cUnit, bb, mir, type, true /* base 0 */);
+    }
+    if (tgtMethod.compare("int java.lang.String.compareTo(java.lang.String)") == 0) {
+        return genInlinedStringCompareTo(cUnit, bb, mir, type);
+    }
+    return false;
+}
+
+
 }  // namespace art
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index 205a65a..87a86eb 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -52,8 +52,12 @@
     return res;
 }
 
-void genInvoke(CompilationUnit* cUnit, MIR* mir, InvokeType type, bool isRange)
+void genInvoke(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+               InvokeType type, bool isRange)
 {
+    if (genIntrinsic(cUnit, bb, mir, type, isRange)) {
+        return;
+    }
     DecodedInstruction* dInsn = &mir->dalvikInsn;
     InvokeType originalType = type;  // avoiding mutation by ComputeInvokeInfo
     int callState = 0;
@@ -543,38 +547,38 @@
             break;
 
         case Instruction::INVOKE_STATIC_RANGE:
-            genInvoke(cUnit, mir, kStatic, true /*range*/);
+            genInvoke(cUnit, bb, mir, kStatic, true /*range*/);
             break;
         case Instruction::INVOKE_STATIC:
-            genInvoke(cUnit, mir, kStatic, false /*range*/);
+            genInvoke(cUnit, bb, mir, kStatic, false /*range*/);
             break;
 
         case Instruction::INVOKE_DIRECT:
-            genInvoke(cUnit, mir, kDirect, false /*range*/);
+            genInvoke(cUnit, bb,  mir, kDirect, false /*range*/);
             break;
         case Instruction::INVOKE_DIRECT_RANGE:
-            genInvoke(cUnit, mir, kDirect, true /*range*/);
+            genInvoke(cUnit, bb, mir, kDirect, true /*range*/);
             break;
 
         case Instruction::INVOKE_VIRTUAL:
-            genInvoke(cUnit, mir, kVirtual, false /*range*/);
+            genInvoke(cUnit, bb, mir, kVirtual, false /*range*/);
             break;
         case Instruction::INVOKE_VIRTUAL_RANGE:
-            genInvoke(cUnit, mir, kVirtual, true /*range*/);
+            genInvoke(cUnit, bb, mir, kVirtual, true /*range*/);
             break;
 
         case Instruction::INVOKE_SUPER:
-            genInvoke(cUnit, mir, kSuper, false /*range*/);
+            genInvoke(cUnit, bb, mir, kSuper, false /*range*/);
             break;
         case Instruction::INVOKE_SUPER_RANGE:
-            genInvoke(cUnit, mir, kSuper, true /*range*/);
+            genInvoke(cUnit, bb, mir, kSuper, true /*range*/);
             break;
 
         case Instruction::INVOKE_INTERFACE:
-            genInvoke(cUnit, mir, kInterface, false /*range*/);
+            genInvoke(cUnit, bb, mir, kInterface, false /*range*/);
             break;
         case Instruction::INVOKE_INTERFACE_RANGE:
-            genInvoke(cUnit, mir, kInterface, true /*range*/);
+            genInvoke(cUnit, bb, mir, kInterface, true /*range*/);
             break;
 
         case Instruction::NEG_INT:
@@ -945,6 +949,8 @@
 
     handleThrowLaunchpads(cUnit);
 
+    handleIntrinsicLaunchpads(cUnit);
+
     if (!(cUnit->disableOpt & (1 << kSafeOptimizations))) {
         removeRedundantBranches(cUnit);
     }
diff --git a/src/compiler/codegen/arm/ArmLIR.h b/src/compiler/codegen/arm/ArmLIR.h
index 2d3028a..da39713 100644
--- a/src/compiler/codegen/arm/ArmLIR.h
+++ b/src/compiler/codegen/arm/ArmLIR.h
@@ -290,6 +290,7 @@
  * Assemble.cc.
  */
 enum ArmOpcode {
+    kPseudoIntrinsicRetry = -16,
     kPseudoSuspendTarget = -15,
     kPseudoThrowTarget = -14,
     kPseudoCaseLabel = -13,
diff --git a/src/compiler/codegen/arm/Thumb2/Factory.cc b/src/compiler/codegen/arm/Thumb2/Factory.cc
index c79f7c6..11d5bf4 100644
--- a/src/compiler/codegen/arm/Thumb2/Factory.cc
+++ b/src/compiler/codegen/arm/Thumb2/Factory.cc
@@ -681,6 +681,7 @@
                 opRegRegReg(cUnit, kOpAdd, regPtr, rBase, rIndex);
             }
             load = newLIR3(cUnit, opcode, rDest, regPtr, 0);
+            oatFreeTemp(cUnit, regPtr);
             return load;
         case kWord:
             opcode = (thumbForm) ? kThumbLdrRRR : kThumb2LdrRRR;
@@ -745,6 +746,7 @@
                 opRegRegReg(cUnit, kOpAdd, regPtr, rBase, rIndex);
             }
             store = newLIR3(cUnit, opcode, rSrc, regPtr, 0);
+            oatFreeTemp(cUnit, regPtr);
             return store;
         case kWord:
             opcode = (thumbForm) ? kThumbStrRRR : kThumb2StrRRR;
diff --git a/src/compiler/codegen/mips/MipsLIR.h b/src/compiler/codegen/mips/MipsLIR.h
index 5c8fc34..c0ff298 100644
--- a/src/compiler/codegen/mips/MipsLIR.h
+++ b/src/compiler/codegen/mips/MipsLIR.h
@@ -333,6 +333,7 @@
  * Assemble.cc.
  */
 enum MipsOpCode {
+    kPseudoIntrinsicRetry = -16,
     kPseudoSuspendTarget = -15,
     kPseudoThrowTarget = -14,
     kPseudoCaseLabel = -13,
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index 378c24d..1fc44b3 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -292,6 +292,7 @@
  * Assemble.cc.
  */
 enum X86OpCode {
+    kPseudoIntrinsicRetry = -16,
     kPseudoSuspendTarget = -15,
     kPseudoThrowTarget = -14,
     kPseudoCaseLabel = -13,