Branch fusing

A belated birthday gift for irogers.  Fuse cmp-long/if-XXz,
cmp[lg]-[float|double]/if-XXz.

Change-Id: I8fa87f620fcf4e6bcf291bbc7a0ea6c8f5535467
diff --git a/src/compiler/codegen/CompilerCodegen.h b/src/compiler/codegen/CompilerCodegen.h
index 8f854da..20b2e45 100644
--- a/src/compiler/codegen/CompilerCodegen.h
+++ b/src/compiler/codegen/CompilerCodegen.h
@@ -26,6 +26,11 @@
 
 int oatGetInsnSize(LIR* lir);
 
+void genFusedLongCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir);
+void genFusedFPCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+                         bool gtBias, bool isDouble);
+
+
 /* Lower middle-level IR to low-level IR for the whole method */
 void oatMethodMIR2LIR(CompilationUnit* cUnit);
 
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index d753fcc..1dc11da 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -741,10 +741,16 @@
     "kMirOpNullNRangeDownCheck",
     "kMirOpLowerBound",
     "kMirOpCopy",
+    "kMirFusedCmplFloat",
+    "kMirFusedCmpgFloat",
+    "kMirFusedCmplDouble",
+    "kMirFusedCmpgDouble",
+    "kMirFusedCmpLong",
+    "kMirNop",
 };
 
 /* Extended MIR instructions like PHI */
-void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir)
+void handleExtendedMethodMIR(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir)
 {
     int opOffset = mir->dalvikInsn.opcode - kMirOpFirst;
     char* msg = NULL;
@@ -771,6 +777,23 @@
             storeValue(cUnit, rlDest, rlSrc);
             break;
         }
+#if defined(TARGET_ARM)
+        case kMirOpFusedCmplFloat:
+            genFusedFPCmpBranch(cUnit, bb, mir, false /*gt bias*/, false /*double*/);
+            break;
+        case kMirOpFusedCmpgFloat:
+            genFusedFPCmpBranch(cUnit, bb, mir, true /*gt bias*/, false /*double*/);
+            break;
+        case kMirOpFusedCmplDouble:
+            genFusedFPCmpBranch(cUnit, bb, mir, false /*gt bias*/, true /*double*/);
+            break;
+        case kMirOpFusedCmpgDouble:
+            genFusedFPCmpBranch(cUnit, bb, mir, true /*gt bias*/, true /*double*/);
+            break;
+        case kMirOpFusedCmpLong:
+            genFusedLongCmpBranch(cUnit, bb, mir);
+            break;
+#endif
         default:
             break;
     }
@@ -827,11 +850,6 @@
         cUnit->liveSReg = INVALID_SREG;
 #endif
 
-        if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) {
-            handleExtendedMethodMIR(cUnit, mir);
-            continue;
-        }
-
         cUnit->currentDalvikOffset = mir->offset;
 
         Instruction::Code dalvikOpcode = mir->dalvikInsn.opcode;
@@ -864,6 +882,11 @@
             newLIR1(cUnit, kPseudoSSARep, (int) ssaString);
         }
 
+        if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) {
+            handleExtendedMethodMIR(cUnit, bb, mir);
+            continue;
+        }
+
         bool notHandled = compileDalvikInstruction(cUnit, mir, bb, labelList);
         if (notHandled) {
           LOG(FATAL) << StringPrintf("%#06x: Opcode %#x (%s) / Fmt %d not handled",
diff --git a/src/compiler/codegen/arm/FP/Thumb2VFP.cc b/src/compiler/codegen/arm/FP/Thumb2VFP.cc
index 72b4fec..380c014 100644
--- a/src/compiler/codegen/arm/FP/Thumb2VFP.cc
+++ b/src/compiler/codegen/arm/FP/Thumb2VFP.cc
@@ -182,6 +182,60 @@
     return false;
 }
 
+void genFusedFPCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+                         bool gtBias, bool isDouble)
+{
+    LIR* labelList = (LIR*)cUnit->blockLabelList;
+    LIR* target = &labelList[bb->taken->id];
+    RegLocation rlSrc1;
+    RegLocation rlSrc2;
+    if (isDouble) {
+        rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1);
+        rlSrc2 = oatGetSrcWide(cUnit, mir, 2, 3);
+        rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+        rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg);
+        newLIR2(cUnit, kThumb2Vcmpd, S2D(rlSrc1.lowReg, r1Src2.highReg),
+                S2D(rlSrc2.lowReg, rlSrc2.highReg));
+    } else {
+        rlSrc1 = oatGetSrc(cUnit, mir, 0);
+        rlSrc2 = oatGetSrc(cUnit, mir, 1);
+        rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+        rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg);
+        newLIR2(cUnit, kThumb2Vcmps, rlSrc1.lowReg, rlSrc2.lowReg);
+    }
+    newLIR0(cUnit, kThumb2Fmstat);
+    ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+    switch(ccode) {
+        case kCondEq:
+        case kCondNe:
+            break;
+        case kCondLt:
+            if (gtBias) {
+                ccode = kCondMi;
+            }
+            break;
+        case kCondLe:
+            if (gtBias) {
+                ccode = kCondLs;
+            }
+            break;
+        case kCondGt:
+            if (gtBias) {
+                ccode = kCondHi;
+            }
+            break;
+        case kCondGe:
+            if (gtBias) {
+                ccode = kCondCs;
+            }
+            break;
+        default:
+            LOG(FATAL) << "Unexpected ccode: " << (int)ccode;
+    }
+    opCondBranch(cUnit, ccode, target);
+}
+
+
 bool genCmpFP(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
               RegLocation rlSrc1, RegLocation rlSrc2)
 {
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 9477d2c..ea02ca9 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -654,6 +654,48 @@
     branch3->target = branch1->target;
 }
 
+void genFusedLongCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir)
+{
+    LIR* labelList = (LIR*)cUnit->blockLabelList;
+    LIR* taken = &labelList[bb->taken->id];
+    RegLocation rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1);
+    RegLocation rlSrc2 = oatGetSrcWide(cUnit, mir, 2, 3);
+    rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
+    rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
+    ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+    LIR* notTaken = rawLIR(cUnit, mir->offset, kPseudoTargetLabel);
+    opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
+    switch(ccode) {
+        case kCondEq:
+            opCondBranch(cUnit, kCondNe, notTaken);
+            break;
+        case kCondNe:
+            opCondBranch(cUnit, kCondNe, taken);
+            break;
+        case kCondLt:
+            opCondBranch(cUnit, kCondLt, taken);
+            opCondBranch(cUnit, kCondGt, notTaken);
+            break;
+        case kCondLe:
+            opCondBranch(cUnit, kCondLt, taken);
+            opCondBranch(cUnit, kCondGt, notTaken);
+            break;
+        case kCondGt:
+            opCondBranch(cUnit, kCondGt, taken);
+            opCondBranch(cUnit, kCondLt, notTaken);
+            break;
+        case kCondGe:
+            opCondBranch(cUnit, kCondGt, taken);
+            opCondBranch(cUnit, kCondLt, notTaken);
+            break;
+        default:
+            LOG(FATAL) << "Unexpected ccode: " << (int)ccode;
+    }
+    opRegReg(cUnit, kOpCmp, rlSrc1.lowReg, rlSrc2.lowReg);
+    opCondBranch(cUnit, ccode, taken);
+    oatAppendLIR(cUnit, notTaken);
+}
+
 /*
  * Generate a register comparison to an immediate and branch.  Caller
  * is responsible for setting branch target field.