Branch fusing
A belated birthday gift for irogers. Fuse cmp-long/if-XXz,
cmp[lg]-[float|double]/if-XXz.
Change-Id: I8fa87f620fcf4e6bcf291bbc7a0ea6c8f5535467
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 25fc89e..621cccc 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -161,10 +161,16 @@
enum ExtendedMIROpcode {
kMirOpFirst = kNumPackedOpcodes,
kMirOpPhi = kMirOpFirst,
+ kMirOpCopy,
+ kMirOpFusedCmplFloat,
+ kMirOpFusedCmpgFloat,
+ kMirOpFusedCmplDouble,
+ kMirOpFusedCmpgDouble,
+ kMirOpFusedCmpLong,
+ kMirOpNop,
kMirOpNullNRangeUpCheck,
kMirOpNullNRangeDownCheck,
kMirOpLowerBound,
- kMirOpCopy,
kMirOpLast,
};
@@ -343,7 +349,8 @@
MIR* phiList;
/* Use counts of ssa names */
- GrowableList useCounts;
+ GrowableList useCounts; // Weighted by nesting depth
+ GrowableList rawUseCounts; // Not weighted
/* Optimization support */
GrowableList loopHeaders;
diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc
index 72e3dc4..5f632c0 100644
--- a/src/compiler/Dataflow.cc
+++ b/src/compiler/Dataflow.cc
@@ -801,10 +801,36 @@
// Beginning of extended MIR opcodes
// 100 MIR_PHI
DF_PHI | DF_DA | DF_NULL_TRANSFER_N,
- /*
- * For extended MIR inserted at the MIR2LIR stage, it is okay to have
- * undefined values here.
- */
+
+ // 101 MIR_COPY
+ DF_DA | DF_UB | DF_IS_MOVE,
+
+ // 102 MIR_FUSED_CMPL_FLOAT
+ DF_UA | DF_UB | DF_FP_A | DF_FP_B,
+
+ // 103 MIR_FUSED_CMPG_FLOAT
+ DF_UA | DF_UB | DF_FP_A | DF_FP_B,
+
+ // 104 MIR_FUSED_CMPL_DOUBLE
+ DF_UA_WIDE | DF_UB_WIDE | DF_FP_A | DF_FP_B,
+
+ // 105 MIR_FUSED_CMPG_DOUBLE
+ DF_UA_WIDE | DF_UB_WIDE | DF_FP_A | DF_FP_B,
+
+ // 106 MIR_FUSED_CMP_LONG
+ DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B,
+
+ // 107 MIR_NOP
+ DF_NOP,
+
+ // 108 MIR_NULL_RANGE_UP_CHECK
+ 0,
+
+ // 109 MIR_NULL_RANGE_DOWN_CHECK
+ 0,
+
+ // 110 MIR_LOWER_BOUND
+ 0,
};
/* Return the base virtual register for a SSA name */
@@ -820,6 +846,13 @@
return GET_ELEM_N(cUnit->ssaSubscripts, int, ssaReg);
}
+int getSSAUseCount(CompilationUnit* cUnit, int sReg)
+{
+ DCHECK(sReg < (int)cUnit->rawUseCounts.numUsed);
+ return cUnit->rawUseCounts.elemList[sReg];
+}
+
+
char* oatGetDalvikDisassembly(CompilationUnit* cUnit,
const DecodedInstruction& insn, const char* note)
{
@@ -1827,7 +1860,69 @@
case Instruction::CMPG_FLOAT:
case Instruction::CMPG_DOUBLE:
case Instruction::CMP_LONG:
- // TODO: Check for and fuse preceeding comparison
+ if (mir->next != NULL) {
+ MIR* mirNext = mir->next;
+ Instruction::Code brOpcode = mirNext->dalvikInsn.opcode;
+ ConditionCode ccode = kCondNv;
+ switch(brOpcode) {
+ case Instruction::IF_EQZ:
+ ccode = kCondEq;
+ break;
+ case Instruction::IF_NEZ:
+ // ccode = kCondNe;
+ break;
+ case Instruction::IF_LTZ:
+ // ccode = kCondLt;
+ break;
+ case Instruction::IF_GEZ:
+ // ccode = kCondGe;
+ break;
+ case Instruction::IF_GTZ:
+ // ccode = kCondGt;
+ break;
+ case Instruction::IF_LEZ:
+ // ccode = kCondLe;
+ break;
+ default:
+ break;
+ }
+ // Make sure result of cmp is used by next insn and nowhere else
+ if ((ccode != kCondNv) &&
+ (mir->ssaRep->defs[0] == mirNext->ssaRep->uses[0]) &&
+ (getSSAUseCount(cUnit, mir->ssaRep->defs[0]) == 1)) {
+ mirNext->dalvikInsn.arg[0] = ccode;
+ switch(opcode) {
+ case Instruction::CMPL_FLOAT:
+ mirNext->dalvikInsn.opcode =
+ static_cast<Instruction::Code>(kMirOpFusedCmplFloat);
+ break;
+ case Instruction::CMPL_DOUBLE:
+ mirNext->dalvikInsn.opcode =
+ static_cast<Instruction::Code>(kMirOpFusedCmplDouble);
+ break;
+ case Instruction::CMPG_FLOAT:
+ mirNext->dalvikInsn.opcode =
+ static_cast<Instruction::Code>(kMirOpFusedCmpgFloat);
+ break;
+ case Instruction::CMPG_DOUBLE:
+ mirNext->dalvikInsn.opcode =
+ static_cast<Instruction::Code>(kMirOpFusedCmpgDouble);
+ break;
+ case Instruction::CMP_LONG:
+ mirNext->dalvikInsn.opcode =
+ static_cast<Instruction::Code>(kMirOpFusedCmpLong);
+ break;
+ default: LOG(ERROR) << "Unexpected opcode: " << (int)opcode;
+ }
+ mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop);
+ mirNext->ssaRep->numUses = mir->ssaRep->numUses;
+ mirNext->ssaRep->uses = mir->ssaRep->uses;
+ mirNext->ssaRep->fpUse = mir->ssaRep->fpUse;
+ mirNext->ssaRep->numDefs = 0;
+ mir->ssaRep->numUses = 0;
+ mir->ssaRep->numDefs = 0;
+ }
+ }
break;
default:
break;
@@ -2230,6 +2325,7 @@
for (int i = 0; i < mir->ssaRep->numUses; i++) {
int sReg = mir->ssaRep->uses[i];
DCHECK_LT(sReg, (int)cUnit->useCounts.numUsed);
+ cUnit->rawUseCounts.elemList[sReg]++;
cUnit->useCounts.elemList[sReg] += (1 << weight);
}
if (!(cUnit->disableOpt & (1 << kPromoteCompilerTemps))) {
@@ -2248,6 +2344,7 @@
usesMethodStar &= invokeUsesMethodStar(cUnit, mir);
}
if (usesMethodStar) {
+ cUnit->rawUseCounts.elemList[cUnit->methodSReg]++;
cUnit->useCounts.elemList[cUnit->methodSReg] += (1 << weight);
}
}
@@ -2258,14 +2355,17 @@
void oatMethodUseCount(CompilationUnit *cUnit)
{
- if (cUnit->disableOpt & (1 << kPromoteRegs)) {
- return;
- }
oatInitGrowableList(cUnit, &cUnit->useCounts, cUnit->numSSARegs + 32,
kListMisc);
+ oatInitGrowableList(cUnit, &cUnit->rawUseCounts, cUnit->numSSARegs + 32,
+ kListMisc);
// Initialize list
for (int i = 0; i < cUnit->numSSARegs; i++) {
oatInsertGrowableList(cUnit, &cUnit->useCounts, 0);
+ oatInsertGrowableList(cUnit, &cUnit->rawUseCounts, 0);
+ }
+ if (cUnit->disableOpt & (1 << kPromoteRegs)) {
+ return;
}
oatDataFlowAnalysisDispatcher(cUnit, countUses,
kAllNodes, false /* isIterative */);
diff --git a/src/compiler/codegen/CompilerCodegen.h b/src/compiler/codegen/CompilerCodegen.h
index 8f854da..20b2e45 100644
--- a/src/compiler/codegen/CompilerCodegen.h
+++ b/src/compiler/codegen/CompilerCodegen.h
@@ -26,6 +26,11 @@
int oatGetInsnSize(LIR* lir);
+void genFusedLongCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir);
+void genFusedFPCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+ bool gtBias, bool isDouble);
+
+
/* Lower middle-level IR to low-level IR for the whole method */
void oatMethodMIR2LIR(CompilationUnit* cUnit);
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index d753fcc..1dc11da 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -741,10 +741,16 @@
"kMirOpNullNRangeDownCheck",
"kMirOpLowerBound",
"kMirOpCopy",
+ "kMirFusedCmplFloat",
+ "kMirFusedCmpgFloat",
+ "kMirFusedCmplDouble",
+ "kMirFusedCmpgDouble",
+ "kMirFusedCmpLong",
+ "kMirNop",
};
/* Extended MIR instructions like PHI */
-void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir)
+void handleExtendedMethodMIR(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir)
{
int opOffset = mir->dalvikInsn.opcode - kMirOpFirst;
char* msg = NULL;
@@ -771,6 +777,23 @@
storeValue(cUnit, rlDest, rlSrc);
break;
}
+#if defined(TARGET_ARM)
+ case kMirOpFusedCmplFloat:
+ genFusedFPCmpBranch(cUnit, bb, mir, false /*gt bias*/, false /*double*/);
+ break;
+ case kMirOpFusedCmpgFloat:
+ genFusedFPCmpBranch(cUnit, bb, mir, true /*gt bias*/, false /*double*/);
+ break;
+ case kMirOpFusedCmplDouble:
+ genFusedFPCmpBranch(cUnit, bb, mir, false /*gt bias*/, true /*double*/);
+ break;
+ case kMirOpFusedCmpgDouble:
+ genFusedFPCmpBranch(cUnit, bb, mir, true /*gt bias*/, true /*double*/);
+ break;
+ case kMirOpFusedCmpLong:
+ genFusedLongCmpBranch(cUnit, bb, mir);
+ break;
+#endif
default:
break;
}
@@ -827,11 +850,6 @@
cUnit->liveSReg = INVALID_SREG;
#endif
- if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) {
- handleExtendedMethodMIR(cUnit, mir);
- continue;
- }
-
cUnit->currentDalvikOffset = mir->offset;
Instruction::Code dalvikOpcode = mir->dalvikInsn.opcode;
@@ -864,6 +882,11 @@
newLIR1(cUnit, kPseudoSSARep, (int) ssaString);
}
+ if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) {
+ handleExtendedMethodMIR(cUnit, bb, mir);
+ continue;
+ }
+
bool notHandled = compileDalvikInstruction(cUnit, mir, bb, labelList);
if (notHandled) {
LOG(FATAL) << StringPrintf("%#06x: Opcode %#x (%s) / Fmt %d not handled",
diff --git a/src/compiler/codegen/arm/FP/Thumb2VFP.cc b/src/compiler/codegen/arm/FP/Thumb2VFP.cc
index 72b4fec..380c014 100644
--- a/src/compiler/codegen/arm/FP/Thumb2VFP.cc
+++ b/src/compiler/codegen/arm/FP/Thumb2VFP.cc
@@ -182,6 +182,60 @@
return false;
}
+void genFusedFPCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+ bool gtBias, bool isDouble)
+{
+ LIR* labelList = (LIR*)cUnit->blockLabelList;
+ LIR* target = &labelList[bb->taken->id];
+ RegLocation rlSrc1;
+ RegLocation rlSrc2;
+ if (isDouble) {
+ rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1);
+ rlSrc2 = oatGetSrcWide(cUnit, mir, 2, 3);
+ rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+ rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg);
+ newLIR2(cUnit, kThumb2Vcmpd, S2D(rlSrc1.lowReg, r1Src2.highReg),
+ S2D(rlSrc2.lowReg, rlSrc2.highReg));
+ } else {
+ rlSrc1 = oatGetSrc(cUnit, mir, 0);
+ rlSrc2 = oatGetSrc(cUnit, mir, 1);
+ rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+ rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg);
+ newLIR2(cUnit, kThumb2Vcmps, rlSrc1.lowReg, rlSrc2.lowReg);
+ }
+ newLIR0(cUnit, kThumb2Fmstat);
+ ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+ switch(ccode) {
+ case kCondEq:
+ case kCondNe:
+ break;
+ case kCondLt:
+ if (gtBias) {
+ ccode = kCondMi;
+ }
+ break;
+ case kCondLe:
+ if (gtBias) {
+ ccode = kCondLs;
+ }
+ break;
+ case kCondGt:
+ if (gtBias) {
+ ccode = kCondHi;
+ }
+ break;
+ case kCondGe:
+ if (gtBias) {
+ ccode = kCondCs;
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unexpected ccode: " << (int)ccode;
+ }
+ opCondBranch(cUnit, ccode, target);
+}
+
+
bool genCmpFP(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
RegLocation rlSrc1, RegLocation rlSrc2)
{
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 9477d2c..ea02ca9 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -654,6 +654,48 @@
branch3->target = branch1->target;
}
+void genFusedLongCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir)
+{
+ LIR* labelList = (LIR*)cUnit->blockLabelList;
+ LIR* taken = &labelList[bb->taken->id];
+ RegLocation rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1);
+ RegLocation rlSrc2 = oatGetSrcWide(cUnit, mir, 2, 3);
+ rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
+ rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
+ ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+ LIR* notTaken = rawLIR(cUnit, mir->offset, kPseudoTargetLabel);
+ opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
+ switch(ccode) {
+ case kCondEq:
+ opCondBranch(cUnit, kCondNe, notTaken);
+ break;
+ case kCondNe:
+ opCondBranch(cUnit, kCondNe, taken);
+ break;
+ case kCondLt:
+ opCondBranch(cUnit, kCondLt, taken);
+ opCondBranch(cUnit, kCondGt, notTaken);
+ break;
+ case kCondLe:
+ opCondBranch(cUnit, kCondLt, taken);
+ opCondBranch(cUnit, kCondGt, notTaken);
+ break;
+ case kCondGt:
+ opCondBranch(cUnit, kCondGt, taken);
+ opCondBranch(cUnit, kCondLt, notTaken);
+ break;
+ case kCondGe:
+ opCondBranch(cUnit, kCondGt, taken);
+ opCondBranch(cUnit, kCondLt, notTaken);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected ccode: " << (int)ccode;
+ }
+ opRegReg(cUnit, kOpCmp, rlSrc1.lowReg, rlSrc2.lowReg);
+ opCondBranch(cUnit, ccode, taken);
+ oatAppendLIR(cUnit, notTaken);
+}
+
/*
* Generate a register comparison to an immediate and branch. Caller
* is responsible for setting branch target field.