Branch fusing
A belated birthday gift for irogers. Fuse cmp-long/if-XXz,
cmp[lg]-[float|double]/if-XXz.
Change-Id: I8fa87f620fcf4e6bcf291bbc7a0ea6c8f5535467
diff --git a/src/compiler/codegen/CompilerCodegen.h b/src/compiler/codegen/CompilerCodegen.h
index 8f854da..20b2e45 100644
--- a/src/compiler/codegen/CompilerCodegen.h
+++ b/src/compiler/codegen/CompilerCodegen.h
@@ -26,6 +26,11 @@
int oatGetInsnSize(LIR* lir);
+void genFusedLongCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir);
+void genFusedFPCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+ bool gtBias, bool isDouble);
+
+
/* Lower middle-level IR to low-level IR for the whole method */
void oatMethodMIR2LIR(CompilationUnit* cUnit);
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index d753fcc..1dc11da 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -741,10 +741,16 @@
"kMirOpNullNRangeDownCheck",
"kMirOpLowerBound",
"kMirOpCopy",
+ "kMirFusedCmplFloat",
+ "kMirFusedCmpgFloat",
+ "kMirFusedCmplDouble",
+ "kMirFusedCmpgDouble",
+ "kMirFusedCmpLong",
+ "kMirNop",
};
/* Extended MIR instructions like PHI */
-void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir)
+void handleExtendedMethodMIR(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir)
{
int opOffset = mir->dalvikInsn.opcode - kMirOpFirst;
char* msg = NULL;
@@ -771,6 +777,23 @@
storeValue(cUnit, rlDest, rlSrc);
break;
}
+#if defined(TARGET_ARM)
+ case kMirOpFusedCmplFloat:
+ genFusedFPCmpBranch(cUnit, bb, mir, false /*gt bias*/, false /*double*/);
+ break;
+ case kMirOpFusedCmpgFloat:
+ genFusedFPCmpBranch(cUnit, bb, mir, true /*gt bias*/, false /*double*/);
+ break;
+ case kMirOpFusedCmplDouble:
+ genFusedFPCmpBranch(cUnit, bb, mir, false /*gt bias*/, true /*double*/);
+ break;
+ case kMirOpFusedCmpgDouble:
+ genFusedFPCmpBranch(cUnit, bb, mir, true /*gt bias*/, true /*double*/);
+ break;
+ case kMirOpFusedCmpLong:
+ genFusedLongCmpBranch(cUnit, bb, mir);
+ break;
+#endif
default:
break;
}
@@ -827,11 +850,6 @@
cUnit->liveSReg = INVALID_SREG;
#endif
- if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) {
- handleExtendedMethodMIR(cUnit, mir);
- continue;
- }
-
cUnit->currentDalvikOffset = mir->offset;
Instruction::Code dalvikOpcode = mir->dalvikInsn.opcode;
@@ -864,6 +882,11 @@
newLIR1(cUnit, kPseudoSSARep, (int) ssaString);
}
+ if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) {
+ handleExtendedMethodMIR(cUnit, bb, mir);
+ continue;
+ }
+
bool notHandled = compileDalvikInstruction(cUnit, mir, bb, labelList);
if (notHandled) {
LOG(FATAL) << StringPrintf("%#06x: Opcode %#x (%s) / Fmt %d not handled",
diff --git a/src/compiler/codegen/arm/FP/Thumb2VFP.cc b/src/compiler/codegen/arm/FP/Thumb2VFP.cc
index 72b4fec..380c014 100644
--- a/src/compiler/codegen/arm/FP/Thumb2VFP.cc
+++ b/src/compiler/codegen/arm/FP/Thumb2VFP.cc
@@ -182,6 +182,60 @@
return false;
}
+void genFusedFPCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir,
+ bool gtBias, bool isDouble)
+{
+ LIR* labelList = (LIR*)cUnit->blockLabelList;
+ LIR* target = &labelList[bb->taken->id];
+ RegLocation rlSrc1;
+ RegLocation rlSrc2;
+ if (isDouble) {
+ rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1);
+ rlSrc2 = oatGetSrcWide(cUnit, mir, 2, 3);
+ rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg);
+ rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg);
+ newLIR2(cUnit, kThumb2Vcmpd, S2D(rlSrc1.lowReg, r1Src2.highReg),
+ S2D(rlSrc2.lowReg, rlSrc2.highReg));
+ } else {
+ rlSrc1 = oatGetSrc(cUnit, mir, 0);
+ rlSrc2 = oatGetSrc(cUnit, mir, 1);
+ rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg);
+ rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg);
+ newLIR2(cUnit, kThumb2Vcmps, rlSrc1.lowReg, rlSrc2.lowReg);
+ }
+ newLIR0(cUnit, kThumb2Fmstat);
+ ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+ switch(ccode) {
+ case kCondEq:
+ case kCondNe:
+ break;
+ case kCondLt:
+ if (gtBias) {
+ ccode = kCondMi;
+ }
+ break;
+ case kCondLe:
+ if (gtBias) {
+ ccode = kCondLs;
+ }
+ break;
+ case kCondGt:
+ if (gtBias) {
+ ccode = kCondHi;
+ }
+ break;
+ case kCondGe:
+ if (gtBias) {
+ ccode = kCondCs;
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unexpected ccode: " << (int)ccode;
+ }
+ opCondBranch(cUnit, ccode, target);
+}
+
+
bool genCmpFP(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
RegLocation rlSrc1, RegLocation rlSrc2)
{
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 9477d2c..ea02ca9 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -654,6 +654,48 @@
branch3->target = branch1->target;
}
+void genFusedLongCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir)
+{
+ LIR* labelList = (LIR*)cUnit->blockLabelList;
+ LIR* taken = &labelList[bb->taken->id];
+ RegLocation rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1);
+ RegLocation rlSrc2 = oatGetSrcWide(cUnit, mir, 2, 3);
+ rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
+ rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
+ ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]);
+ LIR* notTaken = rawLIR(cUnit, mir->offset, kPseudoTargetLabel);
+ opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
+ switch(ccode) {
+ case kCondEq:
+ opCondBranch(cUnit, kCondNe, notTaken);
+ break;
+ case kCondNe:
+ opCondBranch(cUnit, kCondNe, taken);
+ break;
+ case kCondLt:
+ opCondBranch(cUnit, kCondLt, taken);
+ opCondBranch(cUnit, kCondGt, notTaken);
+ break;
+ case kCondLe:
+ opCondBranch(cUnit, kCondLt, taken);
+ opCondBranch(cUnit, kCondGt, notTaken);
+ break;
+ case kCondGt:
+ opCondBranch(cUnit, kCondGt, taken);
+ opCondBranch(cUnit, kCondLt, notTaken);
+ break;
+ case kCondGe:
+ opCondBranch(cUnit, kCondGt, taken);
+ opCondBranch(cUnit, kCondLt, notTaken);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected ccode: " << (int)ccode;
+ }
+ opRegReg(cUnit, kOpCmp, rlSrc1.lowReg, rlSrc2.lowReg);
+ opCondBranch(cUnit, ccode, taken);
+ oatAppendLIR(cUnit, notTaken);
+}
+
/*
* Generate a register comparison to an immediate and branch. Caller
* is responsible for setting branch target field.