diff options
| author | 2012-03-29 16:44:16 -0700 | |
|---|---|---|
| committer | 2012-03-29 16:44:16 -0700 | |
| commit | 84fd693103dddd50b6a18522bfb5eaab0e51b6ec (patch) | |
| tree | 47e3e167fe7b42561b1eafeda2c53329fbad4f58 /src | |
| parent | 7cea03edf5f9615cb65af864183f0e8b4d13a79e (diff) | |
Branch fusing
A belated birthday gift for irogers. Fuse cmp-long/if-XXz,
cmp[lg]-[float|double]/if-XXz.
Change-Id: I8fa87f620fcf4e6bcf291bbc7a0ea6c8f5535467
Diffstat (limited to 'src')
| -rw-r--r-- | src/compiler/CompilerIR.h | 11 | ||||
| -rw-r--r-- | src/compiler/Dataflow.cc | 116 | ||||
| -rw-r--r-- | src/compiler/codegen/CompilerCodegen.h | 5 | ||||
| -rw-r--r-- | src/compiler/codegen/MethodCodegenDriver.cc | 35 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/FP/Thumb2VFP.cc | 54 | ||||
| -rw-r--r-- | src/compiler/codegen/arm/Thumb2/Gen.cc | 42 |
6 files changed, 247 insertions, 16 deletions
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h index 25fc89e593..621cccc91f 100644 --- a/src/compiler/CompilerIR.h +++ b/src/compiler/CompilerIR.h @@ -161,10 +161,16 @@ struct LIR { enum ExtendedMIROpcode { kMirOpFirst = kNumPackedOpcodes, kMirOpPhi = kMirOpFirst, + kMirOpCopy, + kMirOpFusedCmplFloat, + kMirOpFusedCmpgFloat, + kMirOpFusedCmplDouble, + kMirOpFusedCmpgDouble, + kMirOpFusedCmpLong, + kMirOpNop, kMirOpNullNRangeUpCheck, kMirOpNullNRangeDownCheck, kMirOpLowerBound, - kMirOpCopy, kMirOpLast, }; @@ -343,7 +349,8 @@ struct CompilationUnit { MIR* phiList; /* Use counts of ssa names */ - GrowableList useCounts; + GrowableList useCounts; // Weighted by nesting depth + GrowableList rawUseCounts; // Not weighted /* Optimization support */ GrowableList loopHeaders; diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc index 72e3dc4700..5f632c061d 100644 --- a/src/compiler/Dataflow.cc +++ b/src/compiler/Dataflow.cc @@ -801,10 +801,36 @@ const int oatDataFlowAttributes[kMirOpLast] = { // Beginning of extended MIR opcodes // 100 MIR_PHI DF_PHI | DF_DA | DF_NULL_TRANSFER_N, - /* - * For extended MIR inserted at the MIR2LIR stage, it is okay to have - * undefined values here. - */ + + // 101 MIR_COPY + DF_DA | DF_UB | DF_IS_MOVE, + + // 102 MIR_FUSED_CMPL_FLOAT + DF_UA | DF_UB | DF_FP_A | DF_FP_B, + + // 103 MIR_FUSED_CMPG_FLOAT + DF_UA | DF_UB | DF_FP_A | DF_FP_B, + + // 104 MIR_FUSED_CMPL_DOUBLE + DF_UA_WIDE | DF_UB_WIDE | DF_FP_A | DF_FP_B, + + // 105 MIR_FUSED_CMPG_DOUBLE + DF_UA_WIDE | DF_UB_WIDE | DF_FP_A | DF_FP_B, + + // 106 MIR_FUSED_CMP_LONG + DF_UA_WIDE | DF_UB_WIDE | DF_CORE_A | DF_CORE_B, + + // 107 MIR_NOP + DF_NOP, + + // 108 MIR_NULL_RANGE_UP_CHECK + 0, + + // 109 MIR_NULL_RANGE_DOWN_CHECK + 0, + + // 110 MIR_LOWER_BOUND + 0, }; /* Return the base virtual register for a SSA name */ @@ -820,6 +846,13 @@ int SRegToSubscript(const CompilationUnit* cUnit, int ssaReg) return GET_ELEM_N(cUnit->ssaSubscripts, int, ssaReg); } +int getSSAUseCount(CompilationUnit* cUnit, int sReg) +{ + DCHECK(sReg < (int)cUnit->rawUseCounts.numUsed); + return cUnit->rawUseCounts.elemList[sReg]; +} + + char* oatGetDalvikDisassembly(CompilationUnit* cUnit, const DecodedInstruction& insn, const char* note) { @@ -1827,7 +1860,69 @@ bool basicBlockOpt(CompilationUnit* cUnit, BasicBlock* bb) case Instruction::CMPG_FLOAT: case Instruction::CMPG_DOUBLE: case Instruction::CMP_LONG: - // TODO: Check for and fuse preceeding comparison + if (mir->next != NULL) { + MIR* mirNext = mir->next; + Instruction::Code brOpcode = mirNext->dalvikInsn.opcode; + ConditionCode ccode = kCondNv; + switch(brOpcode) { + case Instruction::IF_EQZ: + ccode = kCondEq; + break; + case Instruction::IF_NEZ: + // ccode = kCondNe; + break; + case Instruction::IF_LTZ: + // ccode = kCondLt; + break; + case Instruction::IF_GEZ: + // ccode = kCondGe; + break; + case Instruction::IF_GTZ: + // ccode = kCondGt; + break; + case Instruction::IF_LEZ: + // ccode = kCondLe; + break; + default: + break; + } + // Make sure result of cmp is used by next insn and nowhere else + if ((ccode != kCondNv) && + (mir->ssaRep->defs[0] == mirNext->ssaRep->uses[0]) && + (getSSAUseCount(cUnit, mir->ssaRep->defs[0]) == 1)) { + mirNext->dalvikInsn.arg[0] = ccode; + switch(opcode) { + case Instruction::CMPL_FLOAT: + mirNext->dalvikInsn.opcode = + static_cast<Instruction::Code>(kMirOpFusedCmplFloat); + break; + case Instruction::CMPL_DOUBLE: + mirNext->dalvikInsn.opcode = + static_cast<Instruction::Code>(kMirOpFusedCmplDouble); + break; + case Instruction::CMPG_FLOAT: + mirNext->dalvikInsn.opcode = + static_cast<Instruction::Code>(kMirOpFusedCmpgFloat); + break; + case Instruction::CMPG_DOUBLE: + mirNext->dalvikInsn.opcode = + static_cast<Instruction::Code>(kMirOpFusedCmpgDouble); + break; + case Instruction::CMP_LONG: + mirNext->dalvikInsn.opcode = + static_cast<Instruction::Code>(kMirOpFusedCmpLong); + break; + default: LOG(ERROR) << "Unexpected opcode: " << (int)opcode; + } + mir->dalvikInsn.opcode = static_cast<Instruction::Code>(kMirOpNop); + mirNext->ssaRep->numUses = mir->ssaRep->numUses; + mirNext->ssaRep->uses = mir->ssaRep->uses; + mirNext->ssaRep->fpUse = mir->ssaRep->fpUse; + mirNext->ssaRep->numDefs = 0; + mir->ssaRep->numUses = 0; + mir->ssaRep->numDefs = 0; + } + } break; default: break; @@ -2230,6 +2325,7 @@ bool countUses(struct CompilationUnit* cUnit, struct BasicBlock* bb) for (int i = 0; i < mir->ssaRep->numUses; i++) { int sReg = mir->ssaRep->uses[i]; DCHECK_LT(sReg, (int)cUnit->useCounts.numUsed); + cUnit->rawUseCounts.elemList[sReg]++; cUnit->useCounts.elemList[sReg] += (1 << weight); } if (!(cUnit->disableOpt & (1 << kPromoteCompilerTemps))) { @@ -2248,6 +2344,7 @@ bool countUses(struct CompilationUnit* cUnit, struct BasicBlock* bb) usesMethodStar &= invokeUsesMethodStar(cUnit, mir); } if (usesMethodStar) { + cUnit->rawUseCounts.elemList[cUnit->methodSReg]++; cUnit->useCounts.elemList[cUnit->methodSReg] += (1 << weight); } } @@ -2258,14 +2355,17 @@ bool countUses(struct CompilationUnit* cUnit, struct BasicBlock* bb) void oatMethodUseCount(CompilationUnit *cUnit) { - if (cUnit->disableOpt & (1 << kPromoteRegs)) { - return; - } oatInitGrowableList(cUnit, &cUnit->useCounts, cUnit->numSSARegs + 32, kListMisc); + oatInitGrowableList(cUnit, &cUnit->rawUseCounts, cUnit->numSSARegs + 32, + kListMisc); // Initialize list for (int i = 0; i < cUnit->numSSARegs; i++) { oatInsertGrowableList(cUnit, &cUnit->useCounts, 0); + oatInsertGrowableList(cUnit, &cUnit->rawUseCounts, 0); + } + if (cUnit->disableOpt & (1 << kPromoteRegs)) { + return; } oatDataFlowAnalysisDispatcher(cUnit, countUses, kAllNodes, false /* isIterative */); diff --git a/src/compiler/codegen/CompilerCodegen.h b/src/compiler/codegen/CompilerCodegen.h index 8f854da6e1..20b2e45574 100644 --- a/src/compiler/codegen/CompilerCodegen.h +++ b/src/compiler/codegen/CompilerCodegen.h @@ -26,6 +26,11 @@ LIR* rawLIR(CompilationUnit* cUnit, int dalvikOffset, int opcode, int op0 = 0, int oatGetInsnSize(LIR* lir); +void genFusedLongCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir); +void genFusedFPCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir, + bool gtBias, bool isDouble); + + /* Lower middle-level IR to low-level IR for the whole method */ void oatMethodMIR2LIR(CompilationUnit* cUnit); diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc index d753fcc1c6..1dc11dac70 100644 --- a/src/compiler/codegen/MethodCodegenDriver.cc +++ b/src/compiler/codegen/MethodCodegenDriver.cc @@ -741,10 +741,16 @@ const char* extendedMIROpNames[kMirOpLast - kMirOpFirst] = { "kMirOpNullNRangeDownCheck", "kMirOpLowerBound", "kMirOpCopy", + "kMirFusedCmplFloat", + "kMirFusedCmpgFloat", + "kMirFusedCmplDouble", + "kMirFusedCmpgDouble", + "kMirFusedCmpLong", + "kMirNop", }; /* Extended MIR instructions like PHI */ -void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir) +void handleExtendedMethodMIR(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir) { int opOffset = mir->dalvikInsn.opcode - kMirOpFirst; char* msg = NULL; @@ -771,6 +777,23 @@ void handleExtendedMethodMIR(CompilationUnit* cUnit, MIR* mir) storeValue(cUnit, rlDest, rlSrc); break; } +#if defined(TARGET_ARM) + case kMirOpFusedCmplFloat: + genFusedFPCmpBranch(cUnit, bb, mir, false /*gt bias*/, false /*double*/); + break; + case kMirOpFusedCmpgFloat: + genFusedFPCmpBranch(cUnit, bb, mir, true /*gt bias*/, false /*double*/); + break; + case kMirOpFusedCmplDouble: + genFusedFPCmpBranch(cUnit, bb, mir, false /*gt bias*/, true /*double*/); + break; + case kMirOpFusedCmpgDouble: + genFusedFPCmpBranch(cUnit, bb, mir, true /*gt bias*/, true /*double*/); + break; + case kMirOpFusedCmpLong: + genFusedLongCmpBranch(cUnit, bb, mir); + break; +#endif default: break; } @@ -827,11 +850,6 @@ bool methodBlockCodeGen(CompilationUnit* cUnit, BasicBlock* bb) cUnit->liveSReg = INVALID_SREG; #endif - if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) { - handleExtendedMethodMIR(cUnit, mir); - continue; - } - cUnit->currentDalvikOffset = mir->offset; Instruction::Code dalvikOpcode = mir->dalvikInsn.opcode; @@ -864,6 +882,11 @@ bool methodBlockCodeGen(CompilationUnit* cUnit, BasicBlock* bb) newLIR1(cUnit, kPseudoSSARep, (int) ssaString); } + if ((int)mir->dalvikInsn.opcode >= (int)kMirOpFirst) { + handleExtendedMethodMIR(cUnit, bb, mir); + continue; + } + bool notHandled = compileDalvikInstruction(cUnit, mir, bb, labelList); if (notHandled) { LOG(FATAL) << StringPrintf("%#06x: Opcode %#x (%s) / Fmt %d not handled", diff --git a/src/compiler/codegen/arm/FP/Thumb2VFP.cc b/src/compiler/codegen/arm/FP/Thumb2VFP.cc index 72b4fec39c..380c014b6d 100644 --- a/src/compiler/codegen/arm/FP/Thumb2VFP.cc +++ b/src/compiler/codegen/arm/FP/Thumb2VFP.cc @@ -182,6 +182,60 @@ bool genConversion(CompilationUnit* cUnit, MIR* mir) return false; } +void genFusedFPCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir, + bool gtBias, bool isDouble) +{ + LIR* labelList = (LIR*)cUnit->blockLabelList; + LIR* target = &labelList[bb->taken->id]; + RegLocation rlSrc1; + RegLocation rlSrc2; + if (isDouble) { + rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1); + rlSrc2 = oatGetSrcWide(cUnit, mir, 2, 3); + rlSrc1 = loadValueWide(cUnit, rlSrc1, kFPReg); + rlSrc2 = loadValueWide(cUnit, rlSrc2, kFPReg); + newLIR2(cUnit, kThumb2Vcmpd, S2D(rlSrc1.lowReg, r1Src2.highReg), + S2D(rlSrc2.lowReg, rlSrc2.highReg)); + } else { + rlSrc1 = oatGetSrc(cUnit, mir, 0); + rlSrc2 = oatGetSrc(cUnit, mir, 1); + rlSrc1 = loadValue(cUnit, rlSrc1, kFPReg); + rlSrc2 = loadValue(cUnit, rlSrc2, kFPReg); + newLIR2(cUnit, kThumb2Vcmps, rlSrc1.lowReg, rlSrc2.lowReg); + } + newLIR0(cUnit, kThumb2Fmstat); + ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]); + switch(ccode) { + case kCondEq: + case kCondNe: + break; + case kCondLt: + if (gtBias) { + ccode = kCondMi; + } + break; + case kCondLe: + if (gtBias) { + ccode = kCondLs; + } + break; + case kCondGt: + if (gtBias) { + ccode = kCondHi; + } + break; + case kCondGe: + if (gtBias) { + ccode = kCondCs; + } + break; + default: + LOG(FATAL) << "Unexpected ccode: " << (int)ccode; + } + opCondBranch(cUnit, ccode, target); +} + + bool genCmpFP(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, RegLocation rlSrc1, RegLocation rlSrc2) { diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc index 9477d2c2a7..ea02ca99ac 100644 --- a/src/compiler/codegen/arm/Thumb2/Gen.cc +++ b/src/compiler/codegen/arm/Thumb2/Gen.cc @@ -654,6 +654,48 @@ void genCmpLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest, branch3->target = branch1->target; } +void genFusedLongCmpBranch(CompilationUnit* cUnit, BasicBlock* bb, MIR* mir) +{ + LIR* labelList = (LIR*)cUnit->blockLabelList; + LIR* taken = &labelList[bb->taken->id]; + RegLocation rlSrc1 = oatGetSrcWide(cUnit, mir, 0, 1); + RegLocation rlSrc2 = oatGetSrcWide(cUnit, mir, 2, 3); + rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg); + rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg); + ConditionCode ccode = static_cast<ConditionCode>(mir->dalvikInsn.arg[0]); + LIR* notTaken = rawLIR(cUnit, mir->offset, kPseudoTargetLabel); + opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg); + switch(ccode) { + case kCondEq: + opCondBranch(cUnit, kCondNe, notTaken); + break; + case kCondNe: + opCondBranch(cUnit, kCondNe, taken); + break; + case kCondLt: + opCondBranch(cUnit, kCondLt, taken); + opCondBranch(cUnit, kCondGt, notTaken); + break; + case kCondLe: + opCondBranch(cUnit, kCondLt, taken); + opCondBranch(cUnit, kCondGt, notTaken); + break; + case kCondGt: + opCondBranch(cUnit, kCondGt, taken); + opCondBranch(cUnit, kCondLt, notTaken); + break; + case kCondGe: + opCondBranch(cUnit, kCondGt, taken); + opCondBranch(cUnit, kCondLt, notTaken); + break; + default: + LOG(FATAL) << "Unexpected ccode: " << (int)ccode; + } + opRegReg(cUnit, kOpCmp, rlSrc1.lowReg, rlSrc2.lowReg); + opCondBranch(cUnit, ccode, taken); + oatAppendLIR(cUnit, notTaken); +} + /* * Generate a register comparison to an immediate and branch. Caller * is responsible for setting branch target field. |