diff options
| author | 2012-01-17 17:33:47 -0800 | |
|---|---|---|
| committer | 2012-01-24 06:00:32 -0800 | |
| commit | 5b53710b4abcf8f35c91a963a475b72cb34757e6 (patch) | |
| tree | 9f10f49fd09798a8f07ebffb4ee357c135e56f9f /src | |
| parent | cf044318c5638df0c400b731e94fab948ebd5ccb (diff) | |
Dataflow analysis rework
Rework key portions of the dataflow analysis code to avoid the
massive slowdowns we're seeing with methods containing more than
20,000 basic blocks.
Add a somewhat hacky lookup cache in front of findBlock. Later
we'll want to rework the "GrowableList" mechanism to incorporate
this properly.
Add memory to the bit vector utilities to speed up the iterators
for large sparse vectors. (Similarly, in the future we'll want
to include a rewrite of the home-grown bit-vector manipluation
utilites to be better with large ones).
With this CL, compilation speed on the degenerate cases is > 10x
faster. Compilation of typically-sized methods will see a smallish
improvement.
Change-Id: I7f086c1229dd4fe62f0a5fc361234bf204ebc2b1
Diffstat (limited to 'src')
| -rw-r--r-- | src/compiler/Compiler.h | 2 | ||||
| -rw-r--r-- | src/compiler/CompilerIR.h | 6 | ||||
| -rw-r--r-- | src/compiler/CompilerUtility.h | 12 | ||||
| -rw-r--r-- | src/compiler/Dataflow.cc | 162 | ||||
| -rw-r--r-- | src/compiler/Frontend.cc | 64 | ||||
| -rw-r--r-- | src/compiler/SSATransformation.cc | 197 | ||||
| -rw-r--r-- | src/compiler/Utility.cc | 52 |
7 files changed, 381 insertions, 114 deletions
diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h index ac5e0c9b46..24bc8af169 100644 --- a/src/compiler/Compiler.h +++ b/src/compiler/Compiler.h @@ -54,6 +54,7 @@ enum debugControlVector { kDebugSlowestFieldPath, kDebugSlowestStringPath, kDebugExerciseResolveMethod, + kDebugVerifyDataflow, }; extern uint32_t compilerDebugFlags; @@ -91,6 +92,7 @@ typedef enum DataFlowAnalysisMode { kPreOrderDFSTraversal, // Depth-First-Search / Pre-Order kPostOrderDFSTraversal, // Depth-First-Search / Post-Order kPostOrderDOMTraversal, // Dominator tree / Post-Order + kReversePostOrderTraversal, // Depth-First-Search / reverse Post-Order } DataFlowAnalysisMode; struct CompilationUnit; diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h index f981ccd167..317924b31e 100644 --- a/src/compiler/CompilerIR.h +++ b/src/compiler/CompilerIR.h @@ -147,6 +147,7 @@ typedef enum BlockListType { typedef struct BasicBlock { int id; + int dfsId; bool visited; bool hidden; bool catchEntry; @@ -191,6 +192,8 @@ typedef enum AssemblerStatus { kRetryHalve } AssemblerStatus; +#define NOTVISITED (-1) + typedef struct CompilationUnit { int numInsts; int numBlocks; @@ -265,9 +268,11 @@ typedef struct CompilationUnit { BasicBlock* curBlock; BasicBlock* nextCodegenBlock; // for extended trace codegen GrowableList dfsOrder; + GrowableList dfsPostOrder; GrowableList domPostOrderTraversal; GrowableList throwLaunchpads; GrowableList suspendLaunchpads; + int* iDomList; ArenaBitVector* tryBlockAddr; ArenaBitVector** defBlockMatrix; // numDalvikRegister x numBlocks ArenaBitVector* tempBlockV; @@ -310,6 +315,7 @@ typedef struct CompilationUnit { GrowableList fillArrayData; const u2* insns; u4 insnsSize; + std::map<unsigned int, BasicBlock*> blockMap; // findBlock lookup cache } CompilationUnit; BasicBlock* oatNewBB(BBType blockType, int blockId); diff --git a/src/compiler/CompilerUtility.h b/src/compiler/CompilerUtility.h index d7612ffc2f..a68ee5fb0a 100644 --- a/src/compiler/CompilerUtility.h +++ b/src/compiler/CompilerUtility.h @@ -58,8 +58,20 @@ typedef struct GrowableListIterator { */ struct ArenaBitVector { bool expandable; /* expand bitmap if we run out? */ + bool firstDirty; /* when true, don't believe firstBitSet */ + bool lastDirty; /* when true, don't believe lastBitSet */ u4 storageSize; /* current size, in 32-bit words */ u4* storage; + /* + * Opportunistically remember first and + * last set bits. This yeilds a performance + * advantage in cases where large + * sparse vectors are repeatedly scanned + * (something that can happen a lot during + * dataflow analysis. + */ + int firstBitSet; + int lastBitSet; }; /* Handy iterator to walk through the bit positions set to 1 */ diff --git a/src/compiler/Dataflow.cc b/src/compiler/Dataflow.cc index 211cffbc69..3f88ea5cbd 100644 --- a/src/compiler/Dataflow.cc +++ b/src/compiler/Dataflow.cc @@ -1509,82 +1509,104 @@ void oatDataFlowAnalysisDispatcher(CompilationUnit* cUnit, while (change) { change = false; + switch (dfaMode) { /* Scan all blocks and perform the operations specified in func */ - if (dfaMode == kAllNodes) { - GrowableListIterator iterator; - oatGrowableListIteratorInit(&cUnit->blockList, &iterator); - while (true) { - BasicBlock* bb = - (BasicBlock *) oatGrowableListIteratorNext(&iterator); - if (bb == NULL) break; - if (bb->hidden == true) continue; - change |= (*func)(cUnit, bb); + case kAllNodes: + { + GrowableListIterator iterator; + oatGrowableListIteratorInit(&cUnit->blockList, &iterator); + while (true) { + BasicBlock* bb = + (BasicBlock *) oatGrowableListIteratorNext(&iterator); + if (bb == NULL) break; + if (bb->hidden == true) continue; + change |= (*func)(cUnit, bb); + } } - } - /* - * Scan all reachable blocks and perform the operations specified in - * func. - */ - else if (dfaMode == kReachableNodes) { - int numReachableBlocks = cUnit->numReachableBlocks; - int idx; - const GrowableList *blockList = &cUnit->blockList; - - for (idx = 0; idx < numReachableBlocks; idx++) { - int blockIdx = cUnit->dfsOrder.elemList[idx]; - BasicBlock* bb = - (BasicBlock *) oatGrowableListGetElement(blockList, - blockIdx); - change |= (*func)(cUnit, bb); + break; + /* Scan reachable blocks and perform the ops specified in func. */ + case kReachableNodes: + { + int numReachableBlocks = cUnit->numReachableBlocks; + int idx; + const GrowableList *blockList = &cUnit->blockList; + + for (idx = 0; idx < numReachableBlocks; idx++) { + int blockIdx = cUnit->dfsOrder.elemList[idx]; + BasicBlock* bb = + (BasicBlock *) oatGrowableListGetElement(blockList, + blockIdx); + change |= (*func)(cUnit, bb); + } } - } - /* - * Scan all reachable blocks by the pre-order in the depth-first-search - * CFG and perform the operations specified in func. - */ - else if (dfaMode == kPreOrderDFSTraversal) { - int numReachableBlocks = cUnit->numReachableBlocks; - int idx; - const GrowableList *blockList = &cUnit->blockList; - - for (idx = 0; idx < numReachableBlocks; idx++) { - int dfsIdx = cUnit->dfsOrder.elemList[idx]; - BasicBlock* bb = - (BasicBlock *) oatGrowableListGetElement(blockList, dfsIdx); - change |= (*func)(cUnit, bb); + break; + + /* Scan reachable blocks by pre-order dfs and invoke func on each. */ + case kPreOrderDFSTraversal: + { + int numReachableBlocks = cUnit->numReachableBlocks; + int idx; + const GrowableList *blockList = &cUnit->blockList; + + for (idx = 0; idx < numReachableBlocks; idx++) { + int dfsIdx = cUnit->dfsOrder.elemList[idx]; + BasicBlock* bb = + (BasicBlock *) oatGrowableListGetElement(blockList, + dfsIdx); + change |= (*func)(cUnit, bb); + } } - } - /* - * Scan all reachable blocks by the post-order in the depth-first-search - * CFG and perform the operations specified in func. - */ - else if (dfaMode == kPostOrderDFSTraversal) { - int numReachableBlocks = cUnit->numReachableBlocks; - int idx; - const GrowableList *blockList = &cUnit->blockList; - - for (idx = numReachableBlocks - 1; idx >= 0; idx--) { - int dfsIdx = cUnit->dfsOrder.elemList[idx]; - BasicBlock* bb = - (BasicBlock *) oatGrowableListGetElement(blockList, dfsIdx); - change |= (*func)(cUnit, bb); + break; + /* Scan reachable blocks post-order dfs and invoke func on each. */ + case kPostOrderDFSTraversal: + { + int numReachableBlocks = cUnit->numReachableBlocks; + int idx; + const GrowableList *blockList = &cUnit->blockList; + + for (idx = numReachableBlocks - 1; idx >= 0; idx--) { + int dfsIdx = cUnit->dfsOrder.elemList[idx]; + BasicBlock* bb = + (BasicBlock *) oatGrowableListGetElement(blockList, + dfsIdx); + change |= (*func)(cUnit, bb); + } } - } - /* - * Scan all reachable blocks by the post-order in the dominator tree - * and perform the operations specified in func. - */ - else if (dfaMode == kPostOrderDOMTraversal) { - int numReachableBlocks = cUnit->numReachableBlocks; - int idx; - const GrowableList *blockList = &cUnit->blockList; - - for (idx = 0; idx < numReachableBlocks; idx++) { - int domIdx = cUnit->domPostOrderTraversal.elemList[idx]; - BasicBlock* bb = - (BasicBlock *) oatGrowableListGetElement(blockList, domIdx); - change |= (*func)(cUnit, bb); + break; + /* Scan reachable post-order dom tree and invoke func on each. */ + case kPostOrderDOMTraversal: + { + int numReachableBlocks = cUnit->numReachableBlocks; + int idx; + const GrowableList *blockList = &cUnit->blockList; + + for (idx = 0; idx < numReachableBlocks; idx++) { + int domIdx = cUnit->domPostOrderTraversal.elemList[idx]; + BasicBlock* bb = + (BasicBlock *) oatGrowableListGetElement(blockList, + domIdx); + change |= (*func)(cUnit, bb); + } + } + break; + /* Scan reachable blocks reverse post-order dfs, invoke func on each */ + case kReversePostOrderTraversal: + { + int numReachableBlocks = cUnit->numReachableBlocks; + int idx; + const GrowableList *blockList = &cUnit->blockList; + + for (idx = numReachableBlocks - 1; idx >= 0; idx--) { + int revIdx = cUnit->dfsPostOrder.elemList[idx]; + BasicBlock* bb = + (BasicBlock *) oatGrowableListGetElement(blockList, + revIdx); + change |= (*func)(cUnit, bb); + } } + break; + default: + LOG(FATAL) << "Unknown traversal mode " << (int)dfaMode; } /* If isIterative is false, exit the loop after the first iteration */ change &= isIterative; diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc index 8070e6f5b2..1b1215a6bd 100644 --- a/src/compiler/Frontend.cc +++ b/src/compiler/Frontend.cc @@ -44,6 +44,7 @@ uint32_t compilerDebugFlags = 0 | // Enable debug/testing modes //(1 << kDebugSlowestFieldPath) | //(1 << kDebugSlowestStringPath) | //(1 << kDebugExerciseResolveMethod) | + //(1 << kDebugVerifyDataflow) | 0; std::string compilerMethodMatch; // Method name match to apply above flags @@ -136,6 +137,10 @@ STATIC BasicBlock *splitBlock(CompilationUnit* cUnit, bottomBlock->firstMIRInsn = insn; bottomBlock->lastMIRInsn = origBlock->lastMIRInsn; + /* Add it to the quick lookup cache */ + cUnit->blockMap.insert(std::make_pair(bottomBlock->startOffset, + bottomBlock)); + /* Handle the taken path */ bottomBlock->taken = origBlock->taken; if (bottomBlock->taken) { @@ -196,6 +201,7 @@ STATIC BasicBlock *splitBlock(CompilationUnit* cUnit, * is not non-null and is the block being split, update *immedPredBlockP to * point to the bottom block so that outgoing edges can be set up properly * (by the caller) + * Utilizes a map for fast lookup of the typical cases. */ STATIC BasicBlock *findBlock(CompilationUnit* cUnit, unsigned int codeOffset, @@ -205,28 +211,36 @@ STATIC BasicBlock *findBlock(CompilationUnit* cUnit, GrowableList* blockList = &cUnit->blockList; BasicBlock* bb; unsigned int i; - - for (i = 0; i < blockList->numUsed; i++) { - bb = (BasicBlock *) blockList->elemList[i]; - if (bb->blockType != kDalvikByteCode) continue; - if (bb->startOffset == codeOffset) return bb; - /* Check if a branch jumps into the middle of an existing block */ - if ((split == true) && (codeOffset > bb->startOffset) && - (bb->lastMIRInsn != NULL) && - (codeOffset <= bb->lastMIRInsn->offset)) { - BasicBlock *newBB = splitBlock(cUnit, codeOffset, bb, - bb == *immedPredBlockP ? - immedPredBlockP : NULL); - return newBB; + std::map<unsigned int, BasicBlock*>::iterator it; + + it = cUnit->blockMap.find(codeOffset); + if (it != cUnit->blockMap.end()) { + return it->second; + } else if (!create) { + return NULL; + } + + if (split) { + for (i = 0; i < blockList->numUsed; i++) { + bb = (BasicBlock *) blockList->elemList[i]; + if (bb->blockType != kDalvikByteCode) continue; + /* Check if a branch jumps into the middle of an existing block */ + if ((codeOffset > bb->startOffset) && (bb->lastMIRInsn != NULL) && + (codeOffset <= bb->lastMIRInsn->offset)) { + BasicBlock *newBB = splitBlock(cUnit, codeOffset, bb, + bb == *immedPredBlockP ? + immedPredBlockP : NULL); + return newBB; + } } } - if (create) { - bb = oatNewBB(kDalvikByteCode, cUnit->numBlocks++); - oatInsertGrowableList(&cUnit->blockList, (intptr_t) bb); - bb->startOffset = codeOffset; - return bb; - } - return NULL; + + /* Create a new one */ + bb = oatNewBB(kDalvikByteCode, cUnit->numBlocks++); + oatInsertGrowableList(&cUnit->blockList, (intptr_t) bb); + bb->startOffset = codeOffset; + cUnit->blockMap.insert(std::make_pair(bb->startOffset, bb)); + return bb; } /* Dump the CFG into a DOT graph */ @@ -748,6 +762,8 @@ CompiledMethod* oatCompileMethod(const Compiler& compiler, const DexFile::CodeIt cUnit->numOuts = code_item->outs_size_; /* Adjust this value accordingly once inlining is performed */ cUnit->numDalvikRegisters = code_item->registers_size_; + cUnit->blockMap = std::map<unsigned int, BasicBlock*>(); + cUnit->blockMap.clear(); bool useMatch = compilerMethodMatch.length() != 0; bool match = useMatch && (compilerFlipMatch ^ (PrettyMethod(method_idx, dex_file).find(compilerMethodMatch) != std::string::npos)); @@ -794,6 +810,8 @@ CompiledMethod* oatCompileMethod(const Compiler& compiler, const DexFile::CodeIt BasicBlock *curBlock = oatNewBB(kDalvikByteCode, numBlocks++); curBlock->startOffset = 0; oatInsertGrowableList(&cUnit->blockList, (intptr_t) curBlock); + /* Add first block to the fast lookup cache */ + cUnit->blockMap.insert(std::make_pair(curBlock->startOffset, curBlock)); entryBlock->fallThrough = curBlock; oatSetBit(curBlock->predecessors, entryBlock->id); @@ -885,8 +903,10 @@ CompiledMethod* oatCompileMethod(const Compiler& compiler, const DexFile::CodeIt oatDumpCompilationUnit(cUnit.get()); } - /* Verify if all blocks are connected as claimed */ - oatDataFlowAnalysisDispatcher(cUnit.get(), verifyPredInfo, kAllNodes, false /* isIterative */); + if (cUnit->enableDebug & (1 << kDebugVerifyDataflow)) { + /* Verify if all blocks are connected as claimed */ + oatDataFlowAnalysisDispatcher(cUnit.get(), verifyPredInfo, kAllNodes, false /* isIterative */); + } /* Perform SSA transformation for the whole method */ oatMethodSSATransformation(cUnit.get()); diff --git a/src/compiler/SSATransformation.cc b/src/compiler/SSATransformation.cc index 1f6a3b5318..a25d225694 100644 --- a/src/compiler/SSATransformation.cc +++ b/src/compiler/SSATransformation.cc @@ -20,17 +20,17 @@ namespace art { /* Enter the node to the dfsOrder list then visit its successors */ -STATIC void recordDFSPreOrder(CompilationUnit* cUnit, BasicBlock* block) +STATIC void recordDFSOrders(CompilationUnit* cUnit, BasicBlock* block) { if (block->visited || block->hidden) return; block->visited = true; - /* Enqueue the block id */ + /* Enqueue the preOrder block id */ oatInsertGrowableList(&cUnit->dfsOrder, block->id); - if (block->fallThrough) recordDFSPreOrder(cUnit, block->fallThrough); - if (block->taken) recordDFSPreOrder(cUnit, block->taken); + if (block->fallThrough) recordDFSOrders(cUnit, block->fallThrough); + if (block->taken) recordDFSOrders(cUnit, block->taken); if (block->successorBlockList.blockListType != kNotUsed) { GrowableListIterator iterator; oatGrowableListIteratorInit(&block->successorBlockList.blocks, @@ -40,16 +40,20 @@ STATIC void recordDFSPreOrder(CompilationUnit* cUnit, BasicBlock* block) (SuccessorBlockInfo *) oatGrowableListIteratorNext(&iterator); if (successorBlockInfo == NULL) break; BasicBlock* succBB = successorBlockInfo->block; - recordDFSPreOrder(cUnit, succBB); + recordDFSOrders(cUnit, succBB); } } + + /* Record postorder in basic block and enqueue normal id in dfsPostOrder */ + block->dfsId = cUnit->dfsPostOrder.numUsed; + oatInsertGrowableList(&cUnit->dfsPostOrder, block->id); return; } -/* Sort the blocks by the Depth-First-Search pre-order */ -STATIC void computeDFSOrder(CompilationUnit* cUnit) +/* Sort the blocks by the Depth-First-Search */ +STATIC void computeDFSOrders(CompilationUnit* cUnit) { - /* Initialize or reset the DFS order list */ + /* Initialize or reset the DFS preOrder list */ if (cUnit->dfsOrder.elemList == NULL) { oatInitGrowableList(&cUnit->dfsOrder, cUnit->numBlocks); } else { @@ -57,11 +61,19 @@ STATIC void computeDFSOrder(CompilationUnit* cUnit) cUnit->dfsOrder.numUsed = 0; } + /* Initialize or reset the DFS postOrder list */ + if (cUnit->dfsPostOrder.elemList == NULL) { + oatInitGrowableList(&cUnit->dfsPostOrder, cUnit->numBlocks); + } else { + /* Just reset the used length on the counter */ + cUnit->dfsPostOrder.numUsed = 0; + } + oatDataFlowAnalysisDispatcher(cUnit, oatClearVisitedFlag, kAllNodes, false /* isIterative */); - recordDFSPreOrder(cUnit, cUnit->entryBlock); + recordDFSOrders(cUnit, cUnit->entryBlock); cUnit->numReachableBlocks = cUnit->dfsOrder.numUsed; } @@ -125,6 +137,7 @@ STATIC void computeDomPostOrderTraversal(CompilationUnit* cUnit, BasicBlock* bb) /* Iterate through the dominated blocks first */ while (true) { + //TUNING: hot call to oatBitVectorIteratorNext int bbIdx = oatBitVectorIteratorNext(&bvIterator); if (bbIdx == -1) break; BasicBlock* dominatedBB = @@ -184,6 +197,7 @@ STATIC bool computeDominanceFrontier(CompilationUnit* cUnit, BasicBlock* bb) ArenaBitVectorIterator bvIterator; oatBitVectorIteratorInit(bb->iDominated, &bvIterator); while (true) { + //TUNING: hot call to oatBitVectorIteratorNext int dominatedIdx = oatBitVectorIteratorNext(&bvIterator); if (dominatedIdx == -1) break; BasicBlock* dominatedBB = (BasicBlock* ) @@ -191,6 +205,7 @@ STATIC bool computeDominanceFrontier(CompilationUnit* cUnit, BasicBlock* bb) ArenaBitVectorIterator dfIterator; oatBitVectorIteratorInit(dominatedBB->domFrontier, &dfIterator); while (true) { + //TUNING: hot call to oatBitVectorIteratorNext int dfUpIdx = oatBitVectorIteratorNext(&dfIterator); if (dfUpIdx == -1) break; BasicBlock* dfUpBlock = (BasicBlock* ) @@ -225,8 +240,12 @@ STATIC bool initializeDominationInfo(CompilationUnit* cUnit, BasicBlock* bb) return true; } -/* Worker function to compute each block's dominators */ -STATIC bool computeBlockDominators(CompilationUnit* cUnit, BasicBlock* bb) +/* + * Worker function to compute each block's dominators. This implementation + * is only used when kDebugVerifyDataflow is active and should compute + * the same dominator sets as computeBlockDominators. + */ +STATIC bool slowComputeBlockDominators(CompilationUnit* cUnit, BasicBlock* bb) { GrowableList* blockList = &cUnit->blockList; int numTotalBlocks = blockList->numUsed; @@ -261,8 +280,12 @@ STATIC bool computeBlockDominators(CompilationUnit* cUnit, BasicBlock* bb) return false; } -/* Worker function to compute the idom */ -STATIC bool computeImmediateDominator(CompilationUnit* cUnit, BasicBlock* bb) +/* + * Worker function to compute the idom. This implementation is only + * used when kDebugVerifyDataflow is active and should compute the + * same iDom as computeBlockIDom. + */ +STATIC bool slowComputeBlockIDom(CompilationUnit* cUnit, BasicBlock* bb) { GrowableList* blockList = &cUnit->blockList; ArenaBitVector* tempBlockV = cUnit->tempBlockV; @@ -304,6 +327,107 @@ STATIC bool computeImmediateDominator(CompilationUnit* cUnit, BasicBlock* bb) return true; } +/* + * Walk through the ordered iDom list until we reach common parent. + * Given the ordering of iDomList, this common parent represents the + * last element of the intersection of block1 and block2 dominators. + */ +int findCommonParent(CompilationUnit *cUnit, int block1, int block2) +{ + while (block1 != block2) { + while (block1 < block2) { + block1 = cUnit->iDomList[block1]; + DCHECK_NE(block1, NOTVISITED); + } + while (block2 < block1) { + block2 = cUnit->iDomList[block2]; + DCHECK_NE(block2, NOTVISITED); + } + } + return block1; +} + +/* Worker function to compute each block's immediate dominator */ +STATIC bool computeBlockIDom(CompilationUnit* cUnit, BasicBlock* bb) +{ + GrowableList* blockList = &cUnit->blockList; + ArenaBitVectorIterator bvIterator; + int idom = -1; + + /* Special-case entry block */ + if (bb == cUnit->entryBlock) { + return false; + } + + /* Iterate through the predecessors */ + oatBitVectorIteratorInit(bb->predecessors, &bvIterator); + + /* Find the first processed predecessor */ + while (true) { + //TUNING: hot call to oatBitVectorIteratorNext + int predIdx = oatBitVectorIteratorNext(&bvIterator); + DCHECK_NE(predIdx, -1); /* Should find one */ + BasicBlock* predBB = (BasicBlock* ) oatGrowableListGetElement( + blockList, predIdx); + if (cUnit->iDomList[predBB->dfsId] != NOTVISITED) { + idom = predBB->dfsId; + break; + } + } + + /* Scan the rest of the predecessors */ + while (true) { + int predIdx = oatBitVectorIteratorNext(&bvIterator); + if (predIdx == -1) break; + BasicBlock* predBB = (BasicBlock* ) oatGrowableListGetElement( + blockList, predIdx); + if (cUnit->iDomList[predBB->dfsId] == NOTVISITED) { + continue; + } else { + idom = findCommonParent(cUnit, predBB->dfsId, idom); + } + } + + DCHECK_NE(idom, NOTVISITED); + + /* Did something change? */ + if (cUnit->iDomList[bb->dfsId] != idom) { + cUnit->iDomList[bb->dfsId] = idom; + return true; + } + return false; +} + +/* Worker function to compute each block's domintors */ +STATIC bool computeBlockDominators(CompilationUnit* cUnit, BasicBlock* bb) +{ + if (bb == cUnit->entryBlock) { + oatClearAllBits(bb->dominators); + } else { + oatCopyBitVector(bb->dominators, bb->iDom->dominators); + } + oatSetBit(bb->dominators, bb->id); + return false; +} + +STATIC bool setDominators(CompilationUnit* cUnit, BasicBlock* bb) +{ + if (bb != cUnit->entryBlock) { + int iDomDFSIdx = cUnit->iDomList[bb->dfsId]; + DCHECK_NE(iDomDFSIdx, NOTVISITED); + int iDomIdx = cUnit->dfsPostOrder.elemList[iDomDFSIdx]; + BasicBlock* iDom = (BasicBlock*) + oatGrowableListGetElement(&cUnit->blockList, iDomIdx); + if (cUnit->enableDebug & (1 << kDebugVerifyDataflow)) { + DCHECK_EQ(bb->iDom->id, iDom->id); + } + bb->iDom = iDom; + /* Add bb to the iDominated set of the immediate dominator block */ + oatSetBit(iDom->iDominated, bb->id); + } + return false; +} + /* Compute dominators, immediate dominator, and dominance fronter */ STATIC void computeDominators(CompilationUnit* cUnit) { @@ -315,6 +439,23 @@ STATIC void computeDominators(CompilationUnit* cUnit) kReachableNodes, false /* isIterative */); + /* Initalize & Clear iDomList */ + if (cUnit->iDomList == NULL) { + cUnit->iDomList = (int*)oatNew(sizeof(int) * numReachableBlocks, false); + } + for (int i = 0; i < numReachableBlocks; i++) { + cUnit->iDomList[i] = NOTVISITED; + } + + /* For post-order, last block is entry block. Set its iDom to istelf */ + DCHECK_EQ(cUnit->entryBlock->dfsId, numReachableBlocks-1); + cUnit->iDomList[cUnit->entryBlock->dfsId] = cUnit->entryBlock->dfsId; + + /* Compute the immediate dominators */ + oatDataFlowAnalysisDispatcher(cUnit, computeBlockIDom, + kReversePostOrderTraversal, + true /* isIterative */); + /* Set the dominator for the root node */ oatClearAllBits(cUnit->entryBlock->dominators); oatSetBit(cUnit->entryBlock->dominators, cUnit->entryBlock->id); @@ -325,14 +466,27 @@ STATIC void computeDominators(CompilationUnit* cUnit) } else { oatClearAllBits(cUnit->tempBlockV); } - oatDataFlowAnalysisDispatcher(cUnit, computeBlockDominators, - kPreOrderDFSTraversal, - true /* isIterative */); - cUnit->entryBlock->iDom = NULL; - oatDataFlowAnalysisDispatcher(cUnit, computeImmediateDominator, - kReachableNodes, - false /* isIterative */); + + /* For testing, compute sets using alternate mechanism */ + if (cUnit->enableDebug & (1 << kDebugVerifyDataflow)) { + // Use alternate mechanism to compute dominators for comparison + oatDataFlowAnalysisDispatcher(cUnit, slowComputeBlockDominators, + kPreOrderDFSTraversal, + true /* isIterative */); + + oatDataFlowAnalysisDispatcher(cUnit, slowComputeBlockIDom, + kReachableNodes, + false /* isIterative */); + } + + oatDataFlowAnalysisDispatcher(cUnit, setDominators, + kReachableNodes, + false /* isIterative */); + + oatDataFlowAnalysisDispatcher(cUnit, computeBlockDominators, + kReversePostOrderTraversal, + false /* isIterative */); /* * Now go ahead and compute the post order traversal based on the @@ -456,6 +610,7 @@ STATIC void insertPhiNodes(CompilationUnit* cUnit) (BasicBlock* ) oatGrowableListGetElement(blockList, idx); /* Merge the dominance frontier to tmpBlocks */ + //TUNING: hot call to oatUnifyBitVectors if (defBB->domFrontier != NULL) { oatUnifyBitVectors(tmpBlocks, tmpBlocks, defBB->domFrontier); } @@ -602,7 +757,7 @@ STATIC void doDFSPreOrderSSARename(CompilationUnit* cUnit, BasicBlock* block) void oatMethodSSATransformation(CompilationUnit* cUnit) { /* Compute the DFS order */ - computeDFSOrder(cUnit); + computeDFSOrders(cUnit); /* Compute the dominator info */ computeDominators(cUnit); diff --git a/src/compiler/Utility.cc b/src/compiler/Utility.cc index 8d70136e02..c76143b928 100644 --- a/src/compiler/Utility.cc +++ b/src/compiler/Utility.cc @@ -231,6 +231,8 @@ ArenaBitVector* oatAllocBitVector(unsigned int startBits, bool expandable) bv->storageSize = count; bv->expandable = expandable; bv->storage = (u4*) oatNew(count * sizeof(u4), true); + bv->firstDirty = true; + bv->lastDirty = true; return bv; } @@ -252,6 +254,8 @@ void oatClearAllBits(ArenaBitVector* pBits) { unsigned int count = pBits->storageSize; memset(pBits->storage, 0, count * sizeof(u4)); + pBits->firstDirty = true; + pBits->lastDirty = true; } /* @@ -281,6 +285,12 @@ bool oatSetBit(ArenaBitVector* pBits, unsigned int num) } pBits->storage[num >> 5] |= checkMasks[num & 0x1f]; + if (!pBits->firstDirty && ((int)num < pBits->firstBitSet)) { + pBits->firstBitSet = num; + } + if (!pBits->lastDirty && ((int)num > pBits->lastBitSet)) { + pBits->lastBitSet = num; + } return true; } @@ -299,6 +309,8 @@ bool oatClearBit(ArenaBitVector* pBits, unsigned int num) } pBits->storage[num >> 5] &= ~checkMasks[num & 0x1f]; + pBits->firstDirty = true; + pBits->lastDirty = true; return true; } @@ -309,6 +321,8 @@ void oatMarkAllBits(ArenaBitVector* pBits, bool set) { int value = set ? -1 : 0; memset(pBits->storage, value, pBits->storageSize * (int)sizeof(u4)); + pBits->firstDirty = true; + pBits->lastDirty = true; } void oatDebugBitVector(char* msg, const ArenaBitVector* bv, int length) @@ -374,6 +388,10 @@ void oatCopyBitVector(ArenaBitVector* dest, const ArenaBitVector* src) checkSizes(dest, src); memcpy(dest->storage, src->storage, sizeof(u4) * dest->storageSize); + dest->firstDirty = src->firstDirty; + dest->firstBitSet = src->firstBitSet; + dest->lastDirty = src->lastDirty; + dest->lastBitSet = src->lastBitSet; } /* @@ -395,6 +413,8 @@ bool oatIntersectBitVectors(ArenaBitVector* dest, const ArenaBitVector* src1, for (idx = 0; idx < dest->storageSize; idx++) { dest->storage[idx] = src1->storage[idx] & src2->storage[idx]; } + dest->firstDirty = true; + dest->lastDirty = true; return true; } @@ -416,6 +436,8 @@ bool oatUnifyBitVectors(ArenaBitVector* dest, const ArenaBitVector* src1, for (idx = 0; idx < dest->storageSize; idx++) { dest->storage[idx] = src1->storage[idx] | src2->storage[idx]; } + dest->firstDirty = true; + dest->lastDirty = true; return true; } @@ -466,18 +488,36 @@ int oatCountSetBits(const ArenaBitVector* pBits) /* Return the next position set to 1. -1 means end-of-element reached */ int oatBitVectorIteratorNext(ArenaBitVectorIterator* iterator) { - const ArenaBitVector* pBits = iterator->pBits; + ArenaBitVector* pBits = iterator->pBits; u4 bitIndex = iterator->idx; DCHECK_EQ(iterator->bitSize, pBits->storageSize * sizeof(u4) * 8); if (bitIndex >= iterator->bitSize) return -1; + /* If we know, skip past leading zeros */ + if (!pBits->firstDirty && ((int)bitIndex < pBits->firstBitSet)) { + iterator->idx = pBits->firstBitSet + 1; + return pBits->firstBitSet; + } + + /* If we know, skip past trailing zeroes */ + if (!pBits->lastDirty && ((int)bitIndex > pBits->lastBitSet)) { + iterator->idx = iterator->bitSize; + return -1; + } + + bool firstPass = (bitIndex == 0); + u4 startIndex = bitIndex; for (; bitIndex < iterator->bitSize;) { unsigned int wordIndex = bitIndex >> 5; unsigned int bitPos = bitIndex & 0x1f; unsigned int word = pBits->storage[wordIndex]; if (word & checkMasks[bitPos]) { iterator->idx = bitIndex+1; + if (firstPass && pBits->firstDirty) { + pBits->firstBitSet = bitIndex; + pBits->firstDirty = false; + } return bitIndex; } if (word == 0) { @@ -488,6 +528,14 @@ int oatBitVectorIteratorNext(ArenaBitVectorIterator* iterator) } } /* No more set bits */ + if (firstPass) { + // Empty + pBits->firstBitSet = -1; + pBits->firstDirty = false; + } else { + pBits->lastBitSet = startIndex - 1; + pBits->lastDirty = false; + } return -1; } @@ -507,6 +555,8 @@ void oatSetInitialBits(ArenaBitVector* pBits, unsigned int numBits) if (remNumBits) { pBits->storage[idx] = (1 << remNumBits) - 1; } + pBits->firstDirty = true; + pBits->lastDirty = true; } void oatGetBlockName(BasicBlock* bb, char* name) |