From 17189ac098b2f156713db1821b49db7b2f018bbe Mon Sep 17 00:00:00 2001 From: buzbee Date: Fri, 8 Nov 2013 11:07:02 -0800 Subject: Quick compiler compile-time/memory use improvement This CL delivers a surprisingly large reduction in compile time, as well as a significant reduction in memory usage by conditionally removing a CFG construction feature introduced to support LLVM bitcode generation. In short, bitcode requires all potential exception edges to be explicitly present in the CFG. The Quick compiler (based on the old JIT), can ignore, at least for the purposes of dataflow analysis, potential throw points that do not have a corresponding catch block. To support LLVM, we create a target basic block for every potentially throwing instruction to give us a destination for the exception edge. Then, following the check elimination pass, we remove blocks whose edges have gone away. However, if we're not using LLVM, we can skip the creation of those things in the first place. The savings are significant. Single-threaded compilation time on the host looks to be reduced by something in the vicinity of 10%. We create roughly 60% fewer basic blocks (and, importantly, the creation of fewer basic block nodes has a multiplying effect on memory use reduction because it results in fewer dataflow bitmaps). Some basic block redution stats: boot: 2325802 before, 844846 after. Phonesky: 485472 before, 156014 after. PlusOne: 806232 before, 243156 after. Thinkfree: 864498 before, 264858 after. Another nice side effect of this change is giving the basic block optimization pass generally larger scope. For arena memusage in the boot class path (compiled on the host): Average Max Before: 50,863 88,017,820 After: 41,964 4,914,208 The huge reduction in max arena memory usage is due to the collapsing of a large initialization method. Specifically, with complete exception edges org.ccil.cowan.tagsoup.Scheme requires 13,802 basic blocks. With exception edges collapsed, it requires 4. This change also caused 2 latent bugs to surface. 1) The dex parsing code did not expect that the target of a switch statement could reside in the middle of the same basic block ended by that same switch statement. 2) The x86 backend introduced a 5-operand LIR instruction for indexed memops. However, there was no corresponding change to the use/def mask creation code. Thus, the 5th operand was never included in the use/def mask. This allowed the instruction scheduling code to incorrectly move a use above a definition. We didn't see this before because the affected x86 instructions were only used for aget/aput, and prior to this CL those Dalvik opcodes caused a basic block break because of the implied exception edge - which prevented the code motion. And finally, also included is some minor tuning of register use weighting. Change-Id: I3f2cab7136dba2bded71e9e33b452b95e8fffc0e --- compiler/dex/mir_graph.cc | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'compiler/dex/mir_graph.cc') diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc index cf758fc5da..deaf2ffe80 100644 --- a/compiler/dex/mir_graph.cc +++ b/compiler/dex/mir_graph.cc @@ -365,8 +365,8 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffs } /* Process instructions with the kSwitch flag */ -void MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width, - int flags) { +BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, + int width, int flags) { const uint16_t* switch_data = reinterpret_cast(GetCurrentInsns() + cur_offset + insn->dalvikInsn.vB); int size; @@ -437,6 +437,7 @@ void MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_ /* create */ true, /* immed_pred_block_p */ NULL); cur_block->fall_through = fallthrough_block->id; fallthrough_block->predecessors->Insert(cur_block->id); + return cur_block; } /* Process instructions with the kThrow flag */ @@ -444,6 +445,9 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse int width, int flags, ArenaBitVector* try_block_addr, const uint16_t* code_ptr, const uint16_t* code_end) { bool in_try_block = try_block_addr->IsBitSet(cur_offset); + bool is_throw = (insn->dalvikInsn.opcode == Instruction::THROW); + bool build_all_edges = + (cu_->disable_opt & (1 << kSuppressExceptionEdges)) || is_throw || in_try_block; /* In try block */ if (in_try_block) { @@ -473,7 +477,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse cur_block->successor_blocks->Insert(successor_block_info); catch_block->predecessors->Insert(cur_block->id); } - } else { + } else if (build_all_edges) { BasicBlock *eh_block = NewMemBB(kExceptionHandling, num_blocks_++); cur_block->taken = eh_block->id; block_list_.Insert(eh_block); @@ -481,7 +485,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse eh_block->predecessors->Insert(cur_block->id); } - if (insn->dalvikInsn.opcode == Instruction::THROW) { + if (is_throw) { cur_block->explicit_throw = true; if (code_ptr < code_end) { // Force creation of new block following THROW via side-effect @@ -494,6 +498,16 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse } } + if (!build_all_edges) { + /* + * Even though there is an exception edge here, control cannot return to this + * method. Thus, for the purposes of dataflow analysis and optimization, we can + * ignore the edge. Doing this reduces compile time, and increases the scope + * of the basic-block level optimization pass. + */ + return cur_block; + } + /* * Split the potentially-throwing instruction into two parts. * The first half will be a pseudo-op that captures the exception @@ -695,7 +709,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_ cur_block = ProcessCanThrow(cur_block, insn, current_offset_, width, flags, try_block_addr_, code_ptr, code_end); } else if (flags & Instruction::kSwitch) { - ProcessCanSwitch(cur_block, insn, current_offset_, width, flags); + cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width, flags); } current_offset_ += width; BasicBlock *next_block = FindBlock(current_offset_, /* split */ false, /* create */ @@ -1100,6 +1114,7 @@ const char* MIRGraph::GetShortyFromTargetIdx(int target_idx) { void MIRGraph::DumpMIRGraph() { BasicBlock* bb; const char* block_type_names[] = { + "Null Block", "Entry Block", "Code Block", "Exit Block", -- cgit v1.2.3-59-g8ed1b