From 17189ac098b2f156713db1821b49db7b2f018bbe Mon Sep 17 00:00:00 2001
From: buzbee <buzbee@google.com>
Date: Fri, 8 Nov 2013 11:07:02 -0800
Subject: Quick compiler compile-time/memory use improvement

This CL delivers a surprisingly large reduction in compile time,
as well as a significant reduction in memory usage by conditionally
removing a CFG construction feature introduced to support LLVM
bitcode generation.

In short, bitcode requires all potential exception edges to be
explicitly present in the CFG.  The Quick compiler (based on the
old JIT), can ignore, at least for the purposes of dataflow
analysis, potential throw points that do not have a corresponding
catch block.

To support LLVM, we create a target basic block for every
potentially throwing instruction to give us a destination for
the exception edge.  Then, following the check elimination pass,
we remove blocks whose edges have gone away.

However, if we're not using LLVM, we can skip the creation of
those things in the first place.  The savings are significant.

Single-threaded compilation time on the host looks to be reduced
by something in the vicinity of 10%.  We create roughly 60% fewer
basic blocks (and, importantly, the creation of fewer basic
block nodes has a multiplying effect on memory use reduction
because it results in fewer dataflow bitmaps).

Some basic block redution stats:

boot: 2325802 before, 844846 after.
Phonesky: 485472 before, 156014 after.
PlusOne: 806232 before, 243156 after.
Thinkfree: 864498 before, 264858 after.

Another nice side effect of this change is giving the basic
block optimization pass generally larger scope.

For arena memusage in the boot class path (compiled on the host):

          Average            Max
Before:    50,863         88,017,820
After:     41,964          4,914,208

The huge reduction in max arena memory usage is due to the
collapsing of a large initialization method.  Specifically, with complete
exception edges org.ccil.cowan.tagsoup.Scheme requires 13,802
basic blocks.  With exception edges collapsed, it requires 4.

This change also caused 2 latent bugs to surface.

1) The dex parsing code did not expect that the target of a switch statement
could reside in the middle of the same basic block ended by that same switch
statement.

2) The x86 backend introduced a 5-operand LIR instruction for indexed memops.
However, there was no corresponding change to the use/def mask creation code.
Thus, the 5th operand was never included in the use/def mask.  This allowed
the instruction scheduling code to incorrectly move a use above a definition.
We didn't see this before because the affected x86 instructions were only used
for aget/aput, and prior to this CL those Dalvik opcodes caused a basic
block break because of the implied exception edge - which prevented the code
motion.

And finally, also included is some minor tuning of register use weighting.

Change-Id: I3f2cab7136dba2bded71e9e33b452b95e8fffc0e
---
 compiler/dex/mir_graph.cc | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

(limited to 'compiler/dex/mir_graph.cc')
diff --git a/compiler/dex/mir_graph.cc b/compiler/dex/mir_graph.cc
index cf758fc5da..deaf2ffe80 100644
--- a/compiler/dex/mir_graph.cc
+++ b/compiler/dex/mir_graph.cc
@@ -365,8 +365,8 @@ BasicBlock* MIRGraph::ProcessCanBranch(BasicBlock* cur_block, MIR* insn, DexOffs
 }
 
 /* Process instructions with the kSwitch flag */
-void MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset, int width,
-                                int flags) {
+BasicBlock* MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_offset,
+                                       int width, int flags) {
   const uint16_t* switch_data =
       reinterpret_cast<const uint16_t*>(GetCurrentInsns() + cur_offset + insn->dalvikInsn.vB);
   int size;
@@ -437,6 +437,7 @@ void MIRGraph::ProcessCanSwitch(BasicBlock* cur_block, MIR* insn, DexOffset cur_
                                             /* create */ true, /* immed_pred_block_p */ NULL);
   cur_block->fall_through = fallthrough_block->id;
   fallthrough_block->predecessors->Insert(cur_block->id);
+  return cur_block;
 }
 
 /* Process instructions with the kThrow flag */
@@ -444,6 +445,9 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
                                       int width, int flags, ArenaBitVector* try_block_addr,
                                       const uint16_t* code_ptr, const uint16_t* code_end) {
   bool in_try_block = try_block_addr->IsBitSet(cur_offset);
+  bool is_throw = (insn->dalvikInsn.opcode == Instruction::THROW);
+  bool build_all_edges =
+      (cu_->disable_opt & (1 << kSuppressExceptionEdges)) || is_throw || in_try_block;
 
   /* In try block */
   if (in_try_block) {
@@ -473,7 +477,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
       cur_block->successor_blocks->Insert(successor_block_info);
       catch_block->predecessors->Insert(cur_block->id);
     }
-  } else {
+  } else if (build_all_edges) {
     BasicBlock *eh_block = NewMemBB(kExceptionHandling, num_blocks_++);
     cur_block->taken = eh_block->id;
     block_list_.Insert(eh_block);
@@ -481,7 +485,7 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
     eh_block->predecessors->Insert(cur_block->id);
   }
 
-  if (insn->dalvikInsn.opcode == Instruction::THROW) {
+  if (is_throw) {
     cur_block->explicit_throw = true;
     if (code_ptr < code_end) {
       // Force creation of new block following THROW via side-effect
@@ -494,6 +498,16 @@ BasicBlock* MIRGraph::ProcessCanThrow(BasicBlock* cur_block, MIR* insn, DexOffse
     }
   }
 
+  if (!build_all_edges) {
+    /*
+     * Even though there is an exception edge here, control cannot return to this
+     * method.  Thus, for the purposes of dataflow analysis and optimization, we can
+     * ignore the edge.  Doing this reduces compile time, and increases the scope
+     * of the basic-block level optimization pass.
+     */
+    return cur_block;
+  }
+
   /*
    * Split the potentially-throwing instruction into two parts.
    * The first half will be a pseudo-op that captures the exception
@@ -695,7 +709,7 @@ void MIRGraph::InlineMethod(const DexFile::CodeItem* code_item, uint32_t access_
       cur_block = ProcessCanThrow(cur_block, insn, current_offset_, width, flags, try_block_addr_,
                                   code_ptr, code_end);
     } else if (flags & Instruction::kSwitch) {
-      ProcessCanSwitch(cur_block, insn, current_offset_, width, flags);
+      cur_block = ProcessCanSwitch(cur_block, insn, current_offset_, width, flags);
     }
     current_offset_ += width;
     BasicBlock *next_block = FindBlock(current_offset_, /* split */ false, /* create */
@@ -1100,6 +1114,7 @@ const char* MIRGraph::GetShortyFromTargetIdx(int target_idx) {
 void MIRGraph::DumpMIRGraph() {
   BasicBlock* bb;
   const char* block_type_names[] = {
+    "Null Block",
     "Entry Block",
     "Code Block",
     "Exit Block",
-- 
cgit v1.2.3-59-g8ed1b