diff options
author | 2013-11-15 13:37:01 -0800 | |
---|---|---|
committer | 2013-11-26 12:10:55 -0800 | |
commit | 1da1e2fceb0030b4b76b43510b1710a9613e0c2e (patch) | |
tree | 01ff0b0545450439200b81627bfc43ae60f414c2 /compiler/dex/mir_dataflow.cc | |
parent | 21be5b21017823b3785f94349e2e2b57d82431e6 (diff) |
More compile-time tuning
Another round of compile-time tuning, this time yeilding in the
vicinity of 3% total reduction in compile time (which means about
double that for the Quick Compile portion).
Primary improvements are skipping the basic block combine optimization
pass when using Quick (because we already have big blocks), combining
the null check elimination and type inference passes, and limiting
expensive local value number analysis to only those blocks which
might benefit from it.
Following this CL, the actual compile phase consumes roughly 60%
of the total dex2oat time on the host, and 55% on the target (Note,
I'm subtracting out the Deduping time here, which the timing logger
normally counts against the compiler).
A sample breakdown of the compilation time follows (this taken on
PlusOne.apk w/ a Nexus 4):
39.00% -> MIR2LIR: 1374.90 (Note: includes local optimization & scheduling)
10.25% -> MIROpt:SSATransform: 361.31
8.45% -> BuildMIRGraph: 297.80
7.55% -> Assemble: 266.16
6.87% -> MIROpt:NCE_TypeInference: 242.22
5.56% -> Dedupe: 196.15
3.45% -> MIROpt:BBOpt: 121.53
3.20% -> RegisterAllocation: 112.69
3.00% -> PcMappingTable: 105.65
2.90% -> GcMap: 102.22
2.68% -> Launchpads: 94.50
1.16% -> MIROpt:InitRegLoc: 40.94
1.16% -> Cleanup: 40.93
1.10% -> MIROpt:CodeLayout: 38.80
0.97% -> MIROpt:ConstantProp: 34.35
0.96% -> MIROpt:UseCount: 33.75
0.86% -> MIROpt:CheckFilters: 30.28
0.44% -> SpecialMIR2LIR: 15.53
0.44% -> MIROpt:BBCombine: 15.41
(cherry pick of 9e8e234af4430abe8d144414e272cd72d215b5f3)
Change-Id: I86c665fa7e88b75eb75629a99fd292ff8c449969
Diffstat (limited to 'compiler/dex/mir_dataflow.cc')
-rw-r--r-- | compiler/dex/mir_dataflow.cc | 112 |
1 files changed, 54 insertions, 58 deletions
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc index d359ee2dfe..728d48ad70 100644 --- a/compiler/dex/mir_dataflow.cc +++ b/compiler/dex/mir_dataflow.cc @@ -29,7 +29,7 @@ namespace art { * TODO - many optimization flags are incomplete - they will only limit the * scope of optimizations but will not cause mis-optimizations. */ -const int MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { +const uint64_t MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { // 00 NOP DF_NOP, @@ -235,88 +235,88 @@ const int MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_NOP, // 44 AGET vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C | DF_LVN, // 45 AGET_WIDE vAA, vBB, vCC - DF_DA | DF_A_WIDE | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C, + DF_DA | DF_A_WIDE | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C | DF_LVN, // 46 AGET_OBJECT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_A | DF_REF_B | DF_CORE_C, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_A | DF_REF_B | DF_CORE_C | DF_LVN, // 47 AGET_BOOLEAN vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C | DF_LVN, // 48 AGET_BYTE vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C | DF_LVN, // 49 AGET_CHAR vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C | DF_LVN, // 4A AGET_SHORT vAA, vBB, vCC - DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C, + DF_DA | DF_UB | DF_UC | DF_NULL_CHK_0 | DF_RANGE_CHK_1 | DF_REF_B | DF_CORE_C | DF_LVN, // 4B APUT vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C | DF_LVN, // 4C APUT_WIDE vAA, vBB, vCC - DF_UA | DF_A_WIDE | DF_UB | DF_UC | DF_NULL_CHK_2 | DF_RANGE_CHK_3 | DF_REF_B | DF_CORE_C, + DF_UA | DF_A_WIDE | DF_UB | DF_UC | DF_NULL_CHK_2 | DF_RANGE_CHK_3 | DF_REF_B | DF_CORE_C | DF_LVN, // 4D APUT_OBJECT vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_A | DF_REF_B | DF_CORE_C, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_A | DF_REF_B | DF_CORE_C | DF_LVN, // 4E APUT_BOOLEAN vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C | DF_LVN, // 4F APUT_BYTE vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C | DF_LVN, // 50 APUT_CHAR vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C | DF_LVN, // 51 APUT_SHORT vAA, vBB, vCC - DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C, + DF_UA | DF_UB | DF_UC | DF_NULL_CHK_1 | DF_RANGE_CHK_2 | DF_REF_B | DF_CORE_C | DF_LVN, // 52 IGET vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN, // 53 IGET_WIDE vA, vB, field@CCCC - DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_0 | DF_REF_B, + DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN, // 54 IGET_OBJECT vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_A | DF_REF_B, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_A | DF_REF_B | DF_LVN, // 55 IGET_BOOLEAN vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN, // 56 IGET_BYTE vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN, // 57 IGET_CHAR vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN, // 58 IGET_SHORT vA, vB, field@CCCC - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN, // 59 IPUT vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B | DF_LVN, // 5A IPUT_WIDE vA, vB, field@CCCC - DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_2 | DF_REF_B, + DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_2 | DF_REF_B | DF_LVN, // 5B IPUT_OBJECT vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_A | DF_REF_B, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_A | DF_REF_B | DF_LVN, // 5C IPUT_BOOLEAN vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B | DF_LVN, // 5D IPUT_BYTE vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B | DF_LVN, // 5E IPUT_CHAR vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B | DF_LVN, // 5F IPUT_SHORT vA, vB, field@CCCC - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B | DF_LVN, // 60 SGET vAA, field@BBBB DF_DA | DF_UMS, @@ -712,10 +712,10 @@ const int MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_DA | DF_UB | DF_CORE_A | DF_CORE_B, // E3 IGET_VOLATILE - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN, // E4 IPUT_VOLATILE - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_B | DF_LVN, // E5 SGET_VOLATILE DF_DA | DF_UMS, @@ -724,13 +724,13 @@ const int MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_UA | DF_UMS, // E7 IGET_OBJECT_VOLATILE - DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_A | DF_REF_B, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_REF_A | DF_REF_B | DF_LVN, // E8 IGET_WIDE_VOLATILE - DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_0 | DF_REF_B, + DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_0 | DF_REF_B | DF_LVN, // E9 IPUT_WIDE_VOLATILE - DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_2 | DF_REF_B, + DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_2 | DF_REF_B | DF_LVN, // EA SGET_WIDE_VOLATILE DF_DA | DF_A_WIDE | DF_UMS, @@ -757,22 +757,22 @@ const int MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_NOP, // F2 IGET_QUICK - DF_DA | DF_UB | DF_NULL_CHK_0, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_LVN, // F3 IGET_WIDE_QUICK - DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_0, + DF_DA | DF_A_WIDE | DF_UB | DF_NULL_CHK_0 | DF_LVN, // F4 IGET_OBJECT_QUICK - DF_DA | DF_UB | DF_NULL_CHK_0, + DF_DA | DF_UB | DF_NULL_CHK_0 | DF_LVN, // F5 IPUT_QUICK - DF_UA | DF_UB | DF_NULL_CHK_1, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_LVN, // F6 IPUT_WIDE_QUICK - DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_2, + DF_UA | DF_A_WIDE | DF_UB | DF_NULL_CHK_2 | DF_LVN, // F7 IPUT_OBJECT_QUICK - DF_UA | DF_UB | DF_NULL_CHK_1, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_LVN, // F8 INVOKE_VIRTUAL_QUICK DF_FORMAT_35C | DF_NULL_CHK_OUT0 | DF_UMS, @@ -787,7 +787,7 @@ const int MIRGraph::oat_data_flow_attributes_[kMirOpLast] = { DF_FORMAT_3RC | DF_NULL_CHK_OUT0 | DF_UMS, // FC IPUT_OBJECT_VOLATILE - DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_A | DF_REF_B, + DF_UA | DF_UB | DF_NULL_CHK_1 | DF_REF_A | DF_REF_B | DF_LVN, // FD SGET_OBJECT_VOLATILE DF_DA | DF_REF_A | DF_UMS, @@ -879,7 +879,7 @@ bool MIRGraph::FindLocalLiveIn(BasicBlock* bb) { new (arena_) ArenaBitVector(arena_, cu_->num_dalvik_registers, false, kBitMapLiveIn); for (mir = bb->first_mir_insn; mir != NULL; mir = mir->next) { - int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode]; + uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode]; DecodedInstruction *d_insn = &mir->dalvikInsn; if (df_attributes & DF_HAS_USES) { @@ -994,7 +994,7 @@ bool MIRGraph::DoSSAConversion(BasicBlock* bb) { static_cast<struct SSARepresentation *>(arena_->Alloc(sizeof(SSARepresentation), ArenaAllocator::kAllocDFInfo)); - int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode]; + uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode]; // If not a pseudo-op, note non-leaf or can throw if (static_cast<int>(mir->dalvikInsn.opcode) < @@ -1239,37 +1239,33 @@ bool MIRGraph::CountUses(struct BasicBlock* bb) { if (bb->block_type != kDalvikByteCode) { return false; } + // Each level of nesting adds *100 to count, up to 3 levels deep. + uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth)); + uint32_t weight = std::max(1U, depth * 100); for (MIR* mir = bb->first_mir_insn; (mir != NULL); mir = mir->next) { if (mir->ssa_rep == NULL) { continue; } - // Each level of nesting adds *100 to count, up to 3 levels deep. - uint32_t depth = std::min(3U, static_cast<uint32_t>(bb->nesting_depth)); - uint32_t weight = std::max(1U, depth * 100); for (int i = 0; i < mir->ssa_rep->num_uses; i++) { int s_reg = mir->ssa_rep->uses[i]; raw_use_counts_.Increment(s_reg); use_counts_.Put(s_reg, use_counts_.Get(s_reg) + weight); } if (!(cu_->disable_opt & (1 << kPromoteCompilerTemps))) { - int df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode]; + uint64_t df_attributes = oat_data_flow_attributes_[mir->dalvikInsn.opcode]; // Implicit use of Method* ? */ if (df_attributes & DF_UMS) { /* * Some invokes will not use Method* - need to perform test similar * to that found in GenInvoke() to decide whether to count refs - * for Method* on invoke-class opcodes. - * TODO: refactor for common test here, save results for GenInvoke + * for Method* on invoke-class opcodes. This is a relatively expensive + * operation, so should only be done once. + * TODO: refactor InvokeUsesMethodStar() to perform check at parse time, + * and save results for both here and GenInvoke. For now, go ahead + * and assume all invokes use method*. */ - int uses_method_star = true; - if ((df_attributes & (DF_FORMAT_35C | DF_FORMAT_3RC)) && - !(df_attributes & DF_NON_NULL_RET)) { - uses_method_star &= InvokeUsesMethodStar(mir); - } - if (uses_method_star) { - raw_use_counts_.Increment(method_sreg_); - use_counts_.Put(method_sreg_, use_counts_.Get(method_sreg_) + weight); - } + raw_use_counts_.Increment(method_sreg_); + use_counts_.Put(method_sreg_, use_counts_.Get(method_sreg_) + weight); } } } |