Compile-time tuning: assembly phase
Not as much compile-time gain from reworking the assembly phase as I'd
hoped, but still worthwhile. Should see ~2% improvement thanks to
the assembly rework. On the other hand, expect some huge gains for some
application thanks to better detection of large machine-generated init
methods. Thinkfree shows a 25% improvement.
The major assembly change was to establish thread the LIR nodes that
require fixup into a fixup chain. Only those are processed during the
final assembly pass(es). This doesn't help for methods which only
require a single pass to assemble, but does speed up the larger methods
which required multiple assembly passes.
Also replaced the block_map_ basic block lookup table (which contained
space for a BasicBlock* for each dex instruction unit) with a block id
map - cutting its space requirements by half in a 32-bit pointer
environment.
Changes:
o Reduce size of LIR struct by 12.5% (one of the big memory users)
o Repurpose the use/def portion of the LIR after optimization complete.
o Encode instruction bits to LIR
o Thread LIR nodes requiring pc fixup
o Change follow-on assembly passes to only consider fixup LIRs
o Switch on pc-rel fixup kind
o Fast-path for small methods - single pass assembly
o Avoid using cb[n]z for null checks (almost always exceed displacement)
o Improve detection of large initialization methods.
o Rework def/use flag setup.
o Remove a sequential search from FindBlock using lookup table of 16-bit
block ids rather than full block pointers.
o Eliminate pcRelFixup and use fixup kind instead.
o Add check for 16-bit overflow on dex offset.
Change-Id: I4c6615f83fed46f84629ad6cfe4237205a9562b4
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 6e49f0b..617f357 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -45,9 +45,10 @@
}
void Mir2Lir::MarkSafepointPC(LIR* inst) {
- inst->def_mask = ENCODE_ALL;
+ DCHECK(!inst->flags.use_def_invalid);
+ inst->u.m.def_mask = ENCODE_ALL;
LIR* safepoint_pc = NewLIR0(kPseudoSafepointPC);
- DCHECK_EQ(safepoint_pc->def_mask, ENCODE_ALL);
+ DCHECK_EQ(safepoint_pc->u.m.def_mask, ENCODE_ALL);
}
bool Mir2Lir::FastInstance(uint32_t field_idx, bool is_put, int* field_offset, bool* is_volatile) {
@@ -87,10 +88,11 @@
uint64_t *mask_ptr;
uint64_t mask = ENCODE_MEM;
DCHECK(GetTargetInstFlags(lir->opcode) & (IS_LOAD | IS_STORE));
+ DCHECK(!lir->flags.use_def_invalid);
if (is_load) {
- mask_ptr = &lir->use_mask;
+ mask_ptr = &lir->u.m.use_mask;
} else {
- mask_ptr = &lir->def_mask;
+ mask_ptr = &lir->u.m.def_mask;
}
/* Clear out the memref flags */
*mask_ptr &= ~mask;
@@ -127,7 +129,7 @@
* Store the Dalvik register id in alias_info. Mark the MSB if it is a 64-bit
* access.
*/
- lir->alias_info = ENCODE_ALIAS_INFO(reg_id, is64bit);
+ lir->flags.alias_info = ENCODE_ALIAS_INFO(reg_id, is64bit);
}
/*
@@ -213,11 +215,11 @@
break;
}
- if (lir->use_mask && (!lir->flags.is_nop || dump_nop)) {
- DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->use_mask, "use"));
+ if (lir->u.m.use_mask && (!lir->flags.is_nop || dump_nop)) {
+ DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.use_mask, "use"));
}
- if (lir->def_mask && (!lir->flags.is_nop || dump_nop)) {
- DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->def_mask, "def"));
+ if (lir->u.m.def_mask && (!lir->flags.is_nop || dump_nop)) {
+ DUMP_RESOURCE_MASK(DumpResourceMask(lir, lir->u.m.def_mask, "def"));
}
}
@@ -348,6 +350,7 @@
new_value->operands[0] = value;
new_value->next = *constant_list_p;
*constant_list_p = new_value;
+ estimated_native_code_size_ += sizeof(value);
return new_value;
}
return NULL;
@@ -431,6 +434,7 @@
int bx_offset = INVALID_OFFSET;
switch (cu_->instruction_set) {
case kThumb2:
+ DCHECK(tab_rec->anchor->flags.fixup != kFixupNone);
bx_offset = tab_rec->anchor->offset + 4;
break;
case kX86:
@@ -714,111 +718,29 @@
return offset;
}
-// LIR offset assignment.
-int Mir2Lir::AssignInsnOffsets() {
- LIR* lir;
- int offset = 0;
-
- for (lir = first_lir_insn_; lir != NULL; lir = NEXT_LIR(lir)) {
- lir->offset = offset;
- if (LIKELY(lir->opcode >= 0)) {
- if (!lir->flags.is_nop) {
- offset += lir->flags.size;
- }
- } else if (UNLIKELY(lir->opcode == kPseudoPseudoAlign4)) {
- if (offset & 0x2) {
- offset += 2;
- lir->operands[0] = 1;
- } else {
- lir->operands[0] = 0;
- }
- }
- /* Pseudo opcodes don't consume space */
- }
- return offset;
-}
-
-/*
- * Walk the compilation unit and assign offsets to instructions
- * and literals and compute the total size of the compiled unit.
- */
-void Mir2Lir::AssignOffsets() {
- int offset = AssignInsnOffsets();
-
- /* Const values have to be word aligned */
- offset = (offset + 3) & ~3;
-
- /* Set up offsets for literals */
- data_offset_ = offset;
-
- offset = AssignLiteralOffset(offset);
-
- offset = AssignSwitchTablesOffset(offset);
-
- offset = AssignFillArrayDataOffset(offset);
-
- total_size_ = offset;
-}
-
-/*
- * Go over each instruction in the list and calculate the offset from the top
- * before sending them off to the assembler. If out-of-range branch distance is
- * seen rearrange the instructions a bit to correct it.
- */
-void Mir2Lir::AssembleLIR() {
- AssignOffsets();
- int assembler_retries = 0;
- /*
- * Assemble here. Note that we generate code with optimistic assumptions
- * and if found now to work, we'll have to redo the sequence and retry.
- */
-
- while (true) {
- AssemblerStatus res = AssembleInstructions(0);
- if (res == kSuccess) {
- break;
- } else {
- assembler_retries++;
- if (assembler_retries > MAX_ASSEMBLER_RETRIES) {
- CodegenDump();
- LOG(FATAL) << "Assembler error - too many retries";
- }
- // Redo offsets and try again
- AssignOffsets();
- code_buffer_.clear();
- }
- }
-
- // Install literals
- InstallLiteralPools();
-
- // Install switch tables
- InstallSwitchTables();
-
- // Install fill array data
- InstallFillArrayData();
-
- // Create the mapping table and native offset to reference map.
- CreateMappingTables();
-
- CreateNativeGcMap();
-}
-
/*
* Insert a kPseudoCaseLabel at the beginning of the Dalvik
- * offset vaddr. This label will be used to fix up the case
+ * offset vaddr if pretty-printing, otherise use the standard block
+ * label. The selected label will be used to fix up the case
* branch table during the assembly phase. All resource flags
* are set to prevent code motion. KeyVal is just there for debugging.
*/
LIR* Mir2Lir::InsertCaseLabel(int vaddr, int keyVal) {
LIR* boundary_lir = &block_label_list_[mir_graph_->FindBlock(vaddr)->id];
- LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR));
- new_label->dalvik_offset = vaddr;
- new_label->opcode = kPseudoCaseLabel;
- new_label->operands[0] = keyVal;
- new_label->def_mask = ENCODE_ALL;
- InsertLIRAfter(boundary_lir, new_label);
- return new_label;
+ LIR* res = boundary_lir;
+ if (cu_->verbose) {
+ // Only pay the expense if we're pretty-printing.
+ LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), ArenaAllocator::kAllocLIR));
+ new_label->dalvik_offset = vaddr;
+ new_label->opcode = kPseudoCaseLabel;
+ new_label->operands[0] = keyVal;
+ new_label->flags.fixup = kFixupLabel;
+ DCHECK(!new_label->flags.use_def_invalid);
+ new_label->u.m.def_mask = ENCODE_ALL;
+ InsertLIRAfter(boundary_lir, new_label);
+ res = new_label;
+ }
+ return res;
}
void Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable *tab_rec) {
@@ -951,6 +873,7 @@
literal_list_(NULL),
method_literal_list_(NULL),
code_literal_list_(NULL),
+ first_fixup_(NULL),
cu_(cu),
mir_graph_(mir_graph),
switch_tables_(arena, 4, kGrowableArraySwitchTables),
@@ -964,6 +887,7 @@
total_size_(0),
block_label_list_(NULL),
current_dalvik_offset_(0),
+ estimated_native_code_size_(0),
reg_pool_(NULL),
live_sreg_(0),
num_core_spills_(0),