Reduce quicken info size
Move the quicken info from using a map of <dex pc, index> to an array
of indices. Removed leb encoding since it is harmful for 16 bit
indices. The map is indexed by the dequickenable instruction index
from the start of the code item.
Numbers for a certain large app compiled with quicken filter:
dex2oat native alloc: 85345936B -> 81527072B
oat file size: 9068968B -> 8659368B
vdex file size: 90479120B -> 86321184B
Bug: 63467744
Bug: 36457259
Test: test-art-host
(cherry picked from commit 959f348acabc48efbb18c547dad6300c0f610c1c)
Change-Id: I85546d8cd409cbf96140cbdddabd7e228797b9e3
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index 1e75f10..fe7ecd1 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -17,6 +17,7 @@
#include "block_builder.h"
#include "bytecode_utils.h"
+#include "quicken_info.h"
namespace art {
@@ -121,13 +122,18 @@
HBasicBlock* block = graph_->GetEntryBlock();
graph_->AddBlock(block);
+ size_t quicken_index = 0;
bool is_throwing_block = false;
+ // Calculate the qucikening index here instead of CreateBranchTargets since it's easier to
+ // calculate in dex_pc order.
for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) {
uint32_t dex_pc = it.CurrentDexPc();
// Check if this dex_pc address starts a new basic block.
HBasicBlock* next_block = GetBlockAt(dex_pc);
if (next_block != nullptr) {
+ // We only need quicken index entries for basic block boundaries.
+ quicken_index_for_dex_pc_.Put(dex_pc, quicken_index);
if (block != nullptr) {
// Last instruction did not end its basic block but a new one starts here.
// It must have been a block falling through into the next one.
@@ -137,6 +143,10 @@
is_throwing_block = false;
graph_->AddBlock(block);
}
+ // Make sure to increment this before the continues.
+ if (QuickenInfoTable::NeedsIndexForInstruction(&it.CurrentInstruction())) {
+ ++quicken_index;
+ }
if (block == nullptr) {
// Ignore dead code.
@@ -371,4 +381,8 @@
return true;
}
+size_t HBasicBlockBuilder::GetQuickenIndex(uint32_t dex_pc) const {
+ return quicken_index_for_dex_pc_.Get(dex_pc);
+}
+
} // namespace art
diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h
index 1be0b4c..6adce81 100644
--- a/compiler/optimizing/block_builder.h
+++ b/compiler/optimizing/block_builder.h
@@ -37,7 +37,8 @@
nullptr,
arena_->Adapter(kArenaAllocGraphBuilder)),
throwing_blocks_(kDefaultNumberOfThrowingBlocks, arena_->Adapter(kArenaAllocGraphBuilder)),
- number_of_branches_(0u) {}
+ number_of_branches_(0u),
+ quicken_index_for_dex_pc_(std::less<uint32_t>(), arena_->Adapter()) {}
// Creates basic blocks in `graph_` at branch target dex_pc positions of the
// `code_item_`. Blocks are connected but left unpopulated with instructions.
@@ -48,6 +49,8 @@
size_t GetNumberOfBranches() const { return number_of_branches_; }
HBasicBlock* GetBlockAt(uint32_t dex_pc) const { return branch_targets_[dex_pc]; }
+ size_t GetQuickenIndex(uint32_t dex_pc) const;
+
private:
// Creates a basic block starting at given `dex_pc`.
HBasicBlock* MaybeCreateBlockAt(uint32_t dex_pc);
@@ -78,6 +81,9 @@
ArenaVector<HBasicBlock*> throwing_blocks_;
size_t number_of_branches_;
+ // A table to quickly find the quicken index for the first instruction of a basic block.
+ ArenaSafeMap<uint32_t, uint32_t> quicken_index_for_dex_pc_;
+
static constexpr size_t kDefaultNumberOfThrowingBlocks = 2u;
DISALLOW_COPY_AND_ASSIGN(HBasicBlockBuilder);
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index a73b124..839f328 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -22,6 +22,7 @@
#include "dex_instruction-inl.h"
#include "driver/compiler_options.h"
#include "imtable-inl.h"
+#include "quicken_info.h"
#include "sharpening.h"
#include "scoped_thread_state_change-inl.h"
@@ -312,6 +313,11 @@
DCHECK(!IsBlockPopulated(current_block_));
+ uint32_t quicken_index = 0;
+ if (CanDecodeQuickenedInfo()) {
+ quicken_index = block_builder_->GetQuickenIndex(block_dex_pc);
+ }
+
for (CodeItemIterator it(code_item_, block_dex_pc); !it.Done(); it.Advance()) {
if (current_block_ == nullptr) {
// The previous instruction ended this block.
@@ -332,9 +338,13 @@
AppendInstruction(new (arena_) HNativeDebugInfo(dex_pc));
}
- if (!ProcessDexInstruction(it.CurrentInstruction(), dex_pc)) {
+ if (!ProcessDexInstruction(it.CurrentInstruction(), dex_pc, quicken_index)) {
return false;
}
+
+ if (QuickenInfoTable::NeedsIndexForInstruction(&it.CurrentInstruction())) {
+ ++quicken_index;
+ }
}
if (current_block_ != nullptr) {
@@ -1261,7 +1271,8 @@
bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instruction,
uint32_t dex_pc,
- bool is_put) {
+ bool is_put,
+ size_t quicken_index) {
uint32_t source_or_dest_reg = instruction.VRegA_22c();
uint32_t obj_reg = instruction.VRegB_22c();
uint16_t field_index;
@@ -1269,7 +1280,7 @@
if (!CanDecodeQuickenedInfo()) {
return false;
}
- field_index = LookupQuickenedInfo(dex_pc);
+ field_index = LookupQuickenedInfo(quicken_index);
} else {
field_index = instruction.VRegC_22c();
}
@@ -1805,40 +1816,17 @@
}
bool HInstructionBuilder::CanDecodeQuickenedInfo() const {
- return interpreter_metadata_ != nullptr;
+ return !quicken_info_.IsNull();
}
-uint16_t HInstructionBuilder::LookupQuickenedInfo(uint32_t dex_pc) {
- DCHECK(interpreter_metadata_ != nullptr);
-
- // First check if the info has already been decoded from `interpreter_metadata_`.
- auto it = skipped_interpreter_metadata_.find(dex_pc);
- if (it != skipped_interpreter_metadata_.end()) {
- // Remove the entry from the map and return the parsed info.
- uint16_t value_in_map = it->second;
- skipped_interpreter_metadata_.erase(it);
- return value_in_map;
- }
-
- // Otherwise start parsing `interpreter_metadata_` until the slot for `dex_pc`
- // is found. Store skipped values in the `skipped_interpreter_metadata_` map.
- while (true) {
- uint32_t dex_pc_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
- uint16_t value_in_map = DecodeUnsignedLeb128(&interpreter_metadata_);
- DCHECK_LE(dex_pc_in_map, dex_pc);
-
- if (dex_pc_in_map == dex_pc) {
- return value_in_map;
- } else {
- // Overwrite and not Put, as quickened CHECK-CAST has two entries with
- // the same dex_pc. This is OK, because the compiler does not care about those
- // entries.
- skipped_interpreter_metadata_.Overwrite(dex_pc_in_map, value_in_map);
- }
- }
+uint16_t HInstructionBuilder::LookupQuickenedInfo(uint32_t quicken_index) {
+ DCHECK(CanDecodeQuickenedInfo());
+ return quicken_info_.GetData(quicken_index);
}
-bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc) {
+bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
+ uint32_t dex_pc,
+ size_t quicken_index) {
switch (instruction.Opcode()) {
case Instruction::CONST_4: {
int32_t register_index = instruction.VRegA();
@@ -1995,7 +1983,7 @@
if (!CanDecodeQuickenedInfo()) {
return false;
}
- method_idx = LookupQuickenedInfo(dex_pc);
+ method_idx = LookupQuickenedInfo(quicken_index);
} else {
method_idx = instruction.VRegB_35c();
}
@@ -2020,7 +2008,7 @@
if (!CanDecodeQuickenedInfo()) {
return false;
}
- method_idx = LookupQuickenedInfo(dex_pc);
+ method_idx = LookupQuickenedInfo(quicken_index);
} else {
method_idx = instruction.VRegB_3rc();
}
@@ -2693,7 +2681,7 @@
case Instruction::IGET_CHAR_QUICK:
case Instruction::IGET_SHORT:
case Instruction::IGET_SHORT_QUICK: {
- if (!BuildInstanceFieldAccess(instruction, dex_pc, false)) {
+ if (!BuildInstanceFieldAccess(instruction, dex_pc, false, quicken_index)) {
return false;
}
break;
@@ -2713,7 +2701,7 @@
case Instruction::IPUT_CHAR_QUICK:
case Instruction::IPUT_SHORT:
case Instruction::IPUT_SHORT_QUICK: {
- if (!BuildInstanceFieldAccess(instruction, dex_pc, true)) {
+ if (!BuildInstanceFieldAccess(instruction, dex_pc, true, quicken_index)) {
return false;
}
break;
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index e968760..5a83df3 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -27,6 +27,7 @@
#include "mirror/dex_cache.h"
#include "nodes.h"
#include "optimizing_compiler_stats.h"
+#include "quicken_info.h"
#include "ssa_builder.h"
namespace art {
@@ -67,9 +68,7 @@
code_generator_(code_generator),
dex_compilation_unit_(dex_compilation_unit),
outer_compilation_unit_(outer_compilation_unit),
- interpreter_metadata_(interpreter_metadata),
- skipped_interpreter_metadata_(std::less<uint32_t>(),
- arena_->Adapter(kArenaAllocGraphBuilder)),
+ quicken_info_(interpreter_metadata),
compilation_stats_(compiler_stats),
dex_cache_(dex_cache),
loop_headers_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)) {
@@ -85,11 +84,11 @@
void PropagateLocalsToCatchBlocks();
void SetLoopHeaderPhiInputs();
- bool ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc);
+ bool ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc, size_t quicken_index);
void FindNativeDebugInfoLocations(ArenaBitVector* locations);
bool CanDecodeQuickenedInfo() const;
- uint16_t LookupQuickenedInfo(uint32_t dex_pc);
+ uint16_t LookupQuickenedInfo(uint32_t quicken_index);
HBasicBlock* FindBlockStartingAt(uint32_t dex_pc) const;
@@ -159,7 +158,10 @@
void BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
// Builds an instance field access node and returns whether the instruction is supported.
- bool BuildInstanceFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put);
+ bool BuildInstanceFieldAccess(const Instruction& instruction,
+ uint32_t dex_pc,
+ bool is_put,
+ size_t quicken_index);
void BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
uint32_t dex_pc,
@@ -349,14 +351,8 @@
// methods.
const DexCompilationUnit* const outer_compilation_unit_;
- // Original values kept after instruction quickening. This is a data buffer
- // of Leb128-encoded (dex_pc, value) pairs sorted by dex_pc.
- const uint8_t* interpreter_metadata_;
-
- // InstructionBuilder does not parse instructions in dex_pc order. Quickening
- // info for out-of-order dex_pcs is stored in a map until the positions
- // are eventually visited.
- ArenaSafeMap<uint32_t, uint16_t> skipped_interpreter_metadata_;
+ // Original values kept after instruction quickening.
+ QuickenInfoTable quicken_info_;
OptimizingCompilerStats* compilation_stats_;
Handle<mirror::DexCache> dex_cache_;