Compiler: replace DOM traversal computation
Originally the old trace JIT used a few recursive graph walking
algorithms - which was perfectly reasonable given that the graph
size was capped at a few dozen nodes at most. These were replaced
with iterative walk order computations - or at least I thought
they all were. Missed one of them, which caused a stack overflow
on a pathologically large method compilation.
Renaming of some arena_allocator items for consistency and clarity.
More detailed memory usage logging. Reworked the allocator to waste
less space when an allocation doesn't fit and a new block must be
allocated.
Change-Id: I4d84dded3c47819eefa0de90ebb821dd12eb8be8
diff --git a/src/compiler/dex/arena_allocator.cc b/src/compiler/dex/arena_allocator.cc
index 7f1bfb3..3a3e385 100644
--- a/src/compiler/dex/arena_allocator.cc
+++ b/src/compiler/dex/arena_allocator.cc
@@ -41,12 +41,14 @@
: default_size_(default_size),
block_size_(default_size - sizeof(ArenaMemBlock)),
arena_head_(NULL),
- current_arena_(NULL),
+ current_block_(NULL),
num_arena_blocks_(0),
- malloc_bytes_(0) {
+ malloc_bytes_(0),
+ lost_bytes_(0),
+ num_allocations_(0) {
memset(&alloc_stats_[0], 0, sizeof(alloc_stats_));
// Start with an empty arena.
- arena_head_ = current_arena_ = EmptyArena();
+ arena_head_ = current_block_ = EmptyArenaBlock();
num_arena_blocks_++;
}
@@ -58,12 +60,12 @@
head = head->next;
free(p);
}
- arena_head_ = current_arena_ = NULL;
+ arena_head_ = NULL;
num_arena_blocks_ = 0;
}
// Return an arena with no storage for use as a sentinal.
-ArenaAllocator::ArenaMemBlock* ArenaAllocator::EmptyArena() {
+ArenaAllocator::ArenaMemBlock* ArenaAllocator::EmptyArenaBlock() {
ArenaMemBlock* res = static_cast<ArenaMemBlock*>(malloc(sizeof(ArenaMemBlock)));
malloc_bytes_ += sizeof(ArenaMemBlock);
res->block_size = 0;
@@ -74,32 +76,56 @@
// Arena-based malloc for compilation tasks.
void* ArenaAllocator::NewMem(size_t size, bool zero, ArenaAllocKind kind) {
- DCHECK(current_arena_ != NULL);
+ DCHECK(current_block_ != NULL);
DCHECK(arena_head_ != NULL);
size = (size + 3) & ~3;
alloc_stats_[kind] += size;
- if (size + current_arena_->bytes_allocated > current_arena_->block_size) {
- size_t block_size = (size <= block_size_) ? block_size_ : size;
- /* Time to allocate a new block */
- size_t allocation_size = sizeof(ArenaMemBlock) + block_size;
- ArenaMemBlock *new_arena =
- static_cast<ArenaMemBlock*>(malloc(allocation_size));
- if (new_arena == NULL) {
+ ArenaMemBlock* allocation_block = current_block_; // Assume we'll fit.
+ size_t remaining_space = current_block_->block_size - current_block_->bytes_allocated;
+ if (remaining_space < size) {
+ /*
+ * Time to allocate a new block. If this is a large allocation or we have
+ * significant space remaining in the current block then fulfill the allocation
+ * request with a custom-sized malloc() - otherwise grab a new standard block.
+ */
+ size_t allocation_size = sizeof(ArenaMemBlock);
+ if ((remaining_space >= ARENA_HIGH_WATER) || (size > block_size_)) {
+ allocation_size += size;
+ } else {
+ allocation_size += block_size_;
+ }
+ ArenaMemBlock *new_block = static_cast<ArenaMemBlock*>(malloc(allocation_size));
+ if (new_block == NULL) {
LOG(FATAL) << "Arena allocation failure";
}
malloc_bytes_ += allocation_size;
- new_arena->block_size = block_size;
- new_arena->bytes_allocated = 0;
- new_arena->next = NULL;
- current_arena_->next = new_arena;
- current_arena_ = new_arena;
+ new_block->block_size = allocation_size - sizeof(ArenaMemBlock);
+ new_block->bytes_allocated = 0;
+ new_block->next = NULL;
num_arena_blocks_++;
+ /*
+ * If the new block is completely full, insert it into the head of the list so we don't
+ * bother trying to fit more and won't hide the potentially allocatable space on the
+ * last (current_block_) block. TUNING: if we move to a mark scheme, revisit
+ * this code to keep allocation order intact.
+ */
+ if (new_block->block_size == size) {
+ new_block->next = arena_head_;
+ arena_head_ = new_block;
+ } else {
+ int lost = (current_block_->block_size - current_block_->bytes_allocated);
+ lost_bytes_ += lost;
+ current_block_->next = new_block;
+ current_block_ = new_block;
+ }
+ allocation_block = new_block;
}
- void* ptr = ¤t_arena_->ptr[current_arena_->bytes_allocated];
- current_arena_->bytes_allocated += size;
+ void* ptr = &allocation_block->ptr[allocation_block->bytes_allocated];
+ allocation_block->bytes_allocated += size;
if (zero) {
memset(ptr, 0, size);
}
+ num_allocations_++;
return ptr;
}
@@ -109,9 +135,10 @@
for (int i = 0; i < kNumAllocKinds; i++) {
total += alloc_stats_[i];
}
- os << " MEM: used: " << total << ", allocated: " << malloc_bytes_ << ", wasted: "
- << malloc_bytes_ - (total + (num_arena_blocks_ * sizeof(ArenaMemBlock))) << "\n";
- os << "Number of arenas allocated: " << num_arena_blocks_ << "\n";
+ os << " MEM: used: " << total << ", allocated: " << malloc_bytes_
+ << ", lost: " << lost_bytes_ << "\n";
+ os << "Number of blocks allocated: " << num_arena_blocks_ << ", Number of allocations: "
+ << num_allocations_ << ", avg: " << total / num_allocations_ << "\n";
os << "===== Allocation by kind\n";
for (int i = 0; i < kNumAllocKinds; i++) {
os << alloc_names[i] << std::setw(10) << alloc_stats_[i] << "\n";
diff --git a/src/compiler/dex/arena_allocator.h b/src/compiler/dex/arena_allocator.h
index 26294b6..78d4614 100644
--- a/src/compiler/dex/arena_allocator.h
+++ b/src/compiler/dex/arena_allocator.h
@@ -24,6 +24,7 @@
namespace art {
#define ARENA_DEFAULT_BLOCK_SIZE (256 * 1024)
+#define ARENA_HIGH_WATER (16 * 1024)
class ArenaAllocator {
public:
@@ -65,15 +66,17 @@
char ptr[0];
};
- ArenaMemBlock* EmptyArena();
+ ArenaMemBlock* EmptyArenaBlock();
size_t default_size_; // Smallest size of new allocation block.
size_t block_size_; // Amount of allocatable bytes on a default block.
ArenaMemBlock* arena_head_; // Head of linked list of allocation blocks.
- ArenaMemBlock* current_arena_; // NOTE: code assumes there's always at least 1 block.
+ ArenaMemBlock* current_block_; // NOTE: code assumes there's always at least 1 block.
int num_arena_blocks_;
- uint32_t malloc_bytes_; // Number of actual bytes malloc'd
- uint32_t alloc_stats_[kNumAllocKinds]; // Bytes used by various allocation kinds.
+ uint32_t malloc_bytes_; // Number of actual bytes malloc'd
+ uint32_t alloc_stats_[kNumAllocKinds]; // Bytes used by various allocation kinds.
+ uint32_t lost_bytes_; // Lost memory at end of too-small region
+ uint32_t num_allocations_;
}; // ArenaAllocator
diff --git a/src/compiler/dex/arena_bit_vector.cc b/src/compiler/dex/arena_bit_vector.cc
index 6b0fe8f..6f664e5 100644
--- a/src/compiler/dex/arena_bit_vector.cc
+++ b/src/compiler/dex/arena_bit_vector.cc
@@ -38,7 +38,6 @@
storage_(static_cast<uint32_t*>(arena_->NewMem(storage_size_ * sizeof(uint32_t), true,
ArenaAllocator::kAllocGrowableBitMap))) {
DCHECK_EQ(sizeof(storage_[0]), 4U); // Assuming 32-bit units.
- // TODO: collect detailed memory usage stats by bit vector kind.
}
/*
diff --git a/src/compiler/dex/arena_bit_vector.h b/src/compiler/dex/arena_bit_vector.h
index ffc2160..f5c471c 100644
--- a/src/compiler/dex/arena_bit_vector.h
+++ b/src/compiler/dex/arena_bit_vector.h
@@ -24,27 +24,6 @@
namespace art {
-// Type of growable bitmap for memory tuning.
-enum OatBitMapKind {
- kBitMapMisc = 0,
- kBitMapUse,
- kBitMapDef,
- kBitMapLiveIn,
- kBitMapBMatrix,
- kBitMapDominators,
- kBitMapIDominated,
- kBitMapDomFrontier,
- kBitMapPhi,
- kBitMapTmpBlocks,
- kBitMapInputBlocks,
- kBitMapRegisterV,
- kBitMapTempSSARegisterV,
- kBitMapNullCheck,
- kBitMapTmpBlockV,
- kBitMapPredecessors,
- kNumBitMapKinds
-};
-
/*
* Expanding bitmap, used for tracking resources. Bits are numbered starting
* from zero. All operations on a BitVector are unsynchronized.
diff --git a/src/compiler/dex/compiler_enums.h b/src/compiler/dex/compiler_enums.h
index 71d7d65..bc456b2 100644
--- a/src/compiler/dex/compiler_enums.h
+++ b/src/compiler/dex/compiler_enums.h
@@ -387,7 +387,30 @@
kSelectGoto
};
-std::ostream& operator<<(std::ostream& os, const OpFeatureFlags& flag);
+std::ostream& operator<<(std::ostream& os, const SelectInstructionKind& kind);
+
+// Type of growable bitmap for memory tuning.
+enum OatBitMapKind {
+ kBitMapMisc = 0,
+ kBitMapUse,
+ kBitMapDef,
+ kBitMapLiveIn,
+ kBitMapBMatrix,
+ kBitMapDominators,
+ kBitMapIDominated,
+ kBitMapDomFrontier,
+ kBitMapPhi,
+ kBitMapTmpBlocks,
+ kBitMapInputBlocks,
+ kBitMapRegisterV,
+ kBitMapTempSSARegisterV,
+ kBitMapNullCheck,
+ kBitMapTmpBlockV,
+ kBitMapPredecessors,
+ kNumBitMapKinds
+};
+
+std::ostream& operator<<(std::ostream& os, const OatBitMapKind& kind);
} // namespace art
diff --git a/src/compiler/dex/frontend.cc b/src/compiler/dex/frontend.cc
index b212e5b..ca751ab 100644
--- a/src/compiler/dex/frontend.cc
+++ b/src/compiler/dex/frontend.cc
@@ -101,6 +101,7 @@
//(1 << kDebugDumpCheckStats) |
//(1 << kDebugDumpBitcodeFile) |
//(1 << kDebugVerifyBitcode) |
+ //(1 << kDebugShowSummaryMemoryUsage) |
0;
static CompiledMethod* CompileMethod(CompilerDriver& compiler,
@@ -249,6 +250,11 @@
}
}
+ if (cu->enable_debug & (1 << kDebugShowSummaryMemoryUsage)) {
+ LOG(INFO) << "MEMINFO " << cu->arena.BytesAllocated() << " " << cu->mir_graph->GetNumBlocks()
+ << " " << PrettyMethod(method_idx, dex_file);
+ }
+
return result;
}
diff --git a/src/compiler/dex/frontend.h b/src/compiler/dex/frontend.h
index 49e0852..dc57a23 100644
--- a/src/compiler/dex/frontend.h
+++ b/src/compiler/dex/frontend.h
@@ -71,6 +71,7 @@
kDebugDumpCheckStats,
kDebugDumpBitcodeFile,
kDebugVerifyBitcode,
+ kDebugShowSummaryMemoryUsage,
};
class LLVMInfo {
diff --git a/src/compiler/dex/growable_array.h b/src/compiler/dex/growable_array.h
index eb865d5..c4684a7 100644
--- a/src/compiler/dex/growable_array.h
+++ b/src/compiler/dex/growable_array.h
@@ -79,26 +79,26 @@
GrowableArray(ArenaAllocator* arena, size_t init_length, OatListKind kind = kGrowableArrayMisc)
: arena_(arena),
- num_allocated_(0),
+ num_allocated_(init_length),
num_used_(0),
kind_(kind) {
elem_list_ = static_cast<T*>(arena_->NewMem(sizeof(T) * init_length, true,
- ArenaAllocator::kAllocGrowableArray));
- // TODO: Collect detailed memory usage stats by list kind.
+ ArenaAllocator::kAllocGrowableArray));
};
// Expand the list size to at least new length.
void Resize(size_t new_length) {
if (new_length <= num_allocated_) return;
- size_t target_length = (num_allocated_ < 128) ? num_allocated_ << 1 : num_allocated_ + 128;
+ // If it's a small list double the size, else grow 1.5x.
+ size_t target_length =
+ (num_allocated_ < 128) ? num_allocated_ << 1 : num_allocated_ + (num_allocated_ >> 1);
if (new_length > target_length) {
target_length = new_length;
}
T* new_array = static_cast<T*>(arena_->NewMem(sizeof(T) * target_length, true,
ArenaAllocator::kAllocGrowableArray));
memcpy(new_array, elem_list_, sizeof(T) * num_allocated_);
- // TODO: Collect stats on wasted resize memory.
num_allocated_ = target_length;
elem_list_ = new_array;
};
diff --git a/src/compiler/dex/mir_dataflow.cc b/src/compiler/dex/mir_dataflow.cc
index 444874d..9f61d73 100644
--- a/src/compiler/dex/mir_dataflow.cc
+++ b/src/compiler/dex/mir_dataflow.cc
@@ -1202,13 +1202,6 @@
}
}
-/* Clear the visited flag for each BB */
-bool MIRGraph::ClearVisitedFlag(struct BasicBlock* bb)
-{
- bb->visited = false;
- return true;
-}
-
/*
* This function will make a best guess at whether the invoke will
* end up using Method*. It isn't critical to get it exactly right,
diff --git a/src/compiler/dex/mir_graph.h b/src/compiler/dex/mir_graph.h
index fd81967..882a508 100644
--- a/src/compiler/dex/mir_graph.h
+++ b/src/compiler/dex/mir_graph.h
@@ -592,7 +592,7 @@
void DataFlowSSAFormat35C(MIR* mir);
void DataFlowSSAFormat3RC(MIR* mir);
bool FindLocalLiveIn(BasicBlock* bb);
- bool ClearVisitedFlag(struct BasicBlock* bb);
+ void ClearAllVisitedFlags();
bool CountUses(struct BasicBlock* bb);
bool InferTypeAndSize(BasicBlock* bb);
bool VerifyPredInfo(BasicBlock* bb);
@@ -621,7 +621,7 @@
bool ComputeBlockLiveIns(BasicBlock* bb);
bool InsertPhiNodeOperands(BasicBlock* bb);
bool ComputeDominanceFrontier(BasicBlock* bb);
- bool DoConstantPropogation(BasicBlock* bb);
+ void DoConstantPropogation(BasicBlock* bb);
void CountChecks(BasicBlock* bb);
bool CombineBlocks(BasicBlock* bb);
diff --git a/src/compiler/dex/mir_optimization.cc b/src/compiler/dex/mir_optimization.cc
index 54a9a83..5345501 100644
--- a/src/compiler/dex/mir_optimization.cc
+++ b/src/compiler/dex/mir_optimization.cc
@@ -39,7 +39,7 @@
constant_values_[ssa_reg + 1] = High32Bits(value);
}
-bool MIRGraph::DoConstantPropogation(BasicBlock* bb)
+void MIRGraph::DoConstantPropogation(BasicBlock* bb)
{
MIR* mir;
@@ -94,7 +94,6 @@
}
}
/* TODO: implement code to handle arithmetic operations */
- return true;
}
void MIRGraph::PropagateConstants()
@@ -848,11 +847,7 @@
{
if (!(cu_->disable_opt & (1 << kBBOpt))) {
DCHECK_EQ(cu_->num_compiler_temps, 0);
- // Mark all blocks as not visited
- AllNodesIterator iter(this, false /* not iterative */);
- for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
- ClearVisitedFlag(bb);
- }
+ ClearAllVisitedFlags();
PreOrderDfsIterator iter2(this, false /* not iterative */);
for (BasicBlock* bb = iter2.Next(); bb != NULL; bb = iter2.Next()) {
BuildExtendedBBList(bb);
diff --git a/src/compiler/dex/quick/gen_invoke.cc b/src/compiler/dex/quick/gen_invoke.cc
index efacff0..9fd4a86 100644
--- a/src/compiler/dex/quick/gen_invoke.cc
+++ b/src/compiler/dex/quick/gen_invoke.cc
@@ -1158,11 +1158,13 @@
}
RegLocation rl_object = LoadValue(rl_src_obj, kCoreReg);
RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
- RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
+ RegLocation rl_value;
if (is_long) {
+ rl_value = LoadValueWide(rl_src_value, kCoreReg);
OpRegReg(kOpAdd, rl_object.low_reg, rl_offset.low_reg);
StoreBaseDispWide(rl_object.low_reg, 0, rl_value.low_reg, rl_value.high_reg);
} else {
+ rl_value = LoadValue(rl_src_value, kCoreReg);
StoreBaseIndexed(rl_object.low_reg, rl_offset.low_reg, rl_value.low_reg, 0, kWord);
}
if (is_volatile) {
diff --git a/src/compiler/dex/quick/ralloc_util.cc b/src/compiler/dex/quick/ralloc_util.cc
index dd38914..30ed1b7 100644
--- a/src/compiler/dex/quick/ralloc_util.cc
+++ b/src/compiler/dex/quick/ralloc_util.cc
@@ -37,6 +37,10 @@
if (reg_pool_->FPRegs[i].is_temp)
reg_pool_->FPRegs[i].in_use = false;
}
+ // Reset temp tracking sanity check.
+ if (kIsDebugBuild) {
+ live_sreg_ = INVALID_SREG;
+ }
}
/*
diff --git a/src/compiler/dex/ssa_transformation.cc b/src/compiler/dex/ssa_transformation.cc
index 02982e5..a90d705 100644
--- a/src/compiler/dex/ssa_transformation.cc
+++ b/src/compiler/dex/ssa_transformation.cc
@@ -21,6 +21,13 @@
namespace art {
+void MIRGraph::ClearAllVisitedFlags() {
+ AllNodesIterator iter(this, false /* not iterative */);
+ for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
+ bb->visited = false;
+ }
+}
+
BasicBlock* MIRGraph::NeedsVisit(BasicBlock* bb) {
if (bb != NULL) {
if (bb->visited || bb->hidden) {
@@ -96,10 +103,8 @@
}
// Reset visited flags from all nodes
- AllNodesIterator iter(this, false /* not iterative */);
- for (BasicBlock* bb = iter.Next(); bb != NULL; bb = iter.Next()) {
- ClearVisitedFlag(bb);
- }
+ ClearAllVisitedFlags();
+
// Record dfs orders
RecordDFSOrders(GetEntryBlock());
@@ -158,26 +163,42 @@
}
}
-/* Compute the post-order traversal of the CFG */
-void MIRGraph::ComputeDomPostOrderTraversal(BasicBlock* bb)
-{
- ArenaBitVector::Iterator bv_iterator(bb->i_dominated);
-
- /* Iterate through the dominated blocks first */
- while (true) {
- //TUNING: hot call to BitVectorIteratorNext
- int bb_idx = bv_iterator.Next();
- if (bb_idx == -1) break;
- BasicBlock* dominated_bb = GetBasicBlock(bb_idx);
- ComputeDomPostOrderTraversal(dominated_bb);
+void MIRGraph::ComputeDomPostOrderTraversal(BasicBlock* bb) {
+ if (dom_post_order_traversal_ == NULL) {
+ // First time - create the array.
+ dom_post_order_traversal_ =
+ new (arena_) GrowableArray<int>(arena_, num_reachable_blocks_,
+ kGrowableArrayDomPostOrderTraversal);
+ } else {
+ dom_post_order_traversal_->Reset();
}
+ ClearAllVisitedFlags();
+ std::vector<std::pair<BasicBlock*, ArenaBitVector::Iterator*> > work_stack;
+ bb->visited = true;
+ work_stack.push_back(std::make_pair(bb, new (arena_) ArenaBitVector::Iterator(bb->i_dominated)));
+ while (!work_stack.empty()) {
+ std::pair<BasicBlock*, ArenaBitVector::Iterator*> curr = work_stack.back();
+ BasicBlock* curr_bb = curr.first;
+ ArenaBitVector::Iterator* curr_idom_iter = curr.second;
+ int bb_idx = curr_idom_iter->Next();
+ while ((bb_idx != -1) && (NeedsVisit(GetBasicBlock(bb_idx)) == NULL)) {
+ bb_idx = curr_idom_iter->Next();
+ }
+ if (bb_idx != -1) {
+ BasicBlock* new_bb = GetBasicBlock(bb_idx);
+ new_bb->visited = true;
+ work_stack.push_back(
+ std::make_pair(new_bb, new (arena_) ArenaBitVector::Iterator(new_bb->i_dominated)));
+ } else {
+ // no successor/next
+ dom_post_order_traversal_->Insert(curr_bb->id);
+ work_stack.pop_back();
- /* Enter the current block id */
- dom_post_order_traversal_->Insert(bb->id);
-
- /* hacky loop detection */
- if (bb->taken && bb->dominators->IsBitSet(bb->taken->id)) {
- attributes_ |= METHOD_HAS_LOOP;
+ /* hacky loop detection */
+ if (curr_bb->taken && curr_bb->dominators->IsBitSet(curr_bb->taken->id)) {
+ attributes_ |= METHOD_HAS_LOOP;
+ }
+ }
}
}
@@ -401,21 +422,8 @@
ComputeBlockDominators(bb);
}
- /*
- * Now go ahead and compute the post order traversal based on the
- * i_dominated sets.
- */
- if (dom_post_order_traversal_ == NULL) {
- dom_post_order_traversal_ =
- new (arena_) GrowableArray<int>(arena_, num_reachable_blocks, kGrowableArrayDomPostOrderTraversal);
- } else {
- dom_post_order_traversal_->Reset();
- }
-
+ // Compute the dominance frontier for each block.
ComputeDomPostOrderTraversal(GetEntryBlock());
- DCHECK_EQ(dom_post_order_traversal_->Size(), num_reachable_blocks_);
-
- /* Now compute the dominance frontier for each block */
PostOrderDOMIterator iter5(this, false /* not iterative */);
for (BasicBlock* bb = iter5.Next(); bb != NULL; bb = iter5.Next()) {
ComputeDominanceFrontier(bb);
@@ -673,10 +681,7 @@
InsertPhiNodes();
/* Rename register names by local defs and phi nodes */
- AllNodesIterator iter1(this, false /* not iterative */);
- for (BasicBlock* bb = iter1.Next(); bb != NULL; bb = iter1.Next()) {
- ClearVisitedFlag(bb);
- }
+ ClearAllVisitedFlags();
DoDFSPreOrderSSARename(GetEntryBlock());
/*