Change the BitTableBuilder API to be POD based.
The compiler has two copies of all stack map intermediate data
in memory at the same time at the moment.
Change the BitTableBuilder so that it will be able to store
the intermediate data directly (e.g. StackMapEntry), and thus
we can save the space, and can avoid the copying code complexity.
It will also make it possible to deduplicate data as we go,
thus saving further memory and code complexity.
Test: test-art-host-gtest-stack_map_test
Change-Id: I660fddf0629422ae0d2588333854d8fdf1e1bd0f
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index aa28c8b..c6e375a 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -199,9 +199,6 @@
return region;
}
-template<uint32_t NumColumns>
-using ScopedBitTableBuilder = BitTableBuilder<NumColumns, ScopedArenaAllocatorAdapter<uint32_t>>;
-
size_t StackMapStream::PrepareForFillIn() {
size_t bit_offset = 0;
out_.clear();
@@ -258,20 +255,21 @@
DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size());
// Write stack maps.
- ScopedArenaAllocatorAdapter<void> adapter = allocator_->Adapter(kArenaAllocStackMapStream);
- ScopedBitTableBuilder<StackMap::Field::kCount> stack_map_builder((adapter));
- ScopedBitTableBuilder<InvokeInfo::Field::kCount> invoke_info_builder((adapter));
- ScopedBitTableBuilder<InlineInfo::Field::kCount> inline_info_builder((adapter));
+ BitTableBuilder<std::array<uint32_t, StackMap::kCount>> stack_map_builder(allocator_);
+ BitTableBuilder<std::array<uint32_t, InvokeInfo::kCount>> invoke_info_builder(allocator_);
+ BitTableBuilder<std::array<uint32_t, InlineInfo::kCount>> inline_info_builder(allocator_);
for (const StackMapEntry& entry : stack_maps_) {
if (entry.dex_method_index != dex::kDexNoIndex) {
- invoke_info_builder.AddRow(
+ std::array<uint32_t, InvokeInfo::kCount> invoke_info_entry {
entry.native_pc_code_offset.CompressedValue(),
entry.invoke_type,
- entry.dex_method_index_idx);
+ entry.dex_method_index_idx
+ };
+ invoke_info_builder.Add(invoke_info_entry);
}
// Set the inlining info.
- uint32_t inline_info_index = StackMap::kNoValue;
+ uint32_t inline_info_index = inline_info_builder.size();
DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size());
for (size_t depth = 0; depth < entry.inlining_depth; ++depth) {
InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index];
@@ -281,32 +279,33 @@
method_index_idx = High32Bits(reinterpret_cast<uintptr_t>(inline_entry.method));
extra_data = Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method));
}
- uint32_t index = inline_info_builder.AddRow(
+ std::array<uint32_t, InlineInfo::kCount> inline_info_entry {
(depth == entry.inlining_depth - 1) ? InlineInfo::kLast : InlineInfo::kMore,
method_index_idx,
inline_entry.dex_pc,
extra_data,
- dex_register_entries_[inline_entry.dex_register_map_index].offset);
- if (depth == 0) {
- inline_info_index = index;
- }
+ dex_register_entries_[inline_entry.dex_register_map_index].offset,
+ };
+ inline_info_builder.Add(inline_info_entry);
}
- stack_map_builder.AddRow(
+ std::array<uint32_t, StackMap::kCount> stack_map_entry {
entry.native_pc_code_offset.CompressedValue(),
entry.dex_pc,
dex_register_entries_[entry.dex_register_map_index].offset,
- inline_info_index,
+ entry.inlining_depth != 0 ? inline_info_index : InlineInfo::kNoValue,
entry.register_mask_index,
- entry.stack_mask_index);
+ entry.stack_mask_index,
+ };
+ stack_map_builder.Add(stack_map_entry);
}
stack_map_builder.Encode(&out_, &bit_offset);
invoke_info_builder.Encode(&out_, &bit_offset);
inline_info_builder.Encode(&out_, &bit_offset);
// Write register masks table.
- ScopedBitTableBuilder<1> register_mask_builder((adapter));
+ BitTableBuilder<uint32_t> register_mask_builder(allocator_);
for (size_t i = 0; i < num_register_masks; ++i) {
- register_mask_builder.AddRow(register_masks_[i]);
+ register_mask_builder.Add(register_masks_[i]);
}
register_mask_builder.Encode(&out_, &bit_offset);
diff --git a/libartbase/base/arena_allocator.cc b/libartbase/base/arena_allocator.cc
index 183e5c9..df3deba 100644
--- a/libartbase/base/arena_allocator.cc
+++ b/libartbase/base/arena_allocator.cc
@@ -82,6 +82,7 @@
"RegAllocator ",
"RegAllocVldt ",
"StackMapStm ",
+ "BitTableBld ",
"VectorNode ",
"CodeGen ",
"Assembler ",
diff --git a/libartbase/base/arena_allocator.h b/libartbase/base/arena_allocator.h
index 211ff4f..4dccd03 100644
--- a/libartbase/base/arena_allocator.h
+++ b/libartbase/base/arena_allocator.h
@@ -92,6 +92,7 @@
kArenaAllocRegisterAllocator,
kArenaAllocRegisterAllocatorValidate,
kArenaAllocStackMapStream,
+ kArenaAllocBitTableBuilder,
kArenaAllocVectorNode,
kArenaAllocCodeGenerator,
kArenaAllocAssembler,
diff --git a/libartbase/base/bit_table.h b/libartbase/base/bit_table.h
index 24bdd13..54a2415 100644
--- a/libartbase/base/bit_table.h
+++ b/libartbase/base/bit_table.h
@@ -17,11 +17,17 @@
#ifndef ART_LIBARTBASE_BASE_BIT_TABLE_H_
#define ART_LIBARTBASE_BASE_BIT_TABLE_H_
-#include <vector>
+#include <array>
+#include <numeric>
+#include <string.h>
+#include <type_traits>
+#include <unordered_map>
#include "base/bit_memory_region.h"
-#include "base/bit_utils.h"
+#include "base/casts.h"
#include "base/memory_region.h"
+#include "base/scoped_arena_containers.h"
+#include "base/stl_util.h"
namespace art {
@@ -104,8 +110,7 @@
column_offset_[0] = 0;
for (uint32_t i = 0; i < kNumColumns; i++) {
size_t column_end = column_offset_[i] + DecodeVarintBits(region, bit_offset);
- column_offset_[i + 1] = column_end;
- DCHECK_EQ(column_offset_[i + 1], column_end) << "Overflow";
+ column_offset_[i + 1] = dchecked_integral_cast<uint16_t>(column_end);
}
}
@@ -146,75 +151,97 @@
template<uint32_t kNumColumns>
constexpr uint32_t BitTable<kNumColumns>::kValueBias;
-template<uint32_t kNumColumns, typename Alloc = std::allocator<uint32_t>>
+// Helper class for encoding BitTable. It can optionally de-duplicate the inputs.
+// Type 'T' must be POD type consisting of uint32_t fields (one for each column).
+template<typename T>
class BitTableBuilder {
public:
- explicit BitTableBuilder(Alloc alloc = Alloc()) : buffer_(alloc) {}
+ static_assert(std::is_pod<T>::value, "Type 'T' must be POD");
+ static constexpr size_t kNumColumns = sizeof(T) / sizeof(uint32_t);
- template<typename ... T>
- uint32_t AddRow(T ... values) {
- constexpr size_t count = sizeof...(values);
- static_assert(count == kNumColumns, "Incorrect argument count");
- uint32_t data[count] = { values... };
- buffer_.insert(buffer_.end(), data, data + count);
- return num_rows_++;
+ explicit BitTableBuilder(ScopedArenaAllocator* allocator)
+ : rows_(allocator->Adapter(kArenaAllocBitTableBuilder)) {
+ }
+
+ T& operator[](size_t row) { return rows_[row]; }
+ const T& operator[](size_t row) const { return rows_[row]; }
+ size_t size() const { return rows_.size(); }
+
+ void Add(T value) {
+ rows_.push_back(value);
}
ALWAYS_INLINE uint32_t Get(uint32_t row, uint32_t column) const {
- return buffer_[row * kNumColumns + column];
+ DCHECK_LT(row, size());
+ DCHECK_LT(column, kNumColumns);
+ const uint32_t* data = reinterpret_cast<const uint32_t*>(&rows_[row]);
+ return data[column];
}
+ // Calculate the column bit widths based on the current data.
+ void Measure(/*out*/ std::array<uint32_t, kNumColumns>* column_bits) const {
+ uint32_t max_column_value[kNumColumns];
+ std::fill_n(max_column_value, kNumColumns, 0);
+ for (uint32_t r = 0; r < size(); r++) {
+ for (uint32_t c = 0; c < kNumColumns; c++) {
+ max_column_value[c] |= Get(r, c) - BitTable<kNumColumns>::kValueBias;
+ }
+ }
+ for (uint32_t c = 0; c < kNumColumns; c++) {
+ (*column_bits)[c] = MinimumBitsToStore(max_column_value[c]);
+ }
+ }
+
+ // Encode the stored data into a BitTable.
template<typename Vector>
- void Encode(Vector* out, size_t* bit_offset) {
+ void Encode(Vector* out, size_t* bit_offset) const {
constexpr uint32_t bias = BitTable<kNumColumns>::kValueBias;
size_t initial_bit_offset = *bit_offset;
- // Measure data size.
- uint32_t max_column_value[kNumColumns] = {};
- for (uint32_t r = 0; r < num_rows_; r++) {
+
+ std::array<uint32_t, kNumColumns> column_bits;
+ Measure(&column_bits);
+ EncodeVarintBits(out, bit_offset, size());
+ if (size() != 0) {
+ // Write table header.
for (uint32_t c = 0; c < kNumColumns; c++) {
- max_column_value[c] |= Get(r, c) - bias;
- }
- }
- // Write table header.
- uint32_t table_data_bits = 0;
- uint32_t column_bits[kNumColumns] = {};
- EncodeVarintBits(out, bit_offset, num_rows_);
- if (num_rows_ != 0) {
- for (uint32_t c = 0; c < kNumColumns; c++) {
- column_bits[c] = MinimumBitsToStore(max_column_value[c]);
EncodeVarintBits(out, bit_offset, column_bits[c]);
- table_data_bits += num_rows_ * column_bits[c];
+ }
+
+ // Write table data.
+ uint32_t row_bits = std::accumulate(column_bits.begin(), column_bits.end(), 0u);
+ out->resize(BitsToBytesRoundUp(*bit_offset + row_bits * size()));
+ BitMemoryRegion region(MemoryRegion(out->data(), out->size()));
+ for (uint32_t r = 0; r < size(); r++) {
+ for (uint32_t c = 0; c < kNumColumns; c++) {
+ region.StoreBitsAndAdvance(bit_offset, Get(r, c) - bias, column_bits[c]);
+ }
}
}
- // Write table data.
- out->resize(BitsToBytesRoundUp(*bit_offset + table_data_bits));
- BitMemoryRegion region(MemoryRegion(out->data(), out->size()));
- for (uint32_t r = 0; r < num_rows_; r++) {
- for (uint32_t c = 0; c < kNumColumns; c++) {
- region.StoreBitsAndAdvance(bit_offset, Get(r, c) - bias, column_bits[c]);
- }
- }
+
// Verify the written data.
if (kIsDebugBuild) {
BitTable<kNumColumns> table;
+ BitMemoryRegion region(MemoryRegion(out->data(), out->size()));
table.Decode(region, &initial_bit_offset);
- DCHECK_EQ(this->num_rows_, table.NumRows());
+ DCHECK_EQ(size(), table.NumRows());
for (uint32_t c = 0; c < kNumColumns; c++) {
DCHECK_EQ(column_bits[c], table.NumColumnBits(c));
}
- for (uint32_t r = 0; r < num_rows_; r++) {
+ for (uint32_t r = 0; r < size(); r++) {
for (uint32_t c = 0; c < kNumColumns; c++) {
- DCHECK_EQ(this->Get(r, c), table.Get(r, c)) << " (" << r << ", " << c << ")";
+ DCHECK_EQ(Get(r, c), table.Get(r, c)) << " (" << r << ", " << c << ")";
}
}
}
}
protected:
- std::vector<uint32_t, Alloc> buffer_;
- uint32_t num_rows_ = 0;
+ ScopedArenaDeque<T> rows_;
};
+template<typename T>
+constexpr size_t BitTableBuilder<T>::kNumColumns;
+
} // namespace art
#endif // ART_LIBARTBASE_BASE_BIT_TABLE_H_
diff --git a/libartbase/base/bit_table_test.cc b/libartbase/base/bit_table_test.cc
index 25bfcf0..e6f0d53 100644
--- a/libartbase/base/bit_table_test.cc
+++ b/libartbase/base/bit_table_test.cc
@@ -16,8 +16,14 @@
#include "bit_table.h"
+#include <map>
+
#include "gtest/gtest.h"
+#include "base/arena_allocator.h"
+#include "base/bit_utils.h"
+#include "base/malloc_arena_pool.h"
+
namespace art {
TEST(BitTableTest, TestVarint) {
@@ -38,9 +44,13 @@
}
TEST(BitTableTest, TestEmptyTable) {
+ MallocArenaPool pool;
+ ArenaStack arena_stack(&pool);
+ ScopedArenaAllocator allocator(&arena_stack);
+
std::vector<uint8_t> buffer;
size_t encode_bit_offset = 0;
- BitTableBuilder<1> builder;
+ BitTableBuilder<uint32_t> builder(&allocator);
builder.Encode(&buffer, &encode_bit_offset);
size_t decode_bit_offset = 0;
@@ -50,14 +60,18 @@
}
TEST(BitTableTest, TestSingleColumnTable) {
+ MallocArenaPool pool;
+ ArenaStack arena_stack(&pool);
+ ScopedArenaAllocator allocator(&arena_stack);
+
constexpr uint32_t kNoValue = -1;
std::vector<uint8_t> buffer;
size_t encode_bit_offset = 0;
- BitTableBuilder<1> builder;
- builder.AddRow(42u);
- builder.AddRow(kNoValue);
- builder.AddRow(1000u);
- builder.AddRow(kNoValue);
+ BitTableBuilder<uint32_t> builder(&allocator);
+ builder.Add(42u);
+ builder.Add(kNoValue);
+ builder.Add(1000u);
+ builder.Add(kNoValue);
builder.Encode(&buffer, &encode_bit_offset);
size_t decode_bit_offset = 0;
@@ -72,11 +86,15 @@
}
TEST(BitTableTest, TestUnalignedTable) {
+ MallocArenaPool pool;
+ ArenaStack arena_stack(&pool);
+ ScopedArenaAllocator allocator(&arena_stack);
+
for (size_t start_bit_offset = 0; start_bit_offset <= 32; start_bit_offset++) {
std::vector<uint8_t> buffer;
size_t encode_bit_offset = start_bit_offset;
- BitTableBuilder<1> builder;
- builder.AddRow(42u);
+ BitTableBuilder<uint32_t> builder(&allocator);
+ builder.Add(42u);
builder.Encode(&buffer, &encode_bit_offset);
size_t decode_bit_offset = start_bit_offset;
@@ -88,12 +106,22 @@
}
TEST(BitTableTest, TestBigTable) {
+ MallocArenaPool pool;
+ ArenaStack arena_stack(&pool);
+ ScopedArenaAllocator allocator(&arena_stack);
+
constexpr uint32_t kNoValue = -1;
std::vector<uint8_t> buffer;
size_t encode_bit_offset = 0;
- BitTableBuilder<4> builder;
- builder.AddRow(42u, kNoValue, 0u, static_cast<uint32_t>(-2));
- builder.AddRow(62u, kNoValue, 63u, static_cast<uint32_t>(-3));
+ struct RowData {
+ uint32_t a;
+ uint32_t b;
+ uint32_t c;
+ uint32_t d;
+ };
+ BitTableBuilder<RowData> builder(&allocator);
+ builder.Add(RowData{42u, kNoValue, 0u, static_cast<uint32_t>(-2)});
+ builder.Add(RowData{62u, kNoValue, 63u, static_cast<uint32_t>(-3)});
builder.Encode(&buffer, &encode_bit_offset);
size_t decode_bit_offset = 0;