Deduplicate stack masks
The stack masks repeat often enough so that it is worth deduplicating
them.
Oat size for a large app:
98143600 -> 96722288 (-1.44%)
Bug: 34621054
Test: test-art-host
Change-Id: If73d51e46066357049d5be2e406ae9a32b7ff1f4
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 1b9bd7e..46497e3 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -16,6 +16,9 @@
#include "stack_map_stream.h"
+#include <unordered_map>
+
+#include "base/stl_util.h"
#include "art_method.h"
#include "runtime.h"
#include "scoped_thread_state_change-inl.h"
@@ -40,6 +43,7 @@
current_entry_.inline_infos_start_index = inline_infos_.size();
current_entry_.dex_register_map_hash = 0;
current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound;
+ current_entry_.stack_mask_index = 0;
if (num_dex_registers != 0) {
current_entry_.live_dex_registers_mask =
ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
@@ -153,32 +157,37 @@
}
size_t StackMapStream::PrepareForFillIn() {
- int stack_mask_number_of_bits = stack_mask_max_ + 1; // Need room for max element too.
+ size_t stack_mask_size_in_bits = stack_mask_max_ + 1; // Need room for max element too.
+ size_t number_of_stack_masks = PrepareStackMasks(stack_mask_size_in_bits);
dex_register_maps_size_ = ComputeDexRegisterMapsSize();
ComputeInlineInfoEncoding(); // needs dex_register_maps_size_.
inline_info_size_ = inline_infos_.size() * inline_info_encoding_.GetEntrySize();
CodeOffset max_native_pc_offset = ComputeMaxNativePcCodeOffset();
- // The stack map contains compressed native offsets.
+ // The stack map contains compressed native PC offsets.
size_t stack_map_size = stack_map_encoding_.SetFromSizes(max_native_pc_offset.CompressedValue(),
dex_pc_max_,
dex_register_maps_size_,
inline_info_size_,
register_mask_max_,
- stack_mask_number_of_bits);
+ number_of_stack_masks);
stack_maps_size_ = RoundUp(stack_maps_.size() * stack_map_size, kBitsPerByte) / kBitsPerByte;
dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
+ size_t stack_masks_bytes =
+ RoundUp(number_of_stack_masks * stack_mask_size_in_bits, kBitsPerByte) / kBitsPerByte;
size_t non_header_size =
stack_maps_size_ +
dex_register_location_catalog_size_ +
dex_register_maps_size_ +
- inline_info_size_;
+ inline_info_size_ +
+ stack_masks_bytes;
// Prepare the CodeInfo variable-sized encoding.
CodeInfoEncoding code_info_encoding;
code_info_encoding.non_header_size = non_header_size;
code_info_encoding.number_of_stack_maps = stack_maps_.size();
- code_info_encoding.stack_map_size_in_bits = stack_map_size;
+ code_info_encoding.number_of_stack_masks = number_of_stack_masks;
+ code_info_encoding.stack_mask_size_in_bits = stack_mask_size_in_bits;
code_info_encoding.stack_map_encoding = stack_map_encoding_;
code_info_encoding.inline_info_encoding = inline_info_encoding_;
code_info_encoding.number_of_location_catalog_entries = location_catalog_entries_.size();
@@ -322,17 +331,7 @@
stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc);
stack_map.SetNativePcCodeOffset(stack_map_encoding_, entry.native_pc_code_offset);
stack_map.SetRegisterMask(stack_map_encoding_, entry.register_mask);
- size_t number_of_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding);
- if (entry.sp_mask != nullptr) {
- for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) {
- stack_map.SetStackMaskBit(stack_map_encoding_, bit, entry.sp_mask->IsBitSet(bit));
- }
- } else {
- // The MemoryRegion does not have to be zeroed, so make sure we clear the bits.
- for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) {
- stack_map.SetStackMaskBit(stack_map_encoding_, bit, false);
- }
- }
+ stack_map.SetStackMaskIndex(stack_map_encoding_, entry.stack_mask_index);
if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
// No dex map available.
@@ -353,7 +352,7 @@
next_dex_register_map_offset += register_region.size();
DexRegisterMap dex_register_map(register_region);
stack_map.SetDexRegisterMapOffset(
- stack_map_encoding_, register_region.start() - dex_register_locations_region.start());
+ stack_map_encoding_, register_region.begin() - dex_register_locations_region.begin());
// Set the dex register location.
FillInDexRegisterMap(dex_register_map,
@@ -373,7 +372,7 @@
// Currently relative to the dex register map.
stack_map.SetInlineDescriptorOffset(
- stack_map_encoding_, inline_region.start() - dex_register_locations_region.start());
+ stack_map_encoding_, inline_region.begin() - dex_register_locations_region.begin());
inline_info.SetDepth(inline_info_encoding_, entry.inlining_depth);
DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size());
@@ -408,7 +407,7 @@
DexRegisterMap dex_register_map(register_region);
inline_info.SetDexRegisterMapOffsetAtDepth(
inline_info_encoding_,
- depth, register_region.start() - dex_register_locations_region.start());
+ depth, register_region.begin() - dex_register_locations_region.begin());
FillInDexRegisterMap(dex_register_map,
inline_entry.num_dex_registers,
@@ -423,6 +422,19 @@
}
}
+ // Write stack masks at the end.
+ size_t stack_mask_bits = encoding.stack_mask_size_in_bits;
+ if (stack_mask_bits > 0) {
+ size_t stack_mask_bytes = RoundUp(stack_mask_bits, kBitsPerByte) / kBitsPerByte;
+ for (size_t i = 0; i < encoding.number_of_stack_masks; ++i) {
+ MemoryRegion source(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes);
+ BitMemoryRegion stack_mask = code_info.GetStackMask(encoding, i);
+ for (size_t bit_index = 0; bit_index < encoding.stack_mask_size_in_bits; ++bit_index) {
+ stack_mask.StoreBit(bit_index, source.LoadBit(bit_index));
+ }
+ }
+ }
+
// Verify all written data in debug build.
if (kIsDebugBuild) {
CheckCodeInfo(region);
@@ -536,6 +548,27 @@
}
}
+size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
+ // Preallocate memory since we do not want it to move (the dedup map will point into it).
+ const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
+ stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u);
+ // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later
+ // when copying out from stack_masks_.
+ std::unordered_map<MemoryRegion,
+ size_t,
+ FNVHash<MemoryRegion>,
+ MemoryRegion::ContentEquals> dedup(stack_maps_.size());
+ for (StackMapEntry& stack_map : stack_maps_) {
+ size_t index = dedup.size();
+ MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size);
+ for (size_t i = 0; i < entry_size_in_bits; i++) {
+ stack_mask.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i));
+ }
+ stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second;
+ }
+ return dedup.size();
+}
+
// Check that all StackMapStream inputs are correctly encoded by trying to read them back.
void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
CodeInfo code_info(region);
@@ -551,15 +584,17 @@
entry.native_pc_code_offset.Uint32Value(instruction_set_));
DCHECK_EQ(stack_map.GetDexPc(stack_map_encoding), entry.dex_pc);
DCHECK_EQ(stack_map.GetRegisterMask(stack_map_encoding), entry.register_mask);
- size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding);
+ const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding);
+ DCHECK_EQ(stack_map.GetStackMaskIndex(stack_map_encoding), entry.stack_mask_index);
+ BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
if (entry.sp_mask != nullptr) {
- DCHECK_GE(num_stack_mask_bits, entry.sp_mask->GetNumberOfBits());
+ DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits());
for (size_t b = 0; b < num_stack_mask_bits; b++) {
- DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), entry.sp_mask->IsBitSet(b));
+ DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b));
}
} else {
for (size_t b = 0; b < num_stack_mask_bits; b++) {
- DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), 0u);
+ DCHECK_EQ(stack_mask.LoadBit(b), 0u);
}
}
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 8fec472..e2e16e8 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -68,6 +68,7 @@
location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
+ stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
stack_mask_max_(-1),
dex_pc_max_(0),
register_mask_max_(0),
@@ -107,6 +108,7 @@
BitVector* live_dex_registers_mask;
uint32_t dex_register_map_hash;
size_t same_dex_register_map_as_;
+ uint32_t stack_mask_index;
};
struct InlineInfoEntry {
@@ -160,6 +162,9 @@
CodeOffset ComputeMaxNativePcCodeOffset() const;
+ // Returns the number of unique stack masks.
+ size_t PrepareStackMasks(size_t entry_size_in_bits);
+
// Returns the index of an entry with the same dex register map as the current_entry,
// or kNoSameDexMapFound if no such entry exists.
size_t FindEntryWithTheSameDexMap();
@@ -193,6 +198,7 @@
// A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
ArenaVector<size_t> dex_register_locations_;
ArenaVector<InlineInfoEntry> inline_infos_;
+ ArenaVector<uint8_t> stack_masks_;
int stack_mask_max_;
uint32_t dex_pc_max_;
uint32_t register_mask_max_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index da4597e..da68b60 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -27,15 +27,16 @@
// Check that the stack mask of given stack map is identical
// to the given bit vector. Returns true if they are same.
static bool CheckStackMask(
- int number_of_bits,
+ const CodeInfo& code_info,
+ const CodeInfoEncoding& encoding,
const StackMap& stack_map,
- StackMapEncoding& encoding,
const BitVector& bit_vector) {
- if (bit_vector.GetHighestBitSet() >= number_of_bits) {
+ BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
+ if (bit_vector.GetNumberOfBits() > encoding.stack_mask_size_in_bits) {
return false;
}
- for (int i = 0; i < number_of_bits; ++i) {
- if (stack_map.GetStackMaskBit(encoding, i) != bit_vector.IsBitSet(i)) {
+ for (size_t i = 0; i < encoding.stack_mask_size_in_bits; ++i) {
+ if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) {
return false;
}
}
@@ -81,10 +82,7 @@
ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
- ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
- stack_map,
- encoding.stack_map_encoding,
- sp_mask));
+ ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask));
ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
DexRegisterMap dex_register_map =
@@ -199,10 +197,7 @@
ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
- ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
- stack_map,
- encoding.stack_map_encoding,
- sp_mask1));
+ ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1));
ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
DexRegisterMap dex_register_map =
@@ -261,10 +256,7 @@
ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
ASSERT_EQ(0xFFu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
- ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
- stack_map,
- encoding.stack_map_encoding,
- sp_mask2));
+ ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask2));
ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
DexRegisterMap dex_register_map =
@@ -318,10 +310,7 @@
ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
ASSERT_EQ(0xABu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
- ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
- stack_map,
- encoding.stack_map_encoding,
- sp_mask3));
+ ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask3));
ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
DexRegisterMap dex_register_map =
@@ -375,10 +364,7 @@
ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
ASSERT_EQ(0xCDu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
- ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
- stack_map,
- encoding.stack_map_encoding,
- sp_mask4));
+ ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask4));
ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
DexRegisterMap dex_register_map =
@@ -854,4 +840,33 @@
EXPECT_EQ(offset_mips64.Uint32Value(kMips64), kMips64InstructionAlignment);
}
+
+TEST(StackMapTest, TestDeduplicateStackMask) {
+ ArenaPool pool;
+ ArenaAllocator arena(&pool);
+ StackMapStream stream(&arena, kRuntimeISA);
+
+ ArenaBitVector sp_mask(&arena, 0, true);
+ sp_mask.SetBit(1);
+ sp_mask.SetBit(4);
+ stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0);
+ stream.EndStackMapEntry();
+ stream.BeginStackMapEntry(0, 8, 0x3, &sp_mask, 0, 0);
+ stream.EndStackMapEntry();
+
+ size_t size = stream.PrepareForFillIn();
+ void* memory = arena.Alloc(size, kArenaAllocMisc);
+ MemoryRegion region(memory, size);
+ stream.FillIn(region);
+
+ CodeInfo code_info(region);
+ CodeInfoEncoding encoding = code_info.ExtractEncoding();
+ ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding));
+
+ StackMap stack_map1 = code_info.GetStackMapForNativePcOffset(4, encoding);
+ StackMap stack_map2 = code_info.GetStackMapForNativePcOffset(8, encoding);
+ EXPECT_EQ(stack_map1.GetStackMaskIndex(encoding.stack_map_encoding),
+ stack_map2.GetStackMaskIndex(encoding.stack_map_encoding));
+}
+
} // namespace art