Deduplicate stack masks

The stack masks repeat often enough so that it is worth deduplicating
them.

Oat size for a large app:
98143600 -> 96722288 (-1.44%)

Bug: 34621054

Test: test-art-host
Change-Id: If73d51e46066357049d5be2e406ae9a32b7ff1f4
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 1b9bd7e..46497e3 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -16,6 +16,9 @@
 
 #include "stack_map_stream.h"
 
+#include <unordered_map>
+
+#include "base/stl_util.h"
 #include "art_method.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
@@ -40,6 +43,7 @@
   current_entry_.inline_infos_start_index = inline_infos_.size();
   current_entry_.dex_register_map_hash = 0;
   current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound;
+  current_entry_.stack_mask_index = 0;
   if (num_dex_registers != 0) {
     current_entry_.live_dex_registers_mask =
         ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
@@ -153,32 +157,37 @@
 }
 
 size_t StackMapStream::PrepareForFillIn() {
-  int stack_mask_number_of_bits = stack_mask_max_ + 1;  // Need room for max element too.
+  size_t stack_mask_size_in_bits = stack_mask_max_ + 1;  // Need room for max element too.
+  size_t number_of_stack_masks = PrepareStackMasks(stack_mask_size_in_bits);
   dex_register_maps_size_ = ComputeDexRegisterMapsSize();
   ComputeInlineInfoEncoding();  // needs dex_register_maps_size_.
   inline_info_size_ = inline_infos_.size() * inline_info_encoding_.GetEntrySize();
   CodeOffset max_native_pc_offset = ComputeMaxNativePcCodeOffset();
-  // The stack map contains compressed native offsets.
+  // The stack map contains compressed native PC offsets.
   size_t stack_map_size = stack_map_encoding_.SetFromSizes(max_native_pc_offset.CompressedValue(),
                                                            dex_pc_max_,
                                                            dex_register_maps_size_,
                                                            inline_info_size_,
                                                            register_mask_max_,
-                                                           stack_mask_number_of_bits);
+                                                           number_of_stack_masks);
   stack_maps_size_ = RoundUp(stack_maps_.size() * stack_map_size, kBitsPerByte) / kBitsPerByte;
   dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
+  size_t stack_masks_bytes =
+      RoundUp(number_of_stack_masks * stack_mask_size_in_bits, kBitsPerByte) / kBitsPerByte;
 
   size_t non_header_size =
       stack_maps_size_ +
       dex_register_location_catalog_size_ +
       dex_register_maps_size_ +
-      inline_info_size_;
+      inline_info_size_ +
+      stack_masks_bytes;
 
   // Prepare the CodeInfo variable-sized encoding.
   CodeInfoEncoding code_info_encoding;
   code_info_encoding.non_header_size = non_header_size;
   code_info_encoding.number_of_stack_maps = stack_maps_.size();
-  code_info_encoding.stack_map_size_in_bits = stack_map_size;
+  code_info_encoding.number_of_stack_masks = number_of_stack_masks;
+  code_info_encoding.stack_mask_size_in_bits = stack_mask_size_in_bits;
   code_info_encoding.stack_map_encoding = stack_map_encoding_;
   code_info_encoding.inline_info_encoding = inline_info_encoding_;
   code_info_encoding.number_of_location_catalog_entries = location_catalog_entries_.size();
@@ -322,17 +331,7 @@
     stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc);
     stack_map.SetNativePcCodeOffset(stack_map_encoding_, entry.native_pc_code_offset);
     stack_map.SetRegisterMask(stack_map_encoding_, entry.register_mask);
-    size_t number_of_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding);
-    if (entry.sp_mask != nullptr) {
-      for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) {
-        stack_map.SetStackMaskBit(stack_map_encoding_, bit, entry.sp_mask->IsBitSet(bit));
-      }
-    } else {
-      // The MemoryRegion does not have to be zeroed, so make sure we clear the bits.
-      for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) {
-        stack_map.SetStackMaskBit(stack_map_encoding_, bit, false);
-      }
-    }
+    stack_map.SetStackMaskIndex(stack_map_encoding_, entry.stack_mask_index);
 
     if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
       // No dex map available.
@@ -353,7 +352,7 @@
         next_dex_register_map_offset += register_region.size();
         DexRegisterMap dex_register_map(register_region);
         stack_map.SetDexRegisterMapOffset(
-            stack_map_encoding_, register_region.start() - dex_register_locations_region.start());
+            stack_map_encoding_, register_region.begin() - dex_register_locations_region.begin());
 
         // Set the dex register location.
         FillInDexRegisterMap(dex_register_map,
@@ -373,7 +372,7 @@
 
       // Currently relative to the dex register map.
       stack_map.SetInlineDescriptorOffset(
-          stack_map_encoding_, inline_region.start() - dex_register_locations_region.start());
+          stack_map_encoding_, inline_region.begin() - dex_register_locations_region.begin());
 
       inline_info.SetDepth(inline_info_encoding_, entry.inlining_depth);
       DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size());
@@ -408,7 +407,7 @@
           DexRegisterMap dex_register_map(register_region);
           inline_info.SetDexRegisterMapOffsetAtDepth(
               inline_info_encoding_,
-              depth, register_region.start() - dex_register_locations_region.start());
+              depth, register_region.begin() - dex_register_locations_region.begin());
 
           FillInDexRegisterMap(dex_register_map,
                                inline_entry.num_dex_registers,
@@ -423,6 +422,19 @@
     }
   }
 
+  // Write stack masks at the end.
+  size_t stack_mask_bits = encoding.stack_mask_size_in_bits;
+  if (stack_mask_bits > 0) {
+    size_t stack_mask_bytes = RoundUp(stack_mask_bits, kBitsPerByte) / kBitsPerByte;
+    for (size_t i = 0; i < encoding.number_of_stack_masks; ++i) {
+      MemoryRegion source(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes);
+      BitMemoryRegion stack_mask = code_info.GetStackMask(encoding, i);
+      for (size_t bit_index = 0; bit_index < encoding.stack_mask_size_in_bits; ++bit_index) {
+        stack_mask.StoreBit(bit_index, source.LoadBit(bit_index));
+      }
+    }
+  }
+
   // Verify all written data in debug build.
   if (kIsDebugBuild) {
     CheckCodeInfo(region);
@@ -536,6 +548,27 @@
   }
 }
 
+size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
+  // Preallocate memory since we do not want it to move (the dedup map will point into it).
+  const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
+  stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u);
+  // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later
+  // when copying out from stack_masks_.
+  std::unordered_map<MemoryRegion,
+                     size_t,
+                     FNVHash<MemoryRegion>,
+                     MemoryRegion::ContentEquals> dedup(stack_maps_.size());
+  for (StackMapEntry& stack_map : stack_maps_) {
+    size_t index = dedup.size();
+    MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size);
+    for (size_t i = 0; i < entry_size_in_bits; i++) {
+      stack_mask.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i));
+    }
+    stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second;
+  }
+  return dedup.size();
+}
+
 // Check that all StackMapStream inputs are correctly encoded by trying to read them back.
 void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
   CodeInfo code_info(region);
@@ -551,15 +584,17 @@
               entry.native_pc_code_offset.Uint32Value(instruction_set_));
     DCHECK_EQ(stack_map.GetDexPc(stack_map_encoding), entry.dex_pc);
     DCHECK_EQ(stack_map.GetRegisterMask(stack_map_encoding), entry.register_mask);
-    size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding);
+    const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding);
+    DCHECK_EQ(stack_map.GetStackMaskIndex(stack_map_encoding), entry.stack_mask_index);
+    BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
     if (entry.sp_mask != nullptr) {
-      DCHECK_GE(num_stack_mask_bits, entry.sp_mask->GetNumberOfBits());
+      DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits());
       for (size_t b = 0; b < num_stack_mask_bits; b++) {
-        DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), entry.sp_mask->IsBitSet(b));
+        DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b));
       }
     } else {
       for (size_t b = 0; b < num_stack_mask_bits; b++) {
-        DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), 0u);
+        DCHECK_EQ(stack_mask.LoadBit(b), 0u);
       }
     }
 
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 8fec472..e2e16e8 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -68,6 +68,7 @@
         location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
         dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
         inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
+        stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
         stack_mask_max_(-1),
         dex_pc_max_(0),
         register_mask_max_(0),
@@ -107,6 +108,7 @@
     BitVector* live_dex_registers_mask;
     uint32_t dex_register_map_hash;
     size_t same_dex_register_map_as_;
+    uint32_t stack_mask_index;
   };
 
   struct InlineInfoEntry {
@@ -160,6 +162,9 @@
 
   CodeOffset ComputeMaxNativePcCodeOffset() const;
 
+  // Returns the number of unique stack masks.
+  size_t PrepareStackMasks(size_t entry_size_in_bits);
+
   // Returns the index of an entry with the same dex register map as the current_entry,
   // or kNoSameDexMapFound if no such entry exists.
   size_t FindEntryWithTheSameDexMap();
@@ -193,6 +198,7 @@
   // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
   ArenaVector<size_t> dex_register_locations_;
   ArenaVector<InlineInfoEntry> inline_infos_;
+  ArenaVector<uint8_t> stack_masks_;
   int stack_mask_max_;
   uint32_t dex_pc_max_;
   uint32_t register_mask_max_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index da4597e..da68b60 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -27,15 +27,16 @@
 // Check that the stack mask of given stack map is identical
 // to the given bit vector. Returns true if they are same.
 static bool CheckStackMask(
-    int number_of_bits,
+    const CodeInfo& code_info,
+    const CodeInfoEncoding& encoding,
     const StackMap& stack_map,
-    StackMapEncoding& encoding,
     const BitVector& bit_vector) {
-  if (bit_vector.GetHighestBitSet() >= number_of_bits) {
+  BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
+  if (bit_vector.GetNumberOfBits() > encoding.stack_mask_size_in_bits) {
     return false;
   }
-  for (int i = 0; i < number_of_bits; ++i) {
-    if (stack_map.GetStackMaskBit(encoding, i) != bit_vector.IsBitSet(i)) {
+  for (size_t i = 0; i < encoding.stack_mask_size_in_bits; ++i) {
+    if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) {
       return false;
     }
   }
@@ -81,10 +82,7 @@
   ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
   ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-  ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
-                             stack_map,
-                             encoding.stack_map_encoding,
-                             sp_mask));
+  ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask));
 
   ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
   DexRegisterMap dex_register_map =
@@ -199,10 +197,7 @@
     ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
     ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
-                               stack_map,
-                               encoding.stack_map_encoding,
-                               sp_mask1));
+    ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1));
 
     ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
@@ -261,10 +256,7 @@
     ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
     ASSERT_EQ(0xFFu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
-                               stack_map,
-                               encoding.stack_map_encoding,
-                               sp_mask2));
+    ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask2));
 
     ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
@@ -318,10 +310,7 @@
     ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
     ASSERT_EQ(0xABu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
-                               stack_map,
-                               encoding.stack_map_encoding,
-                               sp_mask3));
+    ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask3));
 
     ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
@@ -375,10 +364,7 @@
     ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
     ASSERT_EQ(0xCDu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
-                               stack_map,
-                               encoding.stack_map_encoding,
-                               sp_mask4));
+    ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask4));
 
     ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
@@ -854,4 +840,33 @@
   EXPECT_EQ(offset_mips64.Uint32Value(kMips64), kMips64InstructionAlignment);
 }
 
+
+TEST(StackMapTest, TestDeduplicateStackMask) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  StackMapStream stream(&arena, kRuntimeISA);
+
+  ArenaBitVector sp_mask(&arena, 0, true);
+  sp_mask.SetBit(1);
+  sp_mask.SetBit(4);
+  stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0);
+  stream.EndStackMapEntry();
+  stream.BeginStackMapEntry(0, 8, 0x3, &sp_mask, 0, 0);
+  stream.EndStackMapEntry();
+
+  size_t size = stream.PrepareForFillIn();
+  void* memory = arena.Alloc(size, kArenaAllocMisc);
+  MemoryRegion region(memory, size);
+  stream.FillIn(region);
+
+  CodeInfo code_info(region);
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding));
+
+  StackMap stack_map1 = code_info.GetStackMapForNativePcOffset(4, encoding);
+  StackMap stack_map2 = code_info.GetStackMapForNativePcOffset(8, encoding);
+  EXPECT_EQ(stack_map1.GetStackMaskIndex(encoding.stack_map_encoding),
+            stack_map2.GetStackMaskIndex(encoding.stack_map_encoding));
+}
+
 }  // namespace art