Deduplicate stack masks

The stack masks repeat often enough so that it is worth deduplicating
them.

Oat size for a large app:
98143600 -> 96722288 (-1.44%)

Bug: 34621054

Test: test-art-host
Change-Id: If73d51e46066357049d5be2e406ae9a32b7ff1f4
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 1b9bd7e..46497e3 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -16,6 +16,9 @@
 
 #include "stack_map_stream.h"
 
+#include <unordered_map>
+
+#include "base/stl_util.h"
 #include "art_method.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
@@ -40,6 +43,7 @@
   current_entry_.inline_infos_start_index = inline_infos_.size();
   current_entry_.dex_register_map_hash = 0;
   current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound;
+  current_entry_.stack_mask_index = 0;
   if (num_dex_registers != 0) {
     current_entry_.live_dex_registers_mask =
         ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
@@ -153,32 +157,37 @@
 }
 
 size_t StackMapStream::PrepareForFillIn() {
-  int stack_mask_number_of_bits = stack_mask_max_ + 1;  // Need room for max element too.
+  size_t stack_mask_size_in_bits = stack_mask_max_ + 1;  // Need room for max element too.
+  size_t number_of_stack_masks = PrepareStackMasks(stack_mask_size_in_bits);
   dex_register_maps_size_ = ComputeDexRegisterMapsSize();
   ComputeInlineInfoEncoding();  // needs dex_register_maps_size_.
   inline_info_size_ = inline_infos_.size() * inline_info_encoding_.GetEntrySize();
   CodeOffset max_native_pc_offset = ComputeMaxNativePcCodeOffset();
-  // The stack map contains compressed native offsets.
+  // The stack map contains compressed native PC offsets.
   size_t stack_map_size = stack_map_encoding_.SetFromSizes(max_native_pc_offset.CompressedValue(),
                                                            dex_pc_max_,
                                                            dex_register_maps_size_,
                                                            inline_info_size_,
                                                            register_mask_max_,
-                                                           stack_mask_number_of_bits);
+                                                           number_of_stack_masks);
   stack_maps_size_ = RoundUp(stack_maps_.size() * stack_map_size, kBitsPerByte) / kBitsPerByte;
   dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize();
+  size_t stack_masks_bytes =
+      RoundUp(number_of_stack_masks * stack_mask_size_in_bits, kBitsPerByte) / kBitsPerByte;
 
   size_t non_header_size =
       stack_maps_size_ +
       dex_register_location_catalog_size_ +
       dex_register_maps_size_ +
-      inline_info_size_;
+      inline_info_size_ +
+      stack_masks_bytes;
 
   // Prepare the CodeInfo variable-sized encoding.
   CodeInfoEncoding code_info_encoding;
   code_info_encoding.non_header_size = non_header_size;
   code_info_encoding.number_of_stack_maps = stack_maps_.size();
-  code_info_encoding.stack_map_size_in_bits = stack_map_size;
+  code_info_encoding.number_of_stack_masks = number_of_stack_masks;
+  code_info_encoding.stack_mask_size_in_bits = stack_mask_size_in_bits;
   code_info_encoding.stack_map_encoding = stack_map_encoding_;
   code_info_encoding.inline_info_encoding = inline_info_encoding_;
   code_info_encoding.number_of_location_catalog_entries = location_catalog_entries_.size();
@@ -322,17 +331,7 @@
     stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc);
     stack_map.SetNativePcCodeOffset(stack_map_encoding_, entry.native_pc_code_offset);
     stack_map.SetRegisterMask(stack_map_encoding_, entry.register_mask);
-    size_t number_of_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding);
-    if (entry.sp_mask != nullptr) {
-      for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) {
-        stack_map.SetStackMaskBit(stack_map_encoding_, bit, entry.sp_mask->IsBitSet(bit));
-      }
-    } else {
-      // The MemoryRegion does not have to be zeroed, so make sure we clear the bits.
-      for (size_t bit = 0; bit < number_of_stack_mask_bits; bit++) {
-        stack_map.SetStackMaskBit(stack_map_encoding_, bit, false);
-      }
-    }
+    stack_map.SetStackMaskIndex(stack_map_encoding_, entry.stack_mask_index);
 
     if (entry.num_dex_registers == 0 || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
       // No dex map available.
@@ -353,7 +352,7 @@
         next_dex_register_map_offset += register_region.size();
         DexRegisterMap dex_register_map(register_region);
         stack_map.SetDexRegisterMapOffset(
-            stack_map_encoding_, register_region.start() - dex_register_locations_region.start());
+            stack_map_encoding_, register_region.begin() - dex_register_locations_region.begin());
 
         // Set the dex register location.
         FillInDexRegisterMap(dex_register_map,
@@ -373,7 +372,7 @@
 
       // Currently relative to the dex register map.
       stack_map.SetInlineDescriptorOffset(
-          stack_map_encoding_, inline_region.start() - dex_register_locations_region.start());
+          stack_map_encoding_, inline_region.begin() - dex_register_locations_region.begin());
 
       inline_info.SetDepth(inline_info_encoding_, entry.inlining_depth);
       DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size());
@@ -408,7 +407,7 @@
           DexRegisterMap dex_register_map(register_region);
           inline_info.SetDexRegisterMapOffsetAtDepth(
               inline_info_encoding_,
-              depth, register_region.start() - dex_register_locations_region.start());
+              depth, register_region.begin() - dex_register_locations_region.begin());
 
           FillInDexRegisterMap(dex_register_map,
                                inline_entry.num_dex_registers,
@@ -423,6 +422,19 @@
     }
   }
 
+  // Write stack masks at the end.
+  size_t stack_mask_bits = encoding.stack_mask_size_in_bits;
+  if (stack_mask_bits > 0) {
+    size_t stack_mask_bytes = RoundUp(stack_mask_bits, kBitsPerByte) / kBitsPerByte;
+    for (size_t i = 0; i < encoding.number_of_stack_masks; ++i) {
+      MemoryRegion source(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes);
+      BitMemoryRegion stack_mask = code_info.GetStackMask(encoding, i);
+      for (size_t bit_index = 0; bit_index < encoding.stack_mask_size_in_bits; ++bit_index) {
+        stack_mask.StoreBit(bit_index, source.LoadBit(bit_index));
+      }
+    }
+  }
+
   // Verify all written data in debug build.
   if (kIsDebugBuild) {
     CheckCodeInfo(region);
@@ -536,6 +548,27 @@
   }
 }
 
+size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
+  // Preallocate memory since we do not want it to move (the dedup map will point into it).
+  const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
+  stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u);
+  // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later
+  // when copying out from stack_masks_.
+  std::unordered_map<MemoryRegion,
+                     size_t,
+                     FNVHash<MemoryRegion>,
+                     MemoryRegion::ContentEquals> dedup(stack_maps_.size());
+  for (StackMapEntry& stack_map : stack_maps_) {
+    size_t index = dedup.size();
+    MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size);
+    for (size_t i = 0; i < entry_size_in_bits; i++) {
+      stack_mask.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i));
+    }
+    stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second;
+  }
+  return dedup.size();
+}
+
 // Check that all StackMapStream inputs are correctly encoded by trying to read them back.
 void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
   CodeInfo code_info(region);
@@ -551,15 +584,17 @@
               entry.native_pc_code_offset.Uint32Value(instruction_set_));
     DCHECK_EQ(stack_map.GetDexPc(stack_map_encoding), entry.dex_pc);
     DCHECK_EQ(stack_map.GetRegisterMask(stack_map_encoding), entry.register_mask);
-    size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding);
+    const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding);
+    DCHECK_EQ(stack_map.GetStackMaskIndex(stack_map_encoding), entry.stack_mask_index);
+    BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
     if (entry.sp_mask != nullptr) {
-      DCHECK_GE(num_stack_mask_bits, entry.sp_mask->GetNumberOfBits());
+      DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits());
       for (size_t b = 0; b < num_stack_mask_bits; b++) {
-        DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), entry.sp_mask->IsBitSet(b));
+        DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b));
       }
     } else {
       for (size_t b = 0; b < num_stack_mask_bits; b++) {
-        DCHECK_EQ(stack_map.GetStackMaskBit(stack_map_encoding, b), 0u);
+        DCHECK_EQ(stack_mask.LoadBit(b), 0u);
       }
     }
 
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 8fec472..e2e16e8 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -68,6 +68,7 @@
         location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
         dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
         inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
+        stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
         stack_mask_max_(-1),
         dex_pc_max_(0),
         register_mask_max_(0),
@@ -107,6 +108,7 @@
     BitVector* live_dex_registers_mask;
     uint32_t dex_register_map_hash;
     size_t same_dex_register_map_as_;
+    uint32_t stack_mask_index;
   };
 
   struct InlineInfoEntry {
@@ -160,6 +162,9 @@
 
   CodeOffset ComputeMaxNativePcCodeOffset() const;
 
+  // Returns the number of unique stack masks.
+  size_t PrepareStackMasks(size_t entry_size_in_bits);
+
   // Returns the index of an entry with the same dex register map as the current_entry,
   // or kNoSameDexMapFound if no such entry exists.
   size_t FindEntryWithTheSameDexMap();
@@ -193,6 +198,7 @@
   // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
   ArenaVector<size_t> dex_register_locations_;
   ArenaVector<InlineInfoEntry> inline_infos_;
+  ArenaVector<uint8_t> stack_masks_;
   int stack_mask_max_;
   uint32_t dex_pc_max_;
   uint32_t register_mask_max_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index da4597e..da68b60 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -27,15 +27,16 @@
 // Check that the stack mask of given stack map is identical
 // to the given bit vector. Returns true if they are same.
 static bool CheckStackMask(
-    int number_of_bits,
+    const CodeInfo& code_info,
+    const CodeInfoEncoding& encoding,
     const StackMap& stack_map,
-    StackMapEncoding& encoding,
     const BitVector& bit_vector) {
-  if (bit_vector.GetHighestBitSet() >= number_of_bits) {
+  BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
+  if (bit_vector.GetNumberOfBits() > encoding.stack_mask_size_in_bits) {
     return false;
   }
-  for (int i = 0; i < number_of_bits; ++i) {
-    if (stack_map.GetStackMaskBit(encoding, i) != bit_vector.IsBitSet(i)) {
+  for (size_t i = 0; i < encoding.stack_mask_size_in_bits; ++i) {
+    if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) {
       return false;
     }
   }
@@ -81,10 +82,7 @@
   ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
   ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-  ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
-                             stack_map,
-                             encoding.stack_map_encoding,
-                             sp_mask));
+  ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask));
 
   ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
   DexRegisterMap dex_register_map =
@@ -199,10 +197,7 @@
     ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
     ASSERT_EQ(0x3u, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
-                               stack_map,
-                               encoding.stack_map_encoding,
-                               sp_mask1));
+    ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1));
 
     ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
@@ -261,10 +256,7 @@
     ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
     ASSERT_EQ(0xFFu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
-                               stack_map,
-                               encoding.stack_map_encoding,
-                               sp_mask2));
+    ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask2));
 
     ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
@@ -318,10 +310,7 @@
     ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
     ASSERT_EQ(0xABu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
-                               stack_map,
-                               encoding.stack_map_encoding,
-                               sp_mask3));
+    ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask3));
 
     ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
@@ -375,10 +364,7 @@
     ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding.stack_map_encoding, kRuntimeISA));
     ASSERT_EQ(0xCDu, stack_map.GetRegisterMask(encoding.stack_map_encoding));
 
-    ASSERT_TRUE(CheckStackMask(code_info.GetNumberOfStackMaskBits(encoding),
-                               stack_map,
-                               encoding.stack_map_encoding,
-                               sp_mask4));
+    ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask4));
 
     ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map_encoding));
     DexRegisterMap dex_register_map =
@@ -854,4 +840,33 @@
   EXPECT_EQ(offset_mips64.Uint32Value(kMips64), kMips64InstructionAlignment);
 }
 
+
+TEST(StackMapTest, TestDeduplicateStackMask) {
+  ArenaPool pool;
+  ArenaAllocator arena(&pool);
+  StackMapStream stream(&arena, kRuntimeISA);
+
+  ArenaBitVector sp_mask(&arena, 0, true);
+  sp_mask.SetBit(1);
+  sp_mask.SetBit(4);
+  stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0);
+  stream.EndStackMapEntry();
+  stream.BeginStackMapEntry(0, 8, 0x3, &sp_mask, 0, 0);
+  stream.EndStackMapEntry();
+
+  size_t size = stream.PrepareForFillIn();
+  void* memory = arena.Alloc(size, kArenaAllocMisc);
+  MemoryRegion region(memory, size);
+  stream.FillIn(region);
+
+  CodeInfo code_info(region);
+  CodeInfoEncoding encoding = code_info.ExtractEncoding();
+  ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding));
+
+  StackMap stack_map1 = code_info.GetStackMapForNativePcOffset(4, encoding);
+  StackMap stack_map2 = code_info.GetStackMapForNativePcOffset(8, encoding);
+  EXPECT_EQ(stack_map1.GetStackMaskIndex(encoding.stack_map_encoding),
+            stack_map2.GetStackMaskIndex(encoding.stack_map_encoding));
+}
+
 }  // namespace art
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 9b4d3e1..c95cc74 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -589,16 +589,16 @@
       kByteKindCodeInfoInlineInfo,
       kByteKindCodeInfoEncoding,
       kByteKindCodeInfoOther,
+      kByteKindCodeInfoStackMasks,
       kByteKindStackMapNativePc,
       kByteKindStackMapDexPc,
       kByteKindStackMapDexRegisterMap,
       kByteKindStackMapInlineInfo,
       kByteKindStackMapRegisterMask,
-      kByteKindStackMapMask,
-      kByteKindStackMapOther,
+      kByteKindStackMapStackMaskIndex,
       kByteKindCount,
       kByteKindStackMapFirst = kByteKindCodeInfoOther,
-      kByteKindStackMapLast = kByteKindStackMapOther,
+      kByteKindStackMapLast = kByteKindStackMapStackMaskIndex,
     };
     int64_t bits[kByteKindCount] = {};
     // Since code has deduplication, seen tracks already seen pointers to avoid double counting
@@ -632,6 +632,7 @@
         Dump(os, "CodeInfoLocationCatalog        ", bits[kByteKindCodeInfoLocationCatalog], sum);
         Dump(os, "CodeInfoDexRegisterMap         ", bits[kByteKindCodeInfoDexRegisterMap], sum);
         Dump(os, "CodeInfoInlineInfo             ", bits[kByteKindCodeInfoInlineInfo], sum);
+        Dump(os, "CodeInfoStackMasks             ", bits[kByteKindCodeInfoStackMasks], sum);
         Dump(os, "CodeInfoStackMap               ", stack_map_bits, sum);
         {
           ScopedIndentation indent1(&os);
@@ -661,13 +662,8 @@
                stack_map_bits,
                "stack map");
           Dump(os,
-               "StackMapMask                 ",
-               bits[kByteKindStackMapMask],
-               stack_map_bits,
-               "stack map");
-          Dump(os,
-               "StackMapOther                ",
-               bits[kByteKindStackMapOther],
+               "StackMapStackMaskIndex       ",
+               bits[kByteKindStackMapStackMaskIndex],
                stack_map_bits,
                "stack map");
         }
@@ -1575,16 +1571,13 @@
           stats_.AddBits(
               Stats::kByteKindStackMapRegisterMask,
               stack_map_encoding.GetRegisterMaskEncoding().BitSize() * num_stack_maps);
-          const size_t stack_mask_bits = encoding.stack_map_size_in_bits -
-              stack_map_encoding.GetStackMaskBitOffset();
           stats_.AddBits(
-              Stats::kByteKindStackMapMask,
-              stack_mask_bits * num_stack_maps);
-          const size_t stack_map_bits =
-              stack_map_encoding.GetStackMaskBitOffset() + stack_mask_bits;
+              Stats::kByteKindStackMapStackMaskIndex,
+              stack_map_encoding.GetStackMaskIndexEncoding().BitSize() * num_stack_maps);
           stats_.AddBits(
-              Stats::kByteKindStackMapOther,
-              (encoding.stack_map_size_in_bits - stack_map_bits) * num_stack_maps);
+              Stats::kByteKindCodeInfoStackMasks,
+              helper.GetCodeInfo().GetNumberOfStackMaskBits(encoding) *
+                  encoding.number_of_stack_masks);
           const size_t stack_map_bytes = helper.GetCodeInfo().GetStackMapsSize(encoding);
           const size_t location_catalog_bytes =
               helper.GetCodeInfo().GetDexRegisterLocationCatalogSize(encoding);
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index 93fdaa6..2252fe7 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -68,6 +68,7 @@
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
     uint32_t register_mask = stack_map.GetRegisterMask(encoding.stack_map_encoding);
+    BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
     for (int i = 0; i < number_of_references; ++i) {
       int reg = registers[i];
       CHECK(reg < m->GetCodeItem()->registers_size_);
@@ -80,8 +81,7 @@
           break;
         case DexRegisterLocation::Kind::kInStack:
           DCHECK_EQ(location.GetValue() % kFrameSlotSize, 0);
-          CHECK(stack_map.GetStackMaskBit(encoding.stack_map_encoding,
-                                          location.GetValue() / kFrameSlotSize));
+          CHECK(stack_mask.LoadBit(location.GetValue() / kFrameSlotSize));
           break;
         case DexRegisterLocation::Kind::kInRegister:
         case DexRegisterLocation::Kind::kInRegisterHigh:
diff --git a/runtime/memory_region.cc b/runtime/memory_region.cc
index b0ecab4..13cc5c9 100644
--- a/runtime/memory_region.cc
+++ b/runtime/memory_region.cc
@@ -29,8 +29,7 @@
   CHECK_GT(from.size(), 0U);
   CHECK_GE(this->size(), from.size());
   CHECK_LE(offset, this->size() - from.size());
-  memmove(reinterpret_cast<void*>(start() + offset),
-          from.pointer(), from.size());
+  memmove(reinterpret_cast<void*>(begin() + offset), from.pointer(), from.size());
 }
 
 void MemoryRegion::StoreBits(uintptr_t bit_offset, uint32_t value, size_t length) {
diff --git a/runtime/memory_region.h b/runtime/memory_region.h
index f55dff7..7cf5d49 100644
--- a/runtime/memory_region.h
+++ b/runtime/memory_region.h
@@ -35,6 +35,12 @@
 // of the region.
 class MemoryRegion FINAL : public ValueObject {
  public:
+  struct ContentEquals {
+    constexpr bool operator()(const MemoryRegion& lhs, const MemoryRegion& rhs) const {
+      return lhs.size() == rhs.size() && memcmp(lhs.begin(), rhs.begin(), lhs.size()) == 0;
+    }
+  };
+
   MemoryRegion() : pointer_(nullptr), size_(0) {}
   MemoryRegion(void* pointer_in, uintptr_t size_in) : pointer_(pointer_in), size_(size_in) {}
 
@@ -46,8 +52,8 @@
     return OFFSETOF_MEMBER(MemoryRegion, pointer_);
   }
 
-  uint8_t* start() const { return reinterpret_cast<uint8_t*>(pointer_); }
-  uint8_t* end() const { return start() + size_; }
+  uint8_t* begin() const { return reinterpret_cast<uint8_t*>(pointer_); }
+  uint8_t* end() const { return begin() + size_; }
 
   // Load value of type `T` at `offset`.  The memory address corresponding
   // to `offset` should be word-aligned (on ARM, this is a requirement).
@@ -131,7 +137,7 @@
       // Do not touch any memory if the range is empty.
       return 0;
     }
-    const uint8_t* address = start() + bit_offset / kBitsPerByte;
+    const uint8_t* address = begin() + bit_offset / kBitsPerByte;
     const uint32_t shift = bit_offset & (kBitsPerByte - 1);
     // Load the value (reading only the strictly needed bytes).
     const uint32_t load_bit_count = shift + length;
@@ -165,11 +171,18 @@
 
   void CopyFrom(size_t offset, const MemoryRegion& from) const;
 
+  template<class Vector>
+  void CopyFromVector(size_t offset, Vector& vector) const {
+    if (!vector.empty()) {
+      CopyFrom(offset, MemoryRegion(vector.data(), vector.size()));
+    }
+  }
+
   // Compute a sub memory region based on an existing one.
   ALWAYS_INLINE MemoryRegion Subregion(uintptr_t offset, uintptr_t size_in) const {
     CHECK_GE(this->size(), size_in);
     CHECK_LE(offset,  this->size() - size_in);
-    return MemoryRegion(reinterpret_cast<void*>(start() + offset), size_in);
+    return MemoryRegion(reinterpret_cast<void*>(begin() + offset), size_in);
   }
 
   // Compute an extended memory region based on an existing one.
@@ -183,7 +196,7 @@
   ALWAYS_INLINE T* ComputeInternalPointer(size_t offset) const {
     CHECK_GE(size(), sizeof(T));
     CHECK_LE(offset, size() - sizeof(T));
-    return reinterpret_cast<T*>(start() + offset);
+    return reinterpret_cast<T*>(begin() + offset);
   }
 
   // Locate the bit with the given offset. Returns a pointer to the byte
diff --git a/runtime/oat.h b/runtime/oat.h
index 62f010b..4033716 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '1', '0', '6', '\0' };  // hash-based DexCache types
+  static constexpr uint8_t kOatVersion[] = { '1', '0', '7', '\0' };  // Stack map stack mask change.
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 4e76951..3ba3011 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -408,6 +408,7 @@
     StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
     const size_t number_of_vregs = m->GetCodeItem()->registers_size_;
     uint32_t register_mask = stack_map.GetRegisterMask(encoding.stack_map_encoding);
+    BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
     DexRegisterMap vreg_map = IsInInlinedFrame()
         ? code_info.GetDexRegisterMapAtDepth(GetCurrentInliningDepth() - 1,
                                              code_info.GetInlineInfoOf(stack_map, encoding),
@@ -440,8 +441,7 @@
           const uint8_t* addr = reinterpret_cast<const uint8_t*>(GetCurrentQuickFrame()) + offset;
           value = *reinterpret_cast<const uint32_t*>(addr);
           uint32_t bit = (offset >> 2);
-          if (code_info.GetNumberOfStackMaskBits(encoding) > bit &&
-              stack_map.GetStackMaskBit(encoding.stack_map_encoding, bit)) {
+          if (bit < encoding.stack_mask_size_in_bits && stack_mask.LoadBit(bit)) {
             is_reference = true;
           }
           break;
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index e093293..f470ae9 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -98,7 +98,8 @@
       << ", dex_register_map_bit_offset=" << static_cast<uint32_t>(dex_register_map_bit_offset_)
       << ", inline_info_bit_offset=" << static_cast<uint32_t>(inline_info_bit_offset_)
       << ", register_mask_bit_offset=" << static_cast<uint32_t>(register_mask_bit_offset_)
-      << ", stack_mask_bit_offset=" << static_cast<uint32_t>(stack_mask_bit_offset_)
+      << ", stack_mask_index_bit_offset=" << static_cast<uint32_t>(stack_mask_index_bit_offset_)
+      << ", total_bit_size=" << static_cast<uint32_t>(total_bit_size_)
       << ")\n";
 }
 
@@ -198,7 +199,7 @@
       << "StackMap" << header_suffix
       << std::hex
       << " [native_pc=0x" << code_offset + pc_offset << "]"
-      << " [entry_size=0x" << encoding.stack_map_size_in_bits << " bits]"
+      << " [entry_size=0x" << encoding.stack_map_encoding.BitSize() << " bits]"
       << " (dex_pc=0x" << GetDexPc(stack_map_encoding)
       << ", native_pc_offset=0x" << pc_offset
       << ", dex_register_map_offset=0x" << GetDexRegisterMapOffset(stack_map_encoding)
@@ -206,8 +207,9 @@
       << ", register_mask=0x" << GetRegisterMask(stack_map_encoding)
       << std::dec
       << ", stack_mask=0b";
-  for (size_t i = 0, e = code_info.GetNumberOfStackMaskBits(encoding); i < e; ++i) {
-    vios->Stream() << GetStackMaskBit(stack_map_encoding, e - i - 1);
+  BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, *this);
+  for (size_t i = 0, e = encoding.stack_mask_size_in_bits; i < e; ++i) {
+    vios->Stream() << stack_mask.LoadBit(e - i - 1);
   }
   vios->Stream() << ")\n";
   if (HasDexRegisterMap(stack_map_encoding)) {
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 679218d..83ba457 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -695,34 +695,34 @@
                       size_t dex_register_map_size,
                       size_t inline_info_size,
                       size_t register_mask_max,
-                      size_t stack_mask_bit_size) {
-    size_t bit_offset = 0;
-    DCHECK_EQ(kNativePcBitOffset, bit_offset);
-    bit_offset += MinimumBitsToStore(native_pc_max);
+                      size_t number_of_stack_masks) {
+    total_bit_size_ = 0;
+    DCHECK_EQ(kNativePcBitOffset, total_bit_size_);
+    total_bit_size_ += MinimumBitsToStore(native_pc_max);
 
-    dex_pc_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
-    bit_offset += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
+    dex_pc_bit_offset_ = total_bit_size_;
+    total_bit_size_ += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
 
     // We also need +1 for kNoDexRegisterMap, but since the size is strictly
     // greater than any offset we might try to encode, we already implicitly have it.
-    dex_register_map_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
-    bit_offset += MinimumBitsToStore(dex_register_map_size);
+    dex_register_map_bit_offset_ = total_bit_size_;
+    total_bit_size_ += MinimumBitsToStore(dex_register_map_size);
 
     // We also need +1 for kNoInlineInfo, but since the inline_info_size is strictly
     // greater than the offset we might try to encode, we already implicitly have it.
     // If inline_info_size is zero, we can encode only kNoInlineInfo (in zero bits).
-    inline_info_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
+    inline_info_bit_offset_ = total_bit_size_;
     if (inline_info_size != 0) {
-      bit_offset += MinimumBitsToStore(dex_register_map_size + inline_info_size);
+      total_bit_size_ += MinimumBitsToStore(dex_register_map_size + inline_info_size);
     }
 
-    register_mask_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
-    bit_offset += MinimumBitsToStore(register_mask_max);
+    register_mask_bit_offset_ = total_bit_size_;
+    total_bit_size_ += MinimumBitsToStore(register_mask_max);
 
-    stack_mask_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
-    bit_offset += stack_mask_bit_size;
+    stack_mask_index_bit_offset_ = total_bit_size_;
+    total_bit_size_ += MinimumBitsToStore(number_of_stack_masks);
 
-    return bit_offset;
+    return total_bit_size_;
   }
 
   ALWAYS_INLINE FieldEncoding GetNativePcEncoding() const {
@@ -738,15 +738,13 @@
     return FieldEncoding(inline_info_bit_offset_, register_mask_bit_offset_, -1 /* min_value */);
   }
   ALWAYS_INLINE FieldEncoding GetRegisterMaskEncoding() const {
-    return FieldEncoding(register_mask_bit_offset_, stack_mask_bit_offset_);
+    return FieldEncoding(register_mask_bit_offset_, stack_mask_index_bit_offset_);
   }
-  ALWAYS_INLINE size_t GetStackMaskBitOffset() const {
-    // The end offset is not encoded. It is implicitly the end of stack map entry.
-    return stack_mask_bit_offset_;
+  ALWAYS_INLINE FieldEncoding GetStackMaskIndexEncoding() const {
+    return FieldEncoding(stack_mask_index_bit_offset_, total_bit_size_);
   }
-  ALWAYS_INLINE size_t GetNumberOfStackMaskBits(size_t stack_map_bits) const {
-    // Note that the stack mask bits are last.
-    return stack_map_bits - GetStackMaskBitOffset();
+  ALWAYS_INLINE size_t BitSize() const {
+    return total_bit_size_;
   }
 
   void Dump(VariableIndentationOutputStream* vios) const;
@@ -757,7 +755,8 @@
   uint8_t dex_register_map_bit_offset_;
   uint8_t inline_info_bit_offset_;
   uint8_t register_mask_bit_offset_;
-  uint8_t stack_mask_bit_offset_;
+  uint8_t stack_mask_index_bit_offset_;
+  uint8_t total_bit_size_;
 };
 
 /**
@@ -771,7 +770,7 @@
  * The information is of the form:
  *
  *   [native_pc_offset, dex_pc, dex_register_map_offset, inlining_info_offset, register_mask,
- *   stack_mask].
+ *   stack_mask_index].
  */
 class StackMap {
  public:
@@ -824,12 +823,12 @@
     encoding.GetRegisterMaskEncoding().Store(region_, mask);
   }
 
-  ALWAYS_INLINE bool GetStackMaskBit(const StackMapEncoding& encoding, size_t index) const {
-    return region_.LoadBit(encoding.GetStackMaskBitOffset() + index);
+  ALWAYS_INLINE uint32_t GetStackMaskIndex(const StackMapEncoding& encoding) const {
+    return encoding.GetStackMaskIndexEncoding().Load(region_);
   }
 
-  ALWAYS_INLINE void SetStackMaskBit(const StackMapEncoding& encoding, size_t index, bool value) {
-    region_.StoreBit(encoding.GetStackMaskBitOffset() + index, value);
+  ALWAYS_INLINE void SetStackMaskIndex(const StackMapEncoding& encoding, uint32_t mask) {
+    encoding.GetStackMaskIndexEncoding().Store(region_, mask);
   }
 
   ALWAYS_INLINE bool HasDexRegisterMap(const StackMapEncoding& encoding) const {
@@ -1031,7 +1030,8 @@
 struct CodeInfoEncoding {
   uint32_t non_header_size;
   uint32_t number_of_stack_maps;
-  uint32_t stack_map_size_in_bits;
+  uint32_t number_of_stack_masks;
+  uint32_t stack_mask_size_in_bits;
   uint32_t number_of_location_catalog_entries;
   StackMapEncoding stack_map_encoding;
   InlineInfoEncoding inline_info_encoding;
@@ -1043,7 +1043,8 @@
     const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);
     non_header_size = DecodeUnsignedLeb128(&ptr);
     number_of_stack_maps = DecodeUnsignedLeb128(&ptr);
-    stack_map_size_in_bits = DecodeUnsignedLeb128(&ptr);
+    number_of_stack_masks = DecodeUnsignedLeb128(&ptr);
+    stack_mask_size_in_bits = DecodeUnsignedLeb128(&ptr);
     number_of_location_catalog_entries = DecodeUnsignedLeb128(&ptr);
     static_assert(alignof(StackMapEncoding) == 1,
                   "StackMapEncoding should not require alignment");
@@ -1064,7 +1065,8 @@
   void Compress(Vector* dest) const {
     EncodeUnsignedLeb128(dest, non_header_size);
     EncodeUnsignedLeb128(dest, number_of_stack_maps);
-    EncodeUnsignedLeb128(dest, stack_map_size_in_bits);
+    EncodeUnsignedLeb128(dest, number_of_stack_masks);
+    EncodeUnsignedLeb128(dest, stack_mask_size_in_bits);
     EncodeUnsignedLeb128(dest, number_of_location_catalog_entries);
     const uint8_t* stack_map_ptr = reinterpret_cast<const uint8_t*>(&stack_map_encoding);
     dest->insert(dest->end(), stack_map_ptr, stack_map_ptr + sizeof(StackMapEncoding));
@@ -1098,7 +1100,7 @@
   }
 
   CodeInfoEncoding ExtractEncoding() const {
-    CodeInfoEncoding encoding(region_.start());
+    CodeInfoEncoding encoding(region_.begin());
     AssertValidStackMap(encoding);
     return encoding;
   }
@@ -1114,14 +1116,27 @@
   }
 
   ALWAYS_INLINE size_t GetNumberOfStackMaskBits(const CodeInfoEncoding& encoding) const {
-    return encoding.stack_map_encoding.GetNumberOfStackMaskBits(encoding.stack_map_size_in_bits);
+    return encoding.stack_mask_size_in_bits;
   }
 
   ALWAYS_INLINE StackMap GetStackMapAt(size_t i, const CodeInfoEncoding& encoding) const {
-    const size_t map_size = encoding.stack_map_size_in_bits;
+    const size_t map_size = encoding.stack_map_encoding.BitSize();
     return StackMap(BitMemoryRegion(GetStackMaps(encoding), i * map_size, map_size));
   }
 
+  BitMemoryRegion GetStackMask(const CodeInfoEncoding& encoding, size_t stack_mask_index) const {
+    // All stack mask data is stored at the very end.
+    const size_t entry_size = GetNumberOfStackMaskBits(encoding);
+    return BitMemoryRegion(region_,
+                           region_.size_in_bits() - entry_size * (stack_mask_index + 1),
+                           entry_size);
+  }
+
+  BitMemoryRegion GetStackMaskOf(const CodeInfoEncoding& encoding,
+                                 const StackMap& stack_map) const {
+    return GetStackMask(encoding, stack_map.GetStackMaskIndex(encoding.stack_map_encoding));
+  }
+
   uint32_t GetNumberOfLocationCatalogEntries(const CodeInfoEncoding& encoding) const {
     return encoding.number_of_location_catalog_entries;
   }
@@ -1135,10 +1150,14 @@
     return encoding.number_of_stack_maps;
   }
 
+  // Get the size of all the stack maps of this CodeInfo object, in bits. Not byte aligned.
+  ALWAYS_INLINE size_t GetStackMapsSizeInBits(const CodeInfoEncoding& encoding) const {
+    return encoding.stack_map_encoding.BitSize() * GetNumberOfStackMaps(encoding);
+  }
+
   // Get the size of all the stack maps of this CodeInfo object, in bytes.
   size_t GetStackMapsSize(const CodeInfoEncoding& encoding) const {
-    return RoundUp(encoding.stack_map_size_in_bits * GetNumberOfStackMaps(encoding), kBitsPerByte) /
-        kBitsPerByte;
+    return RoundUp(GetStackMapsSizeInBits(encoding), kBitsPerByte) / kBitsPerByte;
   }
 
   uint32_t GetDexRegisterLocationCatalogOffset(const CodeInfoEncoding& encoding) const {
@@ -1288,7 +1307,7 @@
                  << encoding.non_header_size << "\n"
                  << encoding.number_of_location_catalog_entries << "\n"
                  << encoding.number_of_stack_maps << "\n"
-                 << encoding.stack_map_size_in_bits;
+                 << encoding.stack_map_encoding.BitSize();
     }
   }
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 3c7a71a..8002f32 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -3038,9 +3038,10 @@
       T vreg_info(m, code_info, encoding, map, visitor_);
 
       // Visit stack entries that hold pointers.
-      size_t number_of_bits = code_info.GetNumberOfStackMaskBits(encoding);
+      const size_t number_of_bits = code_info.GetNumberOfStackMaskBits(encoding);
+      BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, map);
       for (size_t i = 0; i < number_of_bits; ++i) {
-        if (map.GetStackMaskBit(encoding.stack_map_encoding, i)) {
+        if (stack_mask.LoadBit(i)) {
           auto* ref_addr = vreg_base + i;
           mirror::Object* ref = ref_addr->AsMirrorPtr();
           if (ref != nullptr) {
@@ -3048,7 +3049,7 @@
             vreg_info.VisitStack(&new_ref, i, this);
             if (ref != new_ref) {
               ref_addr->Assign(new_ref);
-            }
+           }
           }
         }
       }