Deduplicate stack masks

The stack masks repeat often enough so that it is worth deduplicating
them.

Oat size for a large app:
98143600 -> 96722288 (-1.44%)

Bug: 34621054

Test: test-art-host
Change-Id: If73d51e46066357049d5be2e406ae9a32b7ff1f4
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index 93fdaa6..2252fe7 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -68,6 +68,7 @@
     DexRegisterMap dex_register_map =
         code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
     uint32_t register_mask = stack_map.GetRegisterMask(encoding.stack_map_encoding);
+    BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
     for (int i = 0; i < number_of_references; ++i) {
       int reg = registers[i];
       CHECK(reg < m->GetCodeItem()->registers_size_);
@@ -80,8 +81,7 @@
           break;
         case DexRegisterLocation::Kind::kInStack:
           DCHECK_EQ(location.GetValue() % kFrameSlotSize, 0);
-          CHECK(stack_map.GetStackMaskBit(encoding.stack_map_encoding,
-                                          location.GetValue() / kFrameSlotSize));
+          CHECK(stack_mask.LoadBit(location.GetValue() / kFrameSlotSize));
           break;
         case DexRegisterLocation::Kind::kInRegister:
         case DexRegisterLocation::Kind::kInRegisterHigh:
diff --git a/runtime/memory_region.cc b/runtime/memory_region.cc
index b0ecab4..13cc5c9 100644
--- a/runtime/memory_region.cc
+++ b/runtime/memory_region.cc
@@ -29,8 +29,7 @@
   CHECK_GT(from.size(), 0U);
   CHECK_GE(this->size(), from.size());
   CHECK_LE(offset, this->size() - from.size());
-  memmove(reinterpret_cast<void*>(start() + offset),
-          from.pointer(), from.size());
+  memmove(reinterpret_cast<void*>(begin() + offset), from.pointer(), from.size());
 }
 
 void MemoryRegion::StoreBits(uintptr_t bit_offset, uint32_t value, size_t length) {
diff --git a/runtime/memory_region.h b/runtime/memory_region.h
index f55dff7..7cf5d49 100644
--- a/runtime/memory_region.h
+++ b/runtime/memory_region.h
@@ -35,6 +35,12 @@
 // of the region.
 class MemoryRegion FINAL : public ValueObject {
  public:
+  struct ContentEquals {
+    constexpr bool operator()(const MemoryRegion& lhs, const MemoryRegion& rhs) const {
+      return lhs.size() == rhs.size() && memcmp(lhs.begin(), rhs.begin(), lhs.size()) == 0;
+    }
+  };
+
   MemoryRegion() : pointer_(nullptr), size_(0) {}
   MemoryRegion(void* pointer_in, uintptr_t size_in) : pointer_(pointer_in), size_(size_in) {}
 
@@ -46,8 +52,8 @@
     return OFFSETOF_MEMBER(MemoryRegion, pointer_);
   }
 
-  uint8_t* start() const { return reinterpret_cast<uint8_t*>(pointer_); }
-  uint8_t* end() const { return start() + size_; }
+  uint8_t* begin() const { return reinterpret_cast<uint8_t*>(pointer_); }
+  uint8_t* end() const { return begin() + size_; }
 
   // Load value of type `T` at `offset`.  The memory address corresponding
   // to `offset` should be word-aligned (on ARM, this is a requirement).
@@ -131,7 +137,7 @@
       // Do not touch any memory if the range is empty.
       return 0;
     }
-    const uint8_t* address = start() + bit_offset / kBitsPerByte;
+    const uint8_t* address = begin() + bit_offset / kBitsPerByte;
     const uint32_t shift = bit_offset & (kBitsPerByte - 1);
     // Load the value (reading only the strictly needed bytes).
     const uint32_t load_bit_count = shift + length;
@@ -165,11 +171,18 @@
 
   void CopyFrom(size_t offset, const MemoryRegion& from) const;
 
+  template<class Vector>
+  void CopyFromVector(size_t offset, Vector& vector) const {
+    if (!vector.empty()) {
+      CopyFrom(offset, MemoryRegion(vector.data(), vector.size()));
+    }
+  }
+
   // Compute a sub memory region based on an existing one.
   ALWAYS_INLINE MemoryRegion Subregion(uintptr_t offset, uintptr_t size_in) const {
     CHECK_GE(this->size(), size_in);
     CHECK_LE(offset,  this->size() - size_in);
-    return MemoryRegion(reinterpret_cast<void*>(start() + offset), size_in);
+    return MemoryRegion(reinterpret_cast<void*>(begin() + offset), size_in);
   }
 
   // Compute an extended memory region based on an existing one.
@@ -183,7 +196,7 @@
   ALWAYS_INLINE T* ComputeInternalPointer(size_t offset) const {
     CHECK_GE(size(), sizeof(T));
     CHECK_LE(offset, size() - sizeof(T));
-    return reinterpret_cast<T*>(start() + offset);
+    return reinterpret_cast<T*>(begin() + offset);
   }
 
   // Locate the bit with the given offset. Returns a pointer to the byte
diff --git a/runtime/oat.h b/runtime/oat.h
index 62f010b..4033716 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '1', '0', '6', '\0' };  // hash-based DexCache types
+  static constexpr uint8_t kOatVersion[] = { '1', '0', '7', '\0' };  // Stack map stack mask change.
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 4e76951..3ba3011 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -408,6 +408,7 @@
     StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
     const size_t number_of_vregs = m->GetCodeItem()->registers_size_;
     uint32_t register_mask = stack_map.GetRegisterMask(encoding.stack_map_encoding);
+    BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
     DexRegisterMap vreg_map = IsInInlinedFrame()
         ? code_info.GetDexRegisterMapAtDepth(GetCurrentInliningDepth() - 1,
                                              code_info.GetInlineInfoOf(stack_map, encoding),
@@ -440,8 +441,7 @@
           const uint8_t* addr = reinterpret_cast<const uint8_t*>(GetCurrentQuickFrame()) + offset;
           value = *reinterpret_cast<const uint32_t*>(addr);
           uint32_t bit = (offset >> 2);
-          if (code_info.GetNumberOfStackMaskBits(encoding) > bit &&
-              stack_map.GetStackMaskBit(encoding.stack_map_encoding, bit)) {
+          if (bit < encoding.stack_mask_size_in_bits && stack_mask.LoadBit(bit)) {
             is_reference = true;
           }
           break;
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index e093293..f470ae9 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -98,7 +98,8 @@
       << ", dex_register_map_bit_offset=" << static_cast<uint32_t>(dex_register_map_bit_offset_)
       << ", inline_info_bit_offset=" << static_cast<uint32_t>(inline_info_bit_offset_)
       << ", register_mask_bit_offset=" << static_cast<uint32_t>(register_mask_bit_offset_)
-      << ", stack_mask_bit_offset=" << static_cast<uint32_t>(stack_mask_bit_offset_)
+      << ", stack_mask_index_bit_offset=" << static_cast<uint32_t>(stack_mask_index_bit_offset_)
+      << ", total_bit_size=" << static_cast<uint32_t>(total_bit_size_)
       << ")\n";
 }
 
@@ -198,7 +199,7 @@
       << "StackMap" << header_suffix
       << std::hex
       << " [native_pc=0x" << code_offset + pc_offset << "]"
-      << " [entry_size=0x" << encoding.stack_map_size_in_bits << " bits]"
+      << " [entry_size=0x" << encoding.stack_map_encoding.BitSize() << " bits]"
       << " (dex_pc=0x" << GetDexPc(stack_map_encoding)
       << ", native_pc_offset=0x" << pc_offset
       << ", dex_register_map_offset=0x" << GetDexRegisterMapOffset(stack_map_encoding)
@@ -206,8 +207,9 @@
       << ", register_mask=0x" << GetRegisterMask(stack_map_encoding)
       << std::dec
       << ", stack_mask=0b";
-  for (size_t i = 0, e = code_info.GetNumberOfStackMaskBits(encoding); i < e; ++i) {
-    vios->Stream() << GetStackMaskBit(stack_map_encoding, e - i - 1);
+  BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, *this);
+  for (size_t i = 0, e = encoding.stack_mask_size_in_bits; i < e; ++i) {
+    vios->Stream() << stack_mask.LoadBit(e - i - 1);
   }
   vios->Stream() << ")\n";
   if (HasDexRegisterMap(stack_map_encoding)) {
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 679218d..83ba457 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -695,34 +695,34 @@
                       size_t dex_register_map_size,
                       size_t inline_info_size,
                       size_t register_mask_max,
-                      size_t stack_mask_bit_size) {
-    size_t bit_offset = 0;
-    DCHECK_EQ(kNativePcBitOffset, bit_offset);
-    bit_offset += MinimumBitsToStore(native_pc_max);
+                      size_t number_of_stack_masks) {
+    total_bit_size_ = 0;
+    DCHECK_EQ(kNativePcBitOffset, total_bit_size_);
+    total_bit_size_ += MinimumBitsToStore(native_pc_max);
 
-    dex_pc_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
-    bit_offset += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
+    dex_pc_bit_offset_ = total_bit_size_;
+    total_bit_size_ += MinimumBitsToStore(1 /* kNoDexPc */ + dex_pc_max);
 
     // We also need +1 for kNoDexRegisterMap, but since the size is strictly
     // greater than any offset we might try to encode, we already implicitly have it.
-    dex_register_map_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
-    bit_offset += MinimumBitsToStore(dex_register_map_size);
+    dex_register_map_bit_offset_ = total_bit_size_;
+    total_bit_size_ += MinimumBitsToStore(dex_register_map_size);
 
     // We also need +1 for kNoInlineInfo, but since the inline_info_size is strictly
     // greater than the offset we might try to encode, we already implicitly have it.
     // If inline_info_size is zero, we can encode only kNoInlineInfo (in zero bits).
-    inline_info_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
+    inline_info_bit_offset_ = total_bit_size_;
     if (inline_info_size != 0) {
-      bit_offset += MinimumBitsToStore(dex_register_map_size + inline_info_size);
+      total_bit_size_ += MinimumBitsToStore(dex_register_map_size + inline_info_size);
     }
 
-    register_mask_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
-    bit_offset += MinimumBitsToStore(register_mask_max);
+    register_mask_bit_offset_ = total_bit_size_;
+    total_bit_size_ += MinimumBitsToStore(register_mask_max);
 
-    stack_mask_bit_offset_ = dchecked_integral_cast<uint8_t>(bit_offset);
-    bit_offset += stack_mask_bit_size;
+    stack_mask_index_bit_offset_ = total_bit_size_;
+    total_bit_size_ += MinimumBitsToStore(number_of_stack_masks);
 
-    return bit_offset;
+    return total_bit_size_;
   }
 
   ALWAYS_INLINE FieldEncoding GetNativePcEncoding() const {
@@ -738,15 +738,13 @@
     return FieldEncoding(inline_info_bit_offset_, register_mask_bit_offset_, -1 /* min_value */);
   }
   ALWAYS_INLINE FieldEncoding GetRegisterMaskEncoding() const {
-    return FieldEncoding(register_mask_bit_offset_, stack_mask_bit_offset_);
+    return FieldEncoding(register_mask_bit_offset_, stack_mask_index_bit_offset_);
   }
-  ALWAYS_INLINE size_t GetStackMaskBitOffset() const {
-    // The end offset is not encoded. It is implicitly the end of stack map entry.
-    return stack_mask_bit_offset_;
+  ALWAYS_INLINE FieldEncoding GetStackMaskIndexEncoding() const {
+    return FieldEncoding(stack_mask_index_bit_offset_, total_bit_size_);
   }
-  ALWAYS_INLINE size_t GetNumberOfStackMaskBits(size_t stack_map_bits) const {
-    // Note that the stack mask bits are last.
-    return stack_map_bits - GetStackMaskBitOffset();
+  ALWAYS_INLINE size_t BitSize() const {
+    return total_bit_size_;
   }
 
   void Dump(VariableIndentationOutputStream* vios) const;
@@ -757,7 +755,8 @@
   uint8_t dex_register_map_bit_offset_;
   uint8_t inline_info_bit_offset_;
   uint8_t register_mask_bit_offset_;
-  uint8_t stack_mask_bit_offset_;
+  uint8_t stack_mask_index_bit_offset_;
+  uint8_t total_bit_size_;
 };
 
 /**
@@ -771,7 +770,7 @@
  * The information is of the form:
  *
  *   [native_pc_offset, dex_pc, dex_register_map_offset, inlining_info_offset, register_mask,
- *   stack_mask].
+ *   stack_mask_index].
  */
 class StackMap {
  public:
@@ -824,12 +823,12 @@
     encoding.GetRegisterMaskEncoding().Store(region_, mask);
   }
 
-  ALWAYS_INLINE bool GetStackMaskBit(const StackMapEncoding& encoding, size_t index) const {
-    return region_.LoadBit(encoding.GetStackMaskBitOffset() + index);
+  ALWAYS_INLINE uint32_t GetStackMaskIndex(const StackMapEncoding& encoding) const {
+    return encoding.GetStackMaskIndexEncoding().Load(region_);
   }
 
-  ALWAYS_INLINE void SetStackMaskBit(const StackMapEncoding& encoding, size_t index, bool value) {
-    region_.StoreBit(encoding.GetStackMaskBitOffset() + index, value);
+  ALWAYS_INLINE void SetStackMaskIndex(const StackMapEncoding& encoding, uint32_t mask) {
+    encoding.GetStackMaskIndexEncoding().Store(region_, mask);
   }
 
   ALWAYS_INLINE bool HasDexRegisterMap(const StackMapEncoding& encoding) const {
@@ -1031,7 +1030,8 @@
 struct CodeInfoEncoding {
   uint32_t non_header_size;
   uint32_t number_of_stack_maps;
-  uint32_t stack_map_size_in_bits;
+  uint32_t number_of_stack_masks;
+  uint32_t stack_mask_size_in_bits;
   uint32_t number_of_location_catalog_entries;
   StackMapEncoding stack_map_encoding;
   InlineInfoEncoding inline_info_encoding;
@@ -1043,7 +1043,8 @@
     const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data);
     non_header_size = DecodeUnsignedLeb128(&ptr);
     number_of_stack_maps = DecodeUnsignedLeb128(&ptr);
-    stack_map_size_in_bits = DecodeUnsignedLeb128(&ptr);
+    number_of_stack_masks = DecodeUnsignedLeb128(&ptr);
+    stack_mask_size_in_bits = DecodeUnsignedLeb128(&ptr);
     number_of_location_catalog_entries = DecodeUnsignedLeb128(&ptr);
     static_assert(alignof(StackMapEncoding) == 1,
                   "StackMapEncoding should not require alignment");
@@ -1064,7 +1065,8 @@
   void Compress(Vector* dest) const {
     EncodeUnsignedLeb128(dest, non_header_size);
     EncodeUnsignedLeb128(dest, number_of_stack_maps);
-    EncodeUnsignedLeb128(dest, stack_map_size_in_bits);
+    EncodeUnsignedLeb128(dest, number_of_stack_masks);
+    EncodeUnsignedLeb128(dest, stack_mask_size_in_bits);
     EncodeUnsignedLeb128(dest, number_of_location_catalog_entries);
     const uint8_t* stack_map_ptr = reinterpret_cast<const uint8_t*>(&stack_map_encoding);
     dest->insert(dest->end(), stack_map_ptr, stack_map_ptr + sizeof(StackMapEncoding));
@@ -1098,7 +1100,7 @@
   }
 
   CodeInfoEncoding ExtractEncoding() const {
-    CodeInfoEncoding encoding(region_.start());
+    CodeInfoEncoding encoding(region_.begin());
     AssertValidStackMap(encoding);
     return encoding;
   }
@@ -1114,14 +1116,27 @@
   }
 
   ALWAYS_INLINE size_t GetNumberOfStackMaskBits(const CodeInfoEncoding& encoding) const {
-    return encoding.stack_map_encoding.GetNumberOfStackMaskBits(encoding.stack_map_size_in_bits);
+    return encoding.stack_mask_size_in_bits;
   }
 
   ALWAYS_INLINE StackMap GetStackMapAt(size_t i, const CodeInfoEncoding& encoding) const {
-    const size_t map_size = encoding.stack_map_size_in_bits;
+    const size_t map_size = encoding.stack_map_encoding.BitSize();
     return StackMap(BitMemoryRegion(GetStackMaps(encoding), i * map_size, map_size));
   }
 
+  BitMemoryRegion GetStackMask(const CodeInfoEncoding& encoding, size_t stack_mask_index) const {
+    // All stack mask data is stored at the very end.
+    const size_t entry_size = GetNumberOfStackMaskBits(encoding);
+    return BitMemoryRegion(region_,
+                           region_.size_in_bits() - entry_size * (stack_mask_index + 1),
+                           entry_size);
+  }
+
+  BitMemoryRegion GetStackMaskOf(const CodeInfoEncoding& encoding,
+                                 const StackMap& stack_map) const {
+    return GetStackMask(encoding, stack_map.GetStackMaskIndex(encoding.stack_map_encoding));
+  }
+
   uint32_t GetNumberOfLocationCatalogEntries(const CodeInfoEncoding& encoding) const {
     return encoding.number_of_location_catalog_entries;
   }
@@ -1135,10 +1150,14 @@
     return encoding.number_of_stack_maps;
   }
 
+  // Get the size of all the stack maps of this CodeInfo object, in bits. Not byte aligned.
+  ALWAYS_INLINE size_t GetStackMapsSizeInBits(const CodeInfoEncoding& encoding) const {
+    return encoding.stack_map_encoding.BitSize() * GetNumberOfStackMaps(encoding);
+  }
+
   // Get the size of all the stack maps of this CodeInfo object, in bytes.
   size_t GetStackMapsSize(const CodeInfoEncoding& encoding) const {
-    return RoundUp(encoding.stack_map_size_in_bits * GetNumberOfStackMaps(encoding), kBitsPerByte) /
-        kBitsPerByte;
+    return RoundUp(GetStackMapsSizeInBits(encoding), kBitsPerByte) / kBitsPerByte;
   }
 
   uint32_t GetDexRegisterLocationCatalogOffset(const CodeInfoEncoding& encoding) const {
@@ -1288,7 +1307,7 @@
                  << encoding.non_header_size << "\n"
                  << encoding.number_of_location_catalog_entries << "\n"
                  << encoding.number_of_stack_maps << "\n"
-                 << encoding.stack_map_size_in_bits;
+                 << encoding.stack_map_encoding.BitSize();
     }
   }
 
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 3c7a71a..8002f32 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -3038,9 +3038,10 @@
       T vreg_info(m, code_info, encoding, map, visitor_);
 
       // Visit stack entries that hold pointers.
-      size_t number_of_bits = code_info.GetNumberOfStackMaskBits(encoding);
+      const size_t number_of_bits = code_info.GetNumberOfStackMaskBits(encoding);
+      BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, map);
       for (size_t i = 0; i < number_of_bits; ++i) {
-        if (map.GetStackMaskBit(encoding.stack_map_encoding, i)) {
+        if (stack_mask.LoadBit(i)) {
           auto* ref_addr = vreg_base + i;
           mirror::Object* ref = ref_addr->AsMirrorPtr();
           if (ref != nullptr) {
@@ -3048,7 +3049,7 @@
             vreg_info.VisitStack(&new_ref, i, this);
             if (ref != new_ref) {
               ref_addr->Assign(new_ref);
-            }
+           }
           }
         }
       }