Stack maps: Handle special cases using flags.

Keep the BitTable decoder simple (1+NumColumns varints).
Move special case handling up to CodeInfo (empty/dedup).

This speeds up CodeInfo by 5%, and maps startup by 0.05%.
Change in size is negligible (the bits mostly just move).

Test: test.py -b --host --64 --optimizing
Change-Id: Ib6abe52f04384de9ffd7cfba04a3124b62f713ff
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index e21e21c..87702cc 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -184,7 +184,6 @@
   in_inline_info_ = true;
   DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size());
 
-  flags_ |= CodeInfo::kHasInlineInfo;
   expected_num_dex_registers_ += num_dex_registers;
 
   BitTableBuilder<InlineInfo>::Entry entry;
@@ -294,31 +293,31 @@
   }
 }
 
-template<typename Writer, typename Builder>
-ALWAYS_INLINE static void EncodeTable(Writer& out, const Builder& bit_table) {
-  out.WriteBit(false);  // Is not deduped.
-  bit_table.Encode(out);
-}
-
 ScopedArenaVector<uint8_t> StackMapStream::Encode() {
   DCHECK(in_stack_map_ == false) << "Mismatched Begin/End calls";
   DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls";
 
+  uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0;
+  uint32_t bit_table_flags = 0;
+  ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) {
+    if (bit_table->size() != 0) {  // Record which bit-tables are stored.
+      bit_table_flags |= 1 << i;
+    }
+  });
+
   ScopedArenaVector<uint8_t> buffer(allocator_->Adapter(kArenaAllocStackMapStream));
   BitMemoryWriter<ScopedArenaVector<uint8_t>> out(&buffer);
-  out.WriteVarint(flags_);
+  out.WriteVarint(flags);
   out.WriteVarint(packed_frame_size_);
   out.WriteVarint(core_spill_mask_);
   out.WriteVarint(fp_spill_mask_);
   out.WriteVarint(num_dex_registers_);
-  EncodeTable(out, stack_maps_);
-  EncodeTable(out, register_masks_);
-  EncodeTable(out, stack_masks_);
-  EncodeTable(out, inline_infos_);
-  EncodeTable(out, method_infos_);
-  EncodeTable(out, dex_register_masks_);
-  EncodeTable(out, dex_register_maps_);
-  EncodeTable(out, dex_register_catalog_);
+  out.WriteVarint(bit_table_flags);
+  ForEachBitTable([&out](size_t, auto bit_table) {
+    if (bit_table->size() != 0) {  // Skip empty bit-tables.
+      bit_table->Encode(out);
+    }
+  });
 
   // Verify that we can load the CodeInfo and check some essentials.
   CodeInfo code_info(buffer.data());
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 20dd32e..33c624a 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -40,10 +40,10 @@
       : allocator_(allocator),
         instruction_set_(instruction_set),
         stack_maps_(allocator),
-        inline_infos_(allocator),
-        method_infos_(allocator),
         register_masks_(allocator),
         stack_masks_(allocator),
+        inline_infos_(allocator),
+        method_infos_(allocator),
         dex_register_masks_(allocator),
         dex_register_maps_(allocator),
         dex_register_catalog_(allocator),
@@ -97,18 +97,32 @@
 
   void CreateDexRegisterMap();
 
+  // Invokes the callback with pointer of each BitTableBuilder field.
+  template<typename Callback>
+  void ForEachBitTable(Callback callback) {
+    size_t index = 0;
+    callback(index++, &stack_maps_);
+    callback(index++, &register_masks_);
+    callback(index++, &stack_masks_);
+    callback(index++, &inline_infos_);
+    callback(index++, &method_infos_);
+    callback(index++, &dex_register_masks_);
+    callback(index++, &dex_register_maps_);
+    callback(index++, &dex_register_catalog_);
+    CHECK_EQ(index, CodeInfo::kNumBitTables);
+  }
+
   ScopedArenaAllocator* allocator_;
   const InstructionSet instruction_set_;
-  uint32_t flags_ = 0;
   uint32_t packed_frame_size_ = 0;
   uint32_t core_spill_mask_ = 0;
   uint32_t fp_spill_mask_ = 0;
   uint32_t num_dex_registers_ = 0;
   BitTableBuilder<StackMap> stack_maps_;
-  BitTableBuilder<InlineInfo> inline_infos_;
-  BitTableBuilder<MethodInfo> method_infos_;
   BitTableBuilder<RegisterMask> register_masks_;
   BitmapTableBuilder stack_masks_;
+  BitTableBuilder<InlineInfo> inline_infos_;
+  BitTableBuilder<MethodInfo> method_infos_;
   BitmapTableBuilder dex_register_masks_;
   BitTableBuilder<DexRegisterMapInfo> dex_register_maps_;
   BitTableBuilder<DexRegisterInfo> dex_register_catalog_;
diff --git a/libartbase/base/bit_memory_region.h b/libartbase/base/bit_memory_region.h
index 637332e..50d132e 100644
--- a/libartbase/base/bit_memory_region.h
+++ b/libartbase/base/bit_memory_region.h
@@ -254,23 +254,26 @@
 
   // Optimized version to read several consecutive varints.
   // It reads all the headers at once in a single bit read.
-  template<int N>  // Inference works only with ref-arrays.
+  template<size_t N>  // Inference works only with ref-arrays.
   ALWAYS_INLINE void ReadVarints(uint32_t (&varints)[N]) {
-    static_assert(N * kVarintHeaderBits <= sizeof(uint32_t) * kBitsPerByte, "N too big");
-    uint32_t headers = ReadBits(N * kVarintHeaderBits);
+    constexpr size_t kBatch = std::min(N, sizeof(uint32_t) * kBitsPerByte / kVarintHeaderBits);
+    uint32_t headers = ReadBits(kBatch * kVarintHeaderBits);
     uint32_t* out = varints;
-    for (int i = 0; i < N; out++) {
+    for (size_t i = 0; i < kBatch; out++) {
       uint32_t header = BitFieldExtract(headers, (i++) * kVarintHeaderBits, kVarintHeaderBits);
       if (LIKELY(header <= kVarintSmallValue)) {
         // Fast-path: consume one of the headers and continue to the next varint.
         *out = header;
       } else {
         // Slow-path: rollback reader, read large value, and read remaning headers.
-        finished_region_.Resize(finished_region_.size_in_bits() - (N-i) * kVarintHeaderBits);
+        finished_region_.Resize(finished_region_.size_in_bits() - (kBatch-i) * kVarintHeaderBits);
         *out = ReadBits((header - kVarintSmallValue) * kBitsPerByte);
-        headers = ReadBits((N-i) * kVarintHeaderBits) << (i * kVarintHeaderBits);
+        headers = ReadBits((kBatch-i) * kVarintHeaderBits) << (i * kVarintHeaderBits);
       }
     }
+    for (size_t i = kBatch; i < N; i++, out++) {
+      *out = ReadVarint();
+    }
   }
 
  private:
diff --git a/libartbase/base/bit_table.h b/libartbase/base/bit_table.h
index 6c91ce5..1984265 100644
--- a/libartbase/base/bit_table.h
+++ b/libartbase/base/bit_table.h
@@ -49,15 +49,13 @@
 
   ALWAYS_INLINE void Decode(BitMemoryReader& reader) {
     // Decode row count and column sizes from the table header.
-    num_rows_ = reader.ReadVarint();
-    if (num_rows_ != 0) {
-      uint32_t column_bits[kNumColumns];
-      reader.ReadVarints(column_bits);
-      column_offset_[0] = 0;
-      for (uint32_t i = 0; i < kNumColumns; i++) {
-        size_t column_end = column_offset_[i] + column_bits[i];
-        column_offset_[i + 1] = dchecked_integral_cast<uint16_t>(column_end);
-      }
+    uint32_t header[1 + kNumColumns];
+    reader.ReadVarints(header);
+    num_rows_ = header[0];
+    column_offset_[0] = 0;
+    for (uint32_t i = 0; i < kNumColumns; i++) {
+      size_t column_end = column_offset_[i] + header[i + 1];
+      column_offset_[i + 1] = dchecked_integral_cast<uint16_t>(column_end);
     }
 
     // Record the region which contains the table data and skip past it.
@@ -357,18 +355,17 @@
 
     std::array<uint32_t, kNumColumns> column_bits;
     Measure(&column_bits);
-    out.WriteVarint(size());
-    if (size() != 0) {
-      // Write table header.
-      for (uint32_t c = 0; c < kNumColumns; c++) {
-        out.WriteVarint(column_bits[c]);
-      }
 
-      // Write table data.
-      for (uint32_t r = 0; r < size(); r++) {
-        for (uint32_t c = 0; c < kNumColumns; c++) {
-          out.WriteBits(rows_[r][c] - kValueBias, column_bits[c]);
-        }
+    // Write table header.
+    out.WriteVarint(size());
+    for (uint32_t c = 0; c < kNumColumns; c++) {
+      out.WriteVarint(column_bits[c]);
+    }
+
+    // Write table data.
+    for (uint32_t r = 0; r < size(); r++) {
+      for (uint32_t c = 0; c < kNumColumns; c++) {
+        out.WriteBits(rows_[r][c] - kValueBias, column_bits[c]);
       }
     }
 
@@ -446,16 +443,15 @@
   void Encode(BitMemoryWriter<Vector>& out) const {
     size_t initial_bit_offset = out.NumberOfWrittenBits();
 
+    // Write table header.
     out.WriteVarint(size());
-    if (size() != 0) {
-      out.WriteVarint(max_num_bits_);
+    out.WriteVarint(max_num_bits_);
 
-      // Write table data.
-      for (MemoryRegion row : rows_) {
-        BitMemoryRegion src(row);
-        BitMemoryRegion dst = out.Allocate(max_num_bits_);
-        dst.StoreBits(/* bit_offset */ 0, src, std::min(max_num_bits_, src.size_in_bits()));
-      }
+    // Write table data.
+    for (MemoryRegion row : rows_) {
+      BitMemoryRegion src(row);
+      BitMemoryRegion dst = out.Allocate(max_num_bits_);
+      dst.StoreBits(/* bit_offset */ 0, src, std::min(max_num_bits_, src.size_in_bits()));
     }
 
     // Verify the written data.
diff --git a/oatdump/oatdump_test.h b/oatdump/oatdump_test.h
index fa416e7..359b060 100644
--- a/oatdump/oatdump_test.h
+++ b/oatdump/oatdump_test.h
@@ -179,7 +179,7 @@
         // Code and dex code do not show up if list only.
         expected_prefixes.push_back("DEX CODE:");
         expected_prefixes.push_back("CODE:");
-        expected_prefixes.push_back("InlineInfo");
+        expected_prefixes.push_back("StackMap");
       }
       if (mode == kModeArt) {
         exec_argv.push_back("--runtime-arg");
diff --git a/runtime/oat.h b/runtime/oat.h
index f4b5a6e..c02ac0b 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
-  // Last oat version changed reason: Optimize stack maps: add fast path for no inline info.
-  static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '7', '1', '\0' } };
+  // Last oat version changed reason: Stack maps: Handle special cases using flags.
+  static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '7', '2', '\0' } };
 
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
   static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index eef7378..2300d1f 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -31,31 +31,24 @@
   : CodeInfo(header->GetOptimizedCodeInfoPtr(), flags) {
 }
 
-// Returns true if the decoded table was deduped.
-template<typename Accessor>
-ALWAYS_INLINE static bool DecodeTable(BitTable<Accessor>& table, BitMemoryReader& reader) {
-  bool is_deduped = reader.ReadBit();
-  if (UNLIKELY(is_deduped)) {
-    ssize_t bit_offset = reader.NumberOfReadBits() - reader.ReadVarint();
-    BitMemoryReader reader2(reader.data(), bit_offset);  // The offset is negative.
-    table.Decode(reader2);
-  } else {
-    table.Decode(reader);
-  }
-  return is_deduped;
-}
-
 void CodeInfo::Decode(const uint8_t* data, DecodeFlags flags) {
   BitMemoryReader reader(data);
-  uint32_t header[5];
+  uint32_t header[kNumHeaders];
   reader.ReadVarints(header);
-  flags_ = header[0];
-  packed_frame_size_ = header[1];
-  core_spill_mask_ = header[2];
-  fp_spill_mask_ = header[3];
-  number_of_dex_registers_ = header[4];
-  ForEachBitTableField([this, &reader](auto member_pointer) {
-    DecodeTable(this->*member_pointer, reader);
+  ForEachHeaderField([this, &header](size_t i, auto member_pointer) {
+    this->*member_pointer = header[i];
+  });
+  ForEachBitTableField([this, &reader](size_t i, auto member_pointer) {
+    auto& table = this->*member_pointer;
+    if (HasBitTable(i)) {
+      if (UNLIKELY(IsBitTableDeduped(i))) {
+        ssize_t bit_offset = reader.NumberOfReadBits() - reader.ReadVarint();
+        BitMemoryReader reader2(reader.data(), bit_offset);  // The offset is negative.
+        table.Decode(reader2);
+      } else {
+        table.Decode(reader);
+      }
+    }
   }, flags);
   size_in_bits_ = reader.NumberOfReadBits();
   if (flags == AllTables) {
@@ -66,37 +59,54 @@
 size_t CodeInfo::Deduper::Dedupe(const uint8_t* code_info_data) {
   writer_.ByteAlign();
   size_t deduped_offset = writer_.NumberOfWrittenBits() / kBitsPerByte;
+
+  // Read the existing code info and find (and keep) dedup-map iterator for each table.
+  // The iterator stores BitMemoryRegion and bit_offset of previous identical BitTable.
   BitMemoryReader reader(code_info_data);
   CodeInfo code_info;  // Temporary storage for decoded data.
-  ForEachHeaderField([this, &reader, &code_info](auto member_pointer) {
+  ForEachHeaderField([&reader, &code_info](size_t, auto member_pointer) {
     code_info.*member_pointer = reader.ReadVarint();
+  });
+  std::map<BitMemoryRegion, uint32_t, BitMemoryRegion::Less>::iterator it[kNumBitTables];
+  ForEachBitTableField([this, &reader, &code_info, &it](size_t i, auto member_pointer) {
+    DCHECK(!code_info.IsBitTableDeduped(i));
+    if (code_info.HasBitTable(i)) {
+      size_t bit_table_start = reader.NumberOfReadBits();
+      (code_info.*member_pointer).Decode(reader);
+      BitMemoryRegion region = reader.GetReadRegion().Subregion(bit_table_start);
+      it[i] = dedupe_map_.emplace(region, /* default bit_offset */ 0).first;
+      if (it[i]->second != 0 && region.size_in_bits() > 32) {  // Seen before and large?
+        code_info.SetBitTableDeduped(i);  // Mark as deduped before we write header.
+      }
+    }
+  });
+
+  // Write the code info back, but replace deduped tables with relative offsets.
+  ForEachHeaderField([this, &code_info](size_t, auto member_pointer) {
     writer_.WriteVarint(code_info.*member_pointer);
   });
-  ForEachBitTableField([this, &reader, &code_info](auto member_pointer) {
-    bool is_deduped = reader.ReadBit();
-    DCHECK(!is_deduped);
-    size_t bit_table_start = reader.NumberOfReadBits();
-    (code_info.*member_pointer).Decode(reader);
-    BitMemoryRegion region = reader.GetReadRegion().Subregion(bit_table_start);
-    auto it = dedupe_map_.insert(std::make_pair(region, /* placeholder */ 0));
-    if (it.second /* new bit table */ || region.size_in_bits() < 32) {
-      writer_.WriteBit(false);  // Is not deduped.
-      it.first->second = writer_.NumberOfWrittenBits();
-      writer_.WriteRegion(region);
-    } else {
-      writer_.WriteBit(true);  // Is deduped.
-      size_t bit_offset = writer_.NumberOfWrittenBits();
-      writer_.WriteVarint(bit_offset - it.first->second);
+  ForEachBitTableField([this, &code_info, &it](size_t i, auto) {
+    if (code_info.HasBitTable(i)) {
+      uint32_t& bit_offset = it[i]->second;
+      if (code_info.IsBitTableDeduped(i)) {
+        DCHECK_NE(bit_offset, 0u);
+        writer_.WriteVarint(writer_.NumberOfWrittenBits() - bit_offset);
+      } else {
+        bit_offset = writer_.NumberOfWrittenBits();  // Store offset in dedup map.
+        writer_.WriteRegion(it[i]->first);
+      }
     }
   });
 
   if (kIsDebugBuild) {
     CodeInfo old_code_info(code_info_data);
     CodeInfo new_code_info(writer_.data() + deduped_offset);
-    ForEachHeaderField([&old_code_info, &new_code_info](auto member_pointer) {
-      DCHECK_EQ(old_code_info.*member_pointer, new_code_info.*member_pointer);
+    ForEachHeaderField([&old_code_info, &new_code_info](size_t, auto member_pointer) {
+      if (member_pointer != &CodeInfo::bit_table_flags_) {  // Expected to differ.
+        DCHECK_EQ(old_code_info.*member_pointer, new_code_info.*member_pointer);
+      }
     });
-    ForEachBitTableField([&old_code_info, &new_code_info](auto member_pointer) {
+    ForEachBitTableField([&old_code_info, &new_code_info](size_t, auto member_pointer) {
       DCHECK((old_code_info.*member_pointer).Equals(new_code_info.*member_pointer));
     });
   }
@@ -192,23 +202,28 @@
 void CodeInfo::CollectSizeStats(const uint8_t* code_info_data, /*out*/ Stats* parent) {
   Stats* codeinfo_stats = parent->Child("CodeInfo");
   BitMemoryReader reader(code_info_data);
-  ForEachHeaderField([&reader](auto) { reader.ReadVarint(); });
-  codeinfo_stats->Child("Header")->AddBits(reader.NumberOfReadBits());
   CodeInfo code_info;  // Temporary storage for decoded tables.
-  ForEachBitTableField([codeinfo_stats, &reader, &code_info](auto member_pointer) {
+  ForEachHeaderField([&reader, &code_info](size_t, auto member_pointer) {
+    code_info.*member_pointer = reader.ReadVarint();
+  });
+  codeinfo_stats->Child("Header")->AddBits(reader.NumberOfReadBits());
+  ForEachBitTableField([codeinfo_stats, &reader, &code_info](size_t i, auto member_pointer) {
     auto& table = code_info.*member_pointer;
     size_t bit_offset = reader.NumberOfReadBits();
-    bool deduped = DecodeTable(table, reader);
-    if (deduped) {
-      codeinfo_stats->Child("DedupeOffset")->AddBits(reader.NumberOfReadBits() - bit_offset);
-    } else {
-      Stats* table_stats = codeinfo_stats->Child(table.GetName());
-      table_stats->AddBits(reader.NumberOfReadBits() - bit_offset);
-      const char* const* column_names = table.GetColumnNames();
-      for (size_t c = 0; c < table.NumColumns(); c++) {
-        if (table.NumColumnBits(c) > 0) {
-          Stats* column_stats = table_stats->Child(column_names[c]);
-          column_stats->AddBits(table.NumRows() * table.NumColumnBits(c), table.NumRows());
+    if (code_info.HasBitTable(i)) {
+      if (code_info.IsBitTableDeduped(i)) {
+        reader.ReadVarint();
+        codeinfo_stats->Child("DedupeOffset")->AddBits(reader.NumberOfReadBits() - bit_offset);
+      } else {
+        table.Decode(reader);
+        Stats* table_stats = codeinfo_stats->Child(table.GetName());
+        table_stats->AddBits(reader.NumberOfReadBits() - bit_offset);
+        const char* const* column_names = table.GetColumnNames();
+        for (size_t c = 0; c < table.NumColumns(); c++) {
+          if (table.NumColumnBits(c) > 0) {
+            Stats* column_stats = table_stats->Child(column_names[c]);
+            column_stats->AddBits(table.NumRows() * table.NumColumnBits(c), table.NumRows());
+          }
         }
       }
     }
@@ -234,14 +249,13 @@
                     bool verbose,
                     InstructionSet instruction_set) const {
   vios->Stream() << "CodeInfo BitSize=" << size_in_bits_
-    << " Flags:" << flags_
     << " FrameSize:" << packed_frame_size_ * kStackAlignment
     << " CoreSpillMask:" << std::hex << core_spill_mask_
     << " FpSpillMask:" << std::hex << fp_spill_mask_
     << " NumberOfDexRegisters:" << std::dec << number_of_dex_registers_
     << "\n";
   ScopedIndentation indent1(vios);
-  ForEachBitTableField([this, &vios, verbose](auto member_pointer) {
+  ForEachBitTableField([this, &vios, verbose](size_t, auto member_pointer) {
     const auto& table = this->*member_pointer;
     if (table.NumRows() != 0) {
       vios->Stream() << table.GetName() << " BitSize=" << table.DataBitSize();
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index a971467..c088eb6 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -181,7 +181,6 @@
   BIT_TABLE_COLUMN(3, ArtMethodHi)  // High bits of ArtMethod*.
   BIT_TABLE_COLUMN(4, ArtMethodLo)  // Low bits of ArtMethod*.
   BIT_TABLE_COLUMN(5, NumberOfDexRegisters)  // Includes outer levels and the main method.
-  BIT_TABLE_COLUMN(6, DexRegisterMapIndex)
 
   static constexpr uint32_t kLast = -1;
   static constexpr uint32_t kMore = 0;
@@ -452,7 +451,7 @@
  private:
   // Returns lower bound (fist stack map which has pc greater or equal than the desired one).
   // It ignores catch stack maps at the end (it is the same as if they had maximum pc value).
-  BitTable<StackMap>::const_iterator BinarySearchNativePc(uint32_t packed_pc) const;
+  ALWAYS_INLINE BitTable<StackMap>::const_iterator BinarySearchNativePc(uint32_t packed_pc) const;
 
   // Scan backward to determine dex register locations at given stack map.
   void DecodeDexRegisterMap(uint32_t stack_map_index,
@@ -461,44 +460,60 @@
 
   void Decode(const uint8_t* data, DecodeFlags flags);
 
-  // Invokes the callback with member pointer of each header field.
+  // Invokes the callback with index and member pointer of each header field.
   template<typename Callback>
   ALWAYS_INLINE static void ForEachHeaderField(Callback callback) {
-    callback(&CodeInfo::flags_);
-    callback(&CodeInfo::packed_frame_size_);
-    callback(&CodeInfo::core_spill_mask_);
-    callback(&CodeInfo::fp_spill_mask_);
-    callback(&CodeInfo::number_of_dex_registers_);
+    size_t index = 0;
+    callback(index++, &CodeInfo::flags_);
+    callback(index++, &CodeInfo::packed_frame_size_);
+    callback(index++, &CodeInfo::core_spill_mask_);
+    callback(index++, &CodeInfo::fp_spill_mask_);
+    callback(index++, &CodeInfo::number_of_dex_registers_);
+    callback(index++, &CodeInfo::bit_table_flags_);
+    DCHECK_EQ(index, kNumHeaders);
   }
 
-  // Invokes the callback with member pointer of each BitTable field.
+  // Invokes the callback with index and member pointer of each BitTable field.
   template<typename Callback>
   ALWAYS_INLINE static void ForEachBitTableField(Callback callback, DecodeFlags flags = AllTables) {
-    callback(&CodeInfo::stack_maps_);
-    callback(&CodeInfo::register_masks_);
-    callback(&CodeInfo::stack_masks_);
+    size_t index = 0;
+    callback(index++, &CodeInfo::stack_maps_);
+    callback(index++, &CodeInfo::register_masks_);
+    callback(index++, &CodeInfo::stack_masks_);
     if (flags & DecodeFlags::GcMasksOnly) {
       return;
     }
-    callback(&CodeInfo::inline_infos_);
-    callback(&CodeInfo::method_infos_);
+    callback(index++, &CodeInfo::inline_infos_);
+    callback(index++, &CodeInfo::method_infos_);
     if (flags & DecodeFlags::InlineInfoOnly) {
       return;
     }
-    callback(&CodeInfo::dex_register_masks_);
-    callback(&CodeInfo::dex_register_maps_);
-    callback(&CodeInfo::dex_register_catalog_);
+    callback(index++, &CodeInfo::dex_register_masks_);
+    callback(index++, &CodeInfo::dex_register_maps_);
+    callback(index++, &CodeInfo::dex_register_catalog_);
+    DCHECK_EQ(index, kNumBitTables);
   }
 
+  bool HasBitTable(size_t i) { return ((bit_table_flags_ >> i) & 1) != 0; }
+  bool IsBitTableDeduped(size_t i) { return ((bit_table_flags_ >> (kNumBitTables + i)) & 1) != 0; }
+  void SetBitTableDeduped(size_t i) { bit_table_flags_ |= 1 << (kNumBitTables + i); }
+
   enum Flags {
     kHasInlineInfo = 1 << 0,
   };
 
+  // The CodeInfo starts with sequence of variable-length bit-encoded integers.
+  static constexpr size_t kNumHeaders = 6;
   uint32_t flags_ = 0;
   uint32_t packed_frame_size_ = 0;  // Frame size in kStackAlignment units.
   uint32_t core_spill_mask_ = 0;
   uint32_t fp_spill_mask_ = 0;
   uint32_t number_of_dex_registers_ = 0;
+  uint32_t bit_table_flags_ = 0;
+
+  // The encoded bit-tables follow the header.  Based on the above flags field,
+  // bit-tables might be omitted or replaced by relative bit-offset if deduped.
+  static constexpr size_t kNumBitTables = 8;
   BitTable<StackMap> stack_maps_;
   BitTable<RegisterMask> register_masks_;
   BitTable<StackMask> stack_masks_;
@@ -507,6 +522,7 @@
   BitTable<DexRegisterMask> dex_register_masks_;
   BitTable<DexRegisterMapInfo> dex_register_maps_;
   BitTable<DexRegisterInfo> dex_register_catalog_;
+
   uint32_t size_in_bits_ = 0;
 
   friend class StackMapStream;