Preallocate `CodeInfoTableDeduper::dedupe_set_`.

Preallocate large buffer except for multi-image compilation
which requires multiple dedupe sets, so we let them grow as
needed without wasting too much memory. We do not expect to
use multi-image compilation on user devices.

Also remove the hash from the dedupe set entry. With the
large pre-allocated buffer, we have very few hash conflicts
and comparing the hash just slows down finding identical
entries. If we exceed the pre-allocated buffer, this shall
trade some performance for lower memory use.

Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 181943478
Change-Id: Ibbb98b6d3ebbc35ffa75165baad54f4df7c62ad9
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index 03c906b..4857ec0 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -183,6 +183,11 @@
   ReleaseArrayIfNotDeduplicated(code);
 }
 
+size_t CompiledMethodStorage::UniqueCodeEntries() const {
+  DCHECK(DedupeEnabled());
+  return dedupe_code_.Size(Thread::Current());
+}
+
 const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateVMapTable(
     const ArrayRef<const uint8_t>& table) {
   return AllocateOrDeduplicateArray(table, &dedupe_vmap_table_);
@@ -192,6 +197,11 @@
   ReleaseArrayIfNotDeduplicated(table);
 }
 
+size_t CompiledMethodStorage::UniqueVMapTableEntries() const {
+  DCHECK(DedupeEnabled());
+  return dedupe_vmap_table_.Size(Thread::Current());
+}
+
 const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateCFIInfo(
     const ArrayRef<const uint8_t>& cfi_info) {
   return AllocateOrDeduplicateArray(cfi_info, &dedupe_cfi_info_);
@@ -201,6 +211,11 @@
   ReleaseArrayIfNotDeduplicated(cfi_info);
 }
 
+size_t CompiledMethodStorage::UniqueCFIInfoEntries() const {
+  DCHECK(DedupeEnabled());
+  return dedupe_cfi_info_.Size(Thread::Current());
+}
+
 const LengthPrefixedArray<linker::LinkerPatch>* CompiledMethodStorage::DeduplicateLinkerPatches(
     const ArrayRef<const linker::LinkerPatch>& linker_patches) {
   return AllocateOrDeduplicateArray(linker_patches, &dedupe_linker_patches_);
@@ -211,6 +226,11 @@
   ReleaseArrayIfNotDeduplicated(linker_patches);
 }
 
+size_t CompiledMethodStorage::UniqueLinkerPatchesEntries() const {
+  DCHECK(DedupeEnabled());
+  return dedupe_linker_patches_.Size(Thread::Current());
+}
+
 CompiledMethodStorage::ThunkMapKey CompiledMethodStorage::GetThunkMapKey(
     const linker::LinkerPatch& linker_patch) {
   uint32_t custom_value1 = 0u;
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
index a5a7691..f9f3401 100644
--- a/compiler/driver/compiled_method_storage.h
+++ b/compiler/driver/compiled_method_storage.h
@@ -53,16 +53,20 @@
 
   const LengthPrefixedArray<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code);
   void ReleaseCode(const LengthPrefixedArray<uint8_t>* code);
+  size_t UniqueCodeEntries() const;
 
   const LengthPrefixedArray<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& table);
   void ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table);
+  size_t UniqueVMapTableEntries() const;
 
   const LengthPrefixedArray<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info);
   void ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info);
+  size_t UniqueCFIInfoEntries() const;
 
   const LengthPrefixedArray<linker::LinkerPatch>* DeduplicateLinkerPatches(
       const ArrayRef<const linker::LinkerPatch>& linker_patches);
   void ReleaseLinkerPatches(const LengthPrefixedArray<linker::LinkerPatch>* linker_patches);
+  size_t UniqueLinkerPatchesEntries() const;
 
   // Returns the code associated with the given patch.
   // If the code has not been set, returns empty data.
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index ad5a009..51cd999 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -51,6 +51,7 @@
       verification_results_(nullptr),
       compiler_type_(CompilerType::kAotCompiler),
       image_type_(ImageType::kNone),
+      multi_image_(false),
       compile_art_test_(false),
       baseline_(false),
       debuggable_(false),
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 7830924..0034306 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -233,6 +233,10 @@
     return image_type_ == ImageType::kAppImage;
   }
 
+  bool IsMultiImage() const {
+    return multi_image_;
+  }
+
   // Returns whether we are running ART tests.
   // The compiler will use that information for checking invariants.
   bool CompileArtTest() const {
@@ -408,6 +412,7 @@
 
   CompilerType compiler_type_;
   ImageType image_type_;
+  bool multi_image_;
   bool compile_art_test_;
   bool baseline_;
   bool debuggable_;
diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h
index 4e892f2..d4a9cc8 100644
--- a/compiler/utils/dedupe_set-inl.h
+++ b/compiler/utils/dedupe_set-inl.h
@@ -81,6 +81,11 @@
     return store_key;
   }
 
+  size_t Size(Thread* self) {
+    MutexLock lock(self, lock_);
+    return keys_.size();
+  }
+
   void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) {
     // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory
     // for bookkeeping while collecting the stats.
@@ -234,6 +239,20 @@
           typename HashType,
           typename HashFunc,
           HashType kShard>
+size_t DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Size(Thread* self) const {
+  size_t result = 0u;
+  for (const auto& shard : shards_) {
+    result += shard->Size(self);
+  }
+  return result;
+}
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
 std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats(
     Thread* self) const {
   Stats stats;
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index 3baa061..a1ba208 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -45,6 +45,8 @@
 
   ~DedupeSet();
 
+  size_t Size(Thread* self) const;
+
   std::string DumpStats(Thread* self) const;
 
  private:
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 6e93d86..9e6103b 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -525,7 +525,6 @@
         zip_fd_(-1),
         image_fd_(-1),
         have_multi_image_arg_(false),
-        multi_image_(false),
         image_base_(0U),
         image_storage_mode_(ImageHeader::kStorageModeUncompressed),
         passes_to_run_filename_(nullptr),
@@ -771,19 +770,20 @@
       }
     } else {
       // Use the default, i.e. multi-image for boot image and boot image extension.
-      multi_image_ = IsBootImage() || IsBootImageExtension();  // Shall pass checks below.
+      // This shall pass the checks below.
+      compiler_options_->multi_image_ = IsBootImage() || IsBootImageExtension();
     }
     // On target we support generating a single image for the primary boot image.
     if (!kIsTargetBuild) {
-      if (IsBootImage() && !multi_image_) {
+      if (IsBootImage() && !compiler_options_->multi_image_) {
         Usage("--single-image specified for primary boot image on host");
       }
     }
-    if (IsAppImage() && multi_image_) {
+    if (IsAppImage() && compiler_options_->multi_image_) {
       Usage("--multi-image specified for app image");
     }
 
-    if (image_fd_ != -1 && multi_image_) {
+    if (image_fd_ != -1 && compiler_options_->multi_image_) {
       Usage("--single-image not specified for --image-fd");
     }
 
@@ -909,7 +909,7 @@
 
   void ExpandOatAndImageFilenames() {
     ArrayRef<const std::string> locations(dex_locations_);
-    if (!multi_image_) {
+    if (!compiler_options_->multi_image_) {
       locations = locations.SubArray(/*pos=*/ 0u, /*length=*/ 1u);
     }
     if (image_fd_ == -1) {
@@ -925,7 +925,7 @@
       oat_filenames_ = ImageSpace::ExpandMultiImageLocations(
           locations, oat_filenames_[0], IsBootImageExtension());
     } else {
-      DCHECK(!multi_image_);
+      DCHECK(!compiler_options_->multi_image_);
       std::vector<std::string> oat_locations = ImageSpace::ExpandMultiImageLocations(
           locations, oat_location_, IsBootImageExtension());
       DCHECK_EQ(1u, oat_locations.size());
@@ -1119,7 +1119,7 @@
     AssignIfExists(args, M::CopyDexFiles, &copy_dex_files_);
 
     AssignTrueIfExists(args, M::MultiImage, &have_multi_image_arg_);
-    AssignIfExists(args, M::MultiImage, &multi_image_);
+    AssignIfExists(args, M::MultiImage, &compiler_options_->multi_image_);
 
     if (args.Exists(M::ForceDeterminism)) {
       force_determinism_ = true;
@@ -2936,7 +2936,6 @@
   std::vector<std::string> image_filenames_;
   int image_fd_;
   bool have_multi_image_arg_;
-  bool multi_image_;
   uintptr_t image_base_;
   ImageHeader::StorageMode image_storage_mode_;
   const char* passes_to_run_filename_;
diff --git a/dex2oat/driver/compiler_driver.h b/dex2oat/driver/compiler_driver.h
index d7781a9..ed8fc2f 100644
--- a/dex2oat/driver/compiler_driver.h
+++ b/dex2oat/driver/compiler_driver.h
@@ -217,6 +217,10 @@
     return &compiled_method_storage_;
   }
 
+  const CompiledMethodStorage* GetCompiledMethodStorage() const {
+    return &compiled_method_storage_;
+  }
+
  private:
   void LoadImageClasses(TimingLogger* timings, /*inout*/ HashSet<std::string>* image_classes)
       REQUIRES(!Locks::mutator_lock_);
diff --git a/dex2oat/linker/code_info_table_deduper.cc b/dex2oat/linker/code_info_table_deduper.cc
index 5ffe004..eff0292 100644
--- a/dex2oat/linker/code_info_table_deduper.cc
+++ b/dex2oat/linker/code_info_table_deduper.cc
@@ -21,6 +21,14 @@
 namespace art {
 namespace linker {
 
+void CodeInfoTableDeduper::ReserveDedupeBuffer(size_t num_code_infos) {
+  DCHECK(dedupe_set_.empty());
+  const size_t max_size = num_code_infos * CodeInfo::kNumBitTables;
+  // Reserve space for 1/2 of the maximum dedupe set size to avoid rehashing.
+  // Usually only 30%-40% of bit tables are unique.
+  dedupe_set_.reserve(max_size / 2u);
+}
+
 size_t CodeInfoTableDeduper::Dedupe(const uint8_t* code_info_data) {
   static constexpr size_t kNumHeaders = CodeInfo::kNumHeaders;
   static constexpr size_t kNumBitTables = CodeInfo::kNumBitTables;
@@ -78,9 +86,8 @@
         uint32_t table_bit_start = start_bit_offset + bit_table_bit_starts[i];
         BitMemoryRegion region(
             const_cast<uint8_t*>(writer_.data()), table_bit_start, table_bit_size);
-        uint32_t hash = DataHash()(region);
-        DedupeSetEntry entry{table_bit_start, table_bit_size, hash};
-        auto [it, inserted] = dedupe_set_.InsertWithHash(entry, hash);
+        DedupeSetEntry entry{table_bit_start, table_bit_size};
+        auto [it, inserted] = dedupe_set_.insert(entry);
         dedupe_entries[i] = &*it;
         if (!inserted) {
           code_info.SetBitTableDeduped(i);  // Mark as deduped before we write header.
diff --git a/dex2oat/linker/code_info_table_deduper.h b/dex2oat/linker/code_info_table_deduper.h
index 682aba3..c07641d 100644
--- a/dex2oat/linker/code_info_table_deduper.h
+++ b/dex2oat/linker/code_info_table_deduper.h
@@ -29,10 +29,15 @@
  public:
   explicit CodeInfoTableDeduper(std::vector<uint8_t>* output)
       : writer_(output),
-        dedupe_set_(DedupeSetEntryHash(), DedupeSetEntryEquals(output)) {
+        dedupe_set_(kMinLoadFactor,
+                    kMaxLoadFactor,
+                    DedupeSetEntryHash(output),
+                    DedupeSetEntryEquals(output)) {
     DCHECK_EQ(output->size(), 0u);
   }
 
+  void ReserveDedupeBuffer(size_t num_code_infos);
+
   // Copy CodeInfo into output while de-duplicating the internal bit tables.
   // It returns the byte offset of the copied CodeInfo within the output.
   size_t Dedupe(const uint8_t* code_info);
@@ -41,13 +46,12 @@
   struct DedupeSetEntry {
     uint32_t bit_start;
     uint32_t bit_size;
-    uint32_t hash;
   };
 
   class DedupeSetEntryEmpty {
    public:
     void MakeEmpty(DedupeSetEntry& item) const {
-      item = {0u, 0u, 0u};
+      item = {0u, 0u};
     }
     bool IsEmpty(const DedupeSetEntry& item) const {
       return item.bit_size == 0u;
@@ -56,9 +60,14 @@
 
   class DedupeSetEntryHash {
    public:
+    explicit DedupeSetEntryHash(std::vector<uint8_t>* output) : output_(output) {}
+
     uint32_t operator()(const DedupeSetEntry& item) const {
-      return item.hash;
+      return DataHash()(BitMemoryRegion(output_->data(), item.bit_start, item.bit_size));
     }
+
+   private:
+    std::vector<uint8_t>* const output_;
   };
 
   class DedupeSetEntryEquals {
@@ -68,8 +77,7 @@
     bool operator()(const DedupeSetEntry& lhs, const DedupeSetEntry& rhs) const {
       DCHECK_NE(lhs.bit_size, 0u);
       DCHECK_NE(rhs.bit_size, 0u);
-      return lhs.hash == rhs.hash &&
-             lhs.bit_size == rhs.bit_size &&
+      return lhs.bit_size == rhs.bit_size &&
              BitMemoryRegion::Equals(
                  BitMemoryRegion(output_->data(), lhs.bit_start, lhs.bit_size),
                  BitMemoryRegion(output_->data(), rhs.bit_start, rhs.bit_size));
@@ -82,6 +90,9 @@
   using DedupeSet =
       HashSet<DedupeSetEntry, DedupeSetEntryEmpty, DedupeSetEntryHash, DedupeSetEntryEquals>;
 
+  static constexpr double kMinLoadFactor = 0.5;
+  static constexpr double kMaxLoadFactor = 0.75;
+
   BitMemoryWriter<std::vector<uint8_t>> writer_;
 
   // Deduplicate at BitTable level. Entries describe ranges in `output`, see constructor.
diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc
index 4915525..df95f2a 100644
--- a/dex2oat/linker/oat_writer.cc
+++ b/dex2oat/linker/oat_writer.cc
@@ -1507,11 +1507,23 @@
   const bool generate_debug_info_;
 };
 
+template <bool kDeduplicate>
 class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor {
  public:
   InitMapMethodVisitor(OatWriter* writer, size_t offset)
       : OatDexMethodVisitor(writer, offset),
         dedupe_bit_table_(&writer_->code_info_data_) {
+    if (kDeduplicate) {
+      // Reserve a large buffer for `CodeInfo` bit table deduplication except for
+      // multi-image compilation as we do not want to reserve multiple large buffers.
+      // User devices should not do any multi-image compilation.
+      const CompilerOptions& compiler_options = writer->GetCompilerOptions();
+      DCHECK(compiler_options.IsAnyCompilationEnabled());
+      if (compiler_options.DeduplicateCode() && !compiler_options.IsMultiImage()) {
+        dedupe_bit_table_.ReserveDedupeBuffer(
+            writer->compiler_driver_->GetCompiledMethodStorage()->UniqueVMapTableEntries());
+      }
+    }
   }
 
   bool VisitMethod(size_t class_def_method_index,
@@ -1526,10 +1538,16 @@
 
       ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
       if (map.size() != 0u) {
-        size_t offset = dedupe_code_info_.GetOrCreate(map.data(), [=]() {
-          // Deduplicate the inner BitTable<>s within the CodeInfo.
-          return offset_ + dedupe_bit_table_.Dedupe(map.data());
-        });
+        size_t offset;
+        if (kDeduplicate) {
+          offset = dedupe_code_info_.GetOrCreate(map.data(), [=]() {
+            // Deduplicate the inner BitTable<>s within the CodeInfo.
+            return offset_ + dedupe_bit_table_.Dedupe(map.data());
+          });
+        } else {
+          offset = offset_ + writer_->code_info_data_.size();
+          writer_->code_info_data_.insert(writer_->code_info_data_.end(), map.begin(), map.end());
+        }
         // Code offset is not initialized yet, so set file offset for now.
         DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u);
         oat_class->method_headers_[method_offsets_index_].SetCodeInfoOffset(offset);
@@ -2140,13 +2158,17 @@
   if (!MayHaveCompiledMethods()) {
     return offset;
   }
-  {
-    InitMapMethodVisitor visitor(this, offset);
+  if (GetCompilerOptions().DeduplicateCode()) {
+    InitMapMethodVisitor</*kDeduplicate=*/ true> visitor(this, offset);
     bool success = VisitDexMethods(&visitor);
     DCHECK(success);
-    code_info_data_.shrink_to_fit();
-    offset += code_info_data_.size();
+  } else {
+    InitMapMethodVisitor</*kDeduplicate=*/ false> visitor(this, offset);
+    bool success = VisitDexMethods(&visitor);
+    DCHECK(success);
   }
+  code_info_data_.shrink_to_fit();
+  offset += code_info_data_.size();
   return offset;
 }
 
diff --git a/dex2oat/linker/oat_writer.h b/dex2oat/linker/oat_writer.h
index 21ae77a..ebff7a1 100644
--- a/dex2oat/linker/oat_writer.h
+++ b/dex2oat/linker/oat_writer.h
@@ -265,7 +265,7 @@
   struct OrderedMethodData;
   class OrderedMethodVisitor;
   class InitCodeMethodVisitor;
-  class InitMapMethodVisitor;
+  template <bool kDeduplicate> class InitMapMethodVisitor;
   class InitImageMethodVisitor;
   class WriteCodeMethodVisitor;
   class WriteMapMethodVisitor;