diff options
author | 2022-03-17 12:55:27 +0000 | |
---|---|---|
committer | 2022-04-04 16:40:55 +0000 | |
commit | f67e8c35ee11b0bd81a2d6f6e267b9081db7d758 (patch) | |
tree | 4b5331d57e63554d7a7a84020bb971dcf50c6507 | |
parent | 65258db896c8270873f362d95204336d7d1e333d (diff) |
Preallocate `CodeInfoTableDeduper::dedupe_set_`.
Preallocate large buffer except for multi-image compilation
which requires multiple dedupe sets, so we let them grow as
needed without wasting too much memory. We do not expect to
use multi-image compilation on user devices.
Also remove the hash from the dedupe set entry. With the
large pre-allocated buffer, we have very few hash conflicts
and comparing the hash just slows down finding identical
entries. If we exceed the pre-allocated buffer, this shall
trade some performance for lower memory use.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 181943478
Change-Id: Ibbb98b6d3ebbc35ffa75165baad54f4df7c62ad9
-rw-r--r-- | compiler/driver/compiled_method_storage.cc | 20 | ||||
-rw-r--r-- | compiler/driver/compiled_method_storage.h | 4 | ||||
-rw-r--r-- | compiler/driver/compiler_options.cc | 1 | ||||
-rw-r--r-- | compiler/driver/compiler_options.h | 5 | ||||
-rw-r--r-- | compiler/utils/dedupe_set-inl.h | 19 | ||||
-rw-r--r-- | compiler/utils/dedupe_set.h | 2 | ||||
-rw-r--r-- | dex2oat/dex2oat.cc | 17 | ||||
-rw-r--r-- | dex2oat/driver/compiler_driver.h | 4 | ||||
-rw-r--r-- | dex2oat/linker/code_info_table_deduper.cc | 13 | ||||
-rw-r--r-- | dex2oat/linker/code_info_table_deduper.h | 23 | ||||
-rw-r--r-- | dex2oat/linker/oat_writer.cc | 38 | ||||
-rw-r--r-- | dex2oat/linker/oat_writer.h | 2 |
12 files changed, 121 insertions, 27 deletions
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc index 03c906b991..4857ec0931 100644 --- a/compiler/driver/compiled_method_storage.cc +++ b/compiler/driver/compiled_method_storage.cc @@ -183,6 +183,11 @@ void CompiledMethodStorage::ReleaseCode(const LengthPrefixedArray<uint8_t>* code ReleaseArrayIfNotDeduplicated(code); } +size_t CompiledMethodStorage::UniqueCodeEntries() const { + DCHECK(DedupeEnabled()); + return dedupe_code_.Size(Thread::Current()); +} + const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateVMapTable( const ArrayRef<const uint8_t>& table) { return AllocateOrDeduplicateArray(table, &dedupe_vmap_table_); @@ -192,6 +197,11 @@ void CompiledMethodStorage::ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* ReleaseArrayIfNotDeduplicated(table); } +size_t CompiledMethodStorage::UniqueVMapTableEntries() const { + DCHECK(DedupeEnabled()); + return dedupe_vmap_table_.Size(Thread::Current()); +} + const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateCFIInfo( const ArrayRef<const uint8_t>& cfi_info) { return AllocateOrDeduplicateArray(cfi_info, &dedupe_cfi_info_); @@ -201,6 +211,11 @@ void CompiledMethodStorage::ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* c ReleaseArrayIfNotDeduplicated(cfi_info); } +size_t CompiledMethodStorage::UniqueCFIInfoEntries() const { + DCHECK(DedupeEnabled()); + return dedupe_cfi_info_.Size(Thread::Current()); +} + const LengthPrefixedArray<linker::LinkerPatch>* CompiledMethodStorage::DeduplicateLinkerPatches( const ArrayRef<const linker::LinkerPatch>& linker_patches) { return AllocateOrDeduplicateArray(linker_patches, &dedupe_linker_patches_); @@ -211,6 +226,11 @@ void CompiledMethodStorage::ReleaseLinkerPatches( ReleaseArrayIfNotDeduplicated(linker_patches); } +size_t CompiledMethodStorage::UniqueLinkerPatchesEntries() const { + DCHECK(DedupeEnabled()); + return dedupe_linker_patches_.Size(Thread::Current()); +} + CompiledMethodStorage::ThunkMapKey CompiledMethodStorage::GetThunkMapKey( const linker::LinkerPatch& linker_patch) { uint32_t custom_value1 = 0u; diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h index a5a7691e12..f9f34017eb 100644 --- a/compiler/driver/compiled_method_storage.h +++ b/compiler/driver/compiled_method_storage.h @@ -53,16 +53,20 @@ class CompiledMethodStorage { const LengthPrefixedArray<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code); void ReleaseCode(const LengthPrefixedArray<uint8_t>* code); + size_t UniqueCodeEntries() const; const LengthPrefixedArray<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& table); void ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table); + size_t UniqueVMapTableEntries() const; const LengthPrefixedArray<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info); void ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info); + size_t UniqueCFIInfoEntries() const; const LengthPrefixedArray<linker::LinkerPatch>* DeduplicateLinkerPatches( const ArrayRef<const linker::LinkerPatch>& linker_patches); void ReleaseLinkerPatches(const LengthPrefixedArray<linker::LinkerPatch>* linker_patches); + size_t UniqueLinkerPatchesEntries() const; // Returns the code associated with the given patch. // If the code has not been set, returns empty data. diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc index ad5a009808..51cd999b6d 100644 --- a/compiler/driver/compiler_options.cc +++ b/compiler/driver/compiler_options.cc @@ -51,6 +51,7 @@ CompilerOptions::CompilerOptions() verification_results_(nullptr), compiler_type_(CompilerType::kAotCompiler), image_type_(ImageType::kNone), + multi_image_(false), compile_art_test_(false), baseline_(false), debuggable_(false), diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h index 783092431a..0034306fdf 100644 --- a/compiler/driver/compiler_options.h +++ b/compiler/driver/compiler_options.h @@ -233,6 +233,10 @@ class CompilerOptions final { return image_type_ == ImageType::kAppImage; } + bool IsMultiImage() const { + return multi_image_; + } + // Returns whether we are running ART tests. // The compiler will use that information for checking invariants. bool CompileArtTest() const { @@ -408,6 +412,7 @@ class CompilerOptions final { CompilerType compiler_type_; ImageType image_type_; + bool multi_image_; bool compile_art_test_; bool baseline_; bool debuggable_; diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h index 4e892f2616..d4a9cc829b 100644 --- a/compiler/utils/dedupe_set-inl.h +++ b/compiler/utils/dedupe_set-inl.h @@ -81,6 +81,11 @@ class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard { return store_key; } + size_t Size(Thread* self) { + MutexLock lock(self, lock_); + return keys_.size(); + } + void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) { // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory // for bookkeeping while collecting the stats. @@ -234,6 +239,20 @@ template <typename InKey, typename HashType, typename HashFunc, HashType kShard> +size_t DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Size(Thread* self) const { + size_t result = 0u; + for (const auto& shard : shards_) { + result += shard->Size(self); + } + return result; +} + +template <typename InKey, + typename StoreKey, + typename Alloc, + typename HashType, + typename HashFunc, + HashType kShard> std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats( Thread* self) const { Stats stats; diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h index 3baa0612f6..a1ba208d2c 100644 --- a/compiler/utils/dedupe_set.h +++ b/compiler/utils/dedupe_set.h @@ -45,6 +45,8 @@ class DedupeSet { ~DedupeSet(); + size_t Size(Thread* self) const; + std::string DumpStats(Thread* self) const; private: diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc index 6e93d862ad..9e6103b424 100644 --- a/dex2oat/dex2oat.cc +++ b/dex2oat/dex2oat.cc @@ -525,7 +525,6 @@ class Dex2Oat final { zip_fd_(-1), image_fd_(-1), have_multi_image_arg_(false), - multi_image_(false), image_base_(0U), image_storage_mode_(ImageHeader::kStorageModeUncompressed), passes_to_run_filename_(nullptr), @@ -771,19 +770,20 @@ class Dex2Oat final { } } else { // Use the default, i.e. multi-image for boot image and boot image extension. - multi_image_ = IsBootImage() || IsBootImageExtension(); // Shall pass checks below. + // This shall pass the checks below. + compiler_options_->multi_image_ = IsBootImage() || IsBootImageExtension(); } // On target we support generating a single image for the primary boot image. if (!kIsTargetBuild) { - if (IsBootImage() && !multi_image_) { + if (IsBootImage() && !compiler_options_->multi_image_) { Usage("--single-image specified for primary boot image on host"); } } - if (IsAppImage() && multi_image_) { + if (IsAppImage() && compiler_options_->multi_image_) { Usage("--multi-image specified for app image"); } - if (image_fd_ != -1 && multi_image_) { + if (image_fd_ != -1 && compiler_options_->multi_image_) { Usage("--single-image not specified for --image-fd"); } @@ -909,7 +909,7 @@ class Dex2Oat final { void ExpandOatAndImageFilenames() { ArrayRef<const std::string> locations(dex_locations_); - if (!multi_image_) { + if (!compiler_options_->multi_image_) { locations = locations.SubArray(/*pos=*/ 0u, /*length=*/ 1u); } if (image_fd_ == -1) { @@ -925,7 +925,7 @@ class Dex2Oat final { oat_filenames_ = ImageSpace::ExpandMultiImageLocations( locations, oat_filenames_[0], IsBootImageExtension()); } else { - DCHECK(!multi_image_); + DCHECK(!compiler_options_->multi_image_); std::vector<std::string> oat_locations = ImageSpace::ExpandMultiImageLocations( locations, oat_location_, IsBootImageExtension()); DCHECK_EQ(1u, oat_locations.size()); @@ -1119,7 +1119,7 @@ class Dex2Oat final { AssignIfExists(args, M::CopyDexFiles, ©_dex_files_); AssignTrueIfExists(args, M::MultiImage, &have_multi_image_arg_); - AssignIfExists(args, M::MultiImage, &multi_image_); + AssignIfExists(args, M::MultiImage, &compiler_options_->multi_image_); if (args.Exists(M::ForceDeterminism)) { force_determinism_ = true; @@ -2936,7 +2936,6 @@ class Dex2Oat final { std::vector<std::string> image_filenames_; int image_fd_; bool have_multi_image_arg_; - bool multi_image_; uintptr_t image_base_; ImageHeader::StorageMode image_storage_mode_; const char* passes_to_run_filename_; diff --git a/dex2oat/driver/compiler_driver.h b/dex2oat/driver/compiler_driver.h index d7781a92c5..ed8fc2f222 100644 --- a/dex2oat/driver/compiler_driver.h +++ b/dex2oat/driver/compiler_driver.h @@ -217,6 +217,10 @@ class CompilerDriver { return &compiled_method_storage_; } + const CompiledMethodStorage* GetCompiledMethodStorage() const { + return &compiled_method_storage_; + } + private: void LoadImageClasses(TimingLogger* timings, /*inout*/ HashSet<std::string>* image_classes) REQUIRES(!Locks::mutator_lock_); diff --git a/dex2oat/linker/code_info_table_deduper.cc b/dex2oat/linker/code_info_table_deduper.cc index 5ffe00494f..eff0292e87 100644 --- a/dex2oat/linker/code_info_table_deduper.cc +++ b/dex2oat/linker/code_info_table_deduper.cc @@ -21,6 +21,14 @@ namespace art { namespace linker { +void CodeInfoTableDeduper::ReserveDedupeBuffer(size_t num_code_infos) { + DCHECK(dedupe_set_.empty()); + const size_t max_size = num_code_infos * CodeInfo::kNumBitTables; + // Reserve space for 1/2 of the maximum dedupe set size to avoid rehashing. + // Usually only 30%-40% of bit tables are unique. + dedupe_set_.reserve(max_size / 2u); +} + size_t CodeInfoTableDeduper::Dedupe(const uint8_t* code_info_data) { static constexpr size_t kNumHeaders = CodeInfo::kNumHeaders; static constexpr size_t kNumBitTables = CodeInfo::kNumBitTables; @@ -78,9 +86,8 @@ size_t CodeInfoTableDeduper::Dedupe(const uint8_t* code_info_data) { uint32_t table_bit_start = start_bit_offset + bit_table_bit_starts[i]; BitMemoryRegion region( const_cast<uint8_t*>(writer_.data()), table_bit_start, table_bit_size); - uint32_t hash = DataHash()(region); - DedupeSetEntry entry{table_bit_start, table_bit_size, hash}; - auto [it, inserted] = dedupe_set_.InsertWithHash(entry, hash); + DedupeSetEntry entry{table_bit_start, table_bit_size}; + auto [it, inserted] = dedupe_set_.insert(entry); dedupe_entries[i] = &*it; if (!inserted) { code_info.SetBitTableDeduped(i); // Mark as deduped before we write header. diff --git a/dex2oat/linker/code_info_table_deduper.h b/dex2oat/linker/code_info_table_deduper.h index 682aba3a79..c07641d3fd 100644 --- a/dex2oat/linker/code_info_table_deduper.h +++ b/dex2oat/linker/code_info_table_deduper.h @@ -29,10 +29,15 @@ class CodeInfoTableDeduper { public: explicit CodeInfoTableDeduper(std::vector<uint8_t>* output) : writer_(output), - dedupe_set_(DedupeSetEntryHash(), DedupeSetEntryEquals(output)) { + dedupe_set_(kMinLoadFactor, + kMaxLoadFactor, + DedupeSetEntryHash(output), + DedupeSetEntryEquals(output)) { DCHECK_EQ(output->size(), 0u); } + void ReserveDedupeBuffer(size_t num_code_infos); + // Copy CodeInfo into output while de-duplicating the internal bit tables. // It returns the byte offset of the copied CodeInfo within the output. size_t Dedupe(const uint8_t* code_info); @@ -41,13 +46,12 @@ class CodeInfoTableDeduper { struct DedupeSetEntry { uint32_t bit_start; uint32_t bit_size; - uint32_t hash; }; class DedupeSetEntryEmpty { public: void MakeEmpty(DedupeSetEntry& item) const { - item = {0u, 0u, 0u}; + item = {0u, 0u}; } bool IsEmpty(const DedupeSetEntry& item) const { return item.bit_size == 0u; @@ -56,9 +60,14 @@ class CodeInfoTableDeduper { class DedupeSetEntryHash { public: + explicit DedupeSetEntryHash(std::vector<uint8_t>* output) : output_(output) {} + uint32_t operator()(const DedupeSetEntry& item) const { - return item.hash; + return DataHash()(BitMemoryRegion(output_->data(), item.bit_start, item.bit_size)); } + + private: + std::vector<uint8_t>* const output_; }; class DedupeSetEntryEquals { @@ -68,8 +77,7 @@ class CodeInfoTableDeduper { bool operator()(const DedupeSetEntry& lhs, const DedupeSetEntry& rhs) const { DCHECK_NE(lhs.bit_size, 0u); DCHECK_NE(rhs.bit_size, 0u); - return lhs.hash == rhs.hash && - lhs.bit_size == rhs.bit_size && + return lhs.bit_size == rhs.bit_size && BitMemoryRegion::Equals( BitMemoryRegion(output_->data(), lhs.bit_start, lhs.bit_size), BitMemoryRegion(output_->data(), rhs.bit_start, rhs.bit_size)); @@ -82,6 +90,9 @@ class CodeInfoTableDeduper { using DedupeSet = HashSet<DedupeSetEntry, DedupeSetEntryEmpty, DedupeSetEntryHash, DedupeSetEntryEquals>; + static constexpr double kMinLoadFactor = 0.5; + static constexpr double kMaxLoadFactor = 0.75; + BitMemoryWriter<std::vector<uint8_t>> writer_; // Deduplicate at BitTable level. Entries describe ranges in `output`, see constructor. diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc index 4915525d49..df95f2a856 100644 --- a/dex2oat/linker/oat_writer.cc +++ b/dex2oat/linker/oat_writer.cc @@ -1507,11 +1507,23 @@ class OatWriter::LayoutReserveOffsetCodeMethodVisitor : public OrderedMethodVisi const bool generate_debug_info_; }; +template <bool kDeduplicate> class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor { public: InitMapMethodVisitor(OatWriter* writer, size_t offset) : OatDexMethodVisitor(writer, offset), dedupe_bit_table_(&writer_->code_info_data_) { + if (kDeduplicate) { + // Reserve a large buffer for `CodeInfo` bit table deduplication except for + // multi-image compilation as we do not want to reserve multiple large buffers. + // User devices should not do any multi-image compilation. + const CompilerOptions& compiler_options = writer->GetCompilerOptions(); + DCHECK(compiler_options.IsAnyCompilationEnabled()); + if (compiler_options.DeduplicateCode() && !compiler_options.IsMultiImage()) { + dedupe_bit_table_.ReserveDedupeBuffer( + writer->compiler_driver_->GetCompiledMethodStorage()->UniqueVMapTableEntries()); + } + } } bool VisitMethod(size_t class_def_method_index, @@ -1526,10 +1538,16 @@ class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor { ArrayRef<const uint8_t> map = compiled_method->GetVmapTable(); if (map.size() != 0u) { - size_t offset = dedupe_code_info_.GetOrCreate(map.data(), [=]() { - // Deduplicate the inner BitTable<>s within the CodeInfo. - return offset_ + dedupe_bit_table_.Dedupe(map.data()); - }); + size_t offset; + if (kDeduplicate) { + offset = dedupe_code_info_.GetOrCreate(map.data(), [=]() { + // Deduplicate the inner BitTable<>s within the CodeInfo. + return offset_ + dedupe_bit_table_.Dedupe(map.data()); + }); + } else { + offset = offset_ + writer_->code_info_data_.size(); + writer_->code_info_data_.insert(writer_->code_info_data_.end(), map.begin(), map.end()); + } // Code offset is not initialized yet, so set file offset for now. DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u); oat_class->method_headers_[method_offsets_index_].SetCodeInfoOffset(offset); @@ -2140,13 +2158,17 @@ size_t OatWriter::InitOatMaps(size_t offset) { if (!MayHaveCompiledMethods()) { return offset; } - { - InitMapMethodVisitor visitor(this, offset); + if (GetCompilerOptions().DeduplicateCode()) { + InitMapMethodVisitor</*kDeduplicate=*/ true> visitor(this, offset); + bool success = VisitDexMethods(&visitor); + DCHECK(success); + } else { + InitMapMethodVisitor</*kDeduplicate=*/ false> visitor(this, offset); bool success = VisitDexMethods(&visitor); DCHECK(success); - code_info_data_.shrink_to_fit(); - offset += code_info_data_.size(); } + code_info_data_.shrink_to_fit(); + offset += code_info_data_.size(); return offset; } diff --git a/dex2oat/linker/oat_writer.h b/dex2oat/linker/oat_writer.h index 21ae77a6c5..ebff7a1b5e 100644 --- a/dex2oat/linker/oat_writer.h +++ b/dex2oat/linker/oat_writer.h @@ -265,7 +265,7 @@ class OatWriter { struct OrderedMethodData; class OrderedMethodVisitor; class InitCodeMethodVisitor; - class InitMapMethodVisitor; + template <bool kDeduplicate> class InitMapMethodVisitor; class InitImageMethodVisitor; class WriteCodeMethodVisitor; class WriteMapMethodVisitor; |