Preallocate `CodeInfoTableDeduper::dedupe_set_`.
Preallocate large buffer except for multi-image compilation
which requires multiple dedupe sets, so we let them grow as
needed without wasting too much memory. We do not expect to
use multi-image compilation on user devices.
Also remove the hash from the dedupe set entry. With the
large pre-allocated buffer, we have very few hash conflicts
and comparing the hash just slows down finding identical
entries. If we exceed the pre-allocated buffer, this shall
trade some performance for lower memory use.
Test: m test-art-host-gtest
Test: testrunner.py --host --optimizing
Bug: 181943478
Change-Id: Ibbb98b6d3ebbc35ffa75165baad54f4df7c62ad9
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index 03c906b..4857ec0 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -183,6 +183,11 @@
ReleaseArrayIfNotDeduplicated(code);
}
+size_t CompiledMethodStorage::UniqueCodeEntries() const {
+ DCHECK(DedupeEnabled());
+ return dedupe_code_.Size(Thread::Current());
+}
+
const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateVMapTable(
const ArrayRef<const uint8_t>& table) {
return AllocateOrDeduplicateArray(table, &dedupe_vmap_table_);
@@ -192,6 +197,11 @@
ReleaseArrayIfNotDeduplicated(table);
}
+size_t CompiledMethodStorage::UniqueVMapTableEntries() const {
+ DCHECK(DedupeEnabled());
+ return dedupe_vmap_table_.Size(Thread::Current());
+}
+
const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateCFIInfo(
const ArrayRef<const uint8_t>& cfi_info) {
return AllocateOrDeduplicateArray(cfi_info, &dedupe_cfi_info_);
@@ -201,6 +211,11 @@
ReleaseArrayIfNotDeduplicated(cfi_info);
}
+size_t CompiledMethodStorage::UniqueCFIInfoEntries() const {
+ DCHECK(DedupeEnabled());
+ return dedupe_cfi_info_.Size(Thread::Current());
+}
+
const LengthPrefixedArray<linker::LinkerPatch>* CompiledMethodStorage::DeduplicateLinkerPatches(
const ArrayRef<const linker::LinkerPatch>& linker_patches) {
return AllocateOrDeduplicateArray(linker_patches, &dedupe_linker_patches_);
@@ -211,6 +226,11 @@
ReleaseArrayIfNotDeduplicated(linker_patches);
}
+size_t CompiledMethodStorage::UniqueLinkerPatchesEntries() const {
+ DCHECK(DedupeEnabled());
+ return dedupe_linker_patches_.Size(Thread::Current());
+}
+
CompiledMethodStorage::ThunkMapKey CompiledMethodStorage::GetThunkMapKey(
const linker::LinkerPatch& linker_patch) {
uint32_t custom_value1 = 0u;
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
index a5a7691..f9f3401 100644
--- a/compiler/driver/compiled_method_storage.h
+++ b/compiler/driver/compiled_method_storage.h
@@ -53,16 +53,20 @@
const LengthPrefixedArray<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code);
void ReleaseCode(const LengthPrefixedArray<uint8_t>* code);
+ size_t UniqueCodeEntries() const;
const LengthPrefixedArray<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& table);
void ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table);
+ size_t UniqueVMapTableEntries() const;
const LengthPrefixedArray<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info);
void ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info);
+ size_t UniqueCFIInfoEntries() const;
const LengthPrefixedArray<linker::LinkerPatch>* DeduplicateLinkerPatches(
const ArrayRef<const linker::LinkerPatch>& linker_patches);
void ReleaseLinkerPatches(const LengthPrefixedArray<linker::LinkerPatch>* linker_patches);
+ size_t UniqueLinkerPatchesEntries() const;
// Returns the code associated with the given patch.
// If the code has not been set, returns empty data.
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index ad5a009..51cd999 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -51,6 +51,7 @@
verification_results_(nullptr),
compiler_type_(CompilerType::kAotCompiler),
image_type_(ImageType::kNone),
+ multi_image_(false),
compile_art_test_(false),
baseline_(false),
debuggable_(false),
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 7830924..0034306 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -233,6 +233,10 @@
return image_type_ == ImageType::kAppImage;
}
+ bool IsMultiImage() const {
+ return multi_image_;
+ }
+
// Returns whether we are running ART tests.
// The compiler will use that information for checking invariants.
bool CompileArtTest() const {
@@ -408,6 +412,7 @@
CompilerType compiler_type_;
ImageType image_type_;
+ bool multi_image_;
bool compile_art_test_;
bool baseline_;
bool debuggable_;
diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h
index 4e892f2..d4a9cc8 100644
--- a/compiler/utils/dedupe_set-inl.h
+++ b/compiler/utils/dedupe_set-inl.h
@@ -81,6 +81,11 @@
return store_key;
}
+ size_t Size(Thread* self) {
+ MutexLock lock(self, lock_);
+ return keys_.size();
+ }
+
void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) {
// HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory
// for bookkeeping while collecting the stats.
@@ -234,6 +239,20 @@
typename HashType,
typename HashFunc,
HashType kShard>
+size_t DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Size(Thread* self) const {
+ size_t result = 0u;
+ for (const auto& shard : shards_) {
+ result += shard->Size(self);
+ }
+ return result;
+}
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats(
Thread* self) const {
Stats stats;
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index 3baa061..a1ba208 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -45,6 +45,8 @@
~DedupeSet();
+ size_t Size(Thread* self) const;
+
std::string DumpStats(Thread* self) const;
private:
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 6e93d86..9e6103b 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -525,7 +525,6 @@
zip_fd_(-1),
image_fd_(-1),
have_multi_image_arg_(false),
- multi_image_(false),
image_base_(0U),
image_storage_mode_(ImageHeader::kStorageModeUncompressed),
passes_to_run_filename_(nullptr),
@@ -771,19 +770,20 @@
}
} else {
// Use the default, i.e. multi-image for boot image and boot image extension.
- multi_image_ = IsBootImage() || IsBootImageExtension(); // Shall pass checks below.
+ // This shall pass the checks below.
+ compiler_options_->multi_image_ = IsBootImage() || IsBootImageExtension();
}
// On target we support generating a single image for the primary boot image.
if (!kIsTargetBuild) {
- if (IsBootImage() && !multi_image_) {
+ if (IsBootImage() && !compiler_options_->multi_image_) {
Usage("--single-image specified for primary boot image on host");
}
}
- if (IsAppImage() && multi_image_) {
+ if (IsAppImage() && compiler_options_->multi_image_) {
Usage("--multi-image specified for app image");
}
- if (image_fd_ != -1 && multi_image_) {
+ if (image_fd_ != -1 && compiler_options_->multi_image_) {
Usage("--single-image not specified for --image-fd");
}
@@ -909,7 +909,7 @@
void ExpandOatAndImageFilenames() {
ArrayRef<const std::string> locations(dex_locations_);
- if (!multi_image_) {
+ if (!compiler_options_->multi_image_) {
locations = locations.SubArray(/*pos=*/ 0u, /*length=*/ 1u);
}
if (image_fd_ == -1) {
@@ -925,7 +925,7 @@
oat_filenames_ = ImageSpace::ExpandMultiImageLocations(
locations, oat_filenames_[0], IsBootImageExtension());
} else {
- DCHECK(!multi_image_);
+ DCHECK(!compiler_options_->multi_image_);
std::vector<std::string> oat_locations = ImageSpace::ExpandMultiImageLocations(
locations, oat_location_, IsBootImageExtension());
DCHECK_EQ(1u, oat_locations.size());
@@ -1119,7 +1119,7 @@
AssignIfExists(args, M::CopyDexFiles, ©_dex_files_);
AssignTrueIfExists(args, M::MultiImage, &have_multi_image_arg_);
- AssignIfExists(args, M::MultiImage, &multi_image_);
+ AssignIfExists(args, M::MultiImage, &compiler_options_->multi_image_);
if (args.Exists(M::ForceDeterminism)) {
force_determinism_ = true;
@@ -2936,7 +2936,6 @@
std::vector<std::string> image_filenames_;
int image_fd_;
bool have_multi_image_arg_;
- bool multi_image_;
uintptr_t image_base_;
ImageHeader::StorageMode image_storage_mode_;
const char* passes_to_run_filename_;
diff --git a/dex2oat/driver/compiler_driver.h b/dex2oat/driver/compiler_driver.h
index d7781a9..ed8fc2f 100644
--- a/dex2oat/driver/compiler_driver.h
+++ b/dex2oat/driver/compiler_driver.h
@@ -217,6 +217,10 @@
return &compiled_method_storage_;
}
+ const CompiledMethodStorage* GetCompiledMethodStorage() const {
+ return &compiled_method_storage_;
+ }
+
private:
void LoadImageClasses(TimingLogger* timings, /*inout*/ HashSet<std::string>* image_classes)
REQUIRES(!Locks::mutator_lock_);
diff --git a/dex2oat/linker/code_info_table_deduper.cc b/dex2oat/linker/code_info_table_deduper.cc
index 5ffe004..eff0292 100644
--- a/dex2oat/linker/code_info_table_deduper.cc
+++ b/dex2oat/linker/code_info_table_deduper.cc
@@ -21,6 +21,14 @@
namespace art {
namespace linker {
+void CodeInfoTableDeduper::ReserveDedupeBuffer(size_t num_code_infos) {
+ DCHECK(dedupe_set_.empty());
+ const size_t max_size = num_code_infos * CodeInfo::kNumBitTables;
+ // Reserve space for 1/2 of the maximum dedupe set size to avoid rehashing.
+ // Usually only 30%-40% of bit tables are unique.
+ dedupe_set_.reserve(max_size / 2u);
+}
+
size_t CodeInfoTableDeduper::Dedupe(const uint8_t* code_info_data) {
static constexpr size_t kNumHeaders = CodeInfo::kNumHeaders;
static constexpr size_t kNumBitTables = CodeInfo::kNumBitTables;
@@ -78,9 +86,8 @@
uint32_t table_bit_start = start_bit_offset + bit_table_bit_starts[i];
BitMemoryRegion region(
const_cast<uint8_t*>(writer_.data()), table_bit_start, table_bit_size);
- uint32_t hash = DataHash()(region);
- DedupeSetEntry entry{table_bit_start, table_bit_size, hash};
- auto [it, inserted] = dedupe_set_.InsertWithHash(entry, hash);
+ DedupeSetEntry entry{table_bit_start, table_bit_size};
+ auto [it, inserted] = dedupe_set_.insert(entry);
dedupe_entries[i] = &*it;
if (!inserted) {
code_info.SetBitTableDeduped(i); // Mark as deduped before we write header.
diff --git a/dex2oat/linker/code_info_table_deduper.h b/dex2oat/linker/code_info_table_deduper.h
index 682aba3..c07641d 100644
--- a/dex2oat/linker/code_info_table_deduper.h
+++ b/dex2oat/linker/code_info_table_deduper.h
@@ -29,10 +29,15 @@
public:
explicit CodeInfoTableDeduper(std::vector<uint8_t>* output)
: writer_(output),
- dedupe_set_(DedupeSetEntryHash(), DedupeSetEntryEquals(output)) {
+ dedupe_set_(kMinLoadFactor,
+ kMaxLoadFactor,
+ DedupeSetEntryHash(output),
+ DedupeSetEntryEquals(output)) {
DCHECK_EQ(output->size(), 0u);
}
+ void ReserveDedupeBuffer(size_t num_code_infos);
+
// Copy CodeInfo into output while de-duplicating the internal bit tables.
// It returns the byte offset of the copied CodeInfo within the output.
size_t Dedupe(const uint8_t* code_info);
@@ -41,13 +46,12 @@
struct DedupeSetEntry {
uint32_t bit_start;
uint32_t bit_size;
- uint32_t hash;
};
class DedupeSetEntryEmpty {
public:
void MakeEmpty(DedupeSetEntry& item) const {
- item = {0u, 0u, 0u};
+ item = {0u, 0u};
}
bool IsEmpty(const DedupeSetEntry& item) const {
return item.bit_size == 0u;
@@ -56,9 +60,14 @@
class DedupeSetEntryHash {
public:
+ explicit DedupeSetEntryHash(std::vector<uint8_t>* output) : output_(output) {}
+
uint32_t operator()(const DedupeSetEntry& item) const {
- return item.hash;
+ return DataHash()(BitMemoryRegion(output_->data(), item.bit_start, item.bit_size));
}
+
+ private:
+ std::vector<uint8_t>* const output_;
};
class DedupeSetEntryEquals {
@@ -68,8 +77,7 @@
bool operator()(const DedupeSetEntry& lhs, const DedupeSetEntry& rhs) const {
DCHECK_NE(lhs.bit_size, 0u);
DCHECK_NE(rhs.bit_size, 0u);
- return lhs.hash == rhs.hash &&
- lhs.bit_size == rhs.bit_size &&
+ return lhs.bit_size == rhs.bit_size &&
BitMemoryRegion::Equals(
BitMemoryRegion(output_->data(), lhs.bit_start, lhs.bit_size),
BitMemoryRegion(output_->data(), rhs.bit_start, rhs.bit_size));
@@ -82,6 +90,9 @@
using DedupeSet =
HashSet<DedupeSetEntry, DedupeSetEntryEmpty, DedupeSetEntryHash, DedupeSetEntryEquals>;
+ static constexpr double kMinLoadFactor = 0.5;
+ static constexpr double kMaxLoadFactor = 0.75;
+
BitMemoryWriter<std::vector<uint8_t>> writer_;
// Deduplicate at BitTable level. Entries describe ranges in `output`, see constructor.
diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc
index 4915525..df95f2a 100644
--- a/dex2oat/linker/oat_writer.cc
+++ b/dex2oat/linker/oat_writer.cc
@@ -1507,11 +1507,23 @@
const bool generate_debug_info_;
};
+template <bool kDeduplicate>
class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor {
public:
InitMapMethodVisitor(OatWriter* writer, size_t offset)
: OatDexMethodVisitor(writer, offset),
dedupe_bit_table_(&writer_->code_info_data_) {
+ if (kDeduplicate) {
+ // Reserve a large buffer for `CodeInfo` bit table deduplication except for
+ // multi-image compilation as we do not want to reserve multiple large buffers.
+ // User devices should not do any multi-image compilation.
+ const CompilerOptions& compiler_options = writer->GetCompilerOptions();
+ DCHECK(compiler_options.IsAnyCompilationEnabled());
+ if (compiler_options.DeduplicateCode() && !compiler_options.IsMultiImage()) {
+ dedupe_bit_table_.ReserveDedupeBuffer(
+ writer->compiler_driver_->GetCompiledMethodStorage()->UniqueVMapTableEntries());
+ }
+ }
}
bool VisitMethod(size_t class_def_method_index,
@@ -1526,10 +1538,16 @@
ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
if (map.size() != 0u) {
- size_t offset = dedupe_code_info_.GetOrCreate(map.data(), [=]() {
- // Deduplicate the inner BitTable<>s within the CodeInfo.
- return offset_ + dedupe_bit_table_.Dedupe(map.data());
- });
+ size_t offset;
+ if (kDeduplicate) {
+ offset = dedupe_code_info_.GetOrCreate(map.data(), [=]() {
+ // Deduplicate the inner BitTable<>s within the CodeInfo.
+ return offset_ + dedupe_bit_table_.Dedupe(map.data());
+ });
+ } else {
+ offset = offset_ + writer_->code_info_data_.size();
+ writer_->code_info_data_.insert(writer_->code_info_data_.end(), map.begin(), map.end());
+ }
// Code offset is not initialized yet, so set file offset for now.
DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u);
oat_class->method_headers_[method_offsets_index_].SetCodeInfoOffset(offset);
@@ -2140,13 +2158,17 @@
if (!MayHaveCompiledMethods()) {
return offset;
}
- {
- InitMapMethodVisitor visitor(this, offset);
+ if (GetCompilerOptions().DeduplicateCode()) {
+ InitMapMethodVisitor</*kDeduplicate=*/ true> visitor(this, offset);
bool success = VisitDexMethods(&visitor);
DCHECK(success);
- code_info_data_.shrink_to_fit();
- offset += code_info_data_.size();
+ } else {
+ InitMapMethodVisitor</*kDeduplicate=*/ false> visitor(this, offset);
+ bool success = VisitDexMethods(&visitor);
+ DCHECK(success);
}
+ code_info_data_.shrink_to_fit();
+ offset += code_info_data_.size();
return offset;
}
diff --git a/dex2oat/linker/oat_writer.h b/dex2oat/linker/oat_writer.h
index 21ae77a..ebff7a1 100644
--- a/dex2oat/linker/oat_writer.h
+++ b/dex2oat/linker/oat_writer.h
@@ -265,7 +265,7 @@
struct OrderedMethodData;
class OrderedMethodVisitor;
class InitCodeMethodVisitor;
- class InitMapMethodVisitor;
+ template <bool kDeduplicate> class InitMapMethodVisitor;
class InitImageMethodVisitor;
class WriteCodeMethodVisitor;
class WriteMapMethodVisitor;