Add logic for deduplicating in dexlayout
Added logic for dedeplicating blobs, this is only used for code items
in compact dex for now. This currently provides 0.74% code size
reduction on golem
Disabled for now since quickening is fragile and does not play well
with deduped code items currently.
Future work is to fix quickening and dedupe after quickening to get
the most code size savings.
Test: test-art-host-gtest
Bug: 63756964
Change-Id: Iec770d9c1f5171288aca8329a6ca6992375101bc
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index c8c2b69..48477ab 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -137,16 +137,7 @@
return hash;
} else {
- size_t hash = 0x811c9dc5;
- for (uint32_t i = 0; i < len; ++i) {
- hash = (hash * 16777619) ^ data[i];
- }
- hash += hash << 13;
- hash ^= hash >> 7;
- hash += hash << 3;
- hash ^= hash >> 17;
- hash += hash << 5;
- return hash;
+ return HashBytes(data, len);
}
}
};
diff --git a/dexlayout/compact_dex_writer.cc b/dexlayout/compact_dex_writer.cc
index 73eb39b..dd1eee7 100644
--- a/dexlayout/compact_dex_writer.cc
+++ b/dexlayout/compact_dex_writer.cc
@@ -24,6 +24,18 @@
namespace art {
+CompactDexWriter::CompactDexWriter(dex_ir::Header* header,
+ MemMap* mem_map,
+ DexLayout* dex_layout,
+ CompactDexLevel compact_dex_level)
+ : DexWriter(header, mem_map, dex_layout, /*compute_offsets*/ true),
+ compact_dex_level_(compact_dex_level),
+ data_dedupe_(/*bucket_count*/ 32,
+ HashedMemoryRange::HashEqual(mem_map->Begin()),
+ HashedMemoryRange::HashEqual(mem_map->Begin())) {
+ CHECK(compact_dex_level_ != CompactDexLevel::kCompactDexLevelNone);
+}
+
uint32_t CompactDexWriter::WriteDebugInfoOffsetTable(uint32_t offset) {
const uint32_t start_offset = offset;
const dex_ir::Collections& collections = header_->GetCollections();
@@ -94,9 +106,11 @@
uint32_t offset,
bool reserve_only) {
DCHECK(code_item != nullptr);
+ DCHECK(!reserve_only) << "Not supported because of deduping.";
const uint32_t start_offset = offset;
- // Use 4 byte alignment for now, we need to peek at the bytecode to see if we can 2 byte align
- // otherwise.
+
+ // Align to minimum requirements, additional alignment requirements are handled below after we
+ // know the preheader size.
offset = RoundUp(offset, CompactDexFile::CodeItem::kAlignment);
CompactDexFile::CodeItem disk_code_item;
@@ -130,6 +144,8 @@
}
}
+ const uint32_t data_start = offset;
+
// Write preheader first.
offset += Write(reinterpret_cast<const uint8_t*>(preheader), preheader_bytes, offset);
// Registered offset is after the preheader.
@@ -143,9 +159,32 @@
offset += Write(code_item->Insns(), code_item->InsnsSize() * sizeof(uint16_t), offset);
// Write the post instruction data.
offset += WriteCodeItemPostInstructionData(code_item, offset, reserve_only);
+
+ if (dex_layout_->GetOptions().dedupe_code_items_ && compute_offsets_) {
+ // After having written, try to dedupe the whole code item (excluding padding).
+ uint32_t deduped_offset = DedupeData(data_start, offset, code_item->GetOffset());
+ if (deduped_offset != kDidNotDedupe) {
+ code_item->SetOffset(deduped_offset);
+ // Undo the offset for all that we wrote since we deduped.
+ offset = start_offset;
+ }
+ }
+
return offset - start_offset;
}
+uint32_t CompactDexWriter::DedupeData(uint32_t data_start,
+ uint32_t data_end,
+ uint32_t item_offset) {
+ HashedMemoryRange range {data_start, data_end - data_start};
+ auto existing = data_dedupe_.emplace(range, item_offset);
+ if (!existing.second) {
+ // Failed to insert, item already existed in the map.
+ return existing.first->second;
+ }
+ return kDidNotDedupe;
+}
+
void CompactDexWriter::SortDebugInfosByMethodIndex() {
dex_ir::Collections& collections = header_->GetCollections();
static constexpr InvokeType invoke_types[] = {
diff --git a/dexlayout/compact_dex_writer.h b/dexlayout/compact_dex_writer.h
index 37f6ff1..cb53cae 100644
--- a/dexlayout/compact_dex_writer.h
+++ b/dexlayout/compact_dex_writer.h
@@ -19,18 +19,45 @@
#ifndef ART_DEXLAYOUT_COMPACT_DEX_WRITER_H_
#define ART_DEXLAYOUT_COMPACT_DEX_WRITER_H_
+#include <unordered_map>
+
#include "dex_writer.h"
+#include "utils.h"
namespace art {
+class HashedMemoryRange {
+ public:
+ uint32_t offset_;
+ uint32_t length_;
+
+ class HashEqual {
+ public:
+ explicit HashEqual(const uint8_t* data) : data_(data) {}
+
+ // Equal function.
+ bool operator()(const HashedMemoryRange& a, const HashedMemoryRange& b) const {
+ return a.length_ == b.length_ && std::equal(data_ + a.offset_,
+ data_ + a.offset_ + a.length_,
+ data_ + b.offset_);
+ }
+
+ // Hash function.
+ size_t operator()(const HashedMemoryRange& range) const {
+ return HashBytes(data_ + range.offset_, range.length_);
+ }
+
+ private:
+ const uint8_t* data_;
+ };
+};
+
class CompactDexWriter : public DexWriter {
public:
CompactDexWriter(dex_ir::Header* header,
MemMap* mem_map,
DexLayout* dex_layout,
- CompactDexLevel compact_dex_level)
- : DexWriter(header, mem_map, dex_layout, /*compute_offsets*/ true),
- compact_dex_level_(compact_dex_level) {}
+ CompactDexLevel compact_dex_level);
protected:
void WriteMemMap() OVERRIDE;
@@ -41,13 +68,20 @@
uint32_t WriteDebugInfoOffsetTable(uint32_t offset);
- const CompactDexLevel compact_dex_level_;
-
uint32_t WriteCodeItem(dex_ir::CodeItem* code_item, uint32_t offset, bool reserve_only) OVERRIDE;
void SortDebugInfosByMethodIndex();
+ // Deduplicate a blob of data that has been written to mem_map. The backing storage is the actual
+ // mem_map contents to reduce RAM usage.
+ // Returns the offset of the deduplicated data or 0 if kDidNotDedupe did not occur.
+ uint32_t DedupeData(uint32_t data_start, uint32_t data_end, uint32_t item_offset);
+
private:
+ const CompactDexLevel compact_dex_level_;
+
+ static const uint32_t kDidNotDedupe = 0;
+
// Position in the compact dex file for the debug info table data starts.
uint32_t debug_info_offsets_pos_ = 0u;
@@ -57,6 +91,12 @@
// Base offset of where debug info starts in the dex file.
uint32_t debug_info_base_ = 0u;
+ // Dedupe map.
+ std::unordered_map<HashedMemoryRange,
+ uint32_t,
+ HashedMemoryRange::HashEqual,
+ HashedMemoryRange::HashEqual> data_dedupe_;
+
DISALLOW_COPY_AND_ASSIGN(CompactDexWriter);
};
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index 0a59cc9..fb7dff6 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -565,18 +565,23 @@
return new ParameterAnnotation(method_id, set_ref_list);
}
-CodeItem* Collections::CreateCodeItem(const DexFile& dex_file,
- const DexFile::CodeItem& disk_code_item,
- uint32_t offset,
- uint32_t dex_method_index) {
- CodeItemDebugInfoAccessor accessor(dex_file, &disk_code_item, dex_method_index);
- const uint16_t registers_size = accessor.RegistersSize();
- const uint16_t ins_size = accessor.InsSize();
- const uint16_t outs_size = accessor.OutsSize();
- const uint32_t tries_size = accessor.TriesSize();
-
- // TODO: Calculate the size of the debug info.
+CodeItem* Collections::DedupeOrCreateCodeItem(const DexFile& dex_file,
+ const DexFile::CodeItem* disk_code_item,
+ uint32_t offset,
+ uint32_t dex_method_index) {
+ if (disk_code_item == nullptr) {
+ return nullptr;
+ }
+ CodeItemDebugInfoAccessor accessor(dex_file, disk_code_item, dex_method_index);
const uint32_t debug_info_offset = accessor.DebugInfoOffset();
+
+ // Create the offsets pair and dedupe based on it.
+ std::pair<uint32_t, uint32_t> offsets_pair(offset, debug_info_offset);
+ auto existing = code_items_map_.find(offsets_pair);
+ if (existing != code_items_map_.end()) {
+ return existing->second;
+ }
+
const uint8_t* debug_info_stream = dex_file.GetDebugInfoStream(debug_info_offset);
DebugInfoItem* debug_info = nullptr;
if (debug_info_stream != nullptr) {
@@ -596,7 +601,7 @@
TryItemVector* tries = nullptr;
CatchHandlerVector* handler_list = nullptr;
- if (tries_size > 0) {
+ if (accessor.TriesSize() > 0) {
tries = new TryItemVector();
handler_list = new CatchHandlerVector();
for (const DexFile::TryItem& disk_try_item : accessor.TryItems()) {
@@ -671,11 +676,25 @@
}
}
- uint32_t size = dex_file.GetCodeItemSize(disk_code_item);
- CodeItem* code_item = new CodeItem(
- registers_size, ins_size, outs_size, debug_info, insns_size, insns, tries, handler_list);
+ uint32_t size = dex_file.GetCodeItemSize(*disk_code_item);
+ CodeItem* code_item = new CodeItem(accessor.RegistersSize(),
+ accessor.InsSize(),
+ accessor.OutsSize(),
+ debug_info,
+ insns_size,
+ insns,
+ tries,
+ handler_list);
code_item->SetSize(size);
- AddItem(code_items_map_, code_items_, code_item, offset);
+
+ // Add the code item to the map.
+ DCHECK(!code_item->OffsetAssigned());
+ if (eagerly_assign_offsets_) {
+ code_item->SetOffset(offset);
+ }
+ code_items_map_.emplace(offsets_pair, code_item);
+ code_items_.AddItem(code_item);
+
// Add "fixup" references to types, strings, methods, and fields.
// This is temporary, as we will probably want more detailed parsing of the
// instructions here.
@@ -703,17 +722,12 @@
MethodId* method_id = GetMethodId(cdii.GetMemberIndex());
uint32_t access_flags = cdii.GetRawMemberAccessFlags();
const DexFile::CodeItem* disk_code_item = cdii.GetMethodCodeItem();
- CodeItem* code_item = code_items_map_.GetExistingObject(cdii.GetMethodCodeItemOffset());
- DebugInfoItem* debug_info = nullptr;
- if (disk_code_item != nullptr) {
- if (code_item == nullptr) {
- code_item = CreateCodeItem(dex_file,
- *disk_code_item,
- cdii.GetMethodCodeItemOffset(),
- cdii.GetMemberIndex());
- }
- debug_info = code_item->DebugInfo();
- }
+ // Temporary hack to prevent incorrectly deduping code items if they have the same offset since
+ // they may have different debug info streams.
+ CodeItem* code_item = DedupeOrCreateCodeItem(dex_file,
+ disk_code_item,
+ cdii.GetMethodCodeItemOffset(),
+ cdii.GetMemberIndex());
return new MethodItem(access_flags, method_id, code_item);
}
@@ -819,16 +833,16 @@
}
void Collections::SortVectorsByMapOrder() {
- string_datas_map_.SortVectorByMapOrder(string_datas_);
- type_lists_map_.SortVectorByMapOrder(type_lists_);
- encoded_array_items_map_.SortVectorByMapOrder(encoded_array_items_);
- annotation_items_map_.SortVectorByMapOrder(annotation_items_);
- annotation_set_items_map_.SortVectorByMapOrder(annotation_set_items_);
- annotation_set_ref_lists_map_.SortVectorByMapOrder(annotation_set_ref_lists_);
- annotations_directory_items_map_.SortVectorByMapOrder(annotations_directory_items_);
- debug_info_items_map_.SortVectorByMapOrder(debug_info_items_);
- code_items_map_.SortVectorByMapOrder(code_items_);
- class_datas_map_.SortVectorByMapOrder(class_datas_);
+ string_datas_.SortByMapOrder(string_datas_map_.Collection());
+ type_lists_.SortByMapOrder(type_lists_map_.Collection());
+ encoded_array_items_.SortByMapOrder(encoded_array_items_map_.Collection());
+ annotation_items_.SortByMapOrder(annotation_items_map_.Collection());
+ annotation_set_items_.SortByMapOrder(annotation_set_items_map_.Collection());
+ annotation_set_ref_lists_.SortByMapOrder(annotation_set_ref_lists_map_.Collection());
+ annotations_directory_items_.SortByMapOrder(annotations_directory_items_map_.Collection());
+ debug_info_items_.SortByMapOrder(debug_info_items_map_.Collection());
+ code_items_.SortByMapOrder(code_items_map_);
+ class_datas_.SortByMapOrder(class_datas_map_.Collection());
}
static uint32_t HeaderOffset(const dex_ir::Collections& collections ATTRIBUTE_UNUSED) {
diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h
index ca47b34..3627717 100644
--- a/dexlayout/dex_ir.h
+++ b/dexlayout/dex_ir.h
@@ -135,6 +135,20 @@
Vector& Collection() { return collection_; }
const Vector& Collection() const { return collection_; }
+ // Sort the vector by copying pointers over.
+ template <typename MapType>
+ void SortByMapOrder(const MapType& map) {
+ auto it = map.begin();
+ CHECK_EQ(map.size(), Size());
+ for (size_t i = 0; i < Size(); ++i) {
+ // There are times when the array will temporarily contain the same pointer twice, doing the
+ // release here sure there is no double free errors.
+ Collection()[i].release();
+ Collection()[i].reset(it->second);
+ ++it;
+ }
+ }
+
protected:
Vector collection_;
@@ -172,22 +186,10 @@
return it != collection_.end() ? it->second : nullptr;
}
- uint32_t Size() const { return collection_.size(); }
+ // Lower case for template interop with std::map.
+ uint32_t size() const { return collection_.size(); }
std::map<uint32_t, T*>& Collection() { return collection_; }
- // Sort the vector by copying pointers over.
- void SortVectorByMapOrder(CollectionVector<T>& vector) {
- auto it = collection_.begin();
- CHECK_EQ(vector.Size(), Size());
- for (size_t i = 0; i < Size(); ++i) {
- // There are times when the array will temporarily contain the same pointer twice, doing the
- // release here sure there is no double free errors.
- vector.Collection()[i].release();
- vector.Collection()[i].reset(it->second);
- ++it;
- }
- }
-
private:
std::map<uint32_t, T*> collection_;
@@ -254,10 +256,10 @@
const DexFile::AnnotationSetItem* disk_annotations_item, uint32_t offset);
AnnotationsDirectoryItem* CreateAnnotationsDirectoryItem(const DexFile& dex_file,
const DexFile::AnnotationsDirectoryItem* disk_annotations_item, uint32_t offset);
- CodeItem* CreateCodeItem(const DexFile& dex_file,
- const DexFile::CodeItem& disk_code_item,
- uint32_t offset,
- uint32_t dex_method_index);
+ CodeItem* DedupeOrCreateCodeItem(const DexFile& dex_file,
+ const DexFile::CodeItem* disk_code_item,
+ uint32_t offset,
+ uint32_t dex_method_index);
ClassData* CreateClassData(const DexFile& dex_file, const uint8_t* encoded_data, uint32_t offset);
void AddAnnotationsFromMapListSection(const DexFile& dex_file,
uint32_t start_offset,
@@ -460,7 +462,10 @@
CollectionMap<AnnotationSetRefList> annotation_set_ref_lists_map_;
CollectionMap<AnnotationsDirectoryItem> annotations_directory_items_map_;
CollectionMap<DebugInfoItem> debug_info_items_map_;
- CollectionMap<CodeItem> code_items_map_;
+ // Code item maps need to check both the debug info offset and debug info offset, do not use
+ // CollectionMap.
+ // First offset is the code item offset, second is the debug info offset.
+ std::map<std::pair<uint32_t, uint32_t>, CodeItem*> code_items_map_;
CollectionMap<ClassData> class_datas_map_;
uint32_t map_list_offset_ = 0;
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index 25afb77..cb0eabc 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -65,6 +65,8 @@
bool visualize_pattern_ = false;
bool update_checksum_ = false;
CompactDexLevel compact_dex_level_ = CompactDexLevel::kCompactDexLevelNone;
+ // Disabled until dex2oat properly handles quickening of deduped code items.
+ bool dedupe_code_items_ = false;
OutputFormat output_format_ = kOutputPlain;
const char* output_dex_directory_ = nullptr;
const char* output_file_name_ = nullptr;
diff --git a/runtime/utils.h b/runtime/utils.h
index 789498c..7402c12 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -289,6 +289,20 @@
}
}
+// Hash bytes using a relatively fast hash.
+static inline size_t HashBytes(const uint8_t* data, size_t len) {
+ size_t hash = 0x811c9dc5;
+ for (uint32_t i = 0; i < len; ++i) {
+ hash = (hash * 16777619) ^ data[i];
+ }
+ hash += hash << 13;
+ hash ^= hash >> 7;
+ hash += hash << 3;
+ hash ^= hash >> 17;
+ hash += hash << 5;
+ return hash;
+}
+
} // namespace art
#endif // ART_RUNTIME_UTILS_H_