Add shared separate data section for compact dex

Added a shared dex data buffer for compact dex files, this buffer
is referenced by all compact dex files in a vdex file. Repurposed
the existing data_off / data_size fields in the header.

After the shared buffer is filled up, it is placed after the dex
files in the oat writer and the dex file headers are fixed up to have
the correct offsets / sizes to the shared buffer.

Motivation:
Make it easy to deduplicate data across dexes.

Bug: 63756964
Test: test-art-host
Change-Id: I17855a0c78b20be3d323d12dedb9c695962be3ed
diff --git a/dexlayout/compact_dex_writer.cc b/dexlayout/compact_dex_writer.cc
index 2f601b6..ef31c3f 100644
--- a/dexlayout/compact_dex_writer.cc
+++ b/dexlayout/compact_dex_writer.cc
@@ -167,6 +167,7 @@
                                                               code_item->GetOffset());
     if (deduped_offset != Deduper::kDidNotDedupe) {
       code_item->SetOffset(deduped_offset);
+      stream->Clear(start_offset, stream->Tell() - start_offset);
       // Undo the offset for all that we wrote since we deduped.
       stream->Seek(start_offset);
     }
@@ -287,9 +288,16 @@
   CHECK(output->IsCompactDexContainer());
   Container* const container = down_cast<Container*>(output);
   // For now, use the same stream for both data and metadata.
-  Stream stream(output->GetMainSection());
-  Stream* main_stream = &stream;
-  Stream* data_stream = &stream;
+  Stream temp_main_stream(output->GetMainSection());
+  Stream temp_data_stream(output->GetDataSection());
+  Stream* main_stream = &temp_main_stream;
+  Stream* data_stream = &temp_data_stream;
+
+  // We want offset 0 to be reserved for null, seek to the data section alignment or the end of the
+  // section.
+  data_stream->Seek(std::max(
+      static_cast<uint32_t>(output->GetDataSection()->Size()),
+      kDataSectionAlignment));
   code_item_dedupe_ = &container->code_item_dedupe_;
 
   // Starting offset is right after the header.
@@ -312,11 +320,9 @@
   WriteCallSiteIds(main_stream, /*reserve_only*/ true);
   WriteMethodHandles(main_stream);
 
-  uint32_t data_offset_ = 0u;
   if (compute_offsets_) {
     // Data section.
     data_stream->AlignTo(kDataSectionAlignment);
-    data_offset_ = data_stream->Tell();
   }
 
   // Write code item first to minimize the space required for encoded methods.
@@ -362,19 +368,9 @@
   } else {
     data_stream->Seek(collection.MapListOffset());
   }
-  GenerateAndWriteMapItems(data_stream);
-  data_stream->AlignTo(kDataSectionAlignment);
 
   // Map items are included in the data section.
-  if (compute_offsets_) {
-    header_->SetDataSize(data_stream->Tell() - data_offset_);
-    if (header_->DataSize() != 0) {
-      // Offset must be zero when the size is zero.
-      header_->SetDataOffset(data_offset_);
-    } else {
-      header_->SetDataOffset(0u);
-    }
-  }
+  GenerateAndWriteMapItems(data_stream);
 
   // Write link data if it exists.
   const std::vector<uint8_t>& link_data = collection.LinkData();
@@ -391,19 +387,39 @@
   // Write debug info offset table last to make dex file verifier happy.
   WriteDebugInfoOffsetTable(data_stream);
 
+  data_stream->AlignTo(kDataSectionAlignment);
+  if (compute_offsets_) {
+    header_->SetDataSize(data_stream->Tell());
+    if (header_->DataSize() != 0) {
+      // Offset must be zero when the size is zero.
+      main_stream->AlignTo(kDataSectionAlignment);
+      // For now, default to saying the data is right after the main stream.
+      header_->SetDataOffset(main_stream->Tell());
+      header_->SetDataOffset(0u);
+    } else {
+      header_->SetDataOffset(0u);
+    }
+  }
+
   // Write header last.
   if (compute_offsets_) {
     header_->SetFileSize(main_stream->Tell());
   }
   WriteHeader(main_stream);
 
+  // Trim sections to make sure they are sized properly.
+  output->GetMainSection()->Resize(header_->FileSize());
+  output->GetDataSection()->Resize(data_stream->Tell());
+
   if (dex_layout_->GetOptions().update_checksum_) {
-    header_->SetChecksum(DexFile::CalculateChecksum(main_stream->Begin(), header_->FileSize()));
+    // Compute the cdex section (also covers the used part of the data section).
+    header_->SetChecksum(CompactDexFile::CalculateChecksum(output->GetMainSection()->Begin(),
+                                                           output->GetMainSection()->Size(),
+                                                           output->GetDataSection()->Begin(),
+                                                           output->GetDataSection()->Size()));
     // Rewrite the header with the calculated checksum.
     WriteHeader(main_stream);
   }
-  // Trim the map to make it sized as large as the dex file.
-  output->GetMainSection()->Resize(header_->FileSize());
 }
 
 std::unique_ptr<DexContainer> CompactDexWriter::CreateDexContainer() const {
diff --git a/dexlayout/dex_container.h b/dexlayout/dex_container.h
index 7c426cb..2b9a5f9 100644
--- a/dexlayout/dex_container.h
+++ b/dexlayout/dex_container.h
@@ -43,6 +43,9 @@
     // Resize the backing storage.
     virtual void Resize(size_t size) = 0;
 
+    // Clear the container.
+    virtual void Clear() = 0;
+
     // Returns the end of the memory region.
     uint8_t* End() {
       return Begin() + Size();
@@ -66,6 +69,10 @@
       data_.resize(size, 0u);
     }
 
+    void Clear() OVERRIDE {
+      data_.clear();
+    }
+
    private:
     std::vector<uint8_t> data_;
   };
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index fb7dff6..1525d53 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -280,7 +280,7 @@
     }
     case DexFile::kDexAnnotationArray: {
       EncodedValueVector* values = new EncodedValueVector();
-      const uint32_t offset = *data - dex_file.Begin();
+      const uint32_t offset = *data - dex_file.DataBegin();
       const uint32_t size = DecodeUnsignedLeb128(data);
       // Decode all elements.
       for (uint32_t i = 0; i < size; i++) {
@@ -440,7 +440,7 @@
 AnnotationItem* Collections::CreateAnnotationItem(const DexFile& dex_file,
                                                   const DexFile::AnnotationItem* annotation) {
   const uint8_t* const start_data = reinterpret_cast<const uint8_t*>(annotation);
-  const uint32_t offset = start_data - dex_file.Begin();
+  const uint32_t offset = start_data - dex_file.DataBegin();
   AnnotationItem* annotation_item = annotation_items_map_.GetExistingObject(offset);
   if (annotation_item == nullptr) {
     uint8_t visibility = annotation->visibility_;
@@ -772,8 +772,7 @@
 
 void Collections::CreateCallSitesAndMethodHandles(const DexFile& dex_file) {
   // Iterate through the map list and set the offset of the CallSiteIds and MethodHandleItems.
-  const DexFile::MapList* map =
-      reinterpret_cast<const DexFile::MapList*>(dex_file.Begin() + MapListOffset());
+  const DexFile::MapList* map = dex_file.GetMapList();
   for (uint32_t i = 0; i < map->size_; ++i) {
     const DexFile::MapItem* item = map->list_ + i;
     switch (item->type_) {
@@ -799,7 +798,7 @@
 
 void Collections::CreateCallSiteId(const DexFile& dex_file, uint32_t i) {
   const DexFile::CallSiteIdItem& disk_call_site_id = dex_file.GetCallSiteId(i);
-  const uint8_t* disk_call_item_ptr = dex_file.Begin() + disk_call_site_id.data_off_;
+  const uint8_t* disk_call_item_ptr = dex_file.DataBegin() + disk_call_site_id.data_off_;
   EncodedArrayItem* call_site_item =
       CreateEncodedArrayItem(dex_file, disk_call_item_ptr, disk_call_site_id.data_off_);
 
diff --git a/dexlayout/dex_ir_builder.cc b/dexlayout/dex_ir_builder.cc
index 231826b..3ec163ce 100644
--- a/dexlayout/dex_ir_builder.cc
+++ b/dexlayout/dex_ir_builder.cc
@@ -83,8 +83,8 @@
 
   // Load the link data if it exists.
   collections.SetLinkData(std::vector<uint8_t>(
-      dex_file.Begin() + dex_file.GetHeader().link_off_,
-      dex_file.Begin() + dex_file.GetHeader().link_off_ + dex_file.GetHeader().link_size_));
+      dex_file.DataBegin() + dex_file.GetHeader().link_off_,
+      dex_file.DataBegin() + dex_file.GetHeader().link_off_ + dex_file.GetHeader().link_size_));
 
   return header;
 }
@@ -92,8 +92,7 @@
 static void CheckAndSetRemainingOffsets(const DexFile& dex_file, Collections* collections) {
   const DexFile::Header& disk_header = dex_file.GetHeader();
   // Read MapItems and validate/set remaining offsets.
-  const DexFile::MapList* map =
-      reinterpret_cast<const DexFile::MapList*>(dex_file.Begin() + disk_header.map_off_);
+  const DexFile::MapList* map = dex_file.GetMapList();
   const uint32_t count = map->size_;
   for (uint32_t i = 0; i < count; ++i) {
     const DexFile::MapItem* item = map->list_ + i;
diff --git a/dexlayout/dex_writer.cc b/dexlayout/dex_writer.cc
index eb038a0..67d0f9a 100644
--- a/dexlayout/dex_writer.cc
+++ b/dexlayout/dex_writer.cc
@@ -30,6 +30,8 @@
 
 namespace art {
 
+constexpr uint32_t DexWriter::kDataSectionAlignment;
+
 static size_t EncodeIntValue(int32_t value, uint8_t* buffer) {
   size_t length = 0;
   if (value >= 0) {
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index d33a0bd..1b32f7b 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -1838,13 +1838,17 @@
     }
   }
   DexWriter::Output(this, dex_container, compute_offsets);
-  DexContainer* const container = dex_container->get();
-  DexContainer::Section* const main_section = container->GetMainSection();
-  DexContainer::Section* const data_section = container->GetDataSection();
-  CHECK_EQ(data_section->Size(), 0u) << "Unsupported";
   if (new_file != nullptr) {
+    DexContainer* const container = dex_container->get();
+    DexContainer::Section* const main_section = container->GetMainSection();
     if (!new_file->WriteFully(main_section->Begin(), main_section->Size())) {
-      LOG(ERROR) << "Failed tow write dex file to " << dex_file_location;
+      LOG(ERROR) << "Failed to write main section for dex file " << dex_file_location;
+      new_file->Erase();
+      return;
+    }
+    DexContainer::Section* const data_section = container->GetDataSection();
+    if (!new_file->WriteFully(data_section->Begin(), data_section->Size())) {
+      LOG(ERROR) << "Failed to write data section for dex file " << dex_file_location;
       new_file->Erase();
       return;
     }
@@ -1919,17 +1923,22 @@
       // Dex file verifier cannot handle compact dex.
       bool verify = options_.compact_dex_level_ == CompactDexLevel::kCompactDexLevelNone;
       const ArtDexFileLoader dex_file_loader;
-      DexContainer::Section* section = (*dex_container)->GetMainSection();
-      DCHECK_EQ(file_size, section->Size());
+      DexContainer::Section* const main_section = (*dex_container)->GetMainSection();
+      DexContainer::Section* const data_section = (*dex_container)->GetDataSection();
+      DCHECK_EQ(file_size, main_section->Size())
+          << main_section->Size() << " " << data_section->Size();
       std::unique_ptr<const DexFile> output_dex_file(
-          dex_file_loader.Open(section->Begin(),
-                               file_size,
-                               location,
-                               /* checksum */ 0,
-                               /*oat_dex_file*/ nullptr,
-                               verify,
-                               /*verify_checksum*/ false,
-                               &error_msg));
+          dex_file_loader.OpenWithDataSection(
+              main_section->Begin(),
+              main_section->Size(),
+              data_section->Begin(),
+              data_section->Size(),
+              location,
+              /* checksum */ 0,
+              /*oat_dex_file*/ nullptr,
+              verify,
+              /*verify_checksum*/ false,
+              &error_msg));
       CHECK(output_dex_file != nullptr) << "Failed to re-open output file:" << error_msg;
 
       // Do IR-level comparison between input and output. This check ignores potential differences