Add command line tool for creating mini-debug-info for native code.
Mini-debug-info for native code is currently created with bash script,
which works, but misses some optimizations that the ART compiler does.
This CL adds tool based on ART code-base, with the following features:
* CIE entries are deduplicated to save space (usually one is needed).
* FDE entries are sorted, which significantly improves compression.
* Non-function and zero-sized function symbols are excluded.
* Symbols are sorted by address to allow binary search.
* Compressed data is split to blocks to allow random-access reads.
The space optimizations and the better random access balances,
so the overall generated file size remains approximately same.
Bug: 110133331
Test: manually check the generated data using readelf
Change-Id: I4ed8deaee647d5ee4dfb0846f316e888f060b98e
diff --git a/libelffile/elf/elf_builder.h b/libelffile/elf/elf_builder.h
index a76bf92..10541ec 100644
--- a/libelffile/elf/elf_builder.h
+++ b/libelffile/elf/elf_builder.h
@@ -310,7 +310,7 @@
last_offset_ = 0;
}
- Elf_Word Write(const std::string& name) {
+ Elf_Word Write(std::string_view name) {
if (current_offset_ == 0) {
DCHECK(name.empty());
} else if (name == last_name_) {
@@ -318,7 +318,9 @@
}
last_name_ = name;
last_offset_ = current_offset_;
- this->WriteFully(name.c_str(), name.length() + 1);
+ this->WriteFully(name.data(), name.length());
+ char null_terminator = '\0';
+ this->WriteFully(&null_terminator, sizeof(null_terminator));
current_offset_ += name.length() + 1;
return last_offset_;
}
@@ -798,6 +800,21 @@
return stream_.Seek(RoundUp(stream_.Seek(0, kSeekCurrent), alignment), kSeekSet);
}
+ static InstructionSet GetIsaFromHeader(const Elf_Ehdr& header) {
+ switch (header.e_machine) {
+ case EM_ARM:
+ return InstructionSet::kThumb2;
+ case EM_AARCH64:
+ return InstructionSet::kArm64;
+ case EM_386:
+ return InstructionSet::kX86;
+ case EM_X86_64:
+ return InstructionSet::kX86_64;
+ }
+ LOG(FATAL) << "Unknown architecture: " << header.e_machine;
+ UNREACHABLE();
+ }
+
private:
static Elf_Ehdr MakeElfHeader(InstructionSet isa) {
Elf_Ehdr elf_header = Elf_Ehdr();
@@ -832,6 +849,7 @@
LOG(FATAL) << "Unknown instruction set " << isa;
}
}
+ DCHECK_EQ(GetIsaFromHeader(elf_header), isa);
elf_header.e_ident[EI_MAG0] = ELFMAG0;
elf_header.e_ident[EI_MAG1] = ELFMAG1;
diff --git a/libelffile/elf/elf_debug_reader.h b/libelffile/elf/elf_debug_reader.h
index 2b03037..266c638 100644
--- a/libelffile/elf/elf_debug_reader.h
+++ b/libelffile/elf/elf_debug_reader.h
@@ -36,6 +36,7 @@
public:
// Note that the input buffer might be misaligned.
typedef typename ElfTypes::Ehdr ALIGNED(1) Elf_Ehdr;
+ typedef typename ElfTypes::Phdr ALIGNED(1) Elf_Phdr;
typedef typename ElfTypes::Shdr ALIGNED(1) Elf_Shdr;
typedef typename ElfTypes::Sym ALIGNED(1) Elf_Sym;
typedef typename ElfTypes::Addr ALIGNED(1) Elf_Addr;
@@ -65,10 +66,11 @@
CHECK_EQ(header_->e_ident[1], ELFMAG1);
CHECK_EQ(header_->e_ident[2], ELFMAG2);
CHECK_EQ(header_->e_ident[3], ELFMAG3);
+ CHECK_EQ(header_->e_ident[4], sizeof(Elf_Addr) / sizeof(uint32_t));
CHECK_EQ(header_->e_ehsize, sizeof(Elf_Ehdr));
- CHECK_EQ(header_->e_shentsize, sizeof(Elf_Shdr));
// Find all ELF sections.
+ CHECK_EQ(header_->e_shentsize, sizeof(Elf_Shdr));
sections_ = Read<Elf_Shdr>(header_->e_shoff, header_->e_shnum);
for (const Elf_Shdr& section : sections_) {
const char* name = Read<char>(sections_[header_->e_shstrndx].sh_offset + section.sh_name);
@@ -84,16 +86,38 @@
}
}
- explicit ElfDebugReader(std::vector<uint8_t>& file)
+ explicit ElfDebugReader(const std::vector<uint8_t>& file)
: ElfDebugReader(ArrayRef<const uint8_t>(file)) {
}
+ // Check that ELF signature is present at the start of the files,
+ // and that the ELF bitness matches the ElfTypes template arguments.
+ static bool IsValidElfHeader(const std::vector<uint8_t>& data) {
+ static constexpr bool kIs64Bit = sizeof(Elf_Addr) == sizeof(uint64_t);
+ static constexpr char kMagic[] = { 0x7f, 'E', 'L', 'F', kIs64Bit ? 2 : 1 };
+ return data.size() >= sizeof(kMagic) && memcmp(data.data(), kMagic, sizeof(kMagic)) == 0;
+ }
+
const Elf_Ehdr* GetHeader() { return header_; }
ArrayRef<Elf_Shdr> GetSections() { return sections_; }
const Elf_Shdr* GetSection(const char* name) { return section_map_[name]; }
+ // Find the base address where the ELF file wants to be loaded.
+ // This is generally zero (therefore always requiring relocation).
+ Elf_Addr GetLoadAddress() {
+ std::optional<Elf_Addr> addr;
+ CHECK_EQ(header_->e_phentsize, sizeof(Elf_Phdr));
+ for (const Elf_Phdr& phdr : Read<Elf_Phdr>(header_->e_phoff, header_->e_phnum)) {
+ if (phdr.p_type == PT_LOAD) {
+ addr = addr.has_value() ? std::min(addr.value(), phdr.p_vaddr) : phdr.p_vaddr;
+ }
+ }
+ CHECK(addr.has_value());
+ return addr.value();
+ }
+
template <typename VisitSym>
void VisitFunctionSymbols(VisitSym visit_sym) {
const Elf_Shdr* symtab = GetSection(".symtab");
diff --git a/libelffile/elf/xz_utils.cc b/libelffile/elf/xz_utils.cc
index 87c9a7b..f064cb0 100644
--- a/libelffile/elf/xz_utils.cc
+++ b/libelffile/elf/xz_utils.cc
@@ -32,8 +32,6 @@
namespace art {
-constexpr size_t kChunkSize = 16 * KB;
-
static void XzInitCrc() {
static std::once_flag crc_initialized;
std::call_once(crc_initialized, []() {
@@ -42,14 +40,17 @@
});
}
-void XzCompress(ArrayRef<const uint8_t> src, std::vector<uint8_t>* dst, int level) {
+void XzCompress(ArrayRef<const uint8_t> src,
+ std::vector<uint8_t>* dst,
+ int level,
+ size_t block_size) {
// Configure the compression library.
XzInitCrc();
CLzma2EncProps lzma2Props;
Lzma2EncProps_Init(&lzma2Props);
lzma2Props.lzmaProps.level = level;
lzma2Props.lzmaProps.reduceSize = src.size(); // Size of data that will be compressed.
- lzma2Props.blockSize = kChunkSize;
+ lzma2Props.blockSize = block_size;
Lzma2EncProps_Normalize(&lzma2Props);
CXzProps props;
XzProps_Init(&props);
diff --git a/libelffile/elf/xz_utils.h b/libelffile/elf/xz_utils.h
index df5cb56..b1903ff 100644
--- a/libelffile/elf/xz_utils.h
+++ b/libelffile/elf/xz_utils.h
@@ -20,10 +20,17 @@
#include <vector>
#include "base/array_ref.h"
+#include "base/bit_utils.h"
namespace art {
-void XzCompress(ArrayRef<const uint8_t> src, std::vector<uint8_t>* dst, int level = 1 /* speed */);
+constexpr size_t kXzDefaultBlockSize = 16 * KB;
+
+void XzCompress(ArrayRef<const uint8_t> src,
+ std::vector<uint8_t>* dst,
+ int level = 1 /* speed */,
+ size_t block_size = kXzDefaultBlockSize);
+
void XzDecompress(ArrayRef<const uint8_t> src, std::vector<uint8_t>* dst);
} // namespace art
diff --git a/tools/create_minidebuginfo/Android.bp b/tools/create_minidebuginfo/Android.bp
new file mode 100644
index 0000000..a9d6b8d
--- /dev/null
+++ b/tools/create_minidebuginfo/Android.bp
@@ -0,0 +1,35 @@
+//
+// Copyright (C) 2021 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+art_cc_binary {
+ name: "create_minidebuginfo",
+ defaults: [
+ "art_debug_defaults",
+ "art_defaults",
+ ],
+ host_supported: true,
+ device_supported: false,
+ srcs: [
+ "create_minidebuginfo.cc",
+ ],
+ static_libs: [
+ "libartbase",
+ "libbase",
+ "libelffile",
+ "liblzma",
+ "liblog",
+ ],
+}
diff --git a/tools/create_minidebuginfo/create_minidebuginfo.cc b/tools/create_minidebuginfo/create_minidebuginfo.cc
new file mode 100644
index 0000000..aad36a8
--- /dev/null
+++ b/tools/create_minidebuginfo/create_minidebuginfo.cc
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "android-base/logging.h"
+
+#include "base/os.h"
+#include "base/unix_file/fd_file.h"
+#include "elf/elf_builder.h"
+#include "elf/elf_debug_reader.h"
+#include "elf/xz_utils.h"
+#include "stream/file_output_stream.h"
+#include "stream/vector_output_stream.h"
+
+#include <algorithm>
+#include <deque>
+#include <map>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+namespace art {
+
+static constexpr size_t kBlockSize = 32 * KB;
+
+template<typename ElfTypes>
+static void WriteMinidebugInfo(const std::vector<uint8_t>& input, std::vector<uint8_t>* output) {
+ using Elf_Addr = typename ElfTypes::Addr;
+ using Elf_Ehdr = typename ElfTypes::Ehdr;
+ using Elf_Shdr = typename ElfTypes::Shdr;
+ using Elf_Sym = typename ElfTypes::Sym;
+ using Elf_Word = typename ElfTypes::Word;
+ using CIE = typename ElfDebugReader<ElfTypes>::CIE;
+ using FDE = typename ElfDebugReader<ElfTypes>::FDE;
+
+ ElfDebugReader<ElfTypes> reader(input);
+
+ std::vector<uint8_t> output_elf_data;
+ VectorOutputStream output_stream("Output ELF", &output_elf_data);
+ InstructionSet isa = ElfBuilder<ElfTypes>::GetIsaFromHeader(*reader.GetHeader());
+ std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &output_stream));
+ builder->Start(/*write_program_headers=*/ false);
+
+ auto* rodata = builder->GetRoData();
+ auto* text = builder->GetText();
+ const Elf_Shdr* original_text = reader.GetSection(".text");
+ CHECK(original_text != nullptr);
+ CHECK_EQ(reader.GetLoadAddress(), 0u);
+ rodata->AllocateVirtualMemory(original_text->sh_addr - sizeof(Elf_Ehdr));
+ text->AllocateVirtualMemory(original_text->sh_addr, original_text->sh_size);
+
+ auto* strtab = builder->GetStrTab();
+ auto* symtab = builder->GetSymTab();
+ strtab->Start();
+ {
+ strtab->Write(""); // strtab should start with empty string.
+ std::multimap<std::string_view, Elf_Sym> syms;
+ reader.VisitFunctionSymbols([&](Elf_Sym sym, const char* name) {
+ // Exclude non-function or empty symbols.
+ if (ELF32_ST_TYPE(sym.st_info) == STT_FUNC && sym.st_size != 0) {
+ syms.emplace(name, sym);
+ }
+ });
+ reader.VisitDynamicSymbols([&](Elf_Sym sym, const char* name) {
+ // Exclude symbols which will be preserved in the dynamic table anyway.
+ auto it = syms.find(name);
+ if (it != syms.end() && it->second.st_value == sym.st_value) {
+ syms.erase(it);
+ }
+ });
+ for (auto& entry : syms) {
+ std::string_view name = entry.first;
+ const Elf_Sym& sym = entry.second;
+ Elf_Word name_idx = strtab->Write(name);
+ symtab->Add(name_idx, text, sym.st_value, sym.st_size, STB_GLOBAL, STT_FUNC);
+ }
+ }
+ strtab->End();
+ symtab->WriteCachedSection();
+
+ auto* debug_frame = builder->GetDebugFrame();
+ debug_frame->Start();
+ {
+ std::map<std::basic_string_view<uint8_t>, Elf_Addr> cie_dedup;
+ std::unordered_map<const CIE*, Elf_Addr> new_cie_offset;
+ std::deque<std::pair<const FDE*, const CIE*>> entries;
+ // Read, de-duplicate and write CIE entries. Read FDE entries.
+ reader.VisitDebugFrame(
+ [&](const CIE* cie) {
+ std::basic_string_view<uint8_t> key(cie->data(), cie->size());
+ auto it = cie_dedup.emplace(key, debug_frame->GetPosition());
+ if (/* inserted */ it.second) {
+ debug_frame->WriteFully(cie->data(), cie->size());
+ }
+ new_cie_offset[cie] = it.first->second;
+ },
+ [&](const FDE* fde, const CIE* cie) {
+ entries.emplace_back(std::make_pair(fde, cie));
+ });
+ // Sort FDE entries by opcodes to improve locality for compression (saves ~25%).
+ std::stable_sort(entries.begin(), entries.end(), [](const auto& lhs, const auto& rhs) {
+ constexpr size_t opcode_offset = sizeof(FDE);
+ return std::lexicographical_compare(
+ lhs.first->data() + opcode_offset, lhs.first->data() + lhs.first->size(),
+ rhs.first->data() + opcode_offset, rhs.first->data() + rhs.first->size());
+ });
+ // Write all FDE entries while adjusting the CIE offsets to the new locations.
+ for (const auto& entry : entries) {
+ const FDE* fde = entry.first;
+ const CIE* cie = entry.second;
+ FDE new_header = *fde;
+ new_header.cie_pointer = new_cie_offset[cie];
+ debug_frame->WriteFully(&new_header, sizeof(FDE));
+ debug_frame->WriteFully(fde->data() + sizeof(FDE), fde->size() - sizeof(FDE));
+ }
+ }
+ debug_frame->End();
+
+ builder->End();
+ CHECK(builder->Good());
+
+ XzCompress(ArrayRef<const uint8_t>(output_elf_data), output, 9 /*size*/, kBlockSize);
+}
+
+static int Main(int argc, char** argv) {
+ // Check command like arguments.
+ if (argc != 3) {
+ printf("Usage: create_minidebuginfo ELF_FILE OUT_FILE\n");
+ printf(" ELF_FILE: The path to an ELF file with full symbols (before being stripped).\n");
+ printf(" OUT_FILE: The path for the generated mini-debug-info data (not an elf file).\n");
+ return 1;
+ }
+ const char* input_filename = argv[1];
+ const char* output_filename = argv[2];
+
+ // Read input file.
+ std::unique_ptr<File> input_file(OS::OpenFileForReading(input_filename));
+ CHECK(input_file.get() != nullptr) << "Failed to open input file";
+ std::vector<uint8_t> elf(input_file->GetLength());
+ CHECK(input_file->ReadFully(elf.data(), elf.size())) << "Failed to read input file";
+
+ // Write output file.
+ std::vector<uint8_t> output;
+ if (ElfDebugReader<ElfTypes32>::IsValidElfHeader(elf)) {
+ WriteMinidebugInfo<ElfTypes32>(elf, &output);
+ } else if (ElfDebugReader<ElfTypes64>::IsValidElfHeader(elf)) {
+ WriteMinidebugInfo<ElfTypes64>(elf, &output);
+ } else {
+ LOG(FATAL) << "Invalid ELF file header " << input_filename;
+ }
+ std::unique_ptr<File> output_file(OS::CreateEmptyFile(output_filename));
+ if (!output_file->WriteFully(output.data(), output.size()) || output_file->FlushClose() != 0) {
+ LOG(FATAL) << "Failed to write " << output_filename;
+ }
+ return 0;
+}
+
+} // namespace art
+
+int main(int argc, char** argv) {
+ return art::Main(argc, argv);
+}