diff options
| author | 2019-03-25 13:41:25 +0000 | |
|---|---|---|
| committer | 2021-03-16 21:42:31 +0000 | |
| commit | 0e9d9fce596125df602c1210c948259e9e7ca4ec (patch) | |
| tree | 129a05b02a51730398e036d2815644f0e6b68156 | |
| parent | 553a4b68652fe5b32e150bb7642bbcb8a558d25b (diff) | |
Add command line tool for creating mini-debug-info for native code.
Mini-debug-info for native code is currently created with bash script,
which works, but misses some optimizations that the ART compiler does.
This CL adds tool based on ART code-base, with the following features:
* CIE entries are deduplicated to save space (usually one is needed).
* FDE entries are sorted, which significantly improves compression.
* Non-function and zero-sized function symbols are excluded.
* Symbols are sorted by address to allow binary search.
* Compressed data is split to blocks to allow random-access reads.
The space optimizations and the better random access balances,
so the overall generated file size remains approximately same.
Bug: 110133331
Test: manually check the generated data using readelf
Change-Id: I4ed8deaee647d5ee4dfb0846f316e888f060b98e
| -rw-r--r-- | libelffile/elf/elf_builder.h | 22 | ||||
| -rw-r--r-- | libelffile/elf/elf_debug_reader.h | 28 | ||||
| -rw-r--r-- | libelffile/elf/xz_utils.cc | 9 | ||||
| -rw-r--r-- | libelffile/elf/xz_utils.h | 9 | ||||
| -rw-r--r-- | tools/create_minidebuginfo/Android.bp | 35 | ||||
| -rw-r--r-- | tools/create_minidebuginfo/create_minidebuginfo.cc | 175 |
6 files changed, 269 insertions, 9 deletions
diff --git a/libelffile/elf/elf_builder.h b/libelffile/elf/elf_builder.h index a76bf92ada..10541eca3c 100644 --- a/libelffile/elf/elf_builder.h +++ b/libelffile/elf/elf_builder.h @@ -310,7 +310,7 @@ class ElfBuilder final { last_offset_ = 0; } - Elf_Word Write(const std::string& name) { + Elf_Word Write(std::string_view name) { if (current_offset_ == 0) { DCHECK(name.empty()); } else if (name == last_name_) { @@ -318,7 +318,9 @@ class ElfBuilder final { } last_name_ = name; last_offset_ = current_offset_; - this->WriteFully(name.c_str(), name.length() + 1); + this->WriteFully(name.data(), name.length()); + char null_terminator = '\0'; + this->WriteFully(&null_terminator, sizeof(null_terminator)); current_offset_ += name.length() + 1; return last_offset_; } @@ -798,6 +800,21 @@ class ElfBuilder final { return stream_.Seek(RoundUp(stream_.Seek(0, kSeekCurrent), alignment), kSeekSet); } + static InstructionSet GetIsaFromHeader(const Elf_Ehdr& header) { + switch (header.e_machine) { + case EM_ARM: + return InstructionSet::kThumb2; + case EM_AARCH64: + return InstructionSet::kArm64; + case EM_386: + return InstructionSet::kX86; + case EM_X86_64: + return InstructionSet::kX86_64; + } + LOG(FATAL) << "Unknown architecture: " << header.e_machine; + UNREACHABLE(); + } + private: static Elf_Ehdr MakeElfHeader(InstructionSet isa) { Elf_Ehdr elf_header = Elf_Ehdr(); @@ -832,6 +849,7 @@ class ElfBuilder final { LOG(FATAL) << "Unknown instruction set " << isa; } } + DCHECK_EQ(GetIsaFromHeader(elf_header), isa); elf_header.e_ident[EI_MAG0] = ELFMAG0; elf_header.e_ident[EI_MAG1] = ELFMAG1; diff --git a/libelffile/elf/elf_debug_reader.h b/libelffile/elf/elf_debug_reader.h index 2b0303741b..266c638473 100644 --- a/libelffile/elf/elf_debug_reader.h +++ b/libelffile/elf/elf_debug_reader.h @@ -36,6 +36,7 @@ class ElfDebugReader { public: // Note that the input buffer might be misaligned. typedef typename ElfTypes::Ehdr ALIGNED(1) Elf_Ehdr; + typedef typename ElfTypes::Phdr ALIGNED(1) Elf_Phdr; typedef typename ElfTypes::Shdr ALIGNED(1) Elf_Shdr; typedef typename ElfTypes::Sym ALIGNED(1) Elf_Sym; typedef typename ElfTypes::Addr ALIGNED(1) Elf_Addr; @@ -65,10 +66,11 @@ class ElfDebugReader { CHECK_EQ(header_->e_ident[1], ELFMAG1); CHECK_EQ(header_->e_ident[2], ELFMAG2); CHECK_EQ(header_->e_ident[3], ELFMAG3); + CHECK_EQ(header_->e_ident[4], sizeof(Elf_Addr) / sizeof(uint32_t)); CHECK_EQ(header_->e_ehsize, sizeof(Elf_Ehdr)); - CHECK_EQ(header_->e_shentsize, sizeof(Elf_Shdr)); // Find all ELF sections. + CHECK_EQ(header_->e_shentsize, sizeof(Elf_Shdr)); sections_ = Read<Elf_Shdr>(header_->e_shoff, header_->e_shnum); for (const Elf_Shdr& section : sections_) { const char* name = Read<char>(sections_[header_->e_shstrndx].sh_offset + section.sh_name); @@ -84,16 +86,38 @@ class ElfDebugReader { } } - explicit ElfDebugReader(std::vector<uint8_t>& file) + explicit ElfDebugReader(const std::vector<uint8_t>& file) : ElfDebugReader(ArrayRef<const uint8_t>(file)) { } + // Check that ELF signature is present at the start of the files, + // and that the ELF bitness matches the ElfTypes template arguments. + static bool IsValidElfHeader(const std::vector<uint8_t>& data) { + static constexpr bool kIs64Bit = sizeof(Elf_Addr) == sizeof(uint64_t); + static constexpr char kMagic[] = { 0x7f, 'E', 'L', 'F', kIs64Bit ? 2 : 1 }; + return data.size() >= sizeof(kMagic) && memcmp(data.data(), kMagic, sizeof(kMagic)) == 0; + } + const Elf_Ehdr* GetHeader() { return header_; } ArrayRef<Elf_Shdr> GetSections() { return sections_; } const Elf_Shdr* GetSection(const char* name) { return section_map_[name]; } + // Find the base address where the ELF file wants to be loaded. + // This is generally zero (therefore always requiring relocation). + Elf_Addr GetLoadAddress() { + std::optional<Elf_Addr> addr; + CHECK_EQ(header_->e_phentsize, sizeof(Elf_Phdr)); + for (const Elf_Phdr& phdr : Read<Elf_Phdr>(header_->e_phoff, header_->e_phnum)) { + if (phdr.p_type == PT_LOAD) { + addr = addr.has_value() ? std::min(addr.value(), phdr.p_vaddr) : phdr.p_vaddr; + } + } + CHECK(addr.has_value()); + return addr.value(); + } + template <typename VisitSym> void VisitFunctionSymbols(VisitSym visit_sym) { const Elf_Shdr* symtab = GetSection(".symtab"); diff --git a/libelffile/elf/xz_utils.cc b/libelffile/elf/xz_utils.cc index 87c9a7b758..f064cb0e10 100644 --- a/libelffile/elf/xz_utils.cc +++ b/libelffile/elf/xz_utils.cc @@ -32,8 +32,6 @@ namespace art { -constexpr size_t kChunkSize = 16 * KB; - static void XzInitCrc() { static std::once_flag crc_initialized; std::call_once(crc_initialized, []() { @@ -42,14 +40,17 @@ static void XzInitCrc() { }); } -void XzCompress(ArrayRef<const uint8_t> src, std::vector<uint8_t>* dst, int level) { +void XzCompress(ArrayRef<const uint8_t> src, + std::vector<uint8_t>* dst, + int level, + size_t block_size) { // Configure the compression library. XzInitCrc(); CLzma2EncProps lzma2Props; Lzma2EncProps_Init(&lzma2Props); lzma2Props.lzmaProps.level = level; lzma2Props.lzmaProps.reduceSize = src.size(); // Size of data that will be compressed. - lzma2Props.blockSize = kChunkSize; + lzma2Props.blockSize = block_size; Lzma2EncProps_Normalize(&lzma2Props); CXzProps props; XzProps_Init(&props); diff --git a/libelffile/elf/xz_utils.h b/libelffile/elf/xz_utils.h index df5cb56f2d..b1903ff56c 100644 --- a/libelffile/elf/xz_utils.h +++ b/libelffile/elf/xz_utils.h @@ -20,10 +20,17 @@ #include <vector> #include "base/array_ref.h" +#include "base/bit_utils.h" namespace art { -void XzCompress(ArrayRef<const uint8_t> src, std::vector<uint8_t>* dst, int level = 1 /* speed */); +constexpr size_t kXzDefaultBlockSize = 16 * KB; + +void XzCompress(ArrayRef<const uint8_t> src, + std::vector<uint8_t>* dst, + int level = 1 /* speed */, + size_t block_size = kXzDefaultBlockSize); + void XzDecompress(ArrayRef<const uint8_t> src, std::vector<uint8_t>* dst); } // namespace art diff --git a/tools/create_minidebuginfo/Android.bp b/tools/create_minidebuginfo/Android.bp new file mode 100644 index 0000000000..a9d6b8d0d0 --- /dev/null +++ b/tools/create_minidebuginfo/Android.bp @@ -0,0 +1,35 @@ +// +// Copyright (C) 2021 The Android Open Source Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +art_cc_binary { + name: "create_minidebuginfo", + defaults: [ + "art_debug_defaults", + "art_defaults", + ], + host_supported: true, + device_supported: false, + srcs: [ + "create_minidebuginfo.cc", + ], + static_libs: [ + "libartbase", + "libbase", + "libelffile", + "liblzma", + "liblog", + ], +} diff --git a/tools/create_minidebuginfo/create_minidebuginfo.cc b/tools/create_minidebuginfo/create_minidebuginfo.cc new file mode 100644 index 0000000000..aad36a8279 --- /dev/null +++ b/tools/create_minidebuginfo/create_minidebuginfo.cc @@ -0,0 +1,175 @@ +/* + * Copyright (C) 2021 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "android-base/logging.h" + +#include "base/os.h" +#include "base/unix_file/fd_file.h" +#include "elf/elf_builder.h" +#include "elf/elf_debug_reader.h" +#include "elf/xz_utils.h" +#include "stream/file_output_stream.h" +#include "stream/vector_output_stream.h" + +#include <algorithm> +#include <deque> +#include <map> +#include <memory> +#include <string> +#include <string_view> +#include <vector> + +namespace art { + +static constexpr size_t kBlockSize = 32 * KB; + +template<typename ElfTypes> +static void WriteMinidebugInfo(const std::vector<uint8_t>& input, std::vector<uint8_t>* output) { + using Elf_Addr = typename ElfTypes::Addr; + using Elf_Ehdr = typename ElfTypes::Ehdr; + using Elf_Shdr = typename ElfTypes::Shdr; + using Elf_Sym = typename ElfTypes::Sym; + using Elf_Word = typename ElfTypes::Word; + using CIE = typename ElfDebugReader<ElfTypes>::CIE; + using FDE = typename ElfDebugReader<ElfTypes>::FDE; + + ElfDebugReader<ElfTypes> reader(input); + + std::vector<uint8_t> output_elf_data; + VectorOutputStream output_stream("Output ELF", &output_elf_data); + InstructionSet isa = ElfBuilder<ElfTypes>::GetIsaFromHeader(*reader.GetHeader()); + std::unique_ptr<ElfBuilder<ElfTypes>> builder(new ElfBuilder<ElfTypes>(isa, &output_stream)); + builder->Start(/*write_program_headers=*/ false); + + auto* rodata = builder->GetRoData(); + auto* text = builder->GetText(); + const Elf_Shdr* original_text = reader.GetSection(".text"); + CHECK(original_text != nullptr); + CHECK_EQ(reader.GetLoadAddress(), 0u); + rodata->AllocateVirtualMemory(original_text->sh_addr - sizeof(Elf_Ehdr)); + text->AllocateVirtualMemory(original_text->sh_addr, original_text->sh_size); + + auto* strtab = builder->GetStrTab(); + auto* symtab = builder->GetSymTab(); + strtab->Start(); + { + strtab->Write(""); // strtab should start with empty string. + std::multimap<std::string_view, Elf_Sym> syms; + reader.VisitFunctionSymbols([&](Elf_Sym sym, const char* name) { + // Exclude non-function or empty symbols. + if (ELF32_ST_TYPE(sym.st_info) == STT_FUNC && sym.st_size != 0) { + syms.emplace(name, sym); + } + }); + reader.VisitDynamicSymbols([&](Elf_Sym sym, const char* name) { + // Exclude symbols which will be preserved in the dynamic table anyway. + auto it = syms.find(name); + if (it != syms.end() && it->second.st_value == sym.st_value) { + syms.erase(it); + } + }); + for (auto& entry : syms) { + std::string_view name = entry.first; + const Elf_Sym& sym = entry.second; + Elf_Word name_idx = strtab->Write(name); + symtab->Add(name_idx, text, sym.st_value, sym.st_size, STB_GLOBAL, STT_FUNC); + } + } + strtab->End(); + symtab->WriteCachedSection(); + + auto* debug_frame = builder->GetDebugFrame(); + debug_frame->Start(); + { + std::map<std::basic_string_view<uint8_t>, Elf_Addr> cie_dedup; + std::unordered_map<const CIE*, Elf_Addr> new_cie_offset; + std::deque<std::pair<const FDE*, const CIE*>> entries; + // Read, de-duplicate and write CIE entries. Read FDE entries. + reader.VisitDebugFrame( + [&](const CIE* cie) { + std::basic_string_view<uint8_t> key(cie->data(), cie->size()); + auto it = cie_dedup.emplace(key, debug_frame->GetPosition()); + if (/* inserted */ it.second) { + debug_frame->WriteFully(cie->data(), cie->size()); + } + new_cie_offset[cie] = it.first->second; + }, + [&](const FDE* fde, const CIE* cie) { + entries.emplace_back(std::make_pair(fde, cie)); + }); + // Sort FDE entries by opcodes to improve locality for compression (saves ~25%). + std::stable_sort(entries.begin(), entries.end(), [](const auto& lhs, const auto& rhs) { + constexpr size_t opcode_offset = sizeof(FDE); + return std::lexicographical_compare( + lhs.first->data() + opcode_offset, lhs.first->data() + lhs.first->size(), + rhs.first->data() + opcode_offset, rhs.first->data() + rhs.first->size()); + }); + // Write all FDE entries while adjusting the CIE offsets to the new locations. + for (const auto& entry : entries) { + const FDE* fde = entry.first; + const CIE* cie = entry.second; + FDE new_header = *fde; + new_header.cie_pointer = new_cie_offset[cie]; + debug_frame->WriteFully(&new_header, sizeof(FDE)); + debug_frame->WriteFully(fde->data() + sizeof(FDE), fde->size() - sizeof(FDE)); + } + } + debug_frame->End(); + + builder->End(); + CHECK(builder->Good()); + + XzCompress(ArrayRef<const uint8_t>(output_elf_data), output, 9 /*size*/, kBlockSize); +} + +static int Main(int argc, char** argv) { + // Check command like arguments. + if (argc != 3) { + printf("Usage: create_minidebuginfo ELF_FILE OUT_FILE\n"); + printf(" ELF_FILE: The path to an ELF file with full symbols (before being stripped).\n"); + printf(" OUT_FILE: The path for the generated mini-debug-info data (not an elf file).\n"); + return 1; + } + const char* input_filename = argv[1]; + const char* output_filename = argv[2]; + + // Read input file. + std::unique_ptr<File> input_file(OS::OpenFileForReading(input_filename)); + CHECK(input_file.get() != nullptr) << "Failed to open input file"; + std::vector<uint8_t> elf(input_file->GetLength()); + CHECK(input_file->ReadFully(elf.data(), elf.size())) << "Failed to read input file"; + + // Write output file. + std::vector<uint8_t> output; + if (ElfDebugReader<ElfTypes32>::IsValidElfHeader(elf)) { + WriteMinidebugInfo<ElfTypes32>(elf, &output); + } else if (ElfDebugReader<ElfTypes64>::IsValidElfHeader(elf)) { + WriteMinidebugInfo<ElfTypes64>(elf, &output); + } else { + LOG(FATAL) << "Invalid ELF file header " << input_filename; + } + std::unique_ptr<File> output_file(OS::CreateEmptyFile(output_filename)); + if (!output_file->WriteFully(output.data(), output.size()) || output_file->FlushClose() != 0) { + LOG(FATAL) << "Failed to write " << output_filename; + } + return 0; +} + +} // namespace art + +int main(int argc, char** argv) { + return art::Main(argc, argv); +} |