Pack JIT mini-debug-infos together.

We currently produce one in-memory ELF file per JITed method,
which has significant overheads due to the ELF file headers.

Pack several of those files together regularly to save space.
Similarly, prune information about methods which were GCed.

This reduces the size of JIT mini-debug-info by factor of 10.
The final overhead is 5% to 10% relative to the JIT code size.

Test: ./art/test.py -b -r -t 137
Change-Id: Idfaff8ed9a209e871e815e527f24f36c63a57bbf
diff --git a/compiler/debug/dwarf/headers.h b/compiler/debug/dwarf/headers.h
index 28f1084..4a27178 100644
--- a/compiler/debug/dwarf/headers.h
+++ b/compiler/debug/dwarf/headers.h
@@ -107,7 +107,9 @@
   } else {
     DCHECK(format == DW_DEBUG_FRAME_FORMAT);
     // Relocate code_address if it has absolute value.
-    patch_locations->push_back(buffer_address + buffer->size() - section_address);
+    if (patch_locations != nullptr) {
+      patch_locations->push_back(buffer_address + buffer->size() - section_address);
+    }
   }
   if (is64bit) {
     writer.PushUint64(code_address);
@@ -122,6 +124,30 @@
   writer.UpdateUint32(fde_header_start, writer.data()->size() - fde_header_start - 4);
 }
 
+// Read singe FDE entry from 'data' (which is advanced).
+template<typename Addr>
+bool ReadFDE(const uint8_t** data, Addr* addr, Addr* size, ArrayRef<const uint8_t>* opcodes) {
+  struct Header {
+    uint32_t length;
+    int32_t cie_pointer;
+    Addr addr;
+    Addr size;
+    uint8_t augmentaion;
+    uint8_t opcodes[];
+  } PACKED(1);
+  const Header* header = reinterpret_cast<const Header*>(*data);
+  const size_t length = 4 + header->length;
+  *data += length;
+  if (header->cie_pointer == -1) {
+    return false;  // Not an FDE entry.
+  }
+  DCHECK_EQ(header->cie_pointer, 0);  // Expects single CIE. Assumes DW_DEBUG_FRAME_FORMAT.
+  *addr = header->addr;
+  *size = header->size;
+  *opcodes = ArrayRef<const uint8_t>(header->opcodes, length - offsetof(Header, opcodes));
+  return true;
+}
+
 // Write compilation unit (CU) to .debug_info section.
 template<typename Vector>
 void WriteDebugInfoCU(uint32_t debug_abbrev_offset,
diff --git a/compiler/debug/elf_debug_frame_writer.h b/compiler/debug/elf_debug_frame_writer.h
index 27b70c8..e0116c6 100644
--- a/compiler/debug/elf_debug_frame_writer.h
+++ b/compiler/debug/elf_debug_frame_writer.h
@@ -182,7 +182,7 @@
   std::vector<const MethodDebugInfo*> sorted_method_infos;
   sorted_method_infos.reserve(method_infos.size());
   for (size_t i = 0; i < method_infos.size(); i++) {
-    if (!method_infos[i].cfi.empty() && !method_infos[i].deduped) {
+    if (!method_infos[i].deduped) {
       sorted_method_infos.push_back(&method_infos[i]);
     }
   }
@@ -222,7 +222,6 @@
     buffer.clear();
     for (const MethodDebugInfo* mi : sorted_method_infos) {
       DCHECK(!mi->deduped);
-      DCHECK(!mi->cfi.empty());
       const Elf_Addr code_address = mi->code_address +
           (mi->is_code_address_text_relative ? builder->GetText()->GetAddress() : 0);
       if (format == dwarf::DW_EH_FRAME_FORMAT) {
diff --git a/compiler/debug/elf_debug_reader.h b/compiler/debug/elf_debug_reader.h
new file mode 100644
index 0000000..91b1b3e
--- /dev/null
+++ b/compiler/debug/elf_debug_reader.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_ELF_DEBUG_READER_H_
+#define ART_COMPILER_DEBUG_ELF_DEBUG_READER_H_
+
+#include "base/array_ref.h"
+#include "debug/dwarf/headers.h"
+#include "elf.h"
+#include "xz_utils.h"
+
+namespace art {
+namespace debug {
+
+// Trivial ELF file reader.
+//
+// It is the bare minimum needed to read mini-debug-info symbols for unwinding.
+// We use it to merge JIT mini-debug-infos together or to prune them after GC.
+// The consumed ELF file comes from ART JIT.
+template <typename ElfTypes, typename VisitSym, typename VisitFde>
+static void ReadElfSymbols(const uint8_t* elf, VisitSym visit_sym, VisitFde visit_fde) {
+  // Note that the input buffer might be misaligned.
+  typedef typename ElfTypes::Ehdr ALIGNED(1) Elf_Ehdr;
+  typedef typename ElfTypes::Shdr ALIGNED(1) Elf_Shdr;
+  typedef typename ElfTypes::Sym ALIGNED(1) Elf_Sym;
+  typedef typename ElfTypes::Addr ALIGNED(1) Elf_Addr;
+
+  // Read and check the elf header.
+  const Elf_Ehdr* header = reinterpret_cast<const Elf_Ehdr*>(elf);
+  CHECK(header->checkMagic());
+
+  // Find sections that we are interested in.
+  const Elf_Shdr* sections = reinterpret_cast<const Elf_Shdr*>(elf + header->e_shoff);
+  const Elf_Shdr* strtab = nullptr;
+  const Elf_Shdr* symtab = nullptr;
+  const Elf_Shdr* debug_frame = nullptr;
+  const Elf_Shdr* gnu_debugdata = nullptr;
+  for (size_t i = 1 /* skip null section */; i < header->e_shnum; i++) {
+    const Elf_Shdr* section = sections + i;
+    const char* name = reinterpret_cast<const char*>(
+        elf + sections[header->e_shstrndx].sh_offset + section->sh_name);
+    if (strcmp(name, ".strtab") == 0) {
+      strtab = section;
+    } else if (strcmp(name, ".symtab") == 0) {
+      symtab = section;
+    } else if (strcmp(name, ".debug_frame") == 0) {
+      debug_frame = section;
+    } else if (strcmp(name, ".gnu_debugdata") == 0) {
+      gnu_debugdata = section;
+    }
+  }
+
+  // Visit symbols.
+  if (symtab != nullptr && strtab != nullptr) {
+    const Elf_Sym* symbols = reinterpret_cast<const Elf_Sym*>(elf + symtab->sh_offset);
+    DCHECK_EQ(symtab->sh_entsize, sizeof(Elf_Sym));
+    size_t count = symtab->sh_size / sizeof(Elf_Sym);
+    for (size_t i = 1 /* skip null symbol */; i < count; i++) {
+      Elf_Sym symbol = symbols[i];
+      if (symbol.getBinding() != STB_LOCAL) {  // Ignore local symbols (e.g. "$t").
+        const uint8_t* name = elf + strtab->sh_offset + symbol.st_name;
+        visit_sym(symbol, reinterpret_cast<const char*>(name));
+      }
+    }
+  }
+
+  // Visit CFI (unwind) data.
+  if (debug_frame != nullptr) {
+    const uint8_t* data = elf + debug_frame->sh_offset;
+    const uint8_t* end = data + debug_frame->sh_size;
+    while (data < end) {
+      Elf_Addr addr, size;
+      ArrayRef<const uint8_t> opcodes;
+      if (dwarf::ReadFDE<Elf_Addr>(&data, &addr, &size, &opcodes)) {
+        visit_fde(addr, size, opcodes);
+      }
+    }
+  }
+
+  // Process embedded compressed ELF file.
+  if (gnu_debugdata != nullptr) {
+    ArrayRef<const uint8_t> compressed(elf + gnu_debugdata->sh_offset, gnu_debugdata->sh_size);
+    std::vector<uint8_t> decompressed;
+    XzDecompress(compressed, &decompressed);
+    ReadElfSymbols<ElfTypes>(decompressed.data(), visit_sym, visit_fde);
+  }
+}
+
+}  // namespace debug
+}  // namespace art
+#endif  // ART_COMPILER_DEBUG_ELF_DEBUG_READER_H_
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
index 1ecb1d8e..56d773f 100644
--- a/compiler/debug/elf_debug_writer.cc
+++ b/compiler/debug/elf_debug_writer.cc
@@ -21,12 +21,14 @@
 #include <vector>
 
 #include "base/array_ref.h"
+#include "base/stl_util.h"
 #include "debug/dwarf/dwarf_constants.h"
 #include "debug/elf_compilation_unit.h"
 #include "debug/elf_debug_frame_writer.h"
 #include "debug/elf_debug_info_writer.h"
 #include "debug/elf_debug_line_writer.h"
 #include "debug/elf_debug_loc_writer.h"
+#include "debug/elf_debug_reader.h"
 #include "debug/elf_symtab_writer.h"
 #include "debug/method_debug_info.h"
 #include "debug/xz_utils.h"
@@ -203,9 +205,147 @@
   }
   builder->End();
   CHECK(builder->Good());
+  // Verify the ELF file by reading it back using the trivial reader.
+  if (kIsDebugBuild) {
+    using Elf_Sym = typename ElfTypes::Sym;
+    using Elf_Addr = typename ElfTypes::Addr;
+    size_t num_syms = 0;
+    size_t num_cfis = 0;
+    ReadElfSymbols<ElfTypes>(
+        buffer.data(),
+        [&](Elf_Sym sym, const char*) {
+          DCHECK_EQ(sym.st_value, method_info.code_address + CompiledMethod::CodeDelta(isa));
+          DCHECK_EQ(sym.st_size, method_info.code_size);
+          num_syms++;
+        },
+        [&](Elf_Addr addr, Elf_Addr size, ArrayRef<const uint8_t> opcodes) {
+          DCHECK_EQ(addr, method_info.code_address);
+          DCHECK_EQ(size, method_info.code_size);
+          DCHECK_GE(opcodes.size(), method_info.cfi.size());
+          DCHECK_EQ(memcmp(opcodes.data(), method_info.cfi.data(), method_info.cfi.size()), 0);
+          num_cfis++;
+        });
+    DCHECK_EQ(num_syms, 1u);
+    DCHECK_EQ(num_cfis, 1u);
+  }
   return buffer;
 }
 
+// Combine several mini-debug-info ELF files into one, while filtering some symbols.
+std::vector<uint8_t> PackElfFileForJIT(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    std::vector<const uint8_t*>& added_elf_files,
+    std::vector<const void*>& removed_symbols,
+    /*out*/ size_t* num_symbols) {
+  using ElfTypes = ElfRuntimeTypes;
+  using Elf_Addr = typename ElfTypes::Addr;
+  using Elf_Sym = typename ElfTypes::Sym;
+  CHECK_EQ(sizeof(Elf_Addr), static_cast<size_t>(GetInstructionSetPointerSize(isa)));
+  const bool is64bit = Is64BitInstructionSet(isa);
+  auto is_removed_symbol = [&removed_symbols](Elf_Addr addr) {
+    const void* code_ptr = reinterpret_cast<const void*>(addr);
+    return std::binary_search(removed_symbols.begin(), removed_symbols.end(), code_ptr);
+  };
+  uint64_t min_address = std::numeric_limits<uint64_t>::max();
+  uint64_t max_address = 0;
+
+  // Produce the inner ELF file.
+  // It will contain the symbols (.symtab) and unwind information (.debug_frame).
+  std::vector<uint8_t> inner_elf_file;
+  {
+    inner_elf_file.reserve(1 * KB);  // Approximate size of ELF file with a single symbol.
+    linker::VectorOutputStream out("Mini-debug-info ELF file for JIT", &inner_elf_file);
+    std::unique_ptr<linker::ElfBuilder<ElfTypes>> builder(
+        new linker::ElfBuilder<ElfTypes>(isa, features, &out));
+    builder->Start(/*write_program_headers=*/ false);
+    auto* text = builder->GetText();
+    auto* strtab = builder->GetStrTab();
+    auto* symtab = builder->GetSymTab();
+    auto* debug_frame = builder->GetDebugFrame();
+    std::deque<Elf_Sym> symbols;
+    std::vector<uint8_t> debug_frame_buffer;
+    WriteCIE(isa, dwarf::DW_DEBUG_FRAME_FORMAT, &debug_frame_buffer);
+
+    // Write symbols names. All other data is buffered.
+    strtab->Start();
+    strtab->Write("");  // strtab should start with empty string.
+    for (const uint8_t* added_elf_file : added_elf_files) {
+      ReadElfSymbols<ElfTypes>(
+          added_elf_file,
+          [&](Elf_Sym sym, const char* name) {
+              if (is_removed_symbol(sym.st_value)) {
+                return;
+              }
+              sym.st_name = strtab->Write(name);
+              symbols.push_back(sym);
+              min_address = std::min<uint64_t>(min_address, sym.st_value);
+              max_address = std::max<uint64_t>(max_address, sym.st_value + sym.st_size);
+          },
+          [&](Elf_Addr addr, Elf_Addr size, ArrayRef<const uint8_t> opcodes) {
+              if (is_removed_symbol(addr)) {
+                return;
+              }
+              WriteFDE(is64bit,
+                       /*section_address=*/ 0,
+                       /*cie_address=*/ 0,
+                       addr,
+                       size,
+                       opcodes,
+                       dwarf::DW_DEBUG_FRAME_FORMAT,
+                       debug_frame_buffer.size(),
+                       &debug_frame_buffer,
+                       /*patch_locations=*/ nullptr);
+          });
+    }
+    strtab->End();
+
+    // Create .text covering the code range. Needed for gdb to find the symbols.
+    if (max_address > min_address) {
+      text->AllocateVirtualMemory(min_address, max_address - min_address);
+    }
+
+    // Add the symbols.
+    *num_symbols = symbols.size();
+    for (; !symbols.empty(); symbols.pop_front()) {
+      symtab->Add(symbols.front(), text);
+    }
+    symtab->WriteCachedSection();
+
+    // Add the CFI/unwind section.
+    debug_frame->Start();
+    debug_frame->WriteFully(debug_frame_buffer.data(), debug_frame_buffer.size());
+    debug_frame->End();
+
+    builder->End();
+    CHECK(builder->Good());
+  }
+
+  // Produce the outer ELF file.
+  // It contains only the inner ELF file compressed as .gnu_debugdata section.
+  // This extra wrapping is not necessary but the compression saves space.
+  std::vector<uint8_t> outer_elf_file;
+  {
+    std::vector<uint8_t> gnu_debugdata;
+    gnu_debugdata.reserve(inner_elf_file.size() / 4);
+    XzCompress(ArrayRef<const uint8_t>(inner_elf_file), &gnu_debugdata);
+
+    outer_elf_file.reserve(KB + gnu_debugdata.size());
+    linker::VectorOutputStream out("Mini-debug-info ELF file for JIT", &outer_elf_file);
+    std::unique_ptr<linker::ElfBuilder<ElfTypes>> builder(
+        new linker::ElfBuilder<ElfTypes>(isa, features, &out));
+    builder->Start(/*write_program_headers=*/ false);
+    if (max_address > min_address) {
+      builder->GetText()->AllocateVirtualMemory(min_address, max_address - min_address);
+    }
+    builder->WriteSection(".gnu_debugdata", &gnu_debugdata);
+    builder->End();
+    CHECK(builder->Good());
+  }
+
+  return outer_elf_file;
+}
+
 std::vector<uint8_t> WriteDebugElfFileForClasses(
     InstructionSet isa,
     const InstructionSetFeatures* features,
diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h
index 8ad0c42..85ab356 100644
--- a/compiler/debug/elf_debug_writer.h
+++ b/compiler/debug/elf_debug_writer.h
@@ -56,6 +56,13 @@
     bool mini_debug_info,
     const MethodDebugInfo& method_info);
 
+std::vector<uint8_t> PackElfFileForJIT(
+    InstructionSet isa,
+    const InstructionSetFeatures* features,
+    std::vector<const uint8_t*>& added_elf_files,
+    std::vector<const void*>& removed_symbols,
+    /*out*/ size_t* num_symbols);
+
 std::vector<uint8_t> WriteDebugElfFileForClasses(
     InstructionSet isa,
     const InstructionSetFeatures* features,
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 9b8bb3e..27749a6 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -146,7 +146,10 @@
     // (which would have been otherwise used as identifier to remove it later).
     AddNativeDebugInfoForJit(Thread::Current(),
                              /*code_ptr=*/ nullptr,
-                             elf_file);
+                             elf_file,
+                             debug::PackElfFileForJIT,
+                             compiler_options.GetInstructionSet(),
+                             compiler_options.GetInstructionSetFeatures());
   }
 }
 
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index c9b4d36..4936a6d 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -1456,8 +1456,8 @@
   return true;
 }
 
-void OptimizingCompiler::GenerateJitDebugInfo(
-    ArtMethod* method, const debug::MethodDebugInfo& info) {
+void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method ATTRIBUTE_UNUSED,
+                                              const debug::MethodDebugInfo& info) {
   const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
   DCHECK(compiler_options.GenerateAnyDebugInfo());
 
@@ -1472,12 +1472,10 @@
       info);
   AddNativeDebugInfoForJit(Thread::Current(),
                            reinterpret_cast<const void*>(info.code_address),
-                           elf_file);
-
-  VLOG(jit)
-      << "JIT mini-debug-info added for " << ArtMethod::PrettyMethod(method)
-      << " size=" << PrettySize(elf_file.size())
-      << " total_size=" << PrettySize(GetJitMiniDebugInfoMemUsage());
+                           elf_file,
+                           debug::PackElfFileForJIT,
+                           compiler_options.GetInstructionSet(),
+                           compiler_options.GetInstructionSetFeatures());
 }
 
 }  // namespace art