Encode function signatures properly in DWARF.
The signatures were previously stored as plain string.
The proper way in DWARF is to store them as structured tree of tags.
(for example, DW_TAG_subprogram containing DW_TAG_formal_parameter)
Note that this makes the debug sections smaller since DWARF
signatures are actually more efficient than just plain strings.
Change-Id: I6afbce28340570666d8674d07c0e324aad561dd5
diff --git a/compiler/dwarf/debug_info_entry_writer.h b/compiler/dwarf/debug_info_entry_writer.h
index d9b367b..aa31036 100644
--- a/compiler/dwarf/debug_info_entry_writer.h
+++ b/compiler/dwarf/debug_info_entry_writer.h
@@ -20,6 +20,7 @@
#include <cstdint>
#include <unordered_map>
+#include "base/casts.h"
#include "dwarf/dwarf_constants.h"
#include "dwarf/writer.h"
#include "leb128.h"
@@ -47,9 +48,9 @@
* It also handles generation of abbreviations.
*
* Usage:
- * StartTag(DW_TAG_compile_unit, DW_CHILDREN_yes);
+ * StartTag(DW_TAG_compile_unit);
* WriteStrp(DW_AT_producer, "Compiler name", debug_str);
- * StartTag(DW_TAG_subprogram, DW_CHILDREN_no);
+ * StartTag(DW_TAG_subprogram);
* WriteStrp(DW_AT_name, "Foo", debug_str);
* EndTag();
* EndTag();
@@ -59,36 +60,40 @@
static_assert(std::is_same<typename Vector::value_type, uint8_t>::value, "Invalid value type");
public:
+ static constexpr size_t kCompilationUnitHeaderSize = 11;
+
// Start debugging information entry.
- void StartTag(Tag tag, Children children) {
- DCHECK(has_children) << "This tag can not have nested tags";
+ // Returns offset of the entry in compilation unit.
+ size_t StartTag(Tag tag) {
if (inside_entry_) {
// Write abbrev code for the previous entry.
- this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+ // Parent entry is finalized before any children are written.
+ this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_yes));
inside_entry_ = false;
}
- StartAbbrev(tag, children);
+ StartAbbrev(tag);
// Abbrev code placeholder of sufficient size.
abbrev_code_offset_ = this->data()->size();
this->PushUleb128(NextAbbrevCode());
depth_++;
inside_entry_ = true;
- has_children = (children == DW_CHILDREN_yes);
+ return abbrev_code_offset_ + kCompilationUnitHeaderSize;
}
// End debugging information entry.
void EndTag() {
DCHECK_GT(depth_, 0);
if (inside_entry_) {
- // Write abbrev code for this tag.
- this->UpdateUleb128(abbrev_code_offset_, EndAbbrev());
+ // Write abbrev code for this entry.
+ this->UpdateUleb128(abbrev_code_offset_, EndAbbrev(DW_CHILDREN_no));
inside_entry_ = false;
- }
- if (has_children) {
- this->PushUint8(0); // End of children.
+ // This entry has no children and so there is no terminator.
+ } else {
+ // The entry has been already finalized so it must be parent entry
+ // and we need to write the terminator required by DW_CHILDREN_yes.
+ this->PushUint8(0);
}
depth_--;
- has_children = true; // Parent tag obviously has children.
}
void WriteAddr(Attribute attrib, uint64_t value) {
@@ -101,10 +106,10 @@
}
}
- void WriteBlock(Attribute attrib, const void* ptr, int size) {
+ void WriteBlock(Attribute attrib, const void* ptr, size_t num_bytes) {
AddAbbrevAttribute(attrib, DW_FORM_block);
- this->PushUleb128(size);
- this->PushData(ptr, size);
+ this->PushUleb128(num_bytes);
+ this->PushData(ptr, num_bytes);
}
void WriteData1(Attribute attrib, uint8_t value) {
@@ -147,12 +152,12 @@
this->PushUint8(value ? 1 : 0);
}
- void WriteRef4(Attribute attrib, int cu_offset) {
+ void WriteRef4(Attribute attrib, uint32_t cu_offset) {
AddAbbrevAttribute(attrib, DW_FORM_ref4);
this->PushUint32(cu_offset);
}
- void WriteRef(Attribute attrib, int cu_offset) {
+ void WriteRef(Attribute attrib, uint32_t cu_offset) {
AddAbbrevAttribute(attrib, DW_FORM_ref_udata);
this->PushUleb128(cu_offset);
}
@@ -162,16 +167,21 @@
this->PushString(value);
}
- void WriteStrp(Attribute attrib, int address) {
+ void WriteStrp(Attribute attrib, size_t debug_str_offset) {
AddAbbrevAttribute(attrib, DW_FORM_strp);
- this->PushUint32(address);
+ this->PushUint32(dchecked_integral_cast<uint32_t>(debug_str_offset));
}
- void WriteStrp(Attribute attrib, const char* value, std::vector<uint8_t>* debug_str) {
+ void WriteStrp(Attribute attrib, const char* str, size_t len,
+ std::vector<uint8_t>* debug_str) {
AddAbbrevAttribute(attrib, DW_FORM_strp);
- int address = debug_str->size();
- debug_str->insert(debug_str->end(), value, value + strlen(value) + 1);
- this->PushUint32(address);
+ this->PushUint32(debug_str->size());
+ debug_str->insert(debug_str->end(), str, str + len);
+ debug_str->push_back(0);
+ }
+
+ void WriteStrp(Attribute attrib, const char* str, std::vector<uint8_t>* debug_str) {
+ WriteStrp(attrib, str, strlen(str), debug_str);
}
bool Is64bit() const { return is64bit_; }
@@ -180,7 +190,11 @@
return patch_locations_;
}
+ int Depth() const { return depth_; }
+
using Writer<Vector>::data;
+ using Writer<Vector>::size;
+ using Writer<Vector>::UpdateUint32;
DebugInfoEntryWriter(bool is64bitArch,
Vector* debug_abbrev,
@@ -196,16 +210,17 @@
}
~DebugInfoEntryWriter() {
+ DCHECK(!inside_entry_);
DCHECK_EQ(depth_, 0);
}
private:
// Start abbreviation declaration.
- void StartAbbrev(Tag tag, Children children) {
- DCHECK(!inside_entry_);
+ void StartAbbrev(Tag tag) {
current_abbrev_.clear();
EncodeUnsignedLeb128(¤t_abbrev_, tag);
- current_abbrev_.push_back(children);
+ has_children_offset_ = current_abbrev_.size();
+ current_abbrev_.push_back(0); // Place-holder for DW_CHILDREN.
}
// Add attribute specification.
@@ -220,8 +235,9 @@
}
// End abbreviation declaration and return its code.
- int EndAbbrev() {
- DCHECK(inside_entry_);
+ int EndAbbrev(Children has_children) {
+ DCHECK(!current_abbrev_.empty());
+ current_abbrev_[has_children_offset_] = has_children;
auto it = abbrev_codes_.insert(std::make_pair(std::move(current_abbrev_),
NextAbbrevCode()));
int abbrev_code = it.first->second;
@@ -241,6 +257,7 @@
// Fields for writing and deduplication of abbrevs.
Writer<Vector> debug_abbrev_;
Vector current_abbrev_;
+ size_t has_children_offset_ = 0;
std::unordered_map<Vector, int,
FNVHash<Vector> > abbrev_codes_;
@@ -250,7 +267,6 @@
int depth_ = 0;
size_t abbrev_code_offset_ = 0; // Location to patch once we know the code.
bool inside_entry_ = false; // Entry ends at first child (if any).
- bool has_children = true;
std::vector<uintptr_t> patch_locations_;
};
diff --git a/compiler/dwarf/dedup_vector.h b/compiler/dwarf/dedup_vector.h
new file mode 100644
index 0000000..7fb21b7
--- /dev/null
+++ b/compiler/dwarf/dedup_vector.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DWARF_DEDUP_VECTOR_H_
+#define ART_COMPILER_DWARF_DEDUP_VECTOR_H_
+
+#include <vector>
+#include <unordered_map>
+
+namespace art {
+namespace dwarf {
+ class DedupVector {
+ public:
+ // Returns an offset to previously inserted identical block of data,
+ // or appends the data at the end of the vector and returns offset to it.
+ size_t Insert(const uint8_t* ptr, size_t num_bytes) {
+ // See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
+ uint32_t hash = 2166136261u;
+ for (size_t i = 0; i < num_bytes; i++) {
+ hash = (hash ^ ptr[i]) * 16777619u;
+ }
+ // Try to find existing copy of the data.
+ const auto& range = hash_to_offset_.equal_range(hash);
+ for (auto it = range.first; it != range.second; ++it) {
+ const size_t offset = it->second;
+ if (offset + num_bytes <= vector_.size() &&
+ memcmp(vector_.data() + offset, ptr, num_bytes) == 0) {
+ return offset;
+ }
+ }
+ // Append the data at the end of the vector.
+ const size_t new_offset = vector_.size();
+ hash_to_offset_.emplace(hash, new_offset);
+ vector_.insert(vector_.end(), ptr, ptr + num_bytes);
+ return new_offset;
+ }
+
+ const std::vector<uint8_t>& Data() const { return vector_; }
+
+ private:
+ struct IdentityHash {
+ size_t operator()(uint32_t v) const { return v; }
+ };
+
+ // We store the full hash as the key to simplify growing of the table.
+ // It avoids storing or referencing the actual data in the hash-table.
+ std::unordered_multimap<uint32_t, size_t, IdentityHash> hash_to_offset_;
+
+ std::vector<uint8_t> vector_;
+ };
+} // namespace dwarf
+} // namespace art
+
+#endif // ART_COMPILER_DWARF_DEDUP_VECTOR_H_
diff --git a/compiler/dwarf/dwarf_test.cc b/compiler/dwarf/dwarf_test.cc
index 6bb22ed..e9cd421 100644
--- a/compiler/dwarf/dwarf_test.cc
+++ b/compiler/dwarf/dwarf_test.cc
@@ -285,7 +285,7 @@
constexpr bool is64bit = false;
DebugInfoEntryWriter<> info(is64bit, &debug_abbrev_data_);
DW_CHECK("Contents of the .debug_info section:");
- info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+ info.StartTag(dwarf::DW_TAG_compile_unit);
DW_CHECK("Abbrev Number: 1 (DW_TAG_compile_unit)");
info.WriteStrp(dwarf::DW_AT_producer, "Compiler name", &debug_str_data_);
DW_CHECK_NEXT("DW_AT_producer : (indirect string, offset: 0x0): Compiler name");
@@ -293,7 +293,7 @@
DW_CHECK_NEXT("DW_AT_low_pc : 0x1000000");
info.WriteAddr(dwarf::DW_AT_high_pc, 0x02000000);
DW_CHECK_NEXT("DW_AT_high_pc : 0x2000000");
- info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+ info.StartTag(dwarf::DW_TAG_subprogram);
DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
info.WriteStrp(dwarf::DW_AT_name, "Foo", &debug_str_data_);
DW_CHECK_NEXT("DW_AT_name : (indirect string, offset: 0xe): Foo");
@@ -302,7 +302,7 @@
info.WriteAddr(dwarf::DW_AT_high_pc, 0x01020000);
DW_CHECK_NEXT("DW_AT_high_pc : 0x1020000");
info.EndTag(); // DW_TAG_subprogram
- info.StartTag(dwarf::DW_TAG_subprogram, dwarf::DW_CHILDREN_no);
+ info.StartTag(dwarf::DW_TAG_subprogram);
DW_CHECK("Abbrev Number: 2 (DW_TAG_subprogram)");
info.WriteStrp(dwarf::DW_AT_name, "Bar", &debug_str_data_);
DW_CHECK_NEXT("DW_AT_name : (indirect string, offset: 0x12): Bar");
@@ -313,7 +313,7 @@
info.EndTag(); // DW_TAG_subprogram
info.EndTag(); // DW_TAG_compile_unit
// Test that previous list was properly terminated and empty children.
- info.StartTag(dwarf::DW_TAG_compile_unit, dwarf::DW_CHILDREN_yes);
+ info.StartTag(dwarf::DW_TAG_compile_unit);
info.EndTag(); // DW_TAG_compile_unit
// The abbrev table is just side product, but check it as well.
@@ -327,7 +327,7 @@
DW_CHECK_NEXT("DW_AT_name DW_FORM_strp");
DW_CHECK_NEXT("DW_AT_low_pc DW_FORM_addr");
DW_CHECK_NEXT("DW_AT_high_pc DW_FORM_addr");
- DW_CHECK("3 DW_TAG_compile_unit [has children]");
+ DW_CHECK("3 DW_TAG_compile_unit [no children]");
std::vector<uintptr_t> debug_info_patches;
std::vector<uintptr_t> expected_patches { 16, 20, 29, 33, 42, 46 }; // NOLINT
diff --git a/compiler/dwarf/headers.h b/compiler/dwarf/headers.h
index 633e2f7..c75aeac 100644
--- a/compiler/dwarf/headers.h
+++ b/compiler/dwarf/headers.h
@@ -138,6 +138,7 @@
writer.PushUint32(debug_abbrev_offset);
writer.PushUint8(entries.Is64bit() ? 8 : 4);
size_t entries_offset = writer.data()->size();
+ DCHECK_EQ(entries_offset, DebugInfoEntryWriter<Vector>::kCompilationUnitHeaderSize);
writer.PushData(*entries.data());
writer.UpdateUint32(start, writer.data()->size() - start - 4);
// Copy patch locations and make them relative to .debug_info section.
diff --git a/compiler/dwarf/writer.h b/compiler/dwarf/writer.h
index 00b9dfa..d2add7f 100644
--- a/compiler/dwarf/writer.h
+++ b/compiler/dwarf/writer.h
@@ -114,9 +114,9 @@
data_->insert(data_->end(), value, value + strlen(value) + 1);
}
- void PushData(const void* ptr, size_t size) {
+ void PushData(const void* ptr, size_t num_bytes) {
const char* p = reinterpret_cast<const char*>(ptr);
- data_->insert(data_->end(), p, p + size);
+ data_->insert(data_->end(), p, p + num_bytes);
}
template<typename Vector2>
@@ -164,6 +164,10 @@
return data_;
}
+ size_t size() const {
+ return data_->size();
+ }
+
explicit Writer(Vector* buffer) : data_(buffer) { }
private: