diff options
| -rw-r--r-- | tools/dexanalyze/dexanalyze.cc | 9 | ||||
| -rw-r--r-- | tools/dexanalyze/dexanalyze_experiments.cc | 122 | ||||
| -rw-r--r-- | tools/dexanalyze/dexanalyze_experiments.h | 26 |
3 files changed, 157 insertions, 0 deletions
diff --git a/tools/dexanalyze/dexanalyze.cc b/tools/dexanalyze/dexanalyze.cc index 38725d428b..7d7e5f28b3 100644 --- a/tools/dexanalyze/dexanalyze.cc +++ b/tools/dexanalyze/dexanalyze.cc @@ -49,6 +49,8 @@ class DexAnalyze { << "Usage " << argv[0] << " [options] <dex files>\n" << " [options] is a combination of the following\n" << " -count_indices (Count dex indices accessed from code items)\n" + << " -analyze-strings (Analyze string data)\n" + << " -analyze-debug-info (Analyze debug info)\n" << " -i (Ignore Dex checksum and verification failures)\n" << " -a (Run all experiments)\n" << " -d (Dump on per DEX basis)\n"; @@ -69,6 +71,8 @@ class DexAnalyze { exp_count_indices_ = true; } else if (arg == "-analyze-strings") { exp_analyze_strings_ = true; + } else if (arg == "-analyze-debug-info") { + exp_debug_info_ = true; } else if (arg == "-d") { dump_per_input_dex_ = true; } else if (!arg.empty() && arg[0] == '-') { @@ -90,6 +94,7 @@ class DexAnalyze { bool exp_count_indices_ = false; bool exp_code_metrics_ = false; bool exp_analyze_strings_ = false; + bool exp_debug_info_ = false; bool run_all_experiments_ = false; std::vector<std::string> filenames_; }; @@ -106,6 +111,9 @@ class DexAnalyze { if (options->run_all_experiments_ || options->exp_code_metrics_) { experiments_.emplace_back(new CodeMetrics); } + if (options->run_all_experiments_ || options->exp_debug_info_) { + experiments_.emplace_back(new AnalyzeDebugInfo); + } } bool ProcessDexFile(const DexFile& dex_file) { @@ -120,6 +128,7 @@ class DexAnalyze { void Dump(std::ostream& os) { for (std::unique_ptr<Experiment>& experiment : experiments_) { experiment->Dump(os, total_size_); + os << "\n"; } } diff --git a/tools/dexanalyze/dexanalyze_experiments.cc b/tools/dexanalyze/dexanalyze_experiments.cc index 7006370c0b..1a3b89cbc7 100644 --- a/tools/dexanalyze/dexanalyze_experiments.cc +++ b/tools/dexanalyze/dexanalyze_experiments.cc @@ -75,6 +75,128 @@ static size_t PrefixLen(const std::string& a, const std::string& b) { return len; } +void AnalyzeDebugInfo::ProcessDexFile(const DexFile& dex_file) { + std::set<const uint8_t*> seen; + std::vector<size_t> counts(256, 0u); + std::vector<size_t> opcode_counts(256, 0u); + std::set<std::vector<uint8_t>> unique_non_header; + for (ClassAccessor accessor : dex_file.GetClasses()) { + for (const ClassAccessor::Method& method : accessor.GetMethods()) { + CodeItemDebugInfoAccessor code_item(dex_file, method.GetCodeItem(), method.GetIndex()); + const uint8_t* debug_info = dex_file.GetDebugInfoStream(code_item.DebugInfoOffset()); + if (debug_info != nullptr && seen.insert(debug_info).second) { + const uint8_t* stream = debug_info; + DecodeUnsignedLeb128(&stream); // line_start + uint32_t parameters_size = DecodeUnsignedLeb128(&stream); + for (uint32_t i = 0; i < parameters_size; ++i) { + DecodeUnsignedLeb128P1(&stream); // Parameter name. + } + bool done = false; + const uint8_t* after_header_start = stream; + while (!done) { + const uint8_t* const op_start = stream; + uint8_t opcode = *stream++; + ++opcode_counts[opcode]; + ++total_opcode_bytes_; + switch (opcode) { + case DexFile::DBG_END_SEQUENCE: + ++total_end_seq_bytes_; + done = true; + break; + case DexFile::DBG_ADVANCE_PC: + DecodeUnsignedLeb128(&stream); // addr_diff + total_advance_pc_bytes_ += stream - op_start; + break; + case DexFile::DBG_ADVANCE_LINE: + DecodeSignedLeb128(&stream); // line_diff + total_advance_line_bytes_ += stream - op_start; + break; + case DexFile::DBG_START_LOCAL: + DecodeUnsignedLeb128(&stream); // register_num + DecodeUnsignedLeb128P1(&stream); // name_idx + DecodeUnsignedLeb128P1(&stream); // type_idx + total_start_local_bytes_ += stream - op_start; + break; + case DexFile::DBG_START_LOCAL_EXTENDED: + DecodeUnsignedLeb128(&stream); // register_num + DecodeUnsignedLeb128P1(&stream); // name_idx + DecodeUnsignedLeb128P1(&stream); // type_idx + DecodeUnsignedLeb128P1(&stream); // sig_idx + total_start_local_extended_bytes_ += stream - op_start; + break; + case DexFile::DBG_END_LOCAL: + DecodeUnsignedLeb128(&stream); // register_num + total_end_local_bytes_ += stream - op_start; + break; + case DexFile::DBG_RESTART_LOCAL: + DecodeUnsignedLeb128(&stream); // register_num + total_restart_local_bytes_ += stream - op_start; + break; + case DexFile::DBG_SET_PROLOGUE_END: + case DexFile::DBG_SET_EPILOGUE_BEGIN: + total_epilogue_bytes_ += stream - op_start; + break; + case DexFile::DBG_SET_FILE: { + DecodeUnsignedLeb128P1(&stream); // name_idx + total_set_file_bytes_ += stream - op_start; + break; + } + default: { + total_other_bytes_ += stream - op_start; + break; + } + } + } + const size_t bytes = stream - debug_info; + total_bytes_ += bytes; + total_non_header_bytes_ += stream - after_header_start; + if (unique_non_header.insert(std::vector<uint8_t>(after_header_start, stream)).second) { + total_unique_non_header_bytes_ += stream - after_header_start; + } + for (size_t i = 0; i < bytes; ++i) { + ++counts[debug_info[i]]; + } + } + } + } + auto calc_entropy = [](std::vector<size_t> data) { + size_t total = std::accumulate(data.begin(), data.end(), 0u); + double avg_entropy = 0.0; + for (size_t c : data) { + if (c > 0) { + double ratio = static_cast<double>(c) / static_cast<double>(total); + avg_entropy -= ratio * log(ratio) / log(256.0); + } + } + return avg_entropy * total; + }; + total_entropy_ += calc_entropy(counts); + total_opcode_entropy_ += calc_entropy(opcode_counts); +} + +void AnalyzeDebugInfo::Dump(std::ostream& os, uint64_t total_size) const { + os << "Debug info bytes " << Percent(total_bytes_, total_size) << "\n"; + + os << " DBG_END_SEQUENCE: " << Percent(total_end_seq_bytes_, total_size) << "\n"; + os << " DBG_ADVANCE_PC: " << Percent(total_advance_pc_bytes_, total_size) << "\n"; + os << " DBG_ADVANCE_LINE: " << Percent(total_advance_line_bytes_, total_size) << "\n"; + os << " DBG_START_LOCAL: " << Percent(total_start_local_bytes_, total_size) << "\n"; + os << " DBG_START_LOCAL_EXTENDED: " + << Percent(total_start_local_extended_bytes_, total_size) << "\n"; + os << " DBG_END_LOCAL: " << Percent(total_end_local_bytes_, total_size) << "\n"; + os << " DBG_RESTART_LOCAL: " << Percent(total_restart_local_bytes_, total_size) << "\n"; + os << " DBG_SET_PROLOGUE bytes " << Percent(total_epilogue_bytes_, total_size) << "\n"; + os << " DBG_SET_FILE bytes " << Percent(total_set_file_bytes_, total_size) << "\n"; + os << " special: " + << Percent(total_other_bytes_, total_size) << "\n"; + os << "Debug info entropy " << Percent(total_entropy_, total_size) << "\n"; + os << "Debug info opcode bytes " << Percent(total_opcode_bytes_, total_size) << "\n"; + os << "Debug info opcode entropy " << Percent(total_opcode_entropy_, total_size) << "\n"; + os << "Debug info non header bytes " << Percent(total_non_header_bytes_, total_size) << "\n"; + os << "Debug info deduped non header bytes " + << Percent(total_unique_non_header_bytes_, total_size) << "\n"; +} + void AnalyzeStrings::ProcessDexFile(const DexFile& dex_file) { std::vector<std::string> strings; for (size_t i = 0; i < dex_file.NumStringIds(); ++i) { diff --git a/tools/dexanalyze/dexanalyze_experiments.h b/tools/dexanalyze/dexanalyze_experiments.h index 7ba2a49372..a2621c85ca 100644 --- a/tools/dexanalyze/dexanalyze_experiments.h +++ b/tools/dexanalyze/dexanalyze_experiments.h @@ -51,6 +51,32 @@ class AnalyzeStrings : public Experiment { int64_t total_num_prefixes_ = 0u; }; +// Analyze debug info sizes. +class AnalyzeDebugInfo : public Experiment { + public: + void ProcessDexFile(const DexFile& dex_file); + void Dump(std::ostream& os, uint64_t total_size) const; + + private: + int64_t total_bytes_ = 0u; + int64_t total_entropy_ = 0u; + int64_t total_opcode_bytes_ = 0u; + int64_t total_opcode_entropy_ = 0u; + int64_t total_non_header_bytes_ = 0u; + int64_t total_unique_non_header_bytes_ = 0u; + // Opcode and related data. + int64_t total_end_seq_bytes_ = 0u; + int64_t total_advance_pc_bytes_ = 0u; + int64_t total_advance_line_bytes_ = 0u; + int64_t total_start_local_bytes_ = 0u; + int64_t total_start_local_extended_bytes_ = 0u; + int64_t total_end_local_bytes_ = 0u; + int64_t total_restart_local_bytes_ = 0u; + int64_t total_epilogue_bytes_ = 0u; + int64_t total_set_file_bytes_ = 0u; + int64_t total_other_bytes_ = 0u; +}; + // Count numbers of dex indices. class CountDexIndices : public Experiment { public: |