Add some experiments to analyze string usage
Analyze shared prefixes for strings across dex files and estimate
possible savings from supporting a per string prefix. The estimation
is not yet optimal.
Bug: 77709234
Test: dexanalyze
Change-Id: I2e9f8a09595b54ea4a3e331efde32f9c1689fc82
diff --git a/tools/dexanalyze/dexanalyze.cc b/tools/dexanalyze/dexanalyze.cc
index c4aebc6..0b08bee 100644
--- a/tools/dexanalyze/dexanalyze.cc
+++ b/tools/dexanalyze/dexanalyze.cc
@@ -63,6 +63,8 @@
run_all_experiments_ = true;
} else if (arg == "-count-indices") {
exp_count_indices_ = true;
+ } else if (arg == "-analyze-strings") {
+ exp_analyze_strings_ = true;
} else if (arg == "-d") {
dump_per_input_dex_ = true;
} else if (!arg.empty() && arg[0] == '-') {
@@ -82,6 +84,7 @@
bool run_dex_file_verifier_ = true;
bool dump_per_input_dex_ = false;
bool exp_count_indices_ = false;
+ bool exp_analyze_strings_ = false;
bool run_all_experiments_ = false;
std::vector<std::string> filenames_;
};
@@ -92,25 +95,30 @@
if (options->run_all_experiments_ || options->exp_count_indices_) {
experiments_.emplace_back(new CountDexIndices);
}
+ if (options->run_all_experiments_ || options->exp_analyze_strings_) {
+ experiments_.emplace_back(new AnalyzeStrings);
+ }
}
bool ProcessDexFile(const DexFile& dex_file) {
for (std::unique_ptr<Experiment>& experiment : experiments_) {
experiment->ProcessDexFile(dex_file);
}
+ total_size_ += dex_file.Size();
++dex_count_;
return true;
}
void Dump(std::ostream& os) {
for (std::unique_ptr<Experiment>& experiment : experiments_) {
- experiment->Dump(os);
+ experiment->Dump(os, total_size_);
}
}
const Options* const options_;
std::vector<std::unique_ptr<Experiment>> experiments_;
size_t dex_count_ = 0;
+ uint64_t total_size_ = 0u;
};
public:
diff --git a/tools/dexanalyze/dexanalyze_experiments.cc b/tools/dexanalyze/dexanalyze_experiments.cc
index e1f119d..bfeb4b9 100644
--- a/tools/dexanalyze/dexanalyze_experiments.cc
+++ b/tools/dexanalyze/dexanalyze_experiments.cc
@@ -15,12 +15,91 @@
*/
#include "dexanalyze_experiments.h"
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <iostream>
+#include <map>
+#include <vector>
+
+#include "android-base/stringprintf.h"
#include "dex/code_item_accessors-inl.h"
#include "dex/dex_instruction-inl.h"
#include "dex/standard_dex_file.h"
namespace art {
+std::string Percent(uint64_t value, uint64_t max) {
+ if (max == 0) {
+ ++max;
+ }
+ return android::base::StringPrintf("%" PRId64 "(%.2f%%)",
+ value,
+ static_cast<double>(value * 100) / static_cast<double>(max));
+}
+
+static size_t PrefixLen(const std::string& a, const std::string& b) {
+ size_t len = 0;
+ for (; len < a.length() && len < b.length() && a[len] == b[len]; ++len) {}
+ return len;
+}
+
+void AnalyzeStrings::ProcessDexFile(const DexFile& dex_file) {
+ std::vector<std::string> strings;
+ for (size_t i = 0; i < dex_file.NumStringIds(); ++i) {
+ uint32_t length = 0;
+ const char* data =
+ dex_file.GetStringDataAndUtf16Length(dex_file.GetStringId(dex::StringIndex(i)), &length);
+ strings.push_back(data);
+ }
+ // Note that the strings are probably already sorted.
+ std::sort(strings.begin(), strings.end());
+
+ // Tunable parameters.
+ static const size_t kMinPrefixLen = 3;
+ static const size_t kPrefixConstantCost = 5;
+ static const size_t kPrefixIndexCost = 2;
+
+ // Calculate total shared prefix.
+ std::vector<size_t> shared_len;
+ std::set<std::string> prefixes;
+ for (size_t i = 0; i < strings.size(); ++i) {
+ size_t best_len = 0;
+ if (i > 0) {
+ best_len = std::max(best_len, PrefixLen(strings[i], strings[i - 1]));
+ }
+ if (i < strings.size() - 1) {
+ best_len = std::max(best_len, PrefixLen(strings[i], strings[i + 1]));
+ }
+ std::string prefix;
+ if (best_len >= kMinPrefixLen) {
+ prefix = strings[i].substr(0, best_len);
+ prefixes.insert(prefix);
+ total_prefix_savings_ += prefix.length();
+ }
+ total_prefix_index_cost_ += kPrefixIndexCost;
+ }
+ total_num_prefixes_ += prefixes.size();
+ for (const std::string& s : prefixes) {
+ // 4 bytes for an offset, one for length.
+ total_prefix_dict_ += s.length();
+ total_prefix_table_ += kPrefixConstantCost;
+ }
+}
+
+void AnalyzeStrings::Dump(std::ostream& os, uint64_t total_size) const {
+ os << "Total shared prefix bytes " << Percent(total_prefix_savings_, total_size) << "\n";
+ os << "Prefix dictionary cost " << Percent(total_prefix_dict_, total_size) << "\n";
+ os << "Prefix table cost " << Percent(total_prefix_table_, total_size) << "\n";
+ os << "Prefix index cost " << Percent(total_prefix_index_cost_, total_size) << "\n";
+ int64_t net_savings = total_prefix_savings_;
+ net_savings -= total_prefix_dict_;
+ net_savings -= total_prefix_table_;
+ net_savings -= total_prefix_index_cost_;
+ os << "Prefix net savings " << Percent(net_savings, total_size) << "\n";
+ os << "Prefix dictionary elements " << total_num_prefixes_ << "\n";
+}
+
void CountDexIndices::ProcessDexFile(const DexFile& dex_file) {
num_string_ids_ += dex_file.NumStringIds();
num_method_ids_ += dex_file.NumMethodIds();
@@ -107,7 +186,7 @@
}
}
-void CountDexIndices::Dump(std::ostream& os) const {
+void CountDexIndices::Dump(std::ostream& os, uint64_t total_size) const {
os << "Num string ids: " << num_string_ids_ << "\n";
os << "Num method ids: " << num_method_ids_ << "\n";
os << "Num field ids: " << num_field_ids_ << "\n";
@@ -127,6 +206,7 @@
os << "Same class invoke: " << same_class_total << "\n";
os << "Other class invoke: " << other_class_total << "\n";
os << "Invokes from code: " << (same_class_total + other_class_total) << "\n";
+ os << "Total dex size: " << total_size << "\n";
}
} // namespace art
diff --git a/tools/dexanalyze/dexanalyze_experiments.h b/tools/dexanalyze/dexanalyze_experiments.h
index 5d0f51b..6f70f5d 100644
--- a/tools/dexanalyze/dexanalyze_experiments.h
+++ b/tools/dexanalyze/dexanalyze_experiments.h
@@ -24,12 +24,28 @@
class DexFile;
+std::string Percent(uint64_t value, uint64_t max);
+
// An experiment a stateful visitor that runs on dex files. Results are cumulative.
class Experiment {
public:
virtual ~Experiment() {}
virtual void ProcessDexFile(const DexFile& dex_file) = 0;
- virtual void Dump(std::ostream& os) const = 0;
+ virtual void Dump(std::ostream& os, uint64_t total_size) const = 0;
+};
+
+// Analyze string data and strings accessed from code.
+class AnalyzeStrings : public Experiment {
+ public:
+ void ProcessDexFile(const DexFile& dex_file);
+ void Dump(std::ostream& os, uint64_t total_size) const;
+
+ private:
+ int64_t total_prefix_savings_ = 0u;
+ int64_t total_prefix_dict_ = 0u;
+ int64_t total_prefix_table_ = 0u;
+ int64_t total_prefix_index_cost_ = 0u;
+ int64_t total_num_prefixes_ = 0u;
};
// Count numbers of dex indices.
@@ -37,7 +53,7 @@
public:
void ProcessDexFile(const DexFile& dex_file);
- void Dump(std::ostream& os) const;
+ void Dump(std::ostream& os, uint64_t total_size) const;
private:
// Total string ids loaded from dex code.
@@ -65,4 +81,3 @@
} // namespace art
#endif // ART_TOOLS_DEXANALYZE_DEXANALYZE_EXPERIMENTS_H_
-