Add trie based greedy prefix construction Dex savings for top 99 APKs: Trie based: 4.82% Greedy: 4.68% Algorithm is to continuously pick the prefix that saves the most bytes until there are no more savings. Test: test-art-host Bug: 77709234 Bug: 112311352 Change-Id: I686cfc35a1f17494294816493fb2a1107f3cd114

commit: 49ca869b7a79eaa7220eb61874fa79f1e45ba832 [log] [tgz]
author: Mathieu Chartier <mathieuc@google.com> Fri Aug 24 09:34:49 2018 -0700
committer: Mathieu Chartier <mathieuc@google.com> Tue Aug 28 18:02:30 2018 -0700
tree: c7e5dc76e46304dc4f27afcbb57fcfcd944129dc
parent: 6bfbdc7fe3751fb6af6ff65493ab9e0f74ed5ca8 [diff]
diff --git a/tools/dexanalyze/dexanalyze_bytecode.cc b/tools/dexanalyze/dexanalyze_bytecode.cc
index e6e58c0..659a940 100644
--- a/tools/dexanalyze/dexanalyze_bytecode.cc
+++ b/tools/dexanalyze/dexanalyze_bytecode.cc

@@ -264,18 +264,18 @@
         uint32_t receiver = inst->VRegB_22c();
         uint32_t first_arg_reg = code_item.RegistersSize() - code_item.InsSize();
         uint32_t out_reg = inst->VRegA_22c();
-          if (Enabled(kExperimentInstanceFieldSelf) &&
-              first_arg_reg == receiver &&
-              holder_type == current_class_type) {
-            if (count_types) {
-              ++current_type.fields_.FindOrAdd(dex_field_idx)->second;
-            } else {
-              uint32_t field_idx = types[holder_type.index_].fields_.Get(dex_field_idx);
-              ExtendPrefix(&out_reg, &field_idx);
-              CHECK(InstNibbles(new_opcode, {out_reg, field_idx}));
-              continue;
-            }
-          } else if (Enabled(kExperimentInstanceField)) {
+        if (Enabled(kExperimentInstanceFieldSelf) &&
+            first_arg_reg == receiver &&
+            holder_type == current_class_type) {
+          if (count_types) {
+            ++current_type.fields_.FindOrAdd(dex_field_idx)->second;
+          } else {
+            uint32_t field_idx = types[holder_type.index_].fields_.Get(dex_field_idx);
+            ExtendPrefix(&out_reg, &field_idx);
+            CHECK(InstNibbles(new_opcode, {out_reg, field_idx}));
+            continue;
+          }
+        } else if (Enabled(kExperimentInstanceField)) {
           if (count_types) {
             ++current_type.types_.FindOrAdd(holder_type.index_)->second;
             ++types[holder_type.index_].fields_.FindOrAdd(dex_field_idx)->second;

diff --git a/tools/dexanalyze/dexanalyze_strings.cc b/tools/dexanalyze/dexanalyze_strings.cc
index 9f67ff4..863e4ee 100644
--- a/tools/dexanalyze/dexanalyze_strings.cc
+++ b/tools/dexanalyze/dexanalyze_strings.cc

@@ -19,6 +19,7 @@
 #include <algorithm>
 #include <iomanip>
 #include <iostream>
+#include <queue>
 
 #include "dex/class_accessor-inl.h"
 #include "dex/code_item_accessors-inl.h"
@@ -27,8 +28,171 @@
 namespace art {
 namespace dexanalyze {
 
+// Tunable parameters.
+static const size_t kMinPrefixLen = 1;
+static const size_t kMaxPrefixLen = 255;
+static const size_t kPrefixConstantCost = 4;
+static const size_t kPrefixIndexCost = 2;
+
+// Node value = (distance from root) * (occurrences - 1).
+class MatchTrie {
+ public:
+  void Add(const std::string& str) {
+    MatchTrie* node = this;
+    size_t depth = 0u;
+    for (uint8_t c : str) {
+      ++depth;
+      if (node->nodes_[c] == nullptr) {
+        MatchTrie* new_node = new MatchTrie();
+        node->nodes_[c].reset(new_node);
+        new_node->parent_ = node;
+        new_node->depth_ = depth;
+        new_node->incoming_ = c;
+        node = new_node;
+      } else {
+        node = node->nodes_[c].get();
+      }
+      ++node->count_;
+    }
+    node->is_end_ = true;
+  }
+
+  // Returns the length of the longest prefix and if it's a leaf node.
+  std::pair<size_t, bool> LongestPrefix(const std::string& str) const {
+    const MatchTrie* node = this;
+    const MatchTrie* best_node = this;
+    size_t depth = 0u;
+    size_t best_depth = 0u;
+    for (uint8_t c : str) {
+      if (node->nodes_[c] == nullptr) {
+        break;
+      }
+      node = node->nodes_[c].get();
+      ++depth;
+      if (node->is_end_) {
+        best_depth = depth;
+        best_node = node;
+      }
+    }
+    bool is_leaf = true;
+    for (const std::unique_ptr<MatchTrie>& cur_node : best_node->nodes_) {
+      if (cur_node != nullptr) {
+        is_leaf = false;
+      }
+    }
+    return {best_depth, is_leaf};
+  }
+
+  int32_t Savings() const {
+    int32_t cost = kPrefixConstantCost;
+    int32_t first_used = 0u;
+    if (chosen_suffix_count_ == 0u) {
+      cost += depth_;
+    }
+    uint32_t extra_savings = 0u;
+    for (MatchTrie* cur = parent_; cur != nullptr; cur = cur->parent_) {
+      if (cur->chosen_) {
+        first_used = cur->depth_;
+        if (cur->chosen_suffix_count_ == 0u) {
+          // First suffix for the chosen parent, remove the cost of the dictionary entry.
+          extra_savings += first_used;
+        }
+        break;
+      }
+    }
+    return count_ * (depth_ - first_used) - cost + extra_savings;
+  }
+
+  template <typename T, typename... Args, template <typename...> class Queue>
+  T PopRealTop(Queue<T, Args...>& queue) {
+    auto pair = queue.top();
+    queue.pop();
+    // Keep updating values until one sticks.
+    while (pair.second->Savings() != pair.first) {
+      pair.first = pair.second->Savings();
+      queue.push(pair);
+      pair = queue.top();
+      queue.pop();
+    }
+    return pair;
+  }
+
+  std::vector<std::string> ExtractPrefixes(size_t max) {
+    std::vector<std::string> ret;
+    // Make priority queue and adaptively update it. Each node value is the savings from picking
+    // it. Insert all of the interesting nodes in the queue (children != 1).
+    std::priority_queue<std::pair<int32_t, MatchTrie*>> queue;
+    // Add all of the nodes to the queue.
+    std::vector<MatchTrie*> work(1, this);
+    while (!work.empty()) {
+      MatchTrie* elem = work.back();
+      work.pop_back();
+      size_t num_childs = 0u;
+      for (const std::unique_ptr<MatchTrie>& child : elem->nodes_) {
+        if (child != nullptr) {
+          work.push_back(child.get());
+          ++num_childs;
+        }
+      }
+      if (num_childs > 1u || elem->is_end_) {
+        queue.emplace(elem->Savings(), elem);
+      }
+    }
+    std::priority_queue<std::pair<int32_t, MatchTrie*>> prefixes;
+    // The savings can only ever go down for a given node, never up.
+    while (max != 0u && !queue.empty()) {
+      std::pair<int32_t, MatchTrie*> pair = PopRealTop(queue);
+      if (pair.second != this && pair.first > 0) {
+        // Pick this node.
+        uint32_t count = pair.second->count_;
+        pair.second->chosen_ = true;
+        for (MatchTrie* cur = pair.second->parent_; cur != this; cur = cur->parent_) {
+          if (cur->chosen_) {
+            break;
+          }
+          cur->count_ -= count;
+        }
+        for (MatchTrie* cur = pair.second->parent_; cur != this; cur = cur->parent_) {
+          ++cur->chosen_suffix_count_;
+        }
+        prefixes.emplace(pair.first, pair.second);
+        --max;
+      } else {
+        // Negative or no EV, just delete the node.
+      }
+    }
+    while (!prefixes.empty()) {
+      std::pair<int32_t, MatchTrie*> pair = PopRealTop(prefixes);
+      if (pair.first <= 0) {
+        continue;
+      }
+      std::vector<uint8_t> chars;
+      for (MatchTrie* cur = pair.second; cur != this; cur = cur->parent_) {
+        chars.push_back(cur->incoming_);
+      }
+      ret.push_back(std::string(chars.rbegin(), chars.rend()));
+      // LOG(INFO) << pair.second->Savings() << " : " << ret.back();
+    }
+    return ret;
+  }
+
+  std::unique_ptr<MatchTrie> nodes_[256];
+  MatchTrie* parent_ = nullptr;
+  uint32_t count_ = 0u;
+  int32_t depth_ = 0u;
+  int32_t savings_ = 0u;
+  uint8_t incoming_ = 0u;
+  // If the current node is the end of a possible prefix.
+  bool is_end_ = false;
+  // If the current node is chosen to be a used prefix.
+  bool chosen_ = false;
+  // If the current node is a prefix of a longer chosen prefix.
+  uint32_t chosen_suffix_count_ = 0u;
+};
+
 void AnalyzeStrings::ProcessDexFiles(const std::vector<std::unique_ptr<const DexFile>>& dex_files) {
   std::set<std::string> unique_strings;
+  // Accumulate the strings.
   for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
     for (size_t i = 0; i < dex_file->NumStringIds(); ++i) {
       uint32_t length = 0;
@@ -49,18 +213,17 @@
     }
   }
   // Unique strings only since we want to exclude savings from multidex duplication.
-  std::vector<std::string> strings(unique_strings.begin(), unique_strings.end());
-  unique_strings.clear();
+  ProcessStrings(std::vector<std::string>(unique_strings.begin(), unique_strings.end()), 1);
+}
 
-  // Tunable parameters.
-  static const size_t kMinPrefixLen = 1;
-  static const size_t kMaxPrefixLen = 255;
-  static const size_t kPrefixConstantCost = 4;
-  static const size_t kPrefixIndexCost = 2;
-
+void AnalyzeStrings::ProcessStrings(const std::vector<std::string>& strings, size_t iterations) {
+  if (iterations == 0u) {
+    return;
+  }
   // Calculate total shared prefix.
   std::vector<size_t> shared_len;
   prefixes_.clear();
+  std::unique_ptr<MatchTrie> prefix_construct(new MatchTrie());
   for (size_t i = 0; i < strings.size(); ++i) {
     size_t best_len = 0;
     if (i > 0) {
@@ -73,62 +236,104 @@
     std::string prefix;
     if (best_len >= kMinPrefixLen) {
       prefix = strings[i].substr(0, best_len);
+      prefix_construct->Add(prefix);
       ++prefixes_[prefix];
+      total_shared_prefix_bytes_ += best_len;
     }
     total_prefix_index_cost_ += kPrefixIndexCost;
   }
-  // Optimize the result by moving long prefixes to shorter ones if it causes savings.
-  while (true) {
-    bool have_savings = false;
-    auto it = prefixes_.begin();
-    std::vector<std::string> longest;
-    for (const auto& pair : prefixes_) {
-      longest.push_back(pair.first);
+
+  static constexpr size_t kPrefixBits = 15;
+  static constexpr size_t kShortLen = (1u << (15 - kPrefixBits)) - 1;
+  std::unique_ptr<MatchTrie> prefix_trie(new MatchTrie());
+  static constexpr bool kUseGreedyTrie = true;
+  if (kUseGreedyTrie) {
+    std::vector<std::string> prefixes(prefix_construct->ExtractPrefixes(1 << kPrefixBits));
+    for (auto&& str : prefixes) {
+      prefix_trie->Add(str);
     }
-    std::sort(longest.begin(), longest.end(), [](const std::string& a, const std::string& b) {
-      return a.length() > b.length();
-    });
-    // Do longest first since this provides the best results.
-    for (const std::string& s : longest) {
-      it = prefixes_.find(s);
-      CHECK(it != prefixes_.end());
-      const std::string& prefix = it->first;
-      int64_t best_savings = 0u;
-      int64_t best_len = -1;
-      for (int64_t len = prefix.length() - 1; len >= 0; --len) {
-        auto found = prefixes_.find(prefix.substr(0, len));
-        if (len != 0 && found == prefixes_.end()) {
-          continue;
+  } else {
+    // Optimize the result by moving long prefixes to shorter ones if it causes additional savings.
+    while (true) {
+      bool have_savings = false;
+      auto it = prefixes_.begin();
+      std::vector<std::string> longest;
+      for (const auto& pair : prefixes_) {
+        longest.push_back(pair.first);
+      }
+      std::sort(longest.begin(), longest.end(), [](const std::string& a, const std::string& b) {
+        return a.length() > b.length();
+      });
+      // Do longest first since this provides the best results.
+      for (const std::string& s : longest) {
+        it = prefixes_.find(s);
+        CHECK(it != prefixes_.end());
+        const std::string& prefix = it->first;
+        int64_t best_savings = 0u;
+        int64_t best_len = -1;
+        for (int64_t len = prefix.length() - 1; len >= 0; --len) {
+          auto found = prefixes_.find(prefix.substr(0, len));
+          if (len != 0 && found == prefixes_.end()) {
+            continue;
+          }
+          // Calculate savings from downgrading the prefix.
+          int64_t savings = kPrefixConstantCost + prefix.length() -
+              (prefix.length() - len) * it->second;
+          if (savings > best_savings) {
+            best_savings = savings;
+            best_len = len;
+            break;
+          }
         }
-        // Calculate savings from downgrading the prefix.
-        int64_t savings = kPrefixConstantCost + prefix.length() -
-            (prefix.length() - len) * it->second;
-        if (savings > best_savings) {
-          best_savings = savings;
-          best_len = len;
-          break;
+        if (best_len != -1) {
+          prefixes_[prefix.substr(0, best_len)] += it->second;
+          it = prefixes_.erase(it);
+          optimization_savings_ += best_savings;
+          have_savings = true;
+        } else {
+          ++it;
         }
       }
-      if (best_len != -1) {
-        prefixes_[prefix.substr(0, best_len)] += it->second;
-        it = prefixes_.erase(it);
-        optimization_savings_ += best_savings;
-        have_savings = true;
-      } else {
-        ++it;
+      if (!have_savings) {
+        break;
       }
     }
-    if (!have_savings) {
-      break;
+    for (auto&& pair : prefixes_) {
+      prefix_trie->Add(pair.first);
     }
   }
-  total_num_prefixes_ += prefixes_.size();
-  for (const auto& pair : prefixes_) {
+
+  // Count longest prefixes.
+  std::set<std::string> used_prefixes;
+  std::vector<std::string> suffix;
+  for (const std::string& str : strings) {
+    auto pair = prefix_trie->LongestPrefix(str);
+    const size_t len = pair.first;
+    if (len >= kMinPrefixLen) {
+      ++strings_used_prefixed_;
+      total_prefix_savings_ += len;
+      used_prefixes.insert(str.substr(0, len));
+    }
+    suffix.push_back(str.substr(len));
+    if (suffix.back().size() < kShortLen) {
+      ++short_strings_;
+    } else {
+      ++long_strings_;
+    }
+  }
+  std::sort(suffix.begin(), suffix.end());
+  for (const std::string& prefix : used_prefixes) {
     // 4 bytes for an offset, one for length.
-    total_prefix_dict_ += pair.first.length();
+    auto pair = prefix_trie->LongestPrefix(prefix);
+    CHECK_EQ(pair.first, prefix.length());
+    if (pair.second) {
+      // Only need to add to dictionary if it's a leaf, otherwise we can reuse string data of the
+      // other prefix.
+      total_prefix_dict_ += prefix.size();
+    }
     total_prefix_table_ += kPrefixConstantCost;
-    total_prefix_savings_ += pair.first.length() * pair.second;
   }
+  ProcessStrings(suffix, iterations - 1);
 }
 
 void AnalyzeStrings::Dump(std::ostream& os, uint64_t total_size) const {
@@ -137,17 +342,20 @@
   os << "ASCII string data bytes " << Percent(ascii_string_bytes_, total_size) << "\n";
 
   // Prefix based strings.
-  os << "Total shared prefix bytes " << Percent(total_prefix_savings_, total_size) << "\n";
+  os << "Total shared prefix bytes " << Percent(total_shared_prefix_bytes_, total_size) << "\n";
   os << "Prefix dictionary cost " << Percent(total_prefix_dict_, total_size) << "\n";
   os << "Prefix table cost " << Percent(total_prefix_table_, total_size) << "\n";
   os << "Prefix index cost " << Percent(total_prefix_index_cost_, total_size) << "\n";
-  int64_t net_savings = total_prefix_savings_;
+  int64_t net_savings = total_prefix_savings_ + short_strings_;
   net_savings -= total_prefix_dict_;
   net_savings -= total_prefix_table_;
   net_savings -= total_prefix_index_cost_;
   os << "Prefix dictionary elements " << total_num_prefixes_ << "\n";
   os << "Optimization savings " << Percent(optimization_savings_, total_size) << "\n";
   os << "Prefix net savings " << Percent(net_savings, total_size) << "\n";
+  os << "Strings using prefix "
+     << Percent(strings_used_prefixed_, total_prefix_index_cost_ / kPrefixIndexCost) << "\n";
+  os << "Short strings " << Percent(short_strings_, short_strings_ + long_strings_) << "\n";
   if (verbose_level_ >= VerboseLevel::kEverything) {
     std::vector<std::pair<std::string, size_t>> pairs(prefixes_.begin(), prefixes_.end());
     // Sort lexicographically.

diff --git a/tools/dexanalyze/dexanalyze_strings.h b/tools/dexanalyze/dexanalyze_strings.h
index 3559afa..32702a6 100644
--- a/tools/dexanalyze/dexanalyze_strings.h
+++ b/tools/dexanalyze/dexanalyze_strings.h

@@ -36,15 +36,21 @@
   void Dump(std::ostream& os, uint64_t total_size) const override;
 
  private:
+  void ProcessStrings(const std::vector<std::string>& strings, size_t iterations);
+
   int64_t wide_string_bytes_ = 0u;
   int64_t ascii_string_bytes_ = 0u;
   int64_t string_data_bytes_ = 0u;
+  int64_t total_shared_prefix_bytes_ = 0u;
   int64_t total_prefix_savings_ = 0u;
   int64_t total_prefix_dict_ = 0u;
   int64_t total_prefix_table_ = 0u;
   int64_t total_prefix_index_cost_ = 0u;
   int64_t total_num_prefixes_ = 0u;
   int64_t optimization_savings_ = 0u;
+  int64_t strings_used_prefixed_ = 0u;
+  int64_t short_strings_ = 0u;
+  int64_t long_strings_ = 0u;
   std::unordered_map<std::string, size_t> prefixes_;
 };
commit	49ca869b7a79eaa7220eb61874fa79f1e45ba832	[log] [tgz]
author	Mathieu Chartier <mathieuc@google.com>	Fri Aug 24 09:34:49 2018 -0700
committer	Mathieu Chartier <mathieuc@google.com>	Tue Aug 28 18:02:30 2018 -0700
tree	c7e5dc76e46304dc4f27afcbb57fcfcd944129dc
parent	6bfbdc7fe3751fb6af6ff65493ab9e0f74ed5ca8 [diff]