Reduce memory used by CompiledMethods.

Use LengthPrefixedArray<>s instead of SwapVector<>s to store
CompiledMethod data and get rid of the unnecessary members
of CompiledMethod to reduce dex2oat memory usage. Refactor
the deduplication from CompilerDriver to a new class.

Use HashSet<> instead of std::set<> for the DedupeSet<> to
further decrease the memory usage and improve performance.

This reduces the dex2oat memory usage when compiling boot
image on Nexus 5 (with Optimizing, -j1) by ~6.75MiB (5%).
This also reduces the compile time by ~2.2% (~1.6% dex2oat
time; with Optimizing, without -j).

Change-Id: I974f1f5e58350de2bf487a2bca3907fa05fb80ea
diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h
new file mode 100644
index 0000000..ac54813
--- /dev/null
+++ b/compiler/utils/dedupe_set-inl.h
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
+#define ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
+
+#include "dedupe_set.h"
+
+#include <algorithm>
+#include <inttypes.h>
+#include <unordered_map>
+
+#include "base/mutex.h"
+#include "base/hash_set.h"
+#include "base/stl_util.h"
+#include "base/stringprintf.h"
+#include "base/time_utils.h"
+
+namespace art {
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+struct DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Stats {
+  size_t collision_sum = 0u;
+  size_t collision_max = 0u;
+  size_t total_probe_distance = 0u;
+  size_t total_size = 0u;
+};
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard {
+ public:
+  Shard(const Alloc& alloc, const std::string& lock_name)
+      : alloc_(alloc),
+        lock_name_(lock_name),
+        lock_(lock_name_.c_str()),
+        keys_() {
+  }
+
+  ~Shard() {
+    for (const HashedKey<StoreKey>& key : keys_) {
+      DCHECK(key.Key() != nullptr);
+      alloc_.Destroy(key.Key());
+    }
+  }
+
+  const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) {
+    MutexLock lock(self, lock_);
+    HashedKey<InKey> hashed_in_key(hash, &in_key);
+    auto it = keys_.Find(hashed_in_key);
+    if (it != keys_.end()) {
+      DCHECK(it->Key() != nullptr);
+      return it->Key();
+    }
+    const StoreKey* store_key = alloc_.Copy(in_key);
+    keys_.Insert(HashedKey<StoreKey> { hash, store_key });
+    return store_key;
+  }
+
+  void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) {
+    // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory
+    // for bookkeeping while collecting the stats.
+    std::unordered_map<HashType, size_t> stats;
+    {
+      MutexLock lock(self, lock_);
+      // Note: The total_probe_distance will be updated with the current state.
+      // It may have been higher before a re-hash.
+      global_stats->total_probe_distance += keys_.TotalProbeDistance();
+      global_stats->total_size += keys_.Size();
+      for (const HashedKey<StoreKey>& key : keys_) {
+        auto it = stats.find(key.Hash());
+        if (it == stats.end()) {
+          stats.insert({key.Hash(), 1u});
+        } else {
+          ++it->second;
+        }
+      }
+    }
+    for (const auto& entry : stats) {
+      size_t number_of_entries = entry.second;
+      if (number_of_entries > 1u) {
+        global_stats->collision_sum += number_of_entries - 1u;
+        global_stats->collision_max = std::max(global_stats->collision_max, number_of_entries);
+      }
+    }
+  }
+
+ private:
+  template <typename T>
+  class HashedKey {
+   public:
+    HashedKey() : hash_(0u), key_(nullptr) { }
+    HashedKey(size_t hash, const T* key) : hash_(hash), key_(key) { }
+
+    size_t Hash() const {
+      return hash_;
+    }
+
+    const T* Key() const {
+      return key_;
+    }
+
+    bool IsEmpty() const {
+      return Key() == nullptr;
+    }
+
+    void MakeEmpty() {
+      key_ = nullptr;
+    }
+
+   private:
+    size_t hash_;
+    const T* key_;
+  };
+
+  class ShardEmptyFn {
+   public:
+    bool IsEmpty(const HashedKey<StoreKey>& key) const {
+      return key.IsEmpty();
+    }
+
+    void MakeEmpty(HashedKey<StoreKey>& key) {
+      key.MakeEmpty();
+    }
+  };
+
+  struct ShardHashFn {
+    template <typename T>
+    size_t operator()(const HashedKey<T>& key) const {
+      return key.Hash();
+    }
+  };
+
+  struct ShardPred {
+    typename std::enable_if<!std::is_same<StoreKey, InKey>::value, bool>::type
+    operator()(const HashedKey<StoreKey>& lhs, const HashedKey<StoreKey>& rhs) const {
+      DCHECK(lhs.Key() != nullptr);
+      DCHECK(rhs.Key() != nullptr);
+      // Rehashing: stored keys are already deduplicated, so we can simply compare key pointers.
+      return lhs.Key() == rhs.Key();
+    }
+
+    template <typename LeftT, typename RightT>
+    bool operator()(const HashedKey<LeftT>& lhs, const HashedKey<RightT>& rhs) const {
+      DCHECK(lhs.Key() != nullptr);
+      DCHECK(rhs.Key() != nullptr);
+      return lhs.Hash() == rhs.Hash() &&
+          lhs.Key()->size() == rhs.Key()->size() &&
+          std::equal(lhs.Key()->begin(), lhs.Key()->end(), rhs.Key()->begin());
+    }
+  };
+
+  Alloc alloc_;
+  const std::string lock_name_;
+  Mutex lock_;
+  HashSet<HashedKey<StoreKey>, ShardEmptyFn, ShardHashFn, ShardPred> keys_ GUARDED_BY(lock_);
+};
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+const StoreKey* DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Add(
+    Thread* self, const InKey& key) {
+  uint64_t hash_start;
+  if (kIsDebugBuild) {
+    hash_start = NanoTime();
+  }
+  HashType raw_hash = HashFunc()(key);
+  if (kIsDebugBuild) {
+    uint64_t hash_end = NanoTime();
+    hash_time_ += hash_end - hash_start;
+  }
+  HashType shard_hash = raw_hash / kShard;
+  HashType shard_bin = raw_hash % kShard;
+  return shards_[shard_bin]->Add(self, shard_hash, key);
+}
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DedupeSet(const char* set_name,
+                                                                         const Alloc& alloc)
+    : hash_time_(0) {
+  for (HashType i = 0; i < kShard; ++i) {
+    std::ostringstream oss;
+    oss << set_name << " lock " << i;
+    shards_[i].reset(new Shard(alloc, oss.str()));
+  }
+}
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::~DedupeSet() {
+  // Everything done by member destructors.
+}
+
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
+          HashType kShard>
+std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats(
+    Thread* self) const {
+  Stats stats;
+  for (HashType shard = 0; shard < kShard; ++shard) {
+    shards_[shard]->UpdateStats(self, &stats);
+  }
+  return StringPrintf("%zu collisions, %zu max hash collisions, "
+                      "%zu/%zu probe distance, %" PRIu64 " ns hash time",
+                      stats.collision_sum,
+                      stats.collision_max,
+                      stats.total_probe_distance,
+                      stats.total_size,
+                      hash_time_);
+}
+
+
+}  // namespace art
+
+#endif  // ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index 2c4a689..b62f216 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -17,151 +17,41 @@
 #ifndef ART_COMPILER_UTILS_DEDUPE_SET_H_
 #define ART_COMPILER_UTILS_DEDUPE_SET_H_
 
-#include <algorithm>
-#include <inttypes.h>
 #include <memory>
-#include <set>
+#include <stdint.h>
 #include <string>
 
-#include "base/mutex.h"
-#include "base/stl_util.h"
-#include "base/stringprintf.h"
-#include "base/time_utils.h"
-#include "utils/swap_space.h"
+#include "base/macros.h"
 
 namespace art {
 
+class Thread;
+
 // A set of Keys that support a HashFunc returning HashType. Used to find duplicates of Key in the
 // Add method. The data-structure is thread-safe through the use of internal locks, it also
 // supports the lock being sharded.
-template <typename InKey, typename StoreKey, typename HashType, typename HashFunc,
+template <typename InKey,
+          typename StoreKey,
+          typename Alloc,
+          typename HashType,
+          typename HashFunc,
           HashType kShard = 1>
 class DedupeSet {
-  typedef std::pair<HashType, const InKey*> HashedInKey;
-  struct HashedKey {
-    StoreKey* store_ptr;
-    union {
-      HashType store_hash;        // Valid if store_ptr != null.
-      const HashedInKey* in_key;  // Valid if store_ptr == null.
-    };
-  };
-
-  class Comparator {
-   public:
-    bool operator()(const HashedKey& a, const HashedKey& b) const {
-      HashType a_hash = (a.store_ptr != nullptr) ? a.store_hash : a.in_key->first;
-      HashType b_hash = (b.store_ptr != nullptr) ? b.store_hash : b.in_key->first;
-      if (a_hash != b_hash) {
-        return a_hash < b_hash;
-      }
-      if (a.store_ptr != nullptr && b.store_ptr != nullptr) {
-        return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(),
-                                            b.store_ptr->begin(), b.store_ptr->end());
-      } else if (a.store_ptr != nullptr && b.store_ptr == nullptr) {
-        return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(),
-                                            b.in_key->second->begin(), b.in_key->second->end());
-      } else if (a.store_ptr == nullptr && b.store_ptr != nullptr) {
-        return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(),
-                                            b.store_ptr->begin(), b.store_ptr->end());
-      } else {
-        return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(),
-                                            b.in_key->second->begin(), b.in_key->second->end());
-      }
-    }
-  };
-
  public:
-  StoreKey* Add(Thread* self, const InKey& key) {
-    uint64_t hash_start;
-    if (kIsDebugBuild) {
-      hash_start = NanoTime();
-    }
-    HashType raw_hash = HashFunc()(key);
-    if (kIsDebugBuild) {
-      uint64_t hash_end = NanoTime();
-      hash_time_ += hash_end - hash_start;
-    }
-    HashType shard_hash = raw_hash / kShard;
-    HashType shard_bin = raw_hash % kShard;
-    HashedInKey hashed_in_key(shard_hash, &key);
-    HashedKey hashed_key;
-    hashed_key.store_ptr = nullptr;
-    hashed_key.in_key = &hashed_in_key;
-    MutexLock lock(self, *lock_[shard_bin]);
-    auto it = keys_[shard_bin].find(hashed_key);
-    if (it != keys_[shard_bin].end()) {
-      DCHECK(it->store_ptr != nullptr);
-      return it->store_ptr;
-    }
-    hashed_key.store_ptr = CreateStoreKey(key);
-    hashed_key.store_hash = shard_hash;
-    keys_[shard_bin].insert(hashed_key);
-    return hashed_key.store_ptr;
-  }
+  // Add a new key to the dedupe set if not present. Return the equivalent deduplicated stored key.
+  const StoreKey* Add(Thread* self, const InKey& key);
 
-  DedupeSet(const char* set_name, SwapAllocator<void>& alloc)
-      : allocator_(alloc), hash_time_(0) {
-    for (HashType i = 0; i < kShard; ++i) {
-      std::ostringstream oss;
-      oss << set_name << " lock " << i;
-      lock_name_[i] = oss.str();
-      lock_[i].reset(new Mutex(lock_name_[i].c_str()));
-    }
-  }
+  DedupeSet(const char* set_name, const Alloc& alloc);
 
-  ~DedupeSet() {
-    // Have to manually free all pointers.
-    for (auto& shard : keys_) {
-      for (const auto& hashed_key : shard) {
-        DCHECK(hashed_key.store_ptr != nullptr);
-        DeleteStoreKey(hashed_key.store_ptr);
-      }
-    }
-  }
+  ~DedupeSet();
 
-  std::string DumpStats() const {
-    size_t collision_sum = 0;
-    size_t collision_max = 0;
-    for (HashType shard = 0; shard < kShard; ++shard) {
-      HashType last_hash = 0;
-      size_t collision_cur_max = 0;
-      for (const HashedKey& key : keys_[shard]) {
-        DCHECK(key.store_ptr != nullptr);
-        if (key.store_hash == last_hash) {
-          collision_cur_max++;
-          if (collision_cur_max > 1) {
-            collision_sum++;
-            if (collision_cur_max > collision_max) {
-              collision_max = collision_cur_max;
-            }
-          }
-        } else {
-          collision_cur_max = 1;
-          last_hash = key.store_hash;
-        }
-      }
-    }
-    return StringPrintf("%zu collisions, %zu max bucket size, %" PRIu64 " ns hash time",
-                        collision_sum, collision_max, hash_time_);
-  }
+  std::string DumpStats(Thread* self) const;
 
  private:
-  StoreKey* CreateStoreKey(const InKey& key) {
-    StoreKey* ret = allocator_.allocate(1);
-    allocator_.construct(ret, key.begin(), key.end(), allocator_);
-    return ret;
-  }
+  struct Stats;
+  class Shard;
 
-  void DeleteStoreKey(StoreKey* key) {
-    SwapAllocator<StoreKey> alloc(allocator_);
-    alloc.destroy(key);
-    alloc.deallocate(key, 1);
-  }
-
-  std::string lock_name_[kShard];
-  std::unique_ptr<Mutex> lock_[kShard];
-  std::set<HashedKey, Comparator> keys_[kShard];
-  SwapAllocator<StoreKey> allocator_;
+  std::unique_ptr<Shard> shards_[kShard];
   uint64_t hash_time_;
 
   DISALLOW_COPY_AND_ASSIGN(DedupeSet);
diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc
index 637964e..60a891d 100644
--- a/compiler/utils/dedupe_set_test.cc
+++ b/compiler/utils/dedupe_set_test.cc
@@ -18,15 +18,18 @@
 
 #include <algorithm>
 #include <cstdio>
+#include <vector>
 
+#include "dedupe_set-inl.h"
 #include "gtest/gtest.h"
 #include "thread-inl.h"
+#include "utils/array_ref.h"
 
 namespace art {
 
-class DedupeHashFunc {
+class DedupeSetTestHashFunc {
  public:
-  size_t operator()(const std::vector<uint8_t>& array) const {
+  size_t operator()(const ArrayRef<const uint8_t>& array) const {
     size_t hash = 0;
     for (uint8_t c : array) {
       hash += c;
@@ -36,46 +39,52 @@
     return hash;
   }
 };
+
+class DedupeSetTestAlloc {
+ public:
+  const std::vector<uint8_t>* Copy(const ArrayRef<const uint8_t>& src) {
+    return new std::vector<uint8_t>(src.begin(), src.end());
+  }
+
+  void Destroy(const std::vector<uint8_t>* key) {
+    delete key;
+  }
+};
+
 TEST(DedupeSetTest, Test) {
   Thread* self = Thread::Current();
-  typedef std::vector<uint8_t> ByteArray;
-  SwapAllocator<void> swap(nullptr);
-  DedupeSet<ByteArray, SwapVector<uint8_t>, size_t, DedupeHashFunc> deduplicator("test", swap);
-  SwapVector<uint8_t>* array1;
+  DedupeSetTestAlloc alloc;
+  DedupeSet<ArrayRef<const uint8_t>,
+            std::vector<uint8_t>,
+            DedupeSetTestAlloc,
+            size_t,
+            DedupeSetTestHashFunc> deduplicator("test", alloc);
+  const std::vector<uint8_t>* array1;
   {
-    ByteArray test1;
-    test1.push_back(10);
-    test1.push_back(20);
-    test1.push_back(30);
-    test1.push_back(45);
-
+    uint8_t raw_test1[] = { 10u, 20u, 30u, 45u };
+    ArrayRef<const uint8_t> test1(raw_test1);
     array1 = deduplicator.Add(self, test1);
     ASSERT_NE(array1, nullptr);
     ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array1->begin()));
   }
 
-  SwapVector<uint8_t>* array2;
+  const std::vector<uint8_t>* array2;
   {
-    ByteArray test1;
-    test1.push_back(10);
-    test1.push_back(20);
-    test1.push_back(30);
-    test1.push_back(45);
-    array2 = deduplicator.Add(self, test1);
+    uint8_t raw_test2[] = { 10u, 20u, 30u, 45u };
+    ArrayRef<const uint8_t> test2(raw_test2);
+    array2 = deduplicator.Add(self, test2);
     ASSERT_EQ(array2, array1);
-    ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array2->begin()));
+    ASSERT_TRUE(std::equal(test2.begin(), test2.end(), array2->begin()));
   }
 
-  SwapVector<uint8_t>* array3;
+  const std::vector<uint8_t>* array3;
   {
-    ByteArray test1;
-    test1.push_back(10);
-    test1.push_back(22);
-    test1.push_back(30);
-    test1.push_back(47);
-    array3 = deduplicator.Add(self, test1);
+    uint8_t raw_test3[] = { 10u, 22u, 30u, 47u };
+    ArrayRef<const uint8_t> test3(raw_test3);
+    array3 = deduplicator.Add(self, test3);
     ASSERT_NE(array3, nullptr);
-    ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array3->begin()));
+    ASSERT_NE(array3, array1);
+    ASSERT_TRUE(std::equal(test3.begin(), test3.end(), array3->begin()));
   }
 }