Reduce memory used by CompiledMethods.
Use LengthPrefixedArray<>s instead of SwapVector<>s to store
CompiledMethod data and get rid of the unnecessary members
of CompiledMethod to reduce dex2oat memory usage. Refactor
the deduplication from CompilerDriver to a new class.
Use HashSet<> instead of std::set<> for the DedupeSet<> to
further decrease the memory usage and improve performance.
This reduces the dex2oat memory usage when compiling boot
image on Nexus 5 (with Optimizing, -j1) by ~6.75MiB (5%).
This also reduces the compile time by ~2.2% (~1.6% dex2oat
time; with Optimizing, without -j).
Change-Id: I974f1f5e58350de2bf487a2bca3907fa05fb80ea
diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h
new file mode 100644
index 0000000..ac54813
--- /dev/null
+++ b/compiler/utils/dedupe_set-inl.h
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
+#define ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
+
+#include "dedupe_set.h"
+
+#include <algorithm>
+#include <inttypes.h>
+#include <unordered_map>
+
+#include "base/mutex.h"
+#include "base/hash_set.h"
+#include "base/stl_util.h"
+#include "base/stringprintf.h"
+#include "base/time_utils.h"
+
+namespace art {
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
+struct DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Stats {
+ size_t collision_sum = 0u;
+ size_t collision_max = 0u;
+ size_t total_probe_distance = 0u;
+ size_t total_size = 0u;
+};
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
+class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard {
+ public:
+ Shard(const Alloc& alloc, const std::string& lock_name)
+ : alloc_(alloc),
+ lock_name_(lock_name),
+ lock_(lock_name_.c_str()),
+ keys_() {
+ }
+
+ ~Shard() {
+ for (const HashedKey<StoreKey>& key : keys_) {
+ DCHECK(key.Key() != nullptr);
+ alloc_.Destroy(key.Key());
+ }
+ }
+
+ const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) {
+ MutexLock lock(self, lock_);
+ HashedKey<InKey> hashed_in_key(hash, &in_key);
+ auto it = keys_.Find(hashed_in_key);
+ if (it != keys_.end()) {
+ DCHECK(it->Key() != nullptr);
+ return it->Key();
+ }
+ const StoreKey* store_key = alloc_.Copy(in_key);
+ keys_.Insert(HashedKey<StoreKey> { hash, store_key });
+ return store_key;
+ }
+
+ void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) {
+ // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory
+ // for bookkeeping while collecting the stats.
+ std::unordered_map<HashType, size_t> stats;
+ {
+ MutexLock lock(self, lock_);
+ // Note: The total_probe_distance will be updated with the current state.
+ // It may have been higher before a re-hash.
+ global_stats->total_probe_distance += keys_.TotalProbeDistance();
+ global_stats->total_size += keys_.Size();
+ for (const HashedKey<StoreKey>& key : keys_) {
+ auto it = stats.find(key.Hash());
+ if (it == stats.end()) {
+ stats.insert({key.Hash(), 1u});
+ } else {
+ ++it->second;
+ }
+ }
+ }
+ for (const auto& entry : stats) {
+ size_t number_of_entries = entry.second;
+ if (number_of_entries > 1u) {
+ global_stats->collision_sum += number_of_entries - 1u;
+ global_stats->collision_max = std::max(global_stats->collision_max, number_of_entries);
+ }
+ }
+ }
+
+ private:
+ template <typename T>
+ class HashedKey {
+ public:
+ HashedKey() : hash_(0u), key_(nullptr) { }
+ HashedKey(size_t hash, const T* key) : hash_(hash), key_(key) { }
+
+ size_t Hash() const {
+ return hash_;
+ }
+
+ const T* Key() const {
+ return key_;
+ }
+
+ bool IsEmpty() const {
+ return Key() == nullptr;
+ }
+
+ void MakeEmpty() {
+ key_ = nullptr;
+ }
+
+ private:
+ size_t hash_;
+ const T* key_;
+ };
+
+ class ShardEmptyFn {
+ public:
+ bool IsEmpty(const HashedKey<StoreKey>& key) const {
+ return key.IsEmpty();
+ }
+
+ void MakeEmpty(HashedKey<StoreKey>& key) {
+ key.MakeEmpty();
+ }
+ };
+
+ struct ShardHashFn {
+ template <typename T>
+ size_t operator()(const HashedKey<T>& key) const {
+ return key.Hash();
+ }
+ };
+
+ struct ShardPred {
+ typename std::enable_if<!std::is_same<StoreKey, InKey>::value, bool>::type
+ operator()(const HashedKey<StoreKey>& lhs, const HashedKey<StoreKey>& rhs) const {
+ DCHECK(lhs.Key() != nullptr);
+ DCHECK(rhs.Key() != nullptr);
+ // Rehashing: stored keys are already deduplicated, so we can simply compare key pointers.
+ return lhs.Key() == rhs.Key();
+ }
+
+ template <typename LeftT, typename RightT>
+ bool operator()(const HashedKey<LeftT>& lhs, const HashedKey<RightT>& rhs) const {
+ DCHECK(lhs.Key() != nullptr);
+ DCHECK(rhs.Key() != nullptr);
+ return lhs.Hash() == rhs.Hash() &&
+ lhs.Key()->size() == rhs.Key()->size() &&
+ std::equal(lhs.Key()->begin(), lhs.Key()->end(), rhs.Key()->begin());
+ }
+ };
+
+ Alloc alloc_;
+ const std::string lock_name_;
+ Mutex lock_;
+ HashSet<HashedKey<StoreKey>, ShardEmptyFn, ShardHashFn, ShardPred> keys_ GUARDED_BY(lock_);
+};
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
+const StoreKey* DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Add(
+ Thread* self, const InKey& key) {
+ uint64_t hash_start;
+ if (kIsDebugBuild) {
+ hash_start = NanoTime();
+ }
+ HashType raw_hash = HashFunc()(key);
+ if (kIsDebugBuild) {
+ uint64_t hash_end = NanoTime();
+ hash_time_ += hash_end - hash_start;
+ }
+ HashType shard_hash = raw_hash / kShard;
+ HashType shard_bin = raw_hash % kShard;
+ return shards_[shard_bin]->Add(self, shard_hash, key);
+}
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
+DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DedupeSet(const char* set_name,
+ const Alloc& alloc)
+ : hash_time_(0) {
+ for (HashType i = 0; i < kShard; ++i) {
+ std::ostringstream oss;
+ oss << set_name << " lock " << i;
+ shards_[i].reset(new Shard(alloc, oss.str()));
+ }
+}
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
+DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::~DedupeSet() {
+ // Everything done by member destructors.
+}
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
+std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats(
+ Thread* self) const {
+ Stats stats;
+ for (HashType shard = 0; shard < kShard; ++shard) {
+ shards_[shard]->UpdateStats(self, &stats);
+ }
+ return StringPrintf("%zu collisions, %zu max hash collisions, "
+ "%zu/%zu probe distance, %" PRIu64 " ns hash time",
+ stats.collision_sum,
+ stats.collision_max,
+ stats.total_probe_distance,
+ stats.total_size,
+ hash_time_);
+}
+
+
+} // namespace art
+
+#endif // ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index 2c4a689..b62f216 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -17,151 +17,41 @@
#ifndef ART_COMPILER_UTILS_DEDUPE_SET_H_
#define ART_COMPILER_UTILS_DEDUPE_SET_H_
-#include <algorithm>
-#include <inttypes.h>
#include <memory>
-#include <set>
+#include <stdint.h>
#include <string>
-#include "base/mutex.h"
-#include "base/stl_util.h"
-#include "base/stringprintf.h"
-#include "base/time_utils.h"
-#include "utils/swap_space.h"
+#include "base/macros.h"
namespace art {
+class Thread;
+
// A set of Keys that support a HashFunc returning HashType. Used to find duplicates of Key in the
// Add method. The data-structure is thread-safe through the use of internal locks, it also
// supports the lock being sharded.
-template <typename InKey, typename StoreKey, typename HashType, typename HashFunc,
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
HashType kShard = 1>
class DedupeSet {
- typedef std::pair<HashType, const InKey*> HashedInKey;
- struct HashedKey {
- StoreKey* store_ptr;
- union {
- HashType store_hash; // Valid if store_ptr != null.
- const HashedInKey* in_key; // Valid if store_ptr == null.
- };
- };
-
- class Comparator {
- public:
- bool operator()(const HashedKey& a, const HashedKey& b) const {
- HashType a_hash = (a.store_ptr != nullptr) ? a.store_hash : a.in_key->first;
- HashType b_hash = (b.store_ptr != nullptr) ? b.store_hash : b.in_key->first;
- if (a_hash != b_hash) {
- return a_hash < b_hash;
- }
- if (a.store_ptr != nullptr && b.store_ptr != nullptr) {
- return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(),
- b.store_ptr->begin(), b.store_ptr->end());
- } else if (a.store_ptr != nullptr && b.store_ptr == nullptr) {
- return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(),
- b.in_key->second->begin(), b.in_key->second->end());
- } else if (a.store_ptr == nullptr && b.store_ptr != nullptr) {
- return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(),
- b.store_ptr->begin(), b.store_ptr->end());
- } else {
- return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(),
- b.in_key->second->begin(), b.in_key->second->end());
- }
- }
- };
-
public:
- StoreKey* Add(Thread* self, const InKey& key) {
- uint64_t hash_start;
- if (kIsDebugBuild) {
- hash_start = NanoTime();
- }
- HashType raw_hash = HashFunc()(key);
- if (kIsDebugBuild) {
- uint64_t hash_end = NanoTime();
- hash_time_ += hash_end - hash_start;
- }
- HashType shard_hash = raw_hash / kShard;
- HashType shard_bin = raw_hash % kShard;
- HashedInKey hashed_in_key(shard_hash, &key);
- HashedKey hashed_key;
- hashed_key.store_ptr = nullptr;
- hashed_key.in_key = &hashed_in_key;
- MutexLock lock(self, *lock_[shard_bin]);
- auto it = keys_[shard_bin].find(hashed_key);
- if (it != keys_[shard_bin].end()) {
- DCHECK(it->store_ptr != nullptr);
- return it->store_ptr;
- }
- hashed_key.store_ptr = CreateStoreKey(key);
- hashed_key.store_hash = shard_hash;
- keys_[shard_bin].insert(hashed_key);
- return hashed_key.store_ptr;
- }
+ // Add a new key to the dedupe set if not present. Return the equivalent deduplicated stored key.
+ const StoreKey* Add(Thread* self, const InKey& key);
- DedupeSet(const char* set_name, SwapAllocator<void>& alloc)
- : allocator_(alloc), hash_time_(0) {
- for (HashType i = 0; i < kShard; ++i) {
- std::ostringstream oss;
- oss << set_name << " lock " << i;
- lock_name_[i] = oss.str();
- lock_[i].reset(new Mutex(lock_name_[i].c_str()));
- }
- }
+ DedupeSet(const char* set_name, const Alloc& alloc);
- ~DedupeSet() {
- // Have to manually free all pointers.
- for (auto& shard : keys_) {
- for (const auto& hashed_key : shard) {
- DCHECK(hashed_key.store_ptr != nullptr);
- DeleteStoreKey(hashed_key.store_ptr);
- }
- }
- }
+ ~DedupeSet();
- std::string DumpStats() const {
- size_t collision_sum = 0;
- size_t collision_max = 0;
- for (HashType shard = 0; shard < kShard; ++shard) {
- HashType last_hash = 0;
- size_t collision_cur_max = 0;
- for (const HashedKey& key : keys_[shard]) {
- DCHECK(key.store_ptr != nullptr);
- if (key.store_hash == last_hash) {
- collision_cur_max++;
- if (collision_cur_max > 1) {
- collision_sum++;
- if (collision_cur_max > collision_max) {
- collision_max = collision_cur_max;
- }
- }
- } else {
- collision_cur_max = 1;
- last_hash = key.store_hash;
- }
- }
- }
- return StringPrintf("%zu collisions, %zu max bucket size, %" PRIu64 " ns hash time",
- collision_sum, collision_max, hash_time_);
- }
+ std::string DumpStats(Thread* self) const;
private:
- StoreKey* CreateStoreKey(const InKey& key) {
- StoreKey* ret = allocator_.allocate(1);
- allocator_.construct(ret, key.begin(), key.end(), allocator_);
- return ret;
- }
+ struct Stats;
+ class Shard;
- void DeleteStoreKey(StoreKey* key) {
- SwapAllocator<StoreKey> alloc(allocator_);
- alloc.destroy(key);
- alloc.deallocate(key, 1);
- }
-
- std::string lock_name_[kShard];
- std::unique_ptr<Mutex> lock_[kShard];
- std::set<HashedKey, Comparator> keys_[kShard];
- SwapAllocator<StoreKey> allocator_;
+ std::unique_ptr<Shard> shards_[kShard];
uint64_t hash_time_;
DISALLOW_COPY_AND_ASSIGN(DedupeSet);
diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc
index 637964e..60a891d 100644
--- a/compiler/utils/dedupe_set_test.cc
+++ b/compiler/utils/dedupe_set_test.cc
@@ -18,15 +18,18 @@
#include <algorithm>
#include <cstdio>
+#include <vector>
+#include "dedupe_set-inl.h"
#include "gtest/gtest.h"
#include "thread-inl.h"
+#include "utils/array_ref.h"
namespace art {
-class DedupeHashFunc {
+class DedupeSetTestHashFunc {
public:
- size_t operator()(const std::vector<uint8_t>& array) const {
+ size_t operator()(const ArrayRef<const uint8_t>& array) const {
size_t hash = 0;
for (uint8_t c : array) {
hash += c;
@@ -36,46 +39,52 @@
return hash;
}
};
+
+class DedupeSetTestAlloc {
+ public:
+ const std::vector<uint8_t>* Copy(const ArrayRef<const uint8_t>& src) {
+ return new std::vector<uint8_t>(src.begin(), src.end());
+ }
+
+ void Destroy(const std::vector<uint8_t>* key) {
+ delete key;
+ }
+};
+
TEST(DedupeSetTest, Test) {
Thread* self = Thread::Current();
- typedef std::vector<uint8_t> ByteArray;
- SwapAllocator<void> swap(nullptr);
- DedupeSet<ByteArray, SwapVector<uint8_t>, size_t, DedupeHashFunc> deduplicator("test", swap);
- SwapVector<uint8_t>* array1;
+ DedupeSetTestAlloc alloc;
+ DedupeSet<ArrayRef<const uint8_t>,
+ std::vector<uint8_t>,
+ DedupeSetTestAlloc,
+ size_t,
+ DedupeSetTestHashFunc> deduplicator("test", alloc);
+ const std::vector<uint8_t>* array1;
{
- ByteArray test1;
- test1.push_back(10);
- test1.push_back(20);
- test1.push_back(30);
- test1.push_back(45);
-
+ uint8_t raw_test1[] = { 10u, 20u, 30u, 45u };
+ ArrayRef<const uint8_t> test1(raw_test1);
array1 = deduplicator.Add(self, test1);
ASSERT_NE(array1, nullptr);
ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array1->begin()));
}
- SwapVector<uint8_t>* array2;
+ const std::vector<uint8_t>* array2;
{
- ByteArray test1;
- test1.push_back(10);
- test1.push_back(20);
- test1.push_back(30);
- test1.push_back(45);
- array2 = deduplicator.Add(self, test1);
+ uint8_t raw_test2[] = { 10u, 20u, 30u, 45u };
+ ArrayRef<const uint8_t> test2(raw_test2);
+ array2 = deduplicator.Add(self, test2);
ASSERT_EQ(array2, array1);
- ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array2->begin()));
+ ASSERT_TRUE(std::equal(test2.begin(), test2.end(), array2->begin()));
}
- SwapVector<uint8_t>* array3;
+ const std::vector<uint8_t>* array3;
{
- ByteArray test1;
- test1.push_back(10);
- test1.push_back(22);
- test1.push_back(30);
- test1.push_back(47);
- array3 = deduplicator.Add(self, test1);
+ uint8_t raw_test3[] = { 10u, 22u, 30u, 47u };
+ ArrayRef<const uint8_t> test3(raw_test3);
+ array3 = deduplicator.Add(self, test3);
ASSERT_NE(array3, nullptr);
- ASSERT_TRUE(std::equal(test1.begin(), test1.end(), array3->begin()));
+ ASSERT_NE(array3, array1);
+ ASSERT_TRUE(std::equal(test3.begin(), test3.end(), array3->begin()));
}
}