Reduce memory used by CompiledMethods.
Use LengthPrefixedArray<>s instead of SwapVector<>s to store
CompiledMethod data and get rid of the unnecessary members
of CompiledMethod to reduce dex2oat memory usage. Refactor
the deduplication from CompilerDriver to a new class.
Use HashSet<> instead of std::set<> for the DedupeSet<> to
further decrease the memory usage and improve performance.
This reduces the dex2oat memory usage when compiling boot
image on Nexus 5 (with Optimizing, -j1) by ~6.75MiB (5%).
This also reduces the compile time by ~2.2% (~1.6% dex2oat
time; with Optimizing, without -j).
Change-Id: I974f1f5e58350de2bf487a2bca3907fa05fb80ea
diff --git a/compiler/utils/dedupe_set-inl.h b/compiler/utils/dedupe_set-inl.h
new file mode 100644
index 0000000..ac54813
--- /dev/null
+++ b/compiler/utils/dedupe_set-inl.h
@@ -0,0 +1,253 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
+#define ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
+
+#include "dedupe_set.h"
+
+#include <algorithm>
+#include <inttypes.h>
+#include <unordered_map>
+
+#include "base/mutex.h"
+#include "base/hash_set.h"
+#include "base/stl_util.h"
+#include "base/stringprintf.h"
+#include "base/time_utils.h"
+
+namespace art {
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
+struct DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Stats {
+ size_t collision_sum = 0u;
+ size_t collision_max = 0u;
+ size_t total_probe_distance = 0u;
+ size_t total_size = 0u;
+};
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
+class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard {
+ public:
+ Shard(const Alloc& alloc, const std::string& lock_name)
+ : alloc_(alloc),
+ lock_name_(lock_name),
+ lock_(lock_name_.c_str()),
+ keys_() {
+ }
+
+ ~Shard() {
+ for (const HashedKey<StoreKey>& key : keys_) {
+ DCHECK(key.Key() != nullptr);
+ alloc_.Destroy(key.Key());
+ }
+ }
+
+ const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) {
+ MutexLock lock(self, lock_);
+ HashedKey<InKey> hashed_in_key(hash, &in_key);
+ auto it = keys_.Find(hashed_in_key);
+ if (it != keys_.end()) {
+ DCHECK(it->Key() != nullptr);
+ return it->Key();
+ }
+ const StoreKey* store_key = alloc_.Copy(in_key);
+ keys_.Insert(HashedKey<StoreKey> { hash, store_key });
+ return store_key;
+ }
+
+ void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) {
+ // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory
+ // for bookkeeping while collecting the stats.
+ std::unordered_map<HashType, size_t> stats;
+ {
+ MutexLock lock(self, lock_);
+ // Note: The total_probe_distance will be updated with the current state.
+ // It may have been higher before a re-hash.
+ global_stats->total_probe_distance += keys_.TotalProbeDistance();
+ global_stats->total_size += keys_.Size();
+ for (const HashedKey<StoreKey>& key : keys_) {
+ auto it = stats.find(key.Hash());
+ if (it == stats.end()) {
+ stats.insert({key.Hash(), 1u});
+ } else {
+ ++it->second;
+ }
+ }
+ }
+ for (const auto& entry : stats) {
+ size_t number_of_entries = entry.second;
+ if (number_of_entries > 1u) {
+ global_stats->collision_sum += number_of_entries - 1u;
+ global_stats->collision_max = std::max(global_stats->collision_max, number_of_entries);
+ }
+ }
+ }
+
+ private:
+ template <typename T>
+ class HashedKey {
+ public:
+ HashedKey() : hash_(0u), key_(nullptr) { }
+ HashedKey(size_t hash, const T* key) : hash_(hash), key_(key) { }
+
+ size_t Hash() const {
+ return hash_;
+ }
+
+ const T* Key() const {
+ return key_;
+ }
+
+ bool IsEmpty() const {
+ return Key() == nullptr;
+ }
+
+ void MakeEmpty() {
+ key_ = nullptr;
+ }
+
+ private:
+ size_t hash_;
+ const T* key_;
+ };
+
+ class ShardEmptyFn {
+ public:
+ bool IsEmpty(const HashedKey<StoreKey>& key) const {
+ return key.IsEmpty();
+ }
+
+ void MakeEmpty(HashedKey<StoreKey>& key) {
+ key.MakeEmpty();
+ }
+ };
+
+ struct ShardHashFn {
+ template <typename T>
+ size_t operator()(const HashedKey<T>& key) const {
+ return key.Hash();
+ }
+ };
+
+ struct ShardPred {
+ typename std::enable_if<!std::is_same<StoreKey, InKey>::value, bool>::type
+ operator()(const HashedKey<StoreKey>& lhs, const HashedKey<StoreKey>& rhs) const {
+ DCHECK(lhs.Key() != nullptr);
+ DCHECK(rhs.Key() != nullptr);
+ // Rehashing: stored keys are already deduplicated, so we can simply compare key pointers.
+ return lhs.Key() == rhs.Key();
+ }
+
+ template <typename LeftT, typename RightT>
+ bool operator()(const HashedKey<LeftT>& lhs, const HashedKey<RightT>& rhs) const {
+ DCHECK(lhs.Key() != nullptr);
+ DCHECK(rhs.Key() != nullptr);
+ return lhs.Hash() == rhs.Hash() &&
+ lhs.Key()->size() == rhs.Key()->size() &&
+ std::equal(lhs.Key()->begin(), lhs.Key()->end(), rhs.Key()->begin());
+ }
+ };
+
+ Alloc alloc_;
+ const std::string lock_name_;
+ Mutex lock_;
+ HashSet<HashedKey<StoreKey>, ShardEmptyFn, ShardHashFn, ShardPred> keys_ GUARDED_BY(lock_);
+};
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
+const StoreKey* DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Add(
+ Thread* self, const InKey& key) {
+ uint64_t hash_start;
+ if (kIsDebugBuild) {
+ hash_start = NanoTime();
+ }
+ HashType raw_hash = HashFunc()(key);
+ if (kIsDebugBuild) {
+ uint64_t hash_end = NanoTime();
+ hash_time_ += hash_end - hash_start;
+ }
+ HashType shard_hash = raw_hash / kShard;
+ HashType shard_bin = raw_hash % kShard;
+ return shards_[shard_bin]->Add(self, shard_hash, key);
+}
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
+DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DedupeSet(const char* set_name,
+ const Alloc& alloc)
+ : hash_time_(0) {
+ for (HashType i = 0; i < kShard; ++i) {
+ std::ostringstream oss;
+ oss << set_name << " lock " << i;
+ shards_[i].reset(new Shard(alloc, oss.str()));
+ }
+}
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
+DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::~DedupeSet() {
+ // Everything done by member destructors.
+}
+
+template <typename InKey,
+ typename StoreKey,
+ typename Alloc,
+ typename HashType,
+ typename HashFunc,
+ HashType kShard>
+std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats(
+ Thread* self) const {
+ Stats stats;
+ for (HashType shard = 0; shard < kShard; ++shard) {
+ shards_[shard]->UpdateStats(self, &stats);
+ }
+ return StringPrintf("%zu collisions, %zu max hash collisions, "
+ "%zu/%zu probe distance, %" PRIu64 " ns hash time",
+ stats.collision_sum,
+ stats.collision_max,
+ stats.total_probe_distance,
+ stats.total_size,
+ hash_time_);
+}
+
+
+} // namespace art
+
+#endif // ART_COMPILER_UTILS_DEDUPE_SET_INL_H_