Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2015 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_UTILS_DEDUPE_SET_INL_H_ |
| 18 | #define ART_COMPILER_UTILS_DEDUPE_SET_INL_H_ |
| 19 | |
| 20 | #include "dedupe_set.h" |
| 21 | |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 22 | #include <inttypes.h> |
Andreas Gampe | 8cf9cb3 | 2017-07-19 09:28:38 -0700 | [diff] [blame] | 23 | |
| 24 | #include <algorithm> |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 25 | #include <unordered_map> |
| 26 | |
Andreas Gampe | 46ee31b | 2016-12-14 10:11:49 -0800 | [diff] [blame] | 27 | #include "android-base/stringprintf.h" |
| 28 | |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 29 | #include "base/hash_set.h" |
Vladimir Marko | 176362a | 2022-11-08 11:47:50 +0000 | [diff] [blame] | 30 | #include "base/macros.h" |
Andreas Gampe | 8cf9cb3 | 2017-07-19 09:28:38 -0700 | [diff] [blame] | 31 | #include "base/mutex.h" |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 32 | #include "base/stl_util.h" |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 33 | #include "base/time_utils.h" |
| 34 | |
Vladimir Marko | 176362a | 2022-11-08 11:47:50 +0000 | [diff] [blame] | 35 | namespace art HIDDEN { |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 36 | |
| 37 | template <typename InKey, |
| 38 | typename StoreKey, |
| 39 | typename Alloc, |
| 40 | typename HashType, |
| 41 | typename HashFunc, |
| 42 | HashType kShard> |
| 43 | struct DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Stats { |
| 44 | size_t collision_sum = 0u; |
| 45 | size_t collision_max = 0u; |
| 46 | size_t total_probe_distance = 0u; |
| 47 | size_t total_size = 0u; |
| 48 | }; |
| 49 | |
| 50 | template <typename InKey, |
| 51 | typename StoreKey, |
| 52 | typename Alloc, |
| 53 | typename HashType, |
| 54 | typename HashFunc, |
| 55 | HashType kShard> |
| 56 | class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard { |
| 57 | public: |
| 58 | Shard(const Alloc& alloc, const std::string& lock_name) |
| 59 | : alloc_(alloc), |
| 60 | lock_name_(lock_name), |
| 61 | lock_(lock_name_.c_str()), |
| 62 | keys_() { |
| 63 | } |
| 64 | |
| 65 | ~Shard() { |
| 66 | for (const HashedKey<StoreKey>& key : keys_) { |
| 67 | DCHECK(key.Key() != nullptr); |
| 68 | alloc_.Destroy(key.Key()); |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) { |
| 73 | MutexLock lock(self, lock_); |
| 74 | HashedKey<InKey> hashed_in_key(hash, &in_key); |
Vladimir Marko | 54159c6 | 2018-06-20 14:30:08 +0100 | [diff] [blame] | 75 | auto it = keys_.find(hashed_in_key); |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 76 | if (it != keys_.end()) { |
| 77 | DCHECK(it->Key() != nullptr); |
| 78 | return it->Key(); |
| 79 | } |
| 80 | const StoreKey* store_key = alloc_.Copy(in_key); |
Vladimir Marko | 54159c6 | 2018-06-20 14:30:08 +0100 | [diff] [blame] | 81 | keys_.insert(HashedKey<StoreKey> { hash, store_key }); |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 82 | return store_key; |
| 83 | } |
| 84 | |
Vladimir Marko | f67e8c3 | 2022-03-17 12:55:27 +0000 | [diff] [blame] | 85 | size_t Size(Thread* self) { |
| 86 | MutexLock lock(self, lock_); |
| 87 | return keys_.size(); |
| 88 | } |
| 89 | |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 90 | void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) { |
| 91 | // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory |
| 92 | // for bookkeeping while collecting the stats. |
| 93 | std::unordered_map<HashType, size_t> stats; |
| 94 | { |
| 95 | MutexLock lock(self, lock_); |
| 96 | // Note: The total_probe_distance will be updated with the current state. |
| 97 | // It may have been higher before a re-hash. |
| 98 | global_stats->total_probe_distance += keys_.TotalProbeDistance(); |
Vladimir Marko | 54159c6 | 2018-06-20 14:30:08 +0100 | [diff] [blame] | 99 | global_stats->total_size += keys_.size(); |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 100 | for (const HashedKey<StoreKey>& key : keys_) { |
| 101 | auto it = stats.find(key.Hash()); |
| 102 | if (it == stats.end()) { |
| 103 | stats.insert({key.Hash(), 1u}); |
| 104 | } else { |
| 105 | ++it->second; |
| 106 | } |
| 107 | } |
| 108 | } |
| 109 | for (const auto& entry : stats) { |
| 110 | size_t number_of_entries = entry.second; |
| 111 | if (number_of_entries > 1u) { |
| 112 | global_stats->collision_sum += number_of_entries - 1u; |
| 113 | global_stats->collision_max = std::max(global_stats->collision_max, number_of_entries); |
| 114 | } |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | private: |
| 119 | template <typename T> |
| 120 | class HashedKey { |
| 121 | public: |
| 122 | HashedKey() : hash_(0u), key_(nullptr) { } |
| 123 | HashedKey(size_t hash, const T* key) : hash_(hash), key_(key) { } |
| 124 | |
| 125 | size_t Hash() const { |
| 126 | return hash_; |
| 127 | } |
| 128 | |
| 129 | const T* Key() const { |
| 130 | return key_; |
| 131 | } |
| 132 | |
| 133 | bool IsEmpty() const { |
| 134 | return Key() == nullptr; |
| 135 | } |
| 136 | |
| 137 | void MakeEmpty() { |
| 138 | key_ = nullptr; |
| 139 | } |
| 140 | |
| 141 | private: |
| 142 | size_t hash_; |
| 143 | const T* key_; |
| 144 | }; |
| 145 | |
| 146 | class ShardEmptyFn { |
| 147 | public: |
| 148 | bool IsEmpty(const HashedKey<StoreKey>& key) const { |
| 149 | return key.IsEmpty(); |
| 150 | } |
| 151 | |
| 152 | void MakeEmpty(HashedKey<StoreKey>& key) { |
| 153 | key.MakeEmpty(); |
| 154 | } |
| 155 | }; |
| 156 | |
| 157 | struct ShardHashFn { |
| 158 | template <typename T> |
| 159 | size_t operator()(const HashedKey<T>& key) const { |
| 160 | return key.Hash(); |
| 161 | } |
| 162 | }; |
| 163 | |
| 164 | struct ShardPred { |
| 165 | typename std::enable_if<!std::is_same<StoreKey, InKey>::value, bool>::type |
| 166 | operator()(const HashedKey<StoreKey>& lhs, const HashedKey<StoreKey>& rhs) const { |
| 167 | DCHECK(lhs.Key() != nullptr); |
| 168 | DCHECK(rhs.Key() != nullptr); |
| 169 | // Rehashing: stored keys are already deduplicated, so we can simply compare key pointers. |
| 170 | return lhs.Key() == rhs.Key(); |
| 171 | } |
| 172 | |
| 173 | template <typename LeftT, typename RightT> |
| 174 | bool operator()(const HashedKey<LeftT>& lhs, const HashedKey<RightT>& rhs) const { |
| 175 | DCHECK(lhs.Key() != nullptr); |
| 176 | DCHECK(rhs.Key() != nullptr); |
| 177 | return lhs.Hash() == rhs.Hash() && |
| 178 | lhs.Key()->size() == rhs.Key()->size() && |
| 179 | std::equal(lhs.Key()->begin(), lhs.Key()->end(), rhs.Key()->begin()); |
| 180 | } |
| 181 | }; |
| 182 | |
| 183 | Alloc alloc_; |
| 184 | const std::string lock_name_; |
| 185 | Mutex lock_; |
| 186 | HashSet<HashedKey<StoreKey>, ShardEmptyFn, ShardHashFn, ShardPred> keys_ GUARDED_BY(lock_); |
| 187 | }; |
| 188 | |
| 189 | template <typename InKey, |
| 190 | typename StoreKey, |
| 191 | typename Alloc, |
| 192 | typename HashType, |
| 193 | typename HashFunc, |
| 194 | HashType kShard> |
| 195 | const StoreKey* DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Add( |
| 196 | Thread* self, const InKey& key) { |
| 197 | uint64_t hash_start; |
| 198 | if (kIsDebugBuild) { |
| 199 | hash_start = NanoTime(); |
| 200 | } |
| 201 | HashType raw_hash = HashFunc()(key); |
| 202 | if (kIsDebugBuild) { |
| 203 | uint64_t hash_end = NanoTime(); |
| 204 | hash_time_ += hash_end - hash_start; |
| 205 | } |
| 206 | HashType shard_hash = raw_hash / kShard; |
| 207 | HashType shard_bin = raw_hash % kShard; |
| 208 | return shards_[shard_bin]->Add(self, shard_hash, key); |
| 209 | } |
| 210 | |
| 211 | template <typename InKey, |
| 212 | typename StoreKey, |
| 213 | typename Alloc, |
| 214 | typename HashType, |
| 215 | typename HashFunc, |
| 216 | HashType kShard> |
| 217 | DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DedupeSet(const char* set_name, |
| 218 | const Alloc& alloc) |
| 219 | : hash_time_(0) { |
| 220 | for (HashType i = 0; i < kShard; ++i) { |
| 221 | std::ostringstream oss; |
| 222 | oss << set_name << " lock " << i; |
| 223 | shards_[i].reset(new Shard(alloc, oss.str())); |
| 224 | } |
| 225 | } |
| 226 | |
| 227 | template <typename InKey, |
| 228 | typename StoreKey, |
| 229 | typename Alloc, |
| 230 | typename HashType, |
| 231 | typename HashFunc, |
| 232 | HashType kShard> |
| 233 | DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::~DedupeSet() { |
| 234 | // Everything done by member destructors. |
| 235 | } |
| 236 | |
| 237 | template <typename InKey, |
| 238 | typename StoreKey, |
| 239 | typename Alloc, |
| 240 | typename HashType, |
| 241 | typename HashFunc, |
| 242 | HashType kShard> |
Vladimir Marko | f67e8c3 | 2022-03-17 12:55:27 +0000 | [diff] [blame] | 243 | size_t DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Size(Thread* self) const { |
| 244 | size_t result = 0u; |
| 245 | for (const auto& shard : shards_) { |
| 246 | result += shard->Size(self); |
| 247 | } |
| 248 | return result; |
| 249 | } |
| 250 | |
| 251 | template <typename InKey, |
| 252 | typename StoreKey, |
| 253 | typename Alloc, |
| 254 | typename HashType, |
| 255 | typename HashFunc, |
| 256 | HashType kShard> |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 257 | std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats( |
| 258 | Thread* self) const { |
| 259 | Stats stats; |
| 260 | for (HashType shard = 0; shard < kShard; ++shard) { |
| 261 | shards_[shard]->UpdateStats(self, &stats); |
| 262 | } |
Andreas Gampe | 46ee31b | 2016-12-14 10:11:49 -0800 | [diff] [blame] | 263 | return android::base::StringPrintf("%zu collisions, %zu max hash collisions, " |
| 264 | "%zu/%zu probe distance, %" PRIu64 " ns hash time", |
| 265 | stats.collision_sum, |
| 266 | stats.collision_max, |
| 267 | stats.total_probe_distance, |
| 268 | stats.total_size, |
| 269 | hash_time_); |
Vladimir Marko | 35831e8 | 2015-09-11 11:59:18 +0100 | [diff] [blame] | 270 | } |
| 271 | |
| 272 | |
| 273 | } // namespace art |
| 274 | |
| 275 | #endif // ART_COMPILER_UTILS_DEDUPE_SET_INL_H_ |