Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2013 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_UTILS_DEDUPE_SET_H_ |
| 18 | #define ART_COMPILER_UTILS_DEDUPE_SET_H_ |
| 19 | |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 20 | #include <algorithm> |
| 21 | #include <inttypes.h> |
| 22 | #include <memory> |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 23 | #include <set> |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 24 | #include <string> |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 25 | |
| 26 | #include "base/mutex.h" |
| 27 | #include "base/stl_util.h" |
Brian Carlstrom | ba150c3 | 2013-08-27 17:31:03 -0700 | [diff] [blame] | 28 | #include "base/stringprintf.h" |
Vladimir Marko | 80afd02 | 2015-05-19 18:08:00 +0100 | [diff] [blame] | 29 | #include "base/time_utils.h" |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 30 | #include "utils/swap_space.h" |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 31 | |
| 32 | namespace art { |
| 33 | |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 34 | // A set of Keys that support a HashFunc returning HashType. Used to find duplicates of Key in the |
| 35 | // Add method. The data-structure is thread-safe through the use of internal locks, it also |
| 36 | // supports the lock being sharded. |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 37 | template <typename InKey, typename StoreKey, typename HashType, typename HashFunc, |
| 38 | HashType kShard = 1> |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 39 | class DedupeSet { |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 40 | typedef std::pair<HashType, const InKey*> HashedInKey; |
| 41 | struct HashedKey { |
| 42 | StoreKey* store_ptr; |
| 43 | union { |
Mathieu Chartier | 2cebb24 | 2015-04-21 16:50:40 -0700 | [diff] [blame] | 44 | HashType store_hash; // Valid if store_ptr != null. |
| 45 | const HashedInKey* in_key; // Valid if store_ptr == null. |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 46 | }; |
| 47 | }; |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 48 | |
| 49 | class Comparator { |
| 50 | public: |
| 51 | bool operator()(const HashedKey& a, const HashedKey& b) const { |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 52 | HashType a_hash = (a.store_ptr != nullptr) ? a.store_hash : a.in_key->first; |
| 53 | HashType b_hash = (b.store_ptr != nullptr) ? b.store_hash : b.in_key->first; |
| 54 | if (a_hash != b_hash) { |
| 55 | return a_hash < b_hash; |
| 56 | } |
| 57 | if (a.store_ptr != nullptr && b.store_ptr != nullptr) { |
| 58 | return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(), |
| 59 | b.store_ptr->begin(), b.store_ptr->end()); |
| 60 | } else if (a.store_ptr != nullptr && b.store_ptr == nullptr) { |
| 61 | return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(), |
| 62 | b.in_key->second->begin(), b.in_key->second->end()); |
| 63 | } else if (a.store_ptr == nullptr && b.store_ptr != nullptr) { |
| 64 | return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(), |
| 65 | b.store_ptr->begin(), b.store_ptr->end()); |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 66 | } else { |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 67 | return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(), |
| 68 | b.in_key->second->begin(), b.in_key->second->end()); |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 69 | } |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 70 | } |
| 71 | }; |
| 72 | |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 73 | public: |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 74 | StoreKey* Add(Thread* self, const InKey& key) { |
| 75 | uint64_t hash_start; |
| 76 | if (kIsDebugBuild) { |
| 77 | hash_start = NanoTime(); |
| 78 | } |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 79 | HashType raw_hash = HashFunc()(key); |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 80 | if (kIsDebugBuild) { |
| 81 | uint64_t hash_end = NanoTime(); |
| 82 | hash_time_ += hash_end - hash_start; |
| 83 | } |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 84 | HashType shard_hash = raw_hash / kShard; |
| 85 | HashType shard_bin = raw_hash % kShard; |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 86 | HashedInKey hashed_in_key(shard_hash, &key); |
| 87 | HashedKey hashed_key; |
| 88 | hashed_key.store_ptr = nullptr; |
| 89 | hashed_key.in_key = &hashed_in_key; |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 90 | MutexLock lock(self, *lock_[shard_bin]); |
| 91 | auto it = keys_[shard_bin].find(hashed_key); |
| 92 | if (it != keys_[shard_bin].end()) { |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 93 | DCHECK(it->store_ptr != nullptr); |
| 94 | return it->store_ptr; |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 95 | } |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 96 | hashed_key.store_ptr = CreateStoreKey(key); |
| 97 | hashed_key.store_hash = shard_hash; |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 98 | keys_[shard_bin].insert(hashed_key); |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 99 | return hashed_key.store_ptr; |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 100 | } |
| 101 | |
Roland Levillain | 3887c46 | 2015-08-12 18:15:42 +0100 | [diff] [blame] | 102 | DedupeSet(const char* set_name, SwapAllocator<void>& alloc) |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 103 | : allocator_(alloc), hash_time_(0) { |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 104 | for (HashType i = 0; i < kShard; ++i) { |
Ian Rogers | ef7d42f | 2014-01-06 12:55:46 -0800 | [diff] [blame] | 105 | std::ostringstream oss; |
| 106 | oss << set_name << " lock " << i; |
| 107 | lock_name_[i] = oss.str(); |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 108 | lock_[i].reset(new Mutex(lock_name_[i].c_str())); |
| 109 | } |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 110 | } |
| 111 | |
| 112 | ~DedupeSet() { |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 113 | // Have to manually free all pointers. |
| 114 | for (auto& shard : keys_) { |
| 115 | for (const auto& hashed_key : shard) { |
| 116 | DCHECK(hashed_key.store_ptr != nullptr); |
| 117 | DeleteStoreKey(hashed_key.store_ptr); |
| 118 | } |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 119 | } |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 120 | } |
| 121 | |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 122 | std::string DumpStats() const { |
| 123 | size_t collision_sum = 0; |
| 124 | size_t collision_max = 0; |
| 125 | for (HashType shard = 0; shard < kShard; ++shard) { |
| 126 | HashType last_hash = 0; |
| 127 | size_t collision_cur_max = 0; |
| 128 | for (const HashedKey& key : keys_[shard]) { |
| 129 | DCHECK(key.store_ptr != nullptr); |
| 130 | if (key.store_hash == last_hash) { |
| 131 | collision_cur_max++; |
| 132 | if (collision_cur_max > 1) { |
| 133 | collision_sum++; |
| 134 | if (collision_cur_max > collision_max) { |
| 135 | collision_max = collision_cur_max; |
| 136 | } |
| 137 | } |
| 138 | } else { |
| 139 | collision_cur_max = 1; |
| 140 | last_hash = key.store_hash; |
| 141 | } |
| 142 | } |
| 143 | } |
| 144 | return StringPrintf("%zu collisions, %zu max bucket size, %" PRIu64 " ns hash time", |
| 145 | collision_sum, collision_max, hash_time_); |
| 146 | } |
| 147 | |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 148 | private: |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 149 | StoreKey* CreateStoreKey(const InKey& key) { |
| 150 | StoreKey* ret = allocator_.allocate(1); |
| 151 | allocator_.construct(ret, key.begin(), key.end(), allocator_); |
| 152 | return ret; |
| 153 | } |
| 154 | |
| 155 | void DeleteStoreKey(StoreKey* key) { |
| 156 | SwapAllocator<StoreKey> alloc(allocator_); |
| 157 | alloc.destroy(key); |
| 158 | alloc.deallocate(key, 1); |
| 159 | } |
| 160 | |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 161 | std::string lock_name_[kShard]; |
Ian Rogers | 700a402 | 2014-05-19 16:49:03 -0700 | [diff] [blame] | 162 | std::unique_ptr<Mutex> lock_[kShard]; |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 163 | std::set<HashedKey, Comparator> keys_[kShard]; |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 164 | SwapAllocator<StoreKey> allocator_; |
| 165 | uint64_t hash_time_; |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 166 | |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 167 | DISALLOW_COPY_AND_ASSIGN(DedupeSet); |
| 168 | }; |
| 169 | |
| 170 | } // namespace art |
| 171 | |
| 172 | #endif // ART_COMPILER_UTILS_DEDUPE_SET_H_ |