| /* |
| * Copyright (C) 2014 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #include "hash_set.h" |
| |
| #include <forward_list> |
| #include <map> |
| #include <sstream> |
| #include <string> |
| #include <string_view> |
| #include <unordered_set> |
| #include <vector> |
| |
| #include <gtest/gtest.h> |
| |
| #include "hash_map.h" |
| |
| namespace art { |
| |
| struct IsEmptyFnString { |
| void MakeEmpty(std::string& item) const { |
| item.clear(); |
| } |
| bool IsEmpty(const std::string& item) const { |
| return item.empty(); |
| } |
| }; |
| |
| class HashSetTest : public testing::Test { |
| public: |
| HashSetTest() : seed_(97421), unique_number_(0) { |
| } |
| std::string RandomString(size_t len) { |
| std::ostringstream oss; |
| for (size_t i = 0; i < len; ++i) { |
| oss << static_cast<char>('A' + PRand() % 64); |
| } |
| static_assert(' ' < 'A', "space must be less than a"); |
| oss << " " << unique_number_++; // Relies on ' ' < 'A' |
| return oss.str(); |
| } |
| void SetSeed(size_t seed) { |
| seed_ = seed; |
| } |
| size_t PRand() { // Pseudo random. |
| seed_ = seed_ * 1103515245 + 12345; |
| return seed_; |
| } |
| |
| private: |
| size_t seed_; |
| size_t unique_number_; |
| }; |
| |
| TEST_F(HashSetTest, TestSmoke) { |
| HashSet<std::string, IsEmptyFnString> hash_set; |
| const std::string test_string = "hello world 1234"; |
| ASSERT_TRUE(hash_set.empty()); |
| ASSERT_EQ(hash_set.size(), 0U); |
| hash_set.insert(test_string); |
| auto it = hash_set.find(test_string); |
| ASSERT_EQ(*it, test_string); |
| auto after_it = hash_set.erase(it); |
| ASSERT_TRUE(after_it == hash_set.end()); |
| ASSERT_TRUE(hash_set.empty()); |
| ASSERT_EQ(hash_set.size(), 0U); |
| it = hash_set.find(test_string); |
| ASSERT_TRUE(it == hash_set.end()); |
| } |
| |
| TEST_F(HashSetTest, TestInsertAndErase) { |
| HashSet<std::string, IsEmptyFnString> hash_set; |
| static constexpr size_t count = 1000; |
| std::vector<std::string> strings; |
| for (size_t i = 0; i < count; ++i) { |
| // Insert a bunch of elements and make sure we can find them. |
| strings.push_back(RandomString(10)); |
| hash_set.insert(strings[i]); |
| auto it = hash_set.find(strings[i]); |
| ASSERT_TRUE(it != hash_set.end()); |
| ASSERT_EQ(*it, strings[i]); |
| } |
| ASSERT_EQ(strings.size(), hash_set.size()); |
| // Try to erase the odd strings. |
| for (size_t i = 1; i < count; i += 2) { |
| auto it = hash_set.find(strings[i]); |
| ASSERT_TRUE(it != hash_set.end()); |
| ASSERT_EQ(*it, strings[i]); |
| hash_set.erase(it); |
| } |
| // Test removed. |
| for (size_t i = 1; i < count; i += 2) { |
| auto it = hash_set.find(strings[i]); |
| ASSERT_TRUE(it == hash_set.end()); |
| } |
| for (size_t i = 0; i < count; i += 2) { |
| auto it = hash_set.find(strings[i]); |
| ASSERT_TRUE(it != hash_set.end()); |
| ASSERT_EQ(*it, strings[i]); |
| } |
| } |
| |
| TEST_F(HashSetTest, TestIterator) { |
| HashSet<std::string, IsEmptyFnString> hash_set; |
| ASSERT_TRUE(hash_set.begin() == hash_set.end()); |
| static constexpr size_t count = 1000; |
| std::vector<std::string> strings; |
| for (size_t i = 0; i < count; ++i) { |
| // Insert a bunch of elements and make sure we can find them. |
| strings.push_back(RandomString(10)); |
| hash_set.insert(strings[i]); |
| } |
| // Make sure we visit each string exactly once. |
| std::map<std::string, size_t> found_count; |
| for (const std::string& s : hash_set) { |
| ++found_count[s]; |
| } |
| for (size_t i = 0; i < count; ++i) { |
| ASSERT_EQ(found_count[strings[i]], 1U); |
| } |
| found_count.clear(); |
| // Remove all the elements with iterator erase. |
| for (auto it = hash_set.begin(); it != hash_set.end();) { |
| ++found_count[*it]; |
| it = hash_set.erase(it); |
| ASSERT_EQ(hash_set.Verify(), 0U); |
| } |
| for (size_t i = 0; i < count; ++i) { |
| ASSERT_EQ(found_count[strings[i]], 1U); |
| } |
| } |
| |
| TEST_F(HashSetTest, TestSwap) { |
| HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb; |
| std::vector<std::string> strings; |
| static constexpr size_t count = 1000; |
| for (size_t i = 0; i < count; ++i) { |
| strings.push_back(RandomString(10)); |
| hash_seta.insert(strings[i]); |
| } |
| std::swap(hash_seta, hash_setb); |
| hash_seta.insert("TEST"); |
| hash_setb.insert("TEST2"); |
| for (size_t i = 0; i < count; ++i) { |
| strings.push_back(RandomString(10)); |
| hash_seta.insert(strings[i]); |
| } |
| } |
| |
| TEST_F(HashSetTest, TestShrink) { |
| HashSet<std::string, IsEmptyFnString> hash_set; |
| std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"}; |
| for (size_t i = 0; i < strings.size(); ++i) { |
| // Insert some strings into the beginning of our hash set to establish an initial size |
| hash_set.insert(strings[i]); |
| } |
| |
| hash_set.ShrinkToMaximumLoad(); |
| const double initial_load = hash_set.CalculateLoadFactor(); |
| |
| // Insert a bunch of random strings to guarantee that we grow the capacity. |
| std::vector<std::string> random_strings; |
| static constexpr size_t count = 1000; |
| for (size_t i = 0; i < count; ++i) { |
| random_strings.push_back(RandomString(10)); |
| hash_set.insert(random_strings[i]); |
| } |
| |
| // Erase all the extra strings which guarantees that our load factor will be really bad. |
| for (size_t i = 0; i < count; ++i) { |
| hash_set.erase(hash_set.find(random_strings[i])); |
| } |
| |
| const double bad_load = hash_set.CalculateLoadFactor(); |
| EXPECT_GT(initial_load, bad_load); |
| |
| // Shrink again, the load factor should be good again. |
| hash_set.ShrinkToMaximumLoad(); |
| EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor()); |
| |
| // Make sure all the initial elements we had are still there |
| for (const std::string& initial_string : strings) { |
| EXPECT_NE(hash_set.end(), hash_set.find(initial_string)) |
| << "expected to find " << initial_string; |
| } |
| } |
| |
| TEST_F(HashSetTest, TestLoadFactor) { |
| HashSet<std::string, IsEmptyFnString> hash_set; |
| static constexpr size_t kStringCount = 1000; |
| static constexpr double kEpsilon = 0.01; |
| for (size_t i = 0; i < kStringCount; ++i) { |
| hash_set.insert(RandomString(i % 10 + 1)); |
| } |
| // Check that changing the load factor resizes the table to be within the target range. |
| EXPECT_GE(hash_set.CalculateLoadFactor() + kEpsilon, hash_set.GetMinLoadFactor()); |
| EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor()); |
| hash_set.SetLoadFactor(0.1, 0.3); |
| EXPECT_DOUBLE_EQ(0.1, hash_set.GetMinLoadFactor()); |
| EXPECT_DOUBLE_EQ(0.3, hash_set.GetMaxLoadFactor()); |
| EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor()); |
| hash_set.SetLoadFactor(0.6, 0.8); |
| EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor()); |
| } |
| |
| TEST_F(HashSetTest, TestStress) { |
| HashSet<std::string, IsEmptyFnString> hash_set; |
| std::unordered_set<std::string> std_set; |
| std::vector<std::string> strings; |
| static constexpr size_t string_count = 2000; |
| static constexpr size_t operations = 100000; |
| static constexpr size_t target_size = 5000; |
| for (size_t i = 0; i < string_count; ++i) { |
| strings.push_back(RandomString(i % 10 + 1)); |
| } |
| const size_t seed = time(nullptr); |
| SetSeed(seed); |
| LOG(INFO) << "Starting stress test with seed " << seed; |
| for (size_t i = 0; i < operations; ++i) { |
| ASSERT_EQ(hash_set.size(), std_set.size()); |
| size_t delta = std::abs(static_cast<ssize_t>(target_size) - |
| static_cast<ssize_t>(hash_set.size())); |
| size_t n = PRand(); |
| if (n % target_size == 0) { |
| hash_set.clear(); |
| std_set.clear(); |
| ASSERT_TRUE(hash_set.empty()); |
| ASSERT_TRUE(std_set.empty()); |
| } else if (n % target_size < delta) { |
| // Skew towards adding elements until we are at the desired size. |
| const std::string& s = strings[PRand() % string_count]; |
| hash_set.insert(s); |
| std_set.insert(s); |
| ASSERT_EQ(*hash_set.find(s), *std_set.find(s)); |
| } else { |
| const std::string& s = strings[PRand() % string_count]; |
| auto it1 = hash_set.find(s); |
| auto it2 = std_set.find(s); |
| ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end()); |
| if (it1 != hash_set.end()) { |
| ASSERT_EQ(*it1, *it2); |
| hash_set.erase(it1); |
| std_set.erase(it2); |
| } |
| } |
| } |
| } |
| |
| struct IsEmptyStringPair { |
| void MakeEmpty(std::pair<std::string, int>& pair) const { |
| pair.first.clear(); |
| } |
| bool IsEmpty(const std::pair<std::string, int>& pair) const { |
| return pair.first.empty(); |
| } |
| }; |
| |
| TEST_F(HashSetTest, TestHashMap) { |
| HashMap<std::string, int, IsEmptyStringPair> hash_map; |
| hash_map.insert(std::make_pair(std::string("abcd"), 123)); |
| hash_map.insert(std::make_pair(std::string("abcd"), 124)); |
| hash_map.insert(std::make_pair(std::string("bags"), 444)); |
| auto it = hash_map.find(std::string("abcd")); |
| ASSERT_EQ(it->second, 123); |
| hash_map.erase(it); |
| it = hash_map.find(std::string("abcd")); |
| ASSERT_EQ(it, hash_map.end()); |
| } |
| |
| struct IsEmptyFnVectorInt { |
| void MakeEmpty(std::vector<int>& item) const { |
| item.clear(); |
| } |
| bool IsEmpty(const std::vector<int>& item) const { |
| return item.empty(); |
| } |
| }; |
| |
| template <typename T> |
| size_t HashIntSequence(T begin, T end) { |
| size_t hash = 0; |
| for (auto iter = begin; iter != end; ++iter) { |
| hash = hash * 2 + *iter; |
| } |
| return hash; |
| } |
| |
| struct VectorIntHashEquals { |
| std::size_t operator()(const std::vector<int>& item) const { |
| return HashIntSequence(item.begin(), item.end()); |
| } |
| |
| std::size_t operator()(const std::forward_list<int>& item) const { |
| return HashIntSequence(item.begin(), item.end()); |
| } |
| |
| bool operator()(const std::vector<int>& a, const std::vector<int>& b) const { |
| return a == b; |
| } |
| |
| bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const { |
| auto aiter = a.begin(); |
| auto biter = b.begin(); |
| while (aiter != a.end() && biter != b.end()) { |
| if (*aiter != *biter) { |
| return false; |
| } |
| aiter++; |
| biter++; |
| } |
| return (aiter == a.end() && biter == b.end()); |
| } |
| }; |
| |
| TEST_F(HashSetTest, TestLookupByAlternateKeyType) { |
| HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set; |
| hash_set.insert(std::vector<int>({1, 2, 3, 4})); |
| hash_set.insert(std::vector<int>({4, 2})); |
| ASSERT_EQ(hash_set.end(), hash_set.find(std::vector<int>({1, 1, 1, 1}))); |
| ASSERT_NE(hash_set.end(), hash_set.find(std::vector<int>({1, 2, 3, 4}))); |
| ASSERT_EQ(hash_set.end(), hash_set.find(std::forward_list<int>({1, 1, 1, 1}))); |
| ASSERT_NE(hash_set.end(), hash_set.find(std::forward_list<int>({1, 2, 3, 4}))); |
| } |
| |
| TEST_F(HashSetTest, TestReserve) { |
| HashSet<std::string, IsEmptyFnString> hash_set; |
| std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096}; |
| for (size_t size : sizes) { |
| hash_set.reserve(size); |
| const size_t buckets_before = hash_set.NumBuckets(); |
| // Check that we expanded enough. |
| CHECK_GE(hash_set.ElementsUntilExpand(), size); |
| // Try inserting elements until we are at our reserve size and ensure the hash set did not |
| // expand. |
| while (hash_set.size() < size) { |
| hash_set.insert(std::to_string(hash_set.size())); |
| } |
| CHECK_EQ(hash_set.NumBuckets(), buckets_before); |
| } |
| // Check the behaviour for shrinking, it does not necessarily resize down. |
| constexpr size_t size = 100; |
| hash_set.reserve(size); |
| CHECK_GE(hash_set.ElementsUntilExpand(), size); |
| } |
| |
| TEST_F(HashSetTest, IteratorConversion) { |
| const char* test_string = "test string"; |
| HashSet<std::string> hash_set; |
| HashSet<std::string>::iterator it = hash_set.insert(test_string).first; |
| HashSet<std::string>::const_iterator cit = it; |
| ASSERT_TRUE(it == cit); |
| ASSERT_EQ(*it, *cit); |
| } |
| |
| TEST_F(HashSetTest, StringSearchStringView) { |
| const char* test_string = "test string"; |
| HashSet<std::string> hash_set; |
| HashSet<std::string>::iterator insert_pos = hash_set.insert(test_string).first; |
| HashSet<std::string>::iterator it = hash_set.find(std::string_view(test_string)); |
| ASSERT_TRUE(it == insert_pos); |
| } |
| |
| TEST_F(HashSetTest, DoubleInsert) { |
| const char* test_string = "test string"; |
| HashSet<std::string> hash_set; |
| hash_set.insert(test_string); |
| hash_set.insert(test_string); |
| ASSERT_EQ(1u, hash_set.size()); |
| } |
| |
| TEST_F(HashSetTest, Preallocated) { |
| static const size_t kBufferSize = 64; |
| uint32_t buffer[kBufferSize]; |
| HashSet<uint32_t> hash_set(buffer, kBufferSize); |
| size_t max_without_resize = kBufferSize * hash_set.GetMaxLoadFactor(); |
| for (size_t i = 0; i != max_without_resize; ++i) { |
| hash_set.insert(i); |
| } |
| ASSERT_FALSE(hash_set.owns_data_); |
| hash_set.insert(max_without_resize); |
| ASSERT_TRUE(hash_set.owns_data_); |
| } |
| |
| class SmallIndexEmptyFn { |
| public: |
| void MakeEmpty(uint16_t& item) const { |
| item = std::numeric_limits<uint16_t>::max(); |
| } |
| bool IsEmpty(const uint16_t& item) const { |
| return item == std::numeric_limits<uint16_t>::max(); |
| } |
| }; |
| |
| class StatefulHashFn { |
| public: |
| explicit StatefulHashFn(const std::vector<std::string>* strings) |
| : strings_(strings) {} |
| |
| size_t operator() (const uint16_t& index) const { |
| CHECK_LT(index, strings_->size()); |
| return (*this)((*strings_)[index]); |
| } |
| |
| size_t operator() (std::string_view s) const { |
| return DataHash()(s); |
| } |
| |
| private: |
| const std::vector<std::string>* strings_; |
| }; |
| |
| class StatefulPred { |
| public: |
| explicit StatefulPred(const std::vector<std::string>* strings) |
| : strings_(strings) {} |
| |
| bool operator() (const uint16_t& lhs, const uint16_t& rhs) const { |
| CHECK_LT(rhs, strings_->size()); |
| return (*this)(lhs, (*strings_)[rhs]); |
| } |
| |
| bool operator() (const uint16_t& lhs, std::string_view rhs) const { |
| CHECK_LT(lhs, strings_->size()); |
| return (*strings_)[lhs] == rhs; |
| } |
| |
| private: |
| const std::vector<std::string>* strings_; |
| }; |
| |
| TEST_F(HashSetTest, StatefulHashSet) { |
| std::vector<std::string> strings{ |
| "duplicate", |
| "a", |
| "b", |
| "xyz", |
| "___", |
| "123", |
| "placeholder", |
| "duplicate" |
| }; |
| const size_t duplicateFirstIndex = 0; |
| const size_t duplicateSecondIndex = strings.size() - 1u; |
| const size_t otherIndex = 1u; |
| |
| StatefulHashFn hashfn(&strings); |
| StatefulPred pred(&strings); |
| HashSet<uint16_t, SmallIndexEmptyFn, StatefulHashFn, StatefulPred> hash_set(hashfn, pred); |
| for (size_t index = 0, size = strings.size(); index != size; ++index) { |
| bool inserted = hash_set.insert(index).second; |
| ASSERT_EQ(index != duplicateSecondIndex, inserted) << index; |
| } |
| |
| // Check search by string. |
| for (size_t index = 0, size = strings.size(); index != size; ++index) { |
| auto it = hash_set.find(strings[index]); |
| ASSERT_FALSE(it == hash_set.end()); |
| ASSERT_EQ(index == duplicateSecondIndex ? duplicateFirstIndex : index, *it) << index; |
| } |
| ASSERT_TRUE(hash_set.find("missing") == hash_set.end()); |
| |
| // Check search by index. |
| for (size_t index = 0, size = strings.size(); index != size; ++index) { |
| auto it = hash_set.find(index); |
| ASSERT_FALSE(it == hash_set.end()); |
| ASSERT_EQ(index == duplicateSecondIndex ? duplicateFirstIndex : index, *it) << index; |
| } |
| // Note: Searching for index >= strings.size() is not supported by Stateful{HashFn,Pred}. |
| |
| // Test removal and search by missing index. |
| auto remove_it = hash_set.find(otherIndex); |
| ASSERT_FALSE(remove_it == hash_set.end()); |
| hash_set.erase(remove_it); |
| auto search_it = hash_set.find(otherIndex); |
| ASSERT_TRUE(search_it == hash_set.end()); |
| } |
| |
| } // namespace art |