base: Fix an infinite loop in HashSet::Insert

Also adds a test for HashSet::ShrinkToMaximumLoad (This bug was only reachable when using ShrinkToMaximumLoad, which is not called from anywhere other than the new test) Change-Id: I5276b4b3f4ecf6090bb545ddd1752758b11609dd
author: Igor Murashkin <iam@google.com> 2015-06-22 15:57:38 -0700
committer: Igor Murashkin <iam@google.com> 2015-06-22 16:26:58 -0700
commit: 3552d96086c75523a76f399a13dd85d65eaa2d19 (patch)
tree: 6c3223813af09bdf81de7b7291b0a47153928052
parent: ec3a4e7cdc4f268b40d923227c125429f4ee4884 (diff)
2 files changed, 40 insertions, 3 deletions
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index 8daf6d4c9e..f2c8355f53 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -469,8 +469,6 @@ class HashSet {
     }
     // Resize based on the minimum load factor.
     Resize(min_index);
-    // When we hit elements_until_expand_, we are at the max load factor and must expand again.
-    elements_until_expand_ = NumBuckets() * max_load_factor_;
   }
 
   // Expand / shrink the table to the new specified size.
@@ -493,11 +491,18 @@ class HashSet {
     if (owned_data) {
       allocfn_.deallocate(old_data, old_num_buckets);
     }
+
+    // When we hit elements_until_expand_, we are at the max load factor and must expand again.
+    elements_until_expand_ = NumBuckets() * max_load_factor_;
   }
 
   ALWAYS_INLINE size_t FirstAvailableSlot(size_t index) const {
+    DCHECK_LT(index, NumBuckets());  // Don't try to get a slot out of range.
+    size_t non_empty_count = 0;
     while (!emptyfn_.IsEmpty(data_[index])) {
       index = NextIndex(index);
+      non_empty_count++;
+      DCHECK_LE(non_empty_count, NumBuckets());  // Don't loop forever.
     }
     return index;
   }
@@ -526,7 +531,7 @@ class HashSet {
   Pred pred_;  // Equals function.
   size_t num_elements_;  // Number of inserted elements.
   size_t num_buckets_;  // Number of hash table buckets.
-  size_t elements_until_expand_;  // Maxmimum number of elements until we expand the table.
+  size_t elements_until_expand_;  // Maximum number of elements until we expand the table.
   bool owns_data_;  // If we own data_ and are responsible for freeing it.
   T* data_;  // Backing storage.
   double min_load_factor_;
diff --git a/runtime/base/hash_set_test.cc b/runtime/base/hash_set_test.cc
index e88637ffa5..fd9eb45e3f 100644
--- a/runtime/base/hash_set_test.cc
+++ b/runtime/base/hash_set_test.cc
@@ -156,6 +156,38 @@ TEST_F(HashSetTest, TestSwap) {
   }
 }
 
+TEST_F(HashSetTest, TestShrink) {
+  HashSet<std::string, IsEmptyFnString> hash_set;
+  std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"};
+  for (size_t i = 0; i < strings.size(); ++i) {
+    // Insert some strings into the beginning of our hash set to establish an initial size
+    hash_set.Insert(strings[i]);
+  }
+
+  hash_set.ShrinkToMaximumLoad();
+  const double initial_load = hash_set.CalculateLoadFactor();
+
+  // Insert a bunch of random strings to guarantee that we grow the capacity.
+  std::vector<std::string> random_strings;
+  static constexpr size_t count = 1000;
+  for (size_t i = 0; i < count; ++i) {
+    random_strings.push_back(RandomString(10));
+    hash_set.Insert(random_strings[i]);
+  }
+
+  // Erase all the extra strings which guarantees that our load factor will be really bad.
+  for (size_t i = 0; i < count; ++i) {
+    hash_set.Erase(hash_set.Find(random_strings[i]));
+  }
+
+  const double bad_load = hash_set.CalculateLoadFactor();
+  EXPECT_GT(initial_load, bad_load);
+
+  // Shrink again, the load factor should be good again.
+  hash_set.ShrinkToMaximumLoad();
+  EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor());
+}
+
 TEST_F(HashSetTest, TestStress) {
   HashSet<std::string, IsEmptyFnString> hash_set;
   std::unordered_multiset<std::string> std_set;
author	Igor Murashkin <iam@google.com>	2015-06-22 15:57:38 -0700
committer	Igor Murashkin <iam@google.com>	2015-06-22 16:26:58 -0700
commit	3552d96086c75523a76f399a13dd85d65eaa2d19 (patch)
tree	6c3223813af09bdf81de7b7291b0a47153928052
parent	ec3a4e7cdc4f268b40d923227c125429f4ee4884 (diff)