Shard dedupe set locks.

We're seeing contention during compilation on the dedupe locks, sharding 4 ways
on an occam brings down contention by > 5x.
Improve dedupe hash function to have a FNV hash function at its heart.
Improve naming of dedupe locks.
Tidy portable JNI compiler paramters to be pointers, given that's their primary
use.

Change-Id: I95d905f2ca5fee4e83a0034926a5f6501b4aeb79
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 31aec63..495458e 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -355,7 +355,11 @@
       jni_compiler_(NULL),
       compiler_enable_auto_elf_loading_(NULL),
       compiler_get_method_code_addr_(NULL),
-      support_boot_image_fixup_(true) {
+      support_boot_image_fixup_(true),
+      dedupe_code_("dedupe code"),
+      dedupe_mapping_table_("dedupe mapping table"),
+      dedupe_vmap_table_("dedupe vmap table"),
+      dedupe_gc_map_("dedupe gc map") {
 
   CHECK_PTHREAD_CALL(pthread_key_create, (&tls_key_, NULL), "compiler tls key");
 
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index cd6b5fa..c5c53e3 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -449,27 +449,39 @@
   class DedupeHashFunc {
    public:
     size_t operator()(const std::vector<uint8_t>& array) const {
-      // Take a random sample of bytes.
+      // For small arrays compute a hash using every byte.
       static const size_t kSmallArrayThreshold = 16;
-      static const size_t kRandomHashCount = 16;
-      size_t hash = 0;
-      if (array.size() < kSmallArrayThreshold) {
-        for (auto c : array) {
-          hash = hash * 54 + c;
+      size_t hash = 0x811c9dc5;
+      if (array.size() <= kSmallArrayThreshold) {
+        for (uint8_t b : array) {
+          hash = (hash * 16777619) ^ b;
         }
       } else {
-        for (size_t i = 0; i < kRandomHashCount; ++i) {
+        // For larger arrays use the first 4 bytes and then select a number of other values at
+        // random.
+        static const size_t kRandomHashCount = 16;
+        for (size_t i = 0; i < 4; ++i) {
+          uint8_t b = array[i];
+          hash = (hash * 16777619) ^ b;
+        }
+        for (size_t i = 4; i < kRandomHashCount; ++i) {
           size_t r = i * 1103515245 + 12345;
-          hash = hash * 54 + array[r % array.size()];
+          uint8_t b = array[r % array.size()];
+          hash = (hash * 16777619) ^ b;
         }
       }
+      hash += hash << 13;
+      hash ^= hash >> 7;
+      hash += hash << 3;
+      hash ^= hash >> 17;
+      hash += hash << 5;
       return hash;
     }
   };
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc> dedupe_code_;
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc> dedupe_mapping_table_;
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc> dedupe_vmap_table_;
-  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc> dedupe_gc_map_;
+  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_code_;
+  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_mapping_table_;
+  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_vmap_table_;
+  DedupeSet<std::vector<uint8_t>, size_t, DedupeHashFunc, 4> dedupe_gc_map_;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
 };
diff --git a/compiler/jni/portable/jni_compiler.cc b/compiler/jni/portable/jni_compiler.cc
index 43408a7..0c14346 100644
--- a/compiler/jni/portable/jni_compiler.cc
+++ b/compiler/jni/portable/jni_compiler.cc
@@ -50,9 +50,9 @@
 using ::art::llvm::runtime_support::RuntimeId;
 
 JniCompiler::JniCompiler(LlvmCompilationUnit* cunit,
-                         CompilerDriver& driver,
+                         CompilerDriver* driver,
                          const DexCompilationUnit* dex_compilation_unit)
-    : cunit_(cunit), driver_(&driver), module_(cunit_->GetModule()),
+    : cunit_(cunit), driver_(driver), module_(cunit_->GetModule()),
       context_(cunit_->GetLLVMContext()), irb_(*cunit_->GetIRBuilder()),
       dex_compilation_unit_(dex_compilation_unit),
       func_(NULL), elf_func_idx_(0) {
diff --git a/compiler/jni/portable/jni_compiler.h b/compiler/jni/portable/jni_compiler.h
index d20c63b..ffabfe6 100644
--- a/compiler/jni/portable/jni_compiler.h
+++ b/compiler/jni/portable/jni_compiler.h
@@ -54,7 +54,7 @@
 class JniCompiler {
  public:
   JniCompiler(LlvmCompilationUnit* cunit,
-              CompilerDriver& driver,
+              CompilerDriver* driver,
               const DexCompilationUnit* dex_compilation_unit);
 
   CompiledMethod* Compile();
@@ -67,7 +67,7 @@
 
  private:
   LlvmCompilationUnit* cunit_;
-  CompilerDriver* driver_;
+  CompilerDriver* const driver_;
 
   ::llvm::Module* module_;
   ::llvm::LLVMContext* context_;
diff --git a/compiler/llvm/compiler_llvm.cc b/compiler/llvm/compiler_llvm.cc
index fd440d5..83b0c75 100644
--- a/compiler/llvm/compiler_llvm.cc
+++ b/compiler/llvm/compiler_llvm.cc
@@ -164,7 +164,7 @@
   UniquePtr<LlvmCompilationUnit> cunit(AllocateCompilationUnit());
 
   UniquePtr<JniCompiler> jni_compiler(
-      new JniCompiler(cunit.get(), *compiler_driver_, dex_compilation_unit));
+      new JniCompiler(cunit.get(), compiler_driver_, dex_compilation_unit));
 
   return jni_compiler->Compile();
 }
diff --git a/compiler/utils/dedupe_set.h b/compiler/utils/dedupe_set.h
index f3d35d7..53c1afa 100644
--- a/compiler/utils/dedupe_set.h
+++ b/compiler/utils/dedupe_set.h
@@ -18,62 +18,65 @@
 #define ART_COMPILER_UTILS_DEDUPE_SET_H_
 
 #include <set>
+#include <string>
 
 #include "base/mutex.h"
 #include "base/stl_util.h"
 
 namespace art {
 
-// A simple data structure to handle hashed deduplication. Add is thread safe.
-template <typename Key, typename HashType, typename HashFunc>
+// A set of Keys that support a HashFunc returning HashType. Used to find duplicates of Key in the
+// Add method. The data-structure is thread-safe through the use of internal locks, it also
+// supports the lock being sharded.
+template <typename Key, typename HashType, typename HashFunc, HashType kShard = 1>
 class DedupeSet {
   typedef std::pair<HashType, Key*> HashedKey;
 
   class Comparator {
    public:
     bool operator()(const HashedKey& a, const HashedKey& b) const {
-      if (a.first < b.first) return true;
-      if (a.first > b.first) return true;
-      return *a.second < *b.second;
+      if (a.first != b.first) {
+        return a.first < b.first;
+      } else {
+        return *a.second < *b.second;
+      }
     }
   };
 
-  typedef std::set<HashedKey, Comparator> Keys;
-
  public:
-  typedef typename Keys::iterator iterator;
-  typedef typename Keys::const_iterator const_iterator;
-  typedef typename Keys::size_type size_type;
-  typedef typename Keys::value_type value_type;
-
-  iterator begin() { return keys_.begin(); }
-  const_iterator begin() const { return keys_.begin(); }
-  iterator end() { return keys_.end(); }
-  const_iterator end() const { return keys_.end(); }
-
   Key* Add(Thread* self, const Key& key) {
-    HashType hash = HashFunc()(key);
-    HashedKey hashed_key(hash, const_cast<Key*>(&key));
-    MutexLock lock(self, lock_);
-    auto it = keys_.find(hashed_key);
-    if (it != keys_.end()) {
+    HashType raw_hash = HashFunc()(key);
+    HashType shard_hash = raw_hash / kShard;
+    HashType shard_bin = raw_hash % kShard;
+    HashedKey hashed_key(shard_hash, const_cast<Key*>(&key));
+    MutexLock lock(self, *lock_[shard_bin]);
+    auto it = keys_[shard_bin].find(hashed_key);
+    if (it != keys_[shard_bin].end()) {
       return it->second;
     }
     hashed_key.second = new Key(key);
-    keys_.insert(hashed_key);
+    keys_[shard_bin].insert(hashed_key);
     return hashed_key.second;
   }
 
-  DedupeSet() : lock_("dedupe lock") {
+  explicit DedupeSet(const char* set_name) {
+    for (HashType i = 0; i < kShard; ++i) {
+      lock_name_[i] = StringPrintf("%s lock %d", set_name, i);
+      lock_[i].reset(new Mutex(lock_name_[i].c_str()));
+    }
   }
 
   ~DedupeSet() {
-    STLDeleteValues(&keys_);
+    for (HashType i = 0; i < kShard; ++i) {
+      STLDeleteValues(&keys_[i]);
+    }
   }
 
  private:
-  Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
-  Keys keys_;
+  std::string lock_name_[kShard];
+  UniquePtr<Mutex> lock_[kShard];
+  std::set<HashedKey, Comparator> keys_[kShard];
+
   DISALLOW_COPY_AND_ASSIGN(DedupeSet);
 };
 
diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc
index 9f5e292..03d8b96 100644
--- a/compiler/utils/dedupe_set_test.cc
+++ b/compiler/utils/dedupe_set_test.cc
@@ -38,7 +38,7 @@
 TEST_F(DedupeSetTest, Test) {
   Thread* self = Thread::Current();
   typedef std::vector<uint8_t> ByteArray;
-  DedupeSet<ByteArray, size_t, DedupeHashFunc> deduplicator;
+  DedupeSet<ByteArray, size_t, DedupeHashFunc> deduplicator("test");
   ByteArray* array1;
   {
     ByteArray test1;