Add image strings to intern table

When we create the runtime, we now add the image strings to the
intern table if we are the zygote. This caused some memory bloat,
so I added an extra unordered set to the intern table.

There is now two unordered sets (hash talbe). One for pre-zygote
interns and one for post-zygote interns. This helps since the
pre-zygote hash table doesn't get dirtied. Even with adding
the image strings, we get total memory savings of around 5-7 MB
native PSS after device boot.

FB launch Before:
2.20% art::DexFile::FindStringId(char const*) const
TotalTime: 2069
TotalTime: 1985
TotalTime: 2088
TotalTime: 2003
TotalTime: 2034
TotalTime: 2049
After boot native PSS: 175585 kB: Native

After:
0.27% art::DexFile::FindStringId(char const*) const
TotalTime: 1682
TotalTime: 1756
TotalTime: 1825
TotalTime: 1751
TotalTime: 1666
TotalTime: 1813
After boot native PSS: 167089 kB: Native

Bug: 18054905
Bug: 16828525
Bug: 17808975

(cherry picked from commit b6e292bf7eac9d73c6b79b1e9b7b87beb02436c9)

Change-Id: Ie367f3222f8c4db409ec49c3845276908b51e9c9
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 6730dfe..8e080d1 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -54,6 +54,7 @@
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "heap-inl.h"
 #include "image.h"
+#include "intern_table.h"
 #include "mirror/art_field-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
@@ -1897,6 +1898,7 @@
     LOG(WARNING) << __FUNCTION__ << " called when we already have a zygote space.";
     return;
   }
+  Runtime::Current()->GetInternTable()->SwapPostZygoteWithPreZygote();
   VLOG(heap) << "Starting PreZygoteFork";
   // Trim the pages at the end of the non moving space.
   non_moving_space_->Trim();
diff --git a/runtime/gc_root.h b/runtime/gc_root.h
index b10a55c..a347622 100644
--- a/runtime/gc_root.h
+++ b/runtime/gc_root.h
@@ -30,7 +30,9 @@
   ALWAYS_INLINE MirrorType* Read() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   void VisitRoot(RootCallback* callback, void* arg, uint32_t thread_id, RootType root_type) {
+    DCHECK(!IsNull());
     callback(reinterpret_cast<mirror::Object**>(&root_), arg, thread_id, root_type);
+    DCHECK(!IsNull());
   }
 
   // This is only used by IrtIterator.
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index f6e6661..23324a6 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -29,39 +29,34 @@
 namespace art {
 
 InternTable::InternTable()
-    : log_new_roots_(false), allow_new_interns_(true),
+    : image_added_to_intern_table_(false), log_new_roots_(false),
+      allow_new_interns_(true),
       new_intern_condition_("New intern condition", *Locks::intern_table_lock_) {
 }
 
 size_t InternTable::Size() const {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  return strong_interns_.size() + weak_interns_.size();
+  return strong_interns_.Size() + weak_interns_.Size();
 }
 
 size_t InternTable::StrongSize() const {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  return strong_interns_.size();
+  return strong_interns_.Size();
 }
 
 size_t InternTable::WeakSize() const {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  return weak_interns_.size();
+  return weak_interns_.Size();
 }
 
 void InternTable::DumpForSigQuit(std::ostream& os) const {
-  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  os << "Intern table: " << strong_interns_.size() << " strong; "
-     << weak_interns_.size() << " weak\n";
+  os << "Intern table: " << StrongSize() << " strong; " << WeakSize() << " weak\n";
 }
 
 void InternTable::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
   if ((flags & kVisitRootFlagAllRoots) != 0) {
-    for (auto& strong_intern : strong_interns_) {
-      const_cast<GcRoot<mirror::String>&>(strong_intern).
-          VisitRoot(callback, arg, 0, kRootInternedString);
-      DCHECK(!strong_intern.IsNull());
-    }
+    strong_interns_.VisitRoots(callback, arg, flags);
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_strong_intern_roots_) {
       mirror::String* old_ref = root.Read<kWithoutReadBarrier>();
@@ -71,10 +66,8 @@
         // The GC moved a root in the log. Need to search the strong interns and update the
         // corresponding object. This is slow, but luckily for us, this may only happen with a
         // concurrent moving GC.
-        auto it = strong_interns_.find(GcRoot<mirror::String>(old_ref));
-        DCHECK(it != strong_interns_.end());
-        strong_interns_.erase(it);
-        strong_interns_.insert(GcRoot<mirror::String>(new_ref));
+        strong_interns_.Remove(old_ref);
+        strong_interns_.Insert(new_ref);
       }
     }
   }
@@ -91,21 +84,17 @@
 }
 
 mirror::String* InternTable::LookupStrong(mirror::String* s) {
-  return Lookup(&strong_interns_, s);
+  return strong_interns_.Find(s);
 }
 
 mirror::String* InternTable::LookupWeak(mirror::String* s) {
-  // Weak interns need a read barrier because they are weak roots.
-  return Lookup(&weak_interns_, s);
+  return weak_interns_.Find(s);
 }
 
-mirror::String* InternTable::Lookup(Table* table, mirror::String* s) {
-  Locks::intern_table_lock_->AssertHeld(Thread::Current());
-  auto it = table->find(GcRoot<mirror::String>(s));
-  if (LIKELY(it != table->end())) {
-    return const_cast<GcRoot<mirror::String>&>(*it).Read();
-  }
-  return nullptr;
+void InternTable::SwapPostZygoteWithPreZygote() {
+  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
+  weak_interns_.SwapPostZygoteWithPreZygote();
+  strong_interns_.SwapPostZygoteWithPreZygote();
 }
 
 mirror::String* InternTable::InsertStrong(mirror::String* s) {
@@ -116,7 +105,7 @@
   if (log_new_roots_) {
     new_strong_intern_roots_.push_back(GcRoot<mirror::String>(s));
   }
-  strong_interns_.insert(GcRoot<mirror::String>(s));
+  strong_interns_.Insert(s);
   return s;
 }
 
@@ -125,12 +114,12 @@
   if (runtime->IsActiveTransaction()) {
     runtime->RecordWeakStringInsertion(s);
   }
-  weak_interns_.insert(GcRoot<mirror::String>(s));
+  weak_interns_.Insert(s);
   return s;
 }
 
 void InternTable::RemoveStrong(mirror::String* s) {
-  Remove(&strong_interns_, s);
+  strong_interns_.Remove(s);
 }
 
 void InternTable::RemoveWeak(mirror::String* s) {
@@ -138,13 +127,7 @@
   if (runtime->IsActiveTransaction()) {
     runtime->RecordWeakStringRemoval(s);
   }
-  Remove(&weak_interns_, s);
-}
-
-void InternTable::Remove(Table* table, mirror::String* s) {
-  auto it = table->find(GcRoot<mirror::String>(s));
-  DCHECK(it != table->end());
-  table->erase(it);
+  weak_interns_.Remove(s);
 }
 
 // Insert/remove methods used to undo changes made during an aborted transaction.
@@ -165,11 +148,39 @@
   RemoveWeak(s);
 }
 
-static mirror::String* LookupStringFromImage(mirror::String* s)
+void InternTable::AddImageStringsToTable(gc::space::ImageSpace* image_space) {
+  MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
+  if (!image_added_to_intern_table_) {
+    mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
+    mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>();
+    for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
+      mirror::DexCache* dex_cache = dex_caches->Get(i);
+      const DexFile* dex_file = dex_cache->GetDexFile();
+      const size_t num_strings = dex_file->NumStringIds();
+      for (size_t j = 0; j < num_strings; ++j) {
+        mirror::String* image_string = dex_cache->GetResolvedString(j);
+        if (image_string != nullptr) {
+          mirror::String* found = LookupStrong(image_string);
+          if (found == nullptr) {
+            InsertStrong(image_string);
+          } else {
+            DCHECK_EQ(found, image_string);
+          }
+        }
+      }
+    }
+    image_added_to_intern_table_ = true;
+  }
+}
+
+mirror::String* InternTable::LookupStringFromImage(mirror::String* s)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  if (image_added_to_intern_table_) {
+    return nullptr;
+  }
   gc::space::ImageSpace* image = Runtime::Current()->GetHeap()->GetImageSpace();
-  if (image == NULL) {
-    return NULL;  // No image present.
+  if (image == nullptr) {
+    return nullptr;  // No image present.
   }
   mirror::Object* root = image->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
   mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>();
@@ -285,24 +296,12 @@
 
 bool InternTable::ContainsWeak(mirror::String* s) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  const mirror::String* found = LookupWeak(s);
-  return found == s;
+  return LookupWeak(s) == s;
 }
 
 void InternTable::SweepInternTableWeaks(IsMarkedCallback* callback, void* arg) {
   MutexLock mu(Thread::Current(), *Locks::intern_table_lock_);
-  for (auto it = weak_interns_.begin(), end = weak_interns_.end(); it != end;) {
-    // This does not need a read barrier because this is called by GC.
-    GcRoot<mirror::String>& root = const_cast<GcRoot<mirror::String>&>(*it);
-    mirror::Object* object = root.Read<kWithoutReadBarrier>();
-    mirror::Object* new_object = callback(object, arg);
-    if (new_object == nullptr) {
-      it = weak_interns_.erase(it);
-    } else {
-      root = GcRoot<mirror::String>(down_cast<mirror::String*>(new_object));
-      ++it;
-    }
-  }
+  weak_interns_.SweepWeaks(callback, arg);
 }
 
 std::size_t InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& root) {
@@ -321,4 +320,68 @@
       const_cast<GcRoot<mirror::String>&>(b).Read());
 }
 
+void InternTable::Table::Remove(mirror::String* s) {
+  auto it = post_zygote_table_.find(GcRoot<mirror::String>(s));
+  if (it != post_zygote_table_.end()) {
+    post_zygote_table_.erase(it);
+  } else {
+    it = pre_zygote_table_.find(GcRoot<mirror::String>(s));
+    DCHECK(it != pre_zygote_table_.end());
+    pre_zygote_table_.erase(it);
+  }
+}
+
+mirror::String* InternTable::Table::Find(mirror::String* s) {
+  Locks::intern_table_lock_->AssertHeld(Thread::Current());
+  auto it = pre_zygote_table_.find(GcRoot<mirror::String>(s));
+  if (LIKELY(it != pre_zygote_table_.end())) {
+    return const_cast<GcRoot<mirror::String>&>(*it).Read();
+  }
+  it = post_zygote_table_.find(GcRoot<mirror::String>(s));
+  if (LIKELY(it != post_zygote_table_.end())) {
+    return const_cast<GcRoot<mirror::String>&>(*it).Read();
+  }
+  return nullptr;
+}
+
+void InternTable::Table::SwapPostZygoteWithPreZygote() {
+  CHECK(pre_zygote_table_.empty());
+  std::swap(pre_zygote_table_, post_zygote_table_);
+}
+
+void InternTable::Table::Insert(mirror::String* s) {
+  // Always insert the post zygote table, this gets swapped when we create the zygote to be the
+  // pre zygote table.
+  post_zygote_table_.insert(GcRoot<mirror::String>(s));
+}
+
+void InternTable::Table::VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags) {
+  for (auto& intern : pre_zygote_table_) {
+    const_cast<GcRoot<mirror::String>&>(intern).VisitRoot(callback, arg, 0, kRootInternedString);
+  }
+  for (auto& intern : post_zygote_table_) {
+    const_cast<GcRoot<mirror::String>&>(intern).VisitRoot(callback, arg, 0, kRootInternedString);
+  }
+}
+
+void InternTable::Table::SweepWeaks(IsMarkedCallback* callback, void* arg) {
+  SweepWeaks(&pre_zygote_table_, callback, arg);
+  SweepWeaks(&post_zygote_table_, callback, arg);
+}
+
+void InternTable::Table::SweepWeaks(UnorderedSet* set, IsMarkedCallback* callback, void* arg) {
+  for (auto it = set->begin(), end = set->end(); it != end;) {
+    // This does not need a read barrier because this is called by GC.
+    GcRoot<mirror::String>& root = const_cast<GcRoot<mirror::String>&>(*it);
+    mirror::Object* object = root.Read<kWithoutReadBarrier>();
+    mirror::Object* new_object = callback(object, arg);
+    if (new_object == nullptr) {
+      it = set->erase(it);
+    } else {
+      root = GcRoot<mirror::String>(new_object->AsString());
+      ++it;
+    }
+  }
+}
+
 }  // namespace art
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index e3223c8..0bff7b9 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -26,6 +26,12 @@
 
 namespace art {
 
+namespace gc {
+namespace space {
+class ImageSpace;
+}  // namespace space
+}  // namespace gc
+
 enum VisitRootFlags : uint8_t;
 
 namespace mirror {
@@ -66,9 +72,12 @@
 
   bool ContainsWeak(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  size_t Size() const;
-  size_t StrongSize() const;
-  size_t WeakSize() const;
+  // Total number of interned strings.
+  size_t Size() const LOCKS_EXCLUDED(Locks::intern_table_lock_);
+  // Total number of weakly live interned strings.
+  size_t StrongSize() const LOCKS_EXCLUDED(Locks::intern_table_lock_);
+  // Total number of strongly live interned strings.
+  size_t WeakSize() const LOCKS_EXCLUDED(Locks::intern_table_lock_);
 
   void VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -78,6 +87,14 @@
   void DisallowNewInterns() EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_);
   void AllowNewInterns() SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
+  // Adds all of the resolved image strings from the image space into the intern table. The
+  // advantage of doing this is preventing expensive DexFile::FindStringId calls.
+  void AddImageStringsToTable(gc::space::ImageSpace* image_space)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::intern_table_lock_);
+  // Copy the post zygote tables to pre zygote to save memory by preventing dirty pages.
+  void SwapPostZygoteWithPreZygote()
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) LOCKS_EXCLUDED(Locks::intern_table_lock_);
+
  private:
   class StringHashEquals {
    public:
@@ -85,22 +102,60 @@
     bool operator()(const GcRoot<mirror::String>& a, const GcRoot<mirror::String>& b)
         NO_THREAD_SAFETY_ANALYSIS;
   };
-  typedef std::unordered_set<GcRoot<mirror::String>, StringHashEquals, StringHashEquals,
-      TrackingAllocator<GcRoot<mirror::String>, kAllocatorTagInternTable>> Table;
+
+  // Table which holds pre zygote and post zygote interned strings. There is one instance for
+  // weak interns and strong interns.
+  class Table {
+   public:
+    mirror::String* Find(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
+    void Insert(mirror::String* s) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
+    void Remove(mirror::String* s)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
+    void VisitRoots(RootCallback* callback, void* arg, VisitRootFlags flags)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
+    void SweepWeaks(IsMarkedCallback* callback, void* arg)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
+    void SwapPostZygoteWithPreZygote() EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
+    size_t Size() const EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_) {
+      return pre_zygote_table_.size() + post_zygote_table_.size();
+    }
+
+   private:
+    typedef std::unordered_set<GcRoot<mirror::String>, StringHashEquals, StringHashEquals,
+        TrackingAllocator<GcRoot<mirror::String>, kAllocatorTagInternTable>> UnorderedSet;
+
+    void SweepWeaks(UnorderedSet* set, IsMarkedCallback* callback, void* arg)
+        SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+        EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
+
+    // We call SwapPostZygoteWithPreZygote when we create the zygote to reduce private dirty pages
+    // caused by modifying the zygote intern table hash table. The pre zygote table are the
+    // interned strings which were interned before we created the zygote space. Post zygote is self
+    // explanatory.
+    UnorderedSet pre_zygote_table_;
+    UnorderedSet post_zygote_table_;
+  };
 
   mirror::String* Insert(mirror::String* s, bool is_strong)
       LOCKS_EXCLUDED(Locks::intern_table_lock_)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
   mirror::String* LookupStrong(mirror::String* s)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
   mirror::String* LookupWeak(mirror::String* s)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
-  mirror::String* Lookup(Table* table, mirror::String* s)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
   mirror::String* InsertStrong(mirror::String* s)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
   mirror::String* InsertWeak(mirror::String* s)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
   void RemoveStrong(mirror::String* s)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -108,14 +163,16 @@
   void RemoveWeak(mirror::String* s)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
-  void Remove(Table* table, mirror::String* s)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
-      EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
 
   // Transaction rollback access.
+  mirror::String* LookupStringFromImage(mirror::String* s)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
+      EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
   mirror::String* InsertStrongFromTransaction(mirror::String* s)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
   mirror::String* InsertWeakFromTransaction(mirror::String* s)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
   void RemoveStrongFromTransaction(mirror::String* s)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_)
@@ -125,6 +182,7 @@
       EXCLUSIVE_LOCKS_REQUIRED(Locks::intern_table_lock_);
   friend class Transaction;
 
+  bool image_added_to_intern_table_ GUARDED_BY(Locks::intern_table_lock_);
   bool log_new_roots_ GUARDED_BY(Locks::intern_table_lock_);
   bool allow_new_interns_ GUARDED_BY(Locks::intern_table_lock_);
   ConditionVariable new_intern_condition_ GUARDED_BY(Locks::intern_table_lock_);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index db7936c..de3e976 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -412,8 +412,13 @@
 
   started_ = true;
 
+  if (IsZygote()) {
+    ScopedObjectAccess soa(self);
+    Runtime::Current()->GetInternTable()->AddImageStringsToTable(heap_->GetImageSpace());
+  }
+
   if (!IsImageDex2OatEnabled() || !Runtime::Current()->GetHeap()->HasImageSpace()) {
-    ScopedObjectAccess soa(Thread::Current());
+    ScopedObjectAccess soa(self);
     StackHandleScope<1> hs(soa.Self());
     auto klass(hs.NewHandle<mirror::Class>(mirror::Class::GetJavaLangClass()));
     class_linker_->EnsureInitialized(soa.Self(), klass, true, true);