Reland "Allocate dex cache arrays at startup."

This reverts commit cc97f11fe689c1344bb04ab85e8bdc7baaeb3fb1.

Reason for revert: be more selective when using full arrays.

Test: test.py

Change-Id: If941e81849d9e3b2c4ddcc03e46fb8442a608c82
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index cea654f..d746ade 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -136,9 +136,10 @@
   {
     MutexLock mu(self, wait_lock_);
 
-    const uint64_t end_time = NanoTime() + MsToNs(force_early_first_save
+    const uint64_t sleep_time = MsToNs(force_early_first_save
       ? options_.GetMinFirstSaveMs()
       : options_.GetSaveResolvedClassesDelayMs());
+    const uint64_t end_time = NanoTime() + sleep_time;
     while (!Runtime::Current()->GetStartupCompleted()) {
       const uint64_t current_time = NanoTime();
       if (current_time >= end_time) {
@@ -146,7 +147,7 @@
       }
       period_condition_.TimedWait(self, NsToMs(end_time - current_time), 0);
     }
-    total_ms_of_sleep_ += options_.GetSaveResolvedClassesDelayMs();
+    total_ms_of_sleep_ += sleep_time;
   }
   // Tell the runtime that startup is completed if it has not already been notified.
   // TODO: We should use another thread to do this in case the profile saver is not running.
diff --git a/runtime/linear_alloc.h b/runtime/linear_alloc.h
index fe92d19..7353721 100644
--- a/runtime/linear_alloc.h
+++ b/runtime/linear_alloc.h
@@ -61,7 +61,6 @@
 
 std::ostream& operator<<(std::ostream& os, LinearAllocKind value);
 
-// TODO: Support freeing if we add class unloading.
 class LinearAlloc {
  public:
   static constexpr size_t kAlignment = 8u;
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 0b6bb14..5a44fff 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -54,7 +54,7 @@
 }
 
 template<typename T>
-T* DexCache::AllocArray(MemberOffset obj_offset, size_t num, LinearAllocKind kind) {
+T* DexCache::AllocArray(MemberOffset obj_offset, size_t num, LinearAllocKind kind, bool startup) {
   Thread* self = Thread::Current();
   mirror::DexCache* dex_cache = this;
   if (gUseReadBarrier && self->GetIsGcMarking()) {
@@ -63,8 +63,14 @@
     dex_cache = reinterpret_cast<DexCache*>(ReadBarrier::Mark(this));
   }
   // DON'T USE 'this' from now on.
-  ClassLinker* linker = Runtime::Current()->GetClassLinker();
-  LinearAlloc* alloc = linker->GetOrCreateAllocatorForClassLoader(GetClassLoader());
+  Runtime* runtime = Runtime::Current();
+  // Note: in the 1002-notify-startup test, the startup linear alloc can become null
+  // concurrently, even if the runtime is marked at startup. Therefore we should only
+  // fetch it once here.
+  LinearAlloc* startup_linear_alloc = runtime->GetStartupLinearAlloc();
+  LinearAlloc* alloc = (startup && startup_linear_alloc != nullptr)
+      ? startup_linear_alloc
+      : runtime->GetClassLinker()->GetOrCreateAllocatorForClassLoader(GetClassLoader());
   MutexLock mu(self, *Locks::dex_cache_lock_);  // Avoid allocation by multiple threads.
   T* array = dex_cache->GetFieldPtr64<T*>(obj_offset);
   if (array != nullptr) {
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index 7d0c97f..5e6138c 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -20,6 +20,7 @@
 #include "class_linker.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/heap.h"
+#include "jit/profile_saver.h"
 #include "linear_alloc.h"
 #include "oat_file.h"
 #include "object-inl.h"
@@ -165,5 +166,54 @@
   return GetFieldObject<ClassLoader>(OFFSET_OF_OBJECT_MEMBER(DexCache, class_loader_));
 }
 
+bool DexCache::ShouldAllocateFullArrayAtStartup() {
+  Runtime* runtime = Runtime::Current();
+  if (runtime->IsAotCompiler()) {
+    // To save on memory in dex2oat, we don't allocate full arrays by default.
+    return false;
+  }
+
+  if (runtime->GetStartupCompleted()) {
+    // We only allocate full arrays during app startup.
+    return false;
+  }
+
+  if (GetClassLoader() == nullptr) {
+    // Only allocate full array for app dex files (also note that for
+    // multi-image, the `GetCompilerFilter` call below does not work for
+    // non-primary oat files).
+    return false;
+  }
+
+  const OatDexFile* oat_dex_file = GetDexFile()->GetOatDexFile();
+  if (oat_dex_file != nullptr &&
+      CompilerFilter::IsAotCompilationEnabled(oat_dex_file->GetOatFile()->GetCompilerFilter())) {
+    // We only allocate full arrays for dex files where we do not have
+    // compilation.
+    return false;
+  }
+
+  if (!ProfileSaver::IsStarted()) {
+    // Only allocate full arrays if the profile saver is running: if the app
+    // does not call `reportFullyDrawn`, then only the profile saver will notify
+    // that the app has eventually started.
+    return false;
+  }
+
+  return true;
+}
+
+void DexCache::UnlinkStartupCaches() {
+  if (GetDexFile() == nullptr) {
+    // Unused dex cache.
+    return;
+  }
+  UnlinkStringsArrayIfStartup();
+  UnlinkResolvedFieldsArrayIfStartup();
+  UnlinkResolvedMethodsArrayIfStartup();
+  UnlinkResolvedTypesArrayIfStartup();
+  UnlinkResolvedMethodTypesArrayIfStartup();
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 4c0c35d..3187903 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -365,6 +365,10 @@
   void VisitNativeRoots(const Visitor& visitor)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
 
+  // Sets null to dex cache array fields which were allocated with the startup
+  // allocator.
+  void UnlinkStartupCaches() REQUIRES_SHARED(Locks::mutator_lock_);
+
 // NOLINTBEGIN(bugprone-macro-parentheses)
 #define DEFINE_ARRAY(name, array_kind, getter_setter, type, ids, alloc_kind) \
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> \
@@ -380,10 +384,10 @@
   static constexpr MemberOffset getter_setter ##Offset() { \
     return OFFSET_OF_OBJECT_MEMBER(DexCache, name); \
   } \
-  array_kind* Allocate ##getter_setter() \
+  array_kind* Allocate ##getter_setter(bool startup = false) \
       REQUIRES_SHARED(Locks::mutator_lock_) { \
     return reinterpret_cast<array_kind*>(AllocArray<type>( \
-        getter_setter ##Offset(), GetDexFile()->ids(), alloc_kind)); \
+        getter_setter ##Offset(), GetDexFile()->ids(), alloc_kind, startup)); \
   } \
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags> \
   size_t Num ##getter_setter() REQUIRES_SHARED(Locks::mutator_lock_) { \
@@ -443,8 +447,9 @@
     } else { \
       auto* pairs = Get ##getter_setter(); \
       if (pairs == nullptr) { \
-        if (GetDexFile()->ids() <= pair_size) { \
-          array = Allocate ##getter_setter ##Array(); \
+        bool should_allocate_full_array = ShouldAllocateFullArray(GetDexFile()->ids(), pair_size); \
+        if (ShouldAllocateFullArrayAtStartup() || should_allocate_full_array) { \
+          array = Allocate ##getter_setter ##Array(!should_allocate_full_array); \
           array->Set(index, resolved); \
         } else { \
           pairs = Allocate ##getter_setter(); \
@@ -454,6 +459,12 @@
         pairs->Set(index, resolved); \
       } \
     } \
+  } \
+  void Unlink ##getter_setter ##ArrayIfStartup() \
+      REQUIRES_SHARED(Locks::mutator_lock_) { \
+    if (!ShouldAllocateFullArray(GetDexFile()->ids(), pair_size)) { \
+      Set ##getter_setter ##Array(nullptr) ; \
+    } \
   }
 
   DEFINE_ARRAY(resolved_call_sites_,
@@ -523,7 +534,7 @@
  private:
   // Allocate new array in linear alloc and save it in the given fields.
   template<typename T>
-  T* AllocArray(MemberOffset obj_offset, size_t num, LinearAllocKind kind)
+  T* AllocArray(MemberOffset obj_offset, size_t num, LinearAllocKind kind, bool startup = false)
      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Visit instance fields of the dex cache as well as its associated arrays.
@@ -534,6 +545,16 @@
   void VisitReferences(ObjPtr<Class> klass, const Visitor& visitor)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
 
+  // Returns whether we should allocate a full array given the current state of
+  // the runtime and oat files.
+  bool ShouldAllocateFullArrayAtStartup() REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Returns whether we should allocate a full array given the number of
+  // elements.
+  static bool ShouldAllocateFullArray(size_t number_of_elements, size_t dex_cache_size) {
+    return number_of_elements <= dex_cache_size;
+  }
+
   HeapReference<ClassLoader> class_loader_;
   HeapReference<String> location_;
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 8752125..b38b2cb 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -525,6 +525,7 @@
   // Destroy allocators before shutting down the MemMap because they may use it.
   java_vm_.reset();
   linear_alloc_.reset();
+  startup_linear_alloc_.reset();
   linear_alloc_arena_pool_.reset();
   arena_pool_.reset();
   jit_arena_pool_.reset();
@@ -1748,6 +1749,7 @@
     linear_alloc_arena_pool_.reset(new MemMapArenaPool(low_4gb));
   }
   linear_alloc_.reset(CreateLinearAlloc());
+  startup_linear_alloc_.reset(CreateLinearAlloc());
 
   small_irt_allocator_ = new SmallIrtAllocator();
 
@@ -2749,7 +2751,8 @@
     return;
   }
   if (!OS::FileExists(profile_output_filename.c_str(), /*check_file_type=*/ false)) {
-    LOG(WARNING) << "JIT profile information will not be recorded: profile file does not exist.";
+    LOG(WARNING) << "JIT profile information will not be recorded: profile file does not exist: "
+                 << profile_output_filename;
     return;
   }
   if (code_paths.empty()) {
@@ -3306,6 +3309,14 @@
   startup_completed_.store(false, std::memory_order_seq_cst);
 }
 
+class UnlinkStartupDexCacheVisitor : public DexCacheVisitor {
+ public:
+  void Visit(ObjPtr<mirror::DexCache> dex_cache)
+      REQUIRES_SHARED(Locks::dex_lock_, Locks::mutator_lock_) override {
+    dex_cache->UnlinkStartupCaches();
+  }
+};
+
 class Runtime::NotifyStartupCompletedTask : public gc::HeapTask {
  public:
   NotifyStartupCompletedTask() : gc::HeapTask(/*target_run_time=*/ NanoTime()) {}
@@ -3313,11 +3324,25 @@
   void Run(Thread* self) override {
     VLOG(startup) << "NotifyStartupCompletedTask running";
     Runtime* const runtime = Runtime::Current();
+    // Fetch the startup linear alloc before the checkpoint to play nice with
+    // 1002-notify-startup test which resets the startup state.
+    std::unique_ptr<LinearAlloc> startup_linear_alloc(runtime->ReleaseStartupLinearAlloc());
     {
-      ScopedTrace trace("Releasing app image spaces metadata");
+      ScopedTrace trace("Releasing dex caches and app image spaces metadata");
       ScopedObjectAccess soa(Thread::Current());
-      // Request empty checkpoints to make sure no threads are accessing the image space metadata
-      // section when we madvise it. Use GC exclusion to prevent deadlocks that may happen if
+
+      {
+        // Unlink dex caches that were allocated with the startup linear alloc.
+        UnlinkStartupDexCacheVisitor visitor;
+        ReaderMutexLock mu(self, *Locks::dex_lock_);
+        runtime->GetClassLinker()->VisitDexCaches(&visitor);
+      }
+
+      // Request empty checkpoints to make sure no threads are:
+      // - accessing the image space metadata section when we madvise it
+      // - accessing dex caches when we free them
+      //
+      // Use GC exclusion to prevent deadlocks that may happen if
       // multiple threads are attempting to run empty checkpoints at the same time.
       {
         // Avoid using ScopedGCCriticalSection since that does not allow thread suspension. This is
@@ -3328,6 +3353,7 @@
                                                        gc::kCollectorTypeCriticalSection);
         runtime->GetThreadList()->RunEmptyCheckpoint();
       }
+
       for (gc::space::ContinuousSpace* space : runtime->GetHeap()->GetContinuousSpaces()) {
         if (space->IsImageSpace()) {
           gc::space::ImageSpace* image_space = space->AsImageSpace();
@@ -3343,6 +3369,13 @@
       ScopedTrace trace2("Delete thread pool");
       runtime->DeleteThreadPool();
     }
+
+    {
+      // We know that after the checkpoint, there is no thread that can hold
+      // the startup linear alloc, so it's safe to delete it now.
+      ScopedTrace trace2("Delete startup linear alloc");
+      startup_linear_alloc.reset();
+    }
   }
 };
 
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 10ee4ae..0cebdab 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -820,6 +820,10 @@
     return linear_alloc_.get();
   }
 
+  LinearAlloc* GetStartupLinearAlloc() {
+    return startup_linear_alloc_.get();
+  }
+
   jit::JitOptions* GetJITOptions() {
     return jit_options_.get();
   }
@@ -1062,6 +1066,10 @@
     ThreadPool* const thread_pool_;
   };
 
+  LinearAlloc* ReleaseStartupLinearAlloc() {
+    return startup_linear_alloc_.release();
+  }
+
   bool LoadAppImageStartupCache() const {
     return load_app_image_startup_cache_;
   }
@@ -1278,6 +1286,10 @@
   // Shared linear alloc for now.
   std::unique_ptr<LinearAlloc> linear_alloc_;
 
+  // Linear alloc used for allocations during startup. Will be deleted after
+  // startup.
+  std::unique_ptr<LinearAlloc> startup_linear_alloc_;
+
   // The number of spins that are done before thread suspension is used to forcibly inflate.
   size_t max_spins_before_thin_lock_inflation_;
   MonitorList* monitor_list_;