Use GC exclusion for NotifyStartupCompleted

Prevent deadlocks that can occur in very rare cases where checkpoints
block on a thread decoding weak globals, resulting in a deadlock.

This is done by moving the startup completed event on a separate
thread and using GC exclusion.

Test: test-art-host
Bug: 138852758

Change-Id: I314c61aff5be0d5829f7ad5fac0659e99dec1d90
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index 8b4bac2..b197a99 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -46,6 +46,7 @@
     case kGcCauseHprof: return "Hprof";
     case kGcCauseGetObjectsAllocated: return "ObjectsAllocated";
     case kGcCauseProfileSaver: return "ProfileSaver";
+    case kGcCauseRunEmptyCheckpoint: return "RunEmptyCheckpoint";
   }
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index 81781ce..4dae585 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -62,6 +62,8 @@
   kGcCauseGetObjectsAllocated,
   // GC cause for the profile saver.
   kGcCauseProfileSaver,
+  // GC cause for running an empty checkpoint.
+  kGcCauseRunEmptyCheckpoint,
 };
 
 const char* PrettyCause(GcCause cause);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index b3a46ca..8eebd12 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -4220,5 +4220,14 @@
   });
 }
 
+bool Heap::AddHeapTask(gc::HeapTask* task) {
+  Thread* const self = Thread::Current();
+  if (!CanAddHeapTask(self)) {
+    return false;
+  }
+  GetTaskProcessor()->AddTask(self, task);
+  return true;
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index de94d37..6c3290f 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -69,6 +69,7 @@
 class AllocationListener;
 class AllocRecordObjectMap;
 class GcPauseListener;
+class HeapTask;
 class ReferenceProcessor;
 class TaskProcessor;
 class Verification;
@@ -908,6 +909,8 @@
 
   void TraceHeapSize(size_t heap_size);
 
+  bool AddHeapTask(gc::HeapTask* task);
+
  private:
   class ConcurrentGCTask;
   class CollectorTransitionTask;
@@ -1585,6 +1588,7 @@
   friend class GCCriticalSection;
   friend class ReferenceQueue;
   friend class ScopedGCCriticalSection;
+  friend class ScopedInterruptibleGCCriticalSection;
   friend class VerifyReferenceCardVisitor;
   friend class VerifyReferenceVisitor;
   friend class VerifyObjectVisitor;
diff --git a/runtime/gc/scoped_gc_critical_section.cc b/runtime/gc/scoped_gc_critical_section.cc
index 7a0a6e8..eaede43 100644
--- a/runtime/gc/scoped_gc_critical_section.cc
+++ b/runtime/gc/scoped_gc_critical_section.cc
@@ -58,5 +58,17 @@
   critical_section_.Exit(old_no_suspend_reason_);
 }
 
+ScopedInterruptibleGCCriticalSection::ScopedInterruptibleGCCriticalSection(
+    Thread* self,
+    GcCause cause,
+    CollectorType type) : self_(self) {
+  DCHECK(self != nullptr);
+  Runtime::Current()->GetHeap()->StartGC(self_, cause, type);
+}
+
+ScopedInterruptibleGCCriticalSection::~ScopedInterruptibleGCCriticalSection() {
+  Runtime::Current()->GetHeap()->FinishGC(self_, collector::kGcTypeNone);
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/scoped_gc_critical_section.h b/runtime/gc/scoped_gc_critical_section.h
index 8ad0158..b3a897c 100644
--- a/runtime/gc/scoped_gc_critical_section.h
+++ b/runtime/gc/scoped_gc_critical_section.h
@@ -59,6 +59,19 @@
   const char* old_no_suspend_reason_;
 };
 
+// The use of ScopedGCCriticalSection should be preferred whenever possible.
+// This class allows thread suspension but should never be used with allocations because of the
+// deadlock risk. TODO: Add a new thread role for "no allocations" that still allows suspension.
+class ScopedInterruptibleGCCriticalSection {
+ public:
+  ScopedInterruptibleGCCriticalSection(Thread* self, GcCause cause, CollectorType type);
+  ~ScopedInterruptibleGCCriticalSection();
+
+ private:
+  Thread* const self_;
+};
+
+
 }  // namespace gc
 }  // namespace art
 
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index ecadf68..597cd6b 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -84,6 +84,7 @@
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "gc/system_weak.h"
+#include "gc/task_processor.h"
 #include "handle_scope-inl.h"
 #include "hidden_api.h"
 #include "hidden_api_jni.h"
@@ -2878,6 +2879,54 @@
   startup_completed_.store(false, std::memory_order_seq_cst);
 }
 
+class Runtime::NotifyStartupCompletedTask : public gc::HeapTask {
+ public:
+  NotifyStartupCompletedTask() : gc::HeapTask(/*target_run_time=*/ NanoTime()) {}
+
+  void Run(Thread* self) override {
+    VLOG(startup) << "NotifyStartupCompletedTask running";
+    Runtime* const runtime = Runtime::Current();
+    {
+      ScopedTrace trace("Releasing app image spaces metadata");
+      ScopedObjectAccess soa(Thread::Current());
+      for (gc::space::ContinuousSpace* space : runtime->GetHeap()->GetContinuousSpaces()) {
+        if (space->IsImageSpace()) {
+          gc::space::ImageSpace* image_space = space->AsImageSpace();
+          if (image_space->GetImageHeader().IsAppImage()) {
+            image_space->DisablePreResolvedStrings();
+          }
+        }
+      }
+      // Request empty checkpoints to make sure no threads are accessing the image space metadata
+      // section when we madvise it. Use GC exclusion to prevent deadlocks that may happen if
+      // multiple threads are attempting to run empty checkpoints at the same time.
+      {
+        // Avoid using ScopedGCCriticalSection since that does not allow thread suspension. This is
+        // not allowed to prevent allocations, but it's still safe to suspend temporarily for the
+        // checkpoint.
+        gc::ScopedInterruptibleGCCriticalSection sigcs(self,
+                                                       gc::kGcCauseRunEmptyCheckpoint,
+                                                       gc::kCollectorTypeCriticalSection);
+        runtime->GetThreadList()->RunEmptyCheckpoint();
+      }
+      for (gc::space::ContinuousSpace* space : runtime->GetHeap()->GetContinuousSpaces()) {
+        if (space->IsImageSpace()) {
+          gc::space::ImageSpace* image_space = space->AsImageSpace();
+          if (image_space->GetImageHeader().IsAppImage()) {
+            image_space->ReleaseMetadata();
+          }
+        }
+      }
+    }
+
+    {
+      // Delete the thread pool used for app image loading since startup is assumed to be completed.
+      ScopedTrace trace2("Delete thread pool");
+      runtime->DeleteThreadPool();
+    }
+  }
+};
+
 void Runtime::NotifyStartupCompleted() {
   bool expected = false;
   if (!startup_completed_.compare_exchange_strong(expected, true, std::memory_order_seq_cst)) {
@@ -2885,62 +2934,16 @@
     // once externally. For this reason there are no asserts.
     return;
   }
-  VLOG(startup) << "Startup completed notified";
 
-  {
-    ScopedTrace trace("Releasing app image spaces metadata");
-    ScopedObjectAccess soa(Thread::Current());
-    for (gc::space::ContinuousSpace* space : GetHeap()->GetContinuousSpaces()) {
-      if (space->IsImageSpace()) {
-        gc::space::ImageSpace* image_space = space->AsImageSpace();
-        if (image_space->GetImageHeader().IsAppImage()) {
-          image_space->DisablePreResolvedStrings();
-        }
-      }
-    }
-    // Request empty checkpoint to make sure no threads are accessing the section when we madvise
-    // it. Avoid using RunEmptyCheckpoint since only one concurrent caller is supported. We could
-    // add a GC critical section here but that may cause significant jank if the GC is running.
-    {
-      class EmptyClosure : public Closure {
-       public:
-        explicit EmptyClosure(Barrier* barrier) : barrier_(barrier) {}
-        void Run(Thread* thread ATTRIBUTE_UNUSED) override {
-          barrier_->Pass(Thread::Current());
-        }
-
-       private:
-        Barrier* const barrier_;
-      };
-      Barrier barrier(0);
-      EmptyClosure closure(&barrier);
-      size_t threads_running_checkpoint = GetThreadList()->RunCheckpoint(&closure);
-      // Now that we have run our checkpoint, move to a suspended state and wait
-      // for other threads to run the checkpoint.
-      Thread* self = Thread::Current();
-      ScopedThreadSuspension sts(self, kSuspended);
-      if (threads_running_checkpoint != 0) {
-        barrier.Increment(self, threads_running_checkpoint);
-      }
-    }
-    for (gc::space::ContinuousSpace* space : GetHeap()->GetContinuousSpaces()) {
-      if (space->IsImageSpace()) {
-        gc::space::ImageSpace* image_space = space->AsImageSpace();
-        if (image_space->GetImageHeader().IsAppImage()) {
-          image_space->ReleaseMetadata();
-        }
-      }
-    }
+  VLOG(startup) << "Adding NotifyStartupCompleted task";
+  // Use the heap task processor since we want to be exclusive with the GC and we don't want to
+  // block the caller if the GC is running.
+  if (!GetHeap()->AddHeapTask(new NotifyStartupCompletedTask)) {
+    VLOG(startup) << "Failed to add NotifyStartupCompletedTask";
   }
 
   // Notify the profiler saver that startup is now completed.
   ProfileSaver::NotifyStartupCompleted();
-
-  {
-    // Delete the thread pool used for app image loading startup is completed.
-    ScopedTrace trace2("Delete thread pool");
-    DeleteThreadPool();
-  }
 }
 
 bool Runtime::GetStartupCompleted() const {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 0b336c7..4975e65 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -1295,6 +1295,7 @@
   friend std::string GetFaultMessageForAbortLogging();
   friend class ScopedThreadPoolUsage;
   friend class OatFileAssistantTest;
+  class NotifyStartupCompletedTask;
 
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };