Share boot image methods memory in JIT zygote.

Once the zygote is done compiling, copy the ArtMethods to
shared memory that will be in-place remapped. This is
a memory optimization that enables memory sharing between
zygote and early forked processes.

Currently relies on undefined behavior of the kernel, but will
follow-up with a CL that uses file sealing that will address this.

Saves around 2MB of memory for processes forked before zygote is
done compiling - there are around a dozen of processes.

Bug: 119800099
Test: boots, PostLaunchMemoryUsage
Change-Id: Ia1bdbd1abd27f28b087d9f33aca4cd901d55082f
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index 4424209..bc63ab1 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -1467,6 +1467,10 @@
     backtrace_map_t boot_map = maybe_boot_map.value_or(backtrace_map_t{});
     // Sanity check boot_map_.
     CHECK(boot_map.end >= boot_map.start);
+
+    // Adjust the `end` of the mapping. Some other mappings may have been
+    // inserted within the image.
+    boot_map.end = RoundUp(boot_map.start + image_header.GetImageSize(), kPageSize);
     // The size of the boot image mapping.
     size_t boot_map_size = boot_map.end - boot_map.start;
 
@@ -1478,7 +1482,10 @@
         return false;
       }
       backtrace_map_t zygote_boot_map = maybe_zygote_boot_map.value_or(backtrace_map_t{});
-      if (zygote_boot_map.start != boot_map.start || zygote_boot_map.end != boot_map.end) {
+      // Adjust the `end` of the mapping. Some other mappings may have been
+      // inserted within the image.
+      zygote_boot_map.end = RoundUp(zygote_boot_map.start + image_header.GetImageSize(), kPageSize);
+      if (zygote_boot_map.start != boot_map.start) {
         os << "Zygote boot map does not match image boot map: "
            << "zygote begin " << reinterpret_cast<const void*>(zygote_boot_map.start)
            << ", zygote end " << reinterpret_cast<const void*>(zygote_boot_map.end)
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 58f263e..4a127eb 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -1461,7 +1461,7 @@
     Handle<mirror::Class> called_class(hs.NewHandle(called->GetDeclaringClass()));
     linker->EnsureInitialized(soa.Self(), called_class, true, true);
     bool force_interpreter = self->IsForceInterpreter() && !called->IsNative();
-    if (LIKELY(called_class->IsInitialized())) {
+    if (called_class->IsInitialized() || called_class->IsInitializing()) {
       if (UNLIKELY(force_interpreter ||
                    Dbg::IsForcedInterpreterNeededForResolution(self, called))) {
         // If we are single-stepping or the called method is deoptimized (by a
@@ -1480,21 +1480,16 @@
         code = GetQuickInstrumentationEntryPoint();
       } else {
         code = called->GetEntryPointFromQuickCompiledCode();
-      }
-    } else if (called_class->IsInitializing()) {
-      if (UNLIKELY(force_interpreter ||
-                   Dbg::IsForcedInterpreterNeededForResolution(self, called))) {
-        // If we are single-stepping or the called method is deoptimized (by a
-        // breakpoint, for example), then we have to execute the called method
-        // with the interpreter.
-        code = GetQuickToInterpreterBridge();
-      } else if (invoke_type == kStatic) {
-        // Class is still initializing, go to JIT or oat and grab code (trampoline must be
-        // left in place until class is initialized to stop races between threads).
-        code = linker->GetQuickOatCodeFor(called);
-      } else {
-        // No trampoline for non-static methods.
-        code = called->GetEntryPointFromQuickCompiledCode();
+        if (linker->IsQuickResolutionStub(code)) {
+          DCHECK_EQ(invoke_type, kStatic);
+          // Go to JIT or oat and grab code.
+          code = linker->GetQuickOatCodeFor(called);
+          if (called_class->IsInitialized()) {
+            // Only update the entrypoint once the class is initialized. Other
+            // threads still need to go through the resolution stub.
+            Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(called, code);
+          }
+        }
       }
     } else {
       DCHECK(called_class->IsErroneous());
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 788c10f..bde0ed8 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -22,6 +22,7 @@
 #include "base/enums.h"
 #include "base/file_utils.h"
 #include "base/logging.h"  // For VLOG.
+#include "base/memfd.h"
 #include "base/memory_tool.h"
 #include "base/runtime_debug.h"
 #include "base/scoped_flock.h"
@@ -29,7 +30,9 @@
 #include "class_root.h"
 #include "debugger.h"
 #include "dex/type_lookup_table.h"
+#include "gc/space/image_space.h"
 #include "entrypoints/runtime_asm_entrypoints.h"
+#include "image-inl.h"
 #include "interpreter/interpreter.h"
 #include "jit-inl.h"
 #include "jit_code_cache.h"
@@ -49,6 +52,8 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 
+using android::base::unique_fd;
+
 namespace art {
 namespace jit {
 
@@ -664,14 +669,15 @@
 }
 
 /**
- * A JIT task to madvise DONTNEED dex files after we're done compiling methods.
+ * A JIT task to run after all profile compilation is done.
  */
-class JitMadviseDontNeedTask final : public SelfDeletingTask {
+class JitDoneCompilingProfileTask final : public SelfDeletingTask {
  public:
-  explicit JitMadviseDontNeedTask(const std::vector<const DexFile*>& dex_files)
+  explicit JitDoneCompilingProfileTask(const std::vector<const DexFile*>& dex_files)
       : dex_files_(dex_files) {}
 
   void Run(Thread* self ATTRIBUTE_UNUSED) override {
+    // Madvise DONTNEED dex files now that we're done compiling methods.
     for (const DexFile* dex_file : dex_files_) {
       if (IsAddressKnownBackedByFileOrShared(dex_file->Begin())) {
         int result = madvise(const_cast<uint8_t*>(AlignDown(dex_file->Begin(), kPageSize)),
@@ -682,12 +688,47 @@
         }
       }
     }
+
+    if (Runtime::Current()->IsZygote()) {
+      // Copy the boot image methods data to the mappings we created to share
+      // with the children.
+      Jit* jit = Runtime::Current()->GetJit();
+      size_t offset = 0;
+      for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) {
+        const ImageHeader& header = space->GetImageHeader();
+        const ImageSection& section = header.GetMethodsSection();
+        // Because mremap works at page boundaries, we can only handle methods
+        // within a page range. For methods that falls above or below the range,
+        // the child processes will copy their contents to their private mapping
+        // in `child_mapping_methods_`. See `MapBootImageMethods`.
+        uint8_t* page_start = AlignUp(header.GetImageBegin() + section.Offset(), kPageSize);
+        uint8_t* page_end =
+            AlignDown(header.GetImageBegin() + section.Offset() + section.Size(), kPageSize);
+        if (page_end > page_start) {
+          uint64_t capacity = page_end - page_start;
+          memcpy(jit->GetZygoteMappingMethods().Begin() + offset, page_start, capacity);
+          // So the memory is shared, also map the memory into the zygote
+          // process.
+          if (mremap(jit->GetChildMappingMethods().Begin() + offset,
+                     capacity,
+                     capacity,
+                     MREMAP_FIXED | MREMAP_MAYMOVE,
+                     page_start) == MAP_FAILED) {
+            PLOG(WARNING) << "Failed mremap of boot image methods of " << space->GetImageFilename();
+          }
+          offset += capacity;
+        }
+      }
+      // Mark that compilation of boot classpath is done. Other processes will
+      // pick up this boolean.
+      jit->GetCodeCache()->GetZygoteMap()->SetCompilationDone();
+    }
   }
 
  private:
   std::vector<const DexFile*> dex_files_;
 
-  DISALLOW_COPY_AND_ASSIGN(JitMadviseDontNeedTask);
+  DISALLOW_COPY_AND_ASSIGN(JitDoneCompilingProfileTask);
 };
 
 class ZygoteTask final : public Task {
@@ -793,6 +834,116 @@
   DISALLOW_COPY_AND_ASSIGN(JitProfileTask);
 };
 
+void Jit::MapBootImageMethods() {
+  if (!GetChildMappingMethods().IsValid()) {
+    return;
+  }
+  size_t offset = 0;
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) {
+    const ImageHeader& header = space->GetImageHeader();
+    const ImageSection& section = header.GetMethodsSection();
+    uint8_t* page_start = AlignUp(header.GetImageBegin() + section.Offset(), kPageSize);
+    uint8_t* page_end =
+        AlignDown(header.GetImageBegin() + section.Offset() + section.Size(), kPageSize);
+    if (page_end <= page_start) {
+      // Section doesn't contain one aligned entire page.
+      continue;
+    }
+    uint64_t capacity = page_end - page_start;
+    // Walk over methods in the boot image, and check for ones whose class is
+    // not initialized in the process, but are in the zygote process. For
+    // such methods, we need their entrypoints to be stubs that do the
+    // initialization check.
+    header.VisitPackedArtMethods([&](ArtMethod& method) NO_THREAD_SAFETY_ANALYSIS {
+      if (method.IsRuntimeMethod()) {
+        return;
+      }
+      if (method.GetDeclaringClassUnchecked()->IsVisiblyInitialized() ||
+          !method.IsStatic() ||
+          method.IsConstructor()) {
+        // Method does not need any stub.
+        return;
+      }
+
+      //  We are going to mremap the child mapping into the image:
+      //
+      //                            ImageSection       ChildMappingMethods
+      //
+      //         section start -->  -----------
+      //                            |         |
+      //                            |         |
+      //            page_start -->  |         |   <-----   -----------
+      //                            |         |            |         |
+      //                            |         |            |         |
+      //                            |         |            |         |
+      //                            |         |            |         |
+      //                            |         |            |         |
+      //                            |         |            |         |
+      //                            |         |            |         |
+      //             page_end  -->  |         |   <-----   -----------
+      //                            |         |
+      //         section end   -->  -----------
+
+
+      uint8_t* pointer = reinterpret_cast<uint8_t*>(&method);
+      if (pointer >= page_start && pointer < page_end) {
+        // For all the methods in the mapping, put the entrypoint to the
+        // resolution stub.
+        ArtMethod* new_method = reinterpret_cast<ArtMethod*>(
+            GetChildMappingMethods().Begin() + offset + (pointer - page_start));
+        const void* code = new_method->GetEntryPointFromQuickCompiledCode();
+        if (!class_linker->IsQuickGenericJniStub(code) &&
+            !class_linker->IsQuickToInterpreterBridge(code) &&
+            !class_linker->IsQuickResolutionStub(code)) {
+          LOG(INFO) << "Putting back the resolution stub to an ArtMethod";
+          new_method->SetEntryPointFromQuickCompiledCode(GetQuickResolutionStub());
+        }
+      } else if (pointer < page_start && (pointer + sizeof(ArtMethod)) > page_start) {
+        LOG(INFO) << "Copying parts of the contents of an ArtMethod spanning page_start";
+        // If the method spans `page_start`, copy the contents of the child
+        // into the pages we are going to remap into the image.
+        //
+        //         section start -->  -----------
+        //                            |         |
+        //                            |         |
+        //            page_start -->  |/////////|            -----------
+        //                            |/////////| -> copy -> |/////////|
+        //                            |         |            |         |
+        //
+        memcpy(GetChildMappingMethods().Begin() + offset,
+               page_start,
+               pointer + sizeof(ArtMethod) - page_start);
+      } else if (pointer < page_end && (pointer + sizeof(ArtMethod)) > page_end) {
+        LOG(INFO) << "Copying parts of the contents of an ArtMethod spanning page_end";
+        // If the method spans `page_end`, copy the contents of the child
+        // into the pages we are going to remap into the image.
+        //
+        //                            |         |            |         |
+        //                            |/////////| -> copy -> |/////////|
+        //             page_end  -->  |/////////|            -----------
+        //                            |         |
+        //         section end   -->  -----------
+        //
+        size_t bytes_to_copy = (page_end - pointer);
+        memcpy(GetChildMappingMethods().Begin() + offset + capacity - bytes_to_copy,
+               page_end - bytes_to_copy,
+               bytes_to_copy);
+      }
+    }, space->Begin(), kRuntimePointerSize);
+
+    // Map the memory in the boot image range.
+    if (mremap(GetChildMappingMethods().Begin() + offset,
+               capacity,
+               capacity,
+               MREMAP_FIXED | MREMAP_MAYMOVE,
+               page_start) == MAP_FAILED) {
+      PLOG(WARNING) << "Fail to mremap boot image methods for " << space->GetImageFilename();
+    }
+    offset += capacity;
+  }
+}
+
 void Jit::CreateThreadPool() {
   // There is a DCHECK in the 'AddSamples' method to ensure the tread pool
   // is not null when we instrument.
@@ -804,11 +955,80 @@
   thread_pool_->SetPthreadPriority(options_->GetThreadPoolPthreadPriority());
   Start();
 
-  // If we're not using the default boot image location, request a JIT task to
-  // compile all methods in the boot image profile.
   Runtime* runtime = Runtime::Current();
   if (runtime->IsZygote() && runtime->IsUsingApexBootImageLocation() && UseJitCompilation()) {
+    // If we're not using the default boot image location, request a JIT task to
+    // compile all methods in the boot image profile.
     thread_pool_->AddTask(Thread::Current(), new ZygoteTask());
+
+    // And create mappings to share boot image methods memory from the zygote to
+    // child processes.
+
+    // Compute the total capacity required for the boot image methods.
+    uint64_t total_capacity = 0;
+    for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) {
+      const ImageHeader& header = space->GetImageHeader();
+      const ImageSection& section = header.GetMethodsSection();
+      // Mappings need to be at the page level.
+      uint8_t* page_start = AlignUp(header.GetImageBegin() + section.Offset(), kPageSize);
+      uint8_t* page_end =
+          AlignDown(header.GetImageBegin() + section.Offset() + section.Size(), kPageSize);
+      if (page_end > page_start) {
+        total_capacity += (page_end - page_start);
+      }
+    }
+
+    // Create the child and zygote mappings to the boot image methods.
+    if (total_capacity > 0) {
+      // Start with '/boot' and end with '.art' to match the pattern recognized
+      // by android_os_Debug.cpp for boot images.
+      const char* name = "/boot-image-methods.art";
+      unique_fd mem_fd = unique_fd(art::memfd_create(name, /* flags= */ 0));
+      if (mem_fd.get() == -1) {
+        PLOG(WARNING) << "Could not create boot image methods file descriptor";
+        return;
+      }
+      if (ftruncate(mem_fd.get(), total_capacity) != 0) {
+        PLOG(WARNING) << "Failed to truncate boot image methods file to " << total_capacity;
+        return;
+      }
+      std::string error_str;
+      zygote_mapping_methods_ = MemMap::MapFile(
+        total_capacity,
+        PROT_READ | PROT_WRITE,
+        MAP_SHARED,
+        mem_fd,
+        /* start= */ 0,
+        /* low_4gb= */ false,
+        "boot-image-methods",
+        &error_str);
+
+      if (!zygote_mapping_methods_.IsValid()) {
+        LOG(WARNING) << "Failed to create zygote mapping of boot image methods:  " << error_str;
+        return;
+      }
+      if (zygote_mapping_methods_.MadviseDontFork() != 0) {
+        LOG(WARNING) << "Failed to madvise dont fork boot image methods";
+        zygote_mapping_methods_ = MemMap();
+        return;
+      }
+
+      child_mapping_methods_ = MemMap::MapFile(
+        total_capacity,
+        PROT_READ | PROT_WRITE,
+        MAP_PRIVATE,
+        mem_fd,
+        /* start= */ 0,
+        /* low_4gb= */ true,
+        "boot-image-methods",
+        &error_str);
+
+      if (!child_mapping_methods_.IsValid()) {
+        LOG(WARNING) << "Failed to create child mapping of boot image methods: " << error_str;
+        zygote_mapping_methods_ = MemMap();
+        return;
+      }
+    }
   }
 }
 
@@ -981,8 +1201,8 @@
     }
   }
 
-  // Add a madvise task to release dex file pages once all compilation is done.
-  JitMadviseDontNeedTask* task = new JitMadviseDontNeedTask(dex_files);
+  // Add a task to run when all compilation is done.
+  JitDoneCompilingProfileTask* task = new JitDoneCompilingProfileTask(dex_files);
   MutexLock mu(Thread::Current(), boot_completed_lock_);
   if (!boot_completed_) {
     tasks_after_boot_.push_back(task);
@@ -1193,12 +1413,38 @@
   }
 }
 
+static void* RunPollingThread(void* arg) {
+  Jit* jit = reinterpret_cast<Jit*>(arg);
+  do {
+    sleep(10);
+  } while (!jit->GetCodeCache()->GetZygoteMap()->IsCompilationDone());
+  jit->MapBootImageMethods();
+  return nullptr;
+}
+
 void Jit::PostForkChildAction(bool is_system_server, bool is_zygote) {
   // Clear the potential boot tasks inherited from the zygote.
   {
     MutexLock mu(Thread::Current(), boot_completed_lock_);
     tasks_after_boot_.clear();
   }
+
+  if (Runtime::Current()->IsUsingApexBootImageLocation() &&
+      !GetCodeCache()->GetZygoteMap()->IsCompilationDone()) {
+    // Create a thread that will poll the status of zygote compilation, and map
+    // the private mapping of boot image methods.
+    zygote_mapping_methods_.ResetInForkedProcess();
+    pthread_t polling_thread;
+    pthread_attr_t attr;
+    CHECK_PTHREAD_CALL(pthread_attr_init, (&attr), "new thread");
+    CHECK_PTHREAD_CALL(pthread_attr_setdetachstate, (&attr, PTHREAD_CREATE_DETACHED),
+                       "PTHREAD_CREATE_DETACHED");
+    CHECK_PTHREAD_CALL(
+        pthread_create,
+        (&polling_thread, &attr, RunPollingThread, reinterpret_cast<void*>(this)),
+        "Methods maps thread");
+  }
+
   if (is_zygote || Runtime::Current()->IsSafeMode()) {
     // Delete the thread pool, we are not going to JIT.
     thread_pool_.reset(nullptr);
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 59861c5..68aa1dc 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -374,6 +374,17 @@
   bool CanAssumeInitialized(ObjPtr<mirror::Class> cls, bool is_for_shared_region) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  const MemMap& GetZygoteMappingMethods() const {
+    return zygote_mapping_methods_;
+  }
+
+  const MemMap& GetChildMappingMethods() const {
+    return child_mapping_methods_;
+  }
+
+  // Map boot image methods after all compilation in zygote has been done.
+  void MapBootImageMethods();
+
  private:
   Jit(JitCodeCache* code_cache, JitOptions* options);
 
@@ -421,6 +432,19 @@
   Histogram<uint64_t> memory_use_ GUARDED_BY(lock_);
   Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
+  // In the JIT zygote configuration, after all compilation is done, the zygote
+  // will copy its contents of the boot image to the zygote_mapping_methods_,
+  // which will be picked up by processes that will map child_mapping_methods_
+  // in-place within the boot image mapping.
+  //
+  // zygote_mapping_methods_ and child_mapping_methods_ point to the same memory
+  // (backed by a memfd). The difference between the two is that
+  // zygote_mapping_methods_ is shared memory only usable by the zygote and not
+  // inherited by child processes. child_mapping_methods_ is a private mapping
+  // that all processes will map.
+  MemMap zygote_mapping_methods_;
+  MemMap child_mapping_methods_;
+
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 665b8be..70c6822 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -1829,6 +1829,9 @@
     region_->FillData(data, capacity, Entry { nullptr, nullptr });
     map_ = ArrayRef(data, capacity);
   }
+  done_ = reinterpret_cast<const bool*>(region_->AllocateData(sizeof(bool)));
+  CHECK(done_ != nullptr) << "Could not allocate a single boolean in the JIT region";
+  region_->WriteData(done_, false);
 }
 
 const void* ZygoteMap::GetCodeFor(ArtMethod* method, uintptr_t pc) const {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 154700f..ea7614b 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -88,7 +88,7 @@
 // This map is writable only by the zygote, and readable by all children.
 class ZygoteMap {
  public:
-  explicit ZygoteMap(JitMemoryRegion* region) : map_(), region_(region) {}
+  explicit ZygoteMap(JitMemoryRegion* region) : map_(), region_(region), done_(nullptr) {}
 
   // Initialize the data structure so it can hold `number_of_methods` mappings.
   // Note that the map is fixed size and never grows.
@@ -106,6 +106,14 @@
     return GetCodeFor(method) != nullptr;
   }
 
+  void SetCompilationDone() {
+    region_->WriteData(done_, true);
+  }
+
+  bool IsCompilationDone() const {
+    return *done_;
+  }
+
  private:
   struct Entry {
     ArtMethod* method;
@@ -121,6 +129,8 @@
   // The region in which the map is allocated.
   JitMemoryRegion* const region_;
 
+  const bool* done_;
+
   DISALLOW_COPY_AND_ASSIGN(ZygoteMap);
 };