[jitzygote] Map the boot image fd after the contents have been written.

In order to not rely on undefined behavior from the kernel, map
the boot image methods fd after the shared mapping got updated with
the contents.

Bug: 119800099
Test: BusinessCard
Change-Id: I501b8994f2b0f2b27c4693443827ae1583f9fae4
diff --git a/libartbase/base/memfd.h b/libartbase/base/memfd.h
index 53cfe9c..0bb336d 100644
--- a/libartbase/base/memfd.h
+++ b/libartbase/base/memfd.h
@@ -17,8 +17,46 @@
 #ifndef ART_LIBARTBASE_BASE_MEMFD_H_
 #define ART_LIBARTBASE_BASE_MEMFD_H_
 
+#include <fcntl.h>
+#include <unistd.h>
+
 #if defined(__BIONIC__)
 #include <linux/memfd.h>  // To access memfd flags.
+#else
+
+// If memfd flags don't exist in the current toolchain, define them ourselves.
+#ifndef F_ADD_SEALS
+# define F_ADD_SEALS          (1033)
+#endif
+
+#ifndef F_GET_SEALS
+# define F_GET_SEALS          (1034)
+#endif
+
+#ifndef F_SEAL_SEAL
+# define F_SEAL_SEAL          0x0001
+#endif
+
+#ifndef F_SEAL_SHRINK
+# define F_SEAL_SHRINK        0x0002
+#endif
+
+#ifndef F_SEAL_GROW
+# define F_SEAL_GROW          0x0004
+#endif
+
+#ifndef F_SEAL_WRITE
+# define F_SEAL_WRITE         0x0008
+#endif
+
+#ifndef F_SEAL_FUTURE_WRITE
+# define F_SEAL_FUTURE_WRITE  0x0010
+#endif
+
+#ifndef MFD_ALLOW_SEALING
+# define MFD_ALLOW_SEALING    0x0002U
+#endif
+
 #endif
 
 namespace art {
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 11619c4..25b5be5 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -215,7 +215,10 @@
       boot_completed_lock_("Jit::boot_completed_lock_"),
       cumulative_timings_("JIT timings"),
       memory_use_("Memory used for compilation", 16),
-      lock_("JIT memory use lock") {}
+      lock_("JIT memory use lock"),
+      zygote_mapping_methods_(),
+      fd_methods_(-1),
+      fd_methods_size_(0) {}
 
 Jit* Jit::Create(JitCodeCache* code_cache, JitOptions* options) {
   if (jit_load_ == nullptr) {
@@ -589,6 +592,133 @@
   memory_use_.AddValue(bytes);
 }
 
+void Jit::NotifyZygoteCompilationDone() {
+  if (fd_methods_ == -1) {
+    return;
+  }
+
+  size_t offset = 0;
+  for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) {
+    const ImageHeader& header = space->GetImageHeader();
+    const ImageSection& section = header.GetMethodsSection();
+    // Because mremap works at page boundaries, we can only handle methods
+    // within a page range. For methods that falls above or below the range,
+    // the child processes will copy their contents to their private mapping
+    // in `child_mapping_methods`. See `MapBootImageMethods`.
+    uint8_t* page_start = AlignUp(header.GetImageBegin() + section.Offset(), kPageSize);
+    uint8_t* page_end =
+        AlignDown(header.GetImageBegin() + section.Offset() + section.Size(), kPageSize);
+    if (page_end > page_start) {
+      uint64_t capacity = page_end - page_start;
+      memcpy(zygote_mapping_methods_.Begin() + offset, page_start, capacity);
+      offset += capacity;
+    }
+  }
+
+  // Do an msync to ensure we are not affected by writes still being in caches.
+  if (msync(zygote_mapping_methods_.Begin(), fd_methods_size_, MS_SYNC) != 0) {
+    PLOG(WARNING) << "Failed to sync boot image methods memory";
+    code_cache_->GetZygoteMap()->SetCompilationState(ZygoteCompilationState::kNotifiedFailure);
+    return;
+  }
+
+  // We don't need the shared mapping anymore, and we need to drop it in case
+  // the file hasn't been sealed writable.
+  zygote_mapping_methods_ = MemMap::Invalid();
+
+  std::string error_str;
+  MemMap child_mapping_methods = MemMap::MapFile(
+      fd_methods_size_,
+      PROT_READ | PROT_WRITE,
+      MAP_PRIVATE,
+      fd_methods_,
+      /* start= */ 0,
+      /* low_4gb= */ false,
+      "boot-image-methods",
+      &error_str);
+
+  if (!child_mapping_methods.IsValid()) {
+    LOG(WARNING) << "Failed to create child mapping of boot image methods: " << error_str;
+    code_cache_->GetZygoteMap()->SetCompilationState(ZygoteCompilationState::kNotifiedFailure);
+    return;
+  }
+
+  if (!IsSealFutureWriteSupported()) {
+    // If we didn't write seal the fd before, seal it now.
+
+    if (fcntl(fd_methods_, F_ADD_SEALS, F_SEAL_SEAL | F_SEAL_WRITE) == -1) {
+      PLOG(WARNING) << "Failed to seal boot image methods file descriptor";
+      code_cache_->GetZygoteMap()->SetCompilationState(ZygoteCompilationState::kNotifiedFailure);
+      return;
+    }
+
+    // Ensure the contents are the same as before: there was a window between
+    // the memcpy and the sealing where other processes could have changed the
+    // contents.
+    offset = 0;
+    for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) {
+      const ImageHeader& header = space->GetImageHeader();
+      const ImageSection& section = header.GetMethodsSection();
+      // Because mremap works at page boundaries, we can only handle methods
+      // within a page range. For methods that falls above or below the range,
+      // the child processes will copy their contents to their private mapping
+      // in `child_mapping_methods`. See `MapBootImageMethods`.
+      uint8_t* page_start = AlignUp(header.GetImageBegin() + section.Offset(), kPageSize);
+      uint8_t* page_end =
+          AlignDown(header.GetImageBegin() + section.Offset() + section.Size(), kPageSize);
+      if (page_end > page_start) {
+        uint64_t capacity = page_end - page_start;
+        if (memcmp(child_mapping_methods.Begin() + offset, page_start, capacity) != 0) {
+          LOG(WARNING) << "Contents differ in boot image methods data";
+          code_cache_->GetZygoteMap()->SetCompilationState(
+              ZygoteCompilationState::kNotifiedFailure);
+          return;
+        }
+        offset += capacity;
+      }
+    }
+  }
+
+  // Future spawned processes don't need the fd anymore.
+  fd_methods_.reset();
+
+  // In order to have the zygote and children share the memory, we also remap
+  // the memory into the zygote process.
+  offset = 0;
+  for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) {
+    const ImageHeader& header = space->GetImageHeader();
+    const ImageSection& section = header.GetMethodsSection();
+    // Because mremap works at page boundaries, we can only handle methods
+    // within a page range. For methods that falls above or below the range,
+    // the child processes will copy their contents to their private mapping
+    // in `child_mapping_methods`. See `MapBootImageMethods`.
+    uint8_t* page_start = AlignUp(header.GetImageBegin() + section.Offset(), kPageSize);
+    uint8_t* page_end =
+        AlignDown(header.GetImageBegin() + section.Offset() + section.Size(), kPageSize);
+    if (page_end > page_start) {
+      uint64_t capacity = page_end - page_start;
+      if (mremap(child_mapping_methods.Begin() + offset,
+                 capacity,
+                 capacity,
+                 MREMAP_FIXED | MREMAP_MAYMOVE,
+                 page_start) == MAP_FAILED) {
+        // Failing to remap is safe as the process will just use the old
+        // contents.
+        PLOG(WARNING) << "Failed mremap of boot image methods of " << space->GetImageFilename();
+      }
+      offset += capacity;
+    }
+  }
+
+  // Mark that compilation of boot classpath is done, and memory can now be
+  // shared. Other processes will pick up this information.
+  code_cache_->GetZygoteMap()->SetCompilationState(ZygoteCompilationState::kNotifiedOk);
+
+  // The private mapping created for this process has been mremaped. We can
+  // reset it.
+  child_mapping_methods.Reset();
+}
+
 class JitCompileTask final : public Task {
  public:
   enum class TaskKind {
@@ -693,36 +823,7 @@
     if (Runtime::Current()->IsZygote()) {
       // Copy the boot image methods data to the mappings we created to share
       // with the children.
-      Jit* jit = Runtime::Current()->GetJit();
-      size_t offset = 0;
-      for (gc::space::ImageSpace* space : Runtime::Current()->GetHeap()->GetBootImageSpaces()) {
-        const ImageHeader& header = space->GetImageHeader();
-        const ImageSection& section = header.GetMethodsSection();
-        // Because mremap works at page boundaries, we can only handle methods
-        // within a page range. For methods that falls above or below the range,
-        // the child processes will copy their contents to their private mapping
-        // in `child_mapping_methods_`. See `MapBootImageMethods`.
-        uint8_t* page_start = AlignUp(header.GetImageBegin() + section.Offset(), kPageSize);
-        uint8_t* page_end =
-            AlignDown(header.GetImageBegin() + section.Offset() + section.Size(), kPageSize);
-        if (page_end > page_start) {
-          uint64_t capacity = page_end - page_start;
-          memcpy(jit->GetZygoteMappingMethods().Begin() + offset, page_start, capacity);
-          // So the memory is shared, also map the memory into the zygote
-          // process.
-          if (mremap(jit->GetChildMappingMethods().Begin() + offset,
-                     capacity,
-                     capacity,
-                     MREMAP_FIXED | MREMAP_MAYMOVE,
-                     page_start) == MAP_FAILED) {
-            PLOG(WARNING) << "Failed mremap of boot image methods of " << space->GetImageFilename();
-          }
-          offset += capacity;
-        }
-      }
-      // Mark that compilation of boot classpath is done. Other processes will
-      // pick up this boolean.
-      jit->GetCodeCache()->GetZygoteMap()->SetCompilationDone();
+      Runtime::Current()->GetJit()->NotifyZygoteCompilationDone();
     }
   }
 
@@ -842,7 +943,28 @@
 }
 
 void Jit::MapBootImageMethods() {
-  if (!GetChildMappingMethods().IsValid()) {
+  CHECK_NE(fd_methods_.get(), -1);
+  if (!code_cache_->GetZygoteMap()->CanMapBootImageMethods()) {
+    LOG(WARNING) << "Not mapping boot image methods due to error from zygote";
+    return;
+  }
+
+  std::string error_str;
+  MemMap child_mapping_methods = MemMap::MapFile(
+      fd_methods_size_,
+      PROT_READ | PROT_WRITE,
+      MAP_PRIVATE,
+      fd_methods_,
+      /* start= */ 0,
+      /* low_4gb= */ false,
+      "boot-image-methods",
+      &error_str);
+
+  // We don't need the fd anymore.
+  fd_methods_.reset();
+
+  if (!child_mapping_methods.IsValid()) {
+    LOG(WARNING) << "Failed to create child mapping of boot image methods: " << error_str;
     return;
   }
   size_t offset = 0;
@@ -898,7 +1020,7 @@
         // For all the methods in the mapping, put the entrypoint to the
         // resolution stub.
         ArtMethod* new_method = reinterpret_cast<ArtMethod*>(
-            GetChildMappingMethods().Begin() + offset + (pointer - page_start));
+            child_mapping_methods.Begin() + offset + (pointer - page_start));
         const void* code = new_method->GetEntryPointFromQuickCompiledCode();
         if (!class_linker->IsQuickGenericJniStub(code) &&
             !class_linker->IsQuickToInterpreterBridge(code) &&
@@ -918,7 +1040,7 @@
         //                            |/////////| -> copy -> |/////////|
         //                            |         |            |         |
         //
-        CopyIfDifferent(GetChildMappingMethods().Begin() + offset,
+        CopyIfDifferent(child_mapping_methods.Begin() + offset,
                         page_start,
                         pointer + sizeof(ArtMethod) - page_start);
       } else if (pointer < page_end && (pointer + sizeof(ArtMethod)) > page_end) {
@@ -933,14 +1055,14 @@
         //         section end   -->  -----------
         //
         size_t bytes_to_copy = (page_end - pointer);
-        CopyIfDifferent(GetChildMappingMethods().Begin() + offset + capacity - bytes_to_copy,
+        CopyIfDifferent(child_mapping_methods.Begin() + offset + capacity - bytes_to_copy,
                         page_end - bytes_to_copy,
                         bytes_to_copy);
       }
     }, space->Begin(), kRuntimePointerSize);
 
     // Map the memory in the boot image range.
-    if (mremap(GetChildMappingMethods().Begin() + offset,
+    if (mremap(child_mapping_methods.Begin() + offset,
                capacity,
                capacity,
                MREMAP_FIXED | MREMAP_MAYMOVE,
@@ -949,6 +1071,10 @@
     }
     offset += capacity;
   }
+
+  // The private mapping created for this process has been mremaped. We can
+  // reset it.
+  child_mapping_methods.Reset();
 }
 
 void Jit::CreateThreadPool() {
@@ -990,7 +1116,7 @@
       // Start with '/boot' and end with '.art' to match the pattern recognized
       // by android_os_Debug.cpp for boot images.
       const char* name = "/boot-image-methods.art";
-      unique_fd mem_fd = unique_fd(art::memfd_create(name, /* flags= */ 0));
+      unique_fd mem_fd = unique_fd(art::memfd_create(name, /* flags= */ MFD_ALLOW_SEALING));
       if (mem_fd.get() == -1) {
         PLOG(WARNING) << "Could not create boot image methods file descriptor";
         return;
@@ -1000,6 +1126,9 @@
         return;
       }
       std::string error_str;
+
+      // Create the shared mapping eagerly, as this prevents other processes
+      // from adding the writable seal.
       zygote_mapping_methods_ = MemMap::MapFile(
         total_capacity,
         PROT_READ | PROT_WRITE,
@@ -1020,21 +1149,26 @@
         return;
       }
 
-      child_mapping_methods_ = MemMap::MapFile(
-        total_capacity,
-        PROT_READ | PROT_WRITE,
-        MAP_PRIVATE,
-        mem_fd,
-        /* start= */ 0,
-        /* low_4gb= */ true,
-        "boot-image-methods",
-        &error_str);
-
-      if (!child_mapping_methods_.IsValid()) {
-        LOG(WARNING) << "Failed to create child mapping of boot image methods: " << error_str;
-        zygote_mapping_methods_ = MemMap();
-        return;
+      if (IsSealFutureWriteSupported()) {
+        // Seal now.
+        if (fcntl(mem_fd,
+                  F_ADD_SEALS,
+                  F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_SEAL | F_SEAL_FUTURE_WRITE) == -1) {
+          PLOG(WARNING) << "Failed to seal boot image methods file descriptor";
+          zygote_mapping_methods_ = MemMap();
+          return;
+        }
+      } else {
+        // Only seal the size. We will seal the write once we are donew writing
+        // to the shared mapping.
+        if (fcntl(mem_fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_GROW) == -1) {
+          PLOG(WARNING) << "Failed to seal boot image methods file descriptor";
+          zygote_mapping_methods_ = MemMap();
+          return;
+        }
       }
+      fd_methods_ = unique_fd(mem_fd.release());
+      fd_methods_size_ = total_capacity;
     }
   }
 }
@@ -1425,7 +1559,7 @@
   Jit* jit = reinterpret_cast<Jit*>(arg);
   do {
     sleep(10);
-  } while (!jit->GetCodeCache()->GetZygoteMap()->IsCompilationDone());
+  } while (!jit->GetCodeCache()->GetZygoteMap()->IsCompilationNotified());
   jit->MapBootImageMethods();
   return nullptr;
 }
@@ -1437,8 +1571,7 @@
     tasks_after_boot_.clear();
   }
 
-  if (Runtime::Current()->IsUsingApexBootImageLocation() &&
-      !GetCodeCache()->GetZygoteMap()->IsCompilationDone()) {
+  if (Runtime::Current()->IsUsingApexBootImageLocation() && fd_methods_ != -1) {
     // Create a thread that will poll the status of zygote compilation, and map
     // the private mapping of boot image methods.
     zygote_mapping_methods_.ResetInForkedProcess();
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 68aa1dc..502fe9f 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -17,6 +17,8 @@
 #ifndef ART_RUNTIME_JIT_JIT_H_
 #define ART_RUNTIME_JIT_JIT_H_
 
+#include <android-base/unique_fd.h>
+
 #include "base/histogram-inl.h"
 #include "base/macros.h"
 #include "base/mutex.h"
@@ -374,17 +376,13 @@
   bool CanAssumeInitialized(ObjPtr<mirror::Class> cls, bool is_for_shared_region) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  const MemMap& GetZygoteMappingMethods() const {
-    return zygote_mapping_methods_;
-  }
-
-  const MemMap& GetChildMappingMethods() const {
-    return child_mapping_methods_;
-  }
-
   // Map boot image methods after all compilation in zygote has been done.
   void MapBootImageMethods();
 
+  // Notify to other processes that the zygote is done profile compiling boot
+  // class path methods.
+  void NotifyZygoteCompilationDone();
+
  private:
   Jit(JitCodeCache* code_cache, JitOptions* options);
 
@@ -434,16 +432,23 @@
 
   // In the JIT zygote configuration, after all compilation is done, the zygote
   // will copy its contents of the boot image to the zygote_mapping_methods_,
-  // which will be picked up by processes that will map child_mapping_methods_
+  // which will be picked up by processes that will map the memory
   // in-place within the boot image mapping.
   //
-  // zygote_mapping_methods_ and child_mapping_methods_ point to the same memory
-  // (backed by a memfd). The difference between the two is that
   // zygote_mapping_methods_ is shared memory only usable by the zygote and not
-  // inherited by child processes. child_mapping_methods_ is a private mapping
-  // that all processes will map.
+  // inherited by child processes. We create it eagerly to ensure other
+  // processes cannot seal writable the file.
   MemMap zygote_mapping_methods_;
-  MemMap child_mapping_methods_;
+
+  // The file descriptor created through memfd_create pointing to memory holding
+  // boot image methods. Created by the zygote, and inherited by child
+  // processes. The descriptor will be closed in each process (including the
+  // zygote) once they don't need it.
+  android::base::unique_fd fd_methods_;
+
+  // The size of the memory pointed by `fd_methods_`. Cached here to avoid
+  // recomputing it.
+  size_t fd_methods_size_;
 
   DISALLOW_COPY_AND_ASSIGN(Jit);
 };
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 6a13d59..82ca44c 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -1838,15 +1838,18 @@
   // Allocate for 40-80% capacity. This will offer OK lookup times, and termination
   // cases.
   size_t capacity = RoundUpToPowerOfTwo(number_of_methods * 100 / 80);
-  const Entry* data =
-      reinterpret_cast<const Entry*>(region_->AllocateData(capacity * sizeof(Entry)));
-  if (data != nullptr) {
-    region_->FillData(data, capacity, Entry { nullptr, nullptr });
-    map_ = ArrayRef(data, capacity);
+  const uint8_t* memory = region_->AllocateData(
+      capacity * sizeof(Entry) + sizeof(ZygoteCompilationState));
+  if (memory == nullptr) {
+    LOG(WARNING) << "Could not allocate data for the zygote map";
+    return;
   }
-  done_ = reinterpret_cast<const bool*>(region_->AllocateData(sizeof(bool)));
-  CHECK(done_ != nullptr) << "Could not allocate a single boolean in the JIT region";
-  region_->WriteData(done_, false);
+  const Entry* data = reinterpret_cast<const Entry*>(memory);
+  region_->FillData(data, capacity, Entry { nullptr, nullptr });
+  map_ = ArrayRef(data, capacity);
+  compilation_state_ = reinterpret_cast<const ZygoteCompilationState*>(
+      memory + capacity * sizeof(Entry));
+  region_->WriteData(compilation_state_, ZygoteCompilationState::kInProgress);
 }
 
 const void* ZygoteMap::GetCodeFor(ArtMethod* method, uintptr_t pc) const {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 12425cf..637d5e3 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -79,6 +79,22 @@
 // of garbage collecting code.
 using CodeCacheBitmap = gc::accounting::MemoryRangeBitmap<kJitCodeAccountingBytes>;
 
+// The state of profile-based compilation in the zygote.
+// - kInProgress:      JIT compilation is happening
+// - kDone:            JIT compilation is finished, and the zygote is preparing notifying
+//                     the other processes.
+// - kNotifiedOk:      the zygote has notified the other processes, which can start
+//                     sharing the boot image method mappings.
+// - kNotifiedFailure: the zygote has notified the other processes, but they
+//                     cannot share the boot image method mappings due to
+//                     unexpected errors
+enum class ZygoteCompilationState : uint8_t {
+  kInProgress = 0,
+  kDone = 1,
+  kNotifiedOk = 2,
+  kNotifiedFailure = 3,
+};
+
 // Class abstraction over a map of ArtMethod -> compiled code, where the
 // ArtMethod are compiled by the zygote, and the map acts as a communication
 // channel between the zygote and the other processes.
@@ -88,7 +104,8 @@
 // This map is writable only by the zygote, and readable by all children.
 class ZygoteMap {
  public:
-  explicit ZygoteMap(JitMemoryRegion* region) : map_(), region_(region), done_(nullptr) {}
+  explicit ZygoteMap(JitMemoryRegion* region)
+      : map_(), region_(region), compilation_state_(nullptr) {}
 
   // Initialize the data structure so it can hold `number_of_methods` mappings.
   // Note that the map is fixed size and never grows.
@@ -106,12 +123,16 @@
     return GetCodeFor(method) != nullptr;
   }
 
-  void SetCompilationDone() {
-    region_->WriteData(done_, true);
+  void SetCompilationState(ZygoteCompilationState state) {
+    region_->WriteData(compilation_state_, state);
   }
 
-  bool IsCompilationDone() const {
-    return *done_;
+  bool IsCompilationNotified() const {
+    return *compilation_state_ > ZygoteCompilationState::kDone;
+  }
+
+  bool CanMapBootImageMethods() const {
+    return *compilation_state_ == ZygoteCompilationState::kNotifiedOk;
   }
 
  private:
@@ -129,7 +150,9 @@
   // The region in which the map is allocated.
   JitMemoryRegion* const region_;
 
-  const bool* done_;
+  // The current state of compilation in the zygote. Starts with kInProgress,
+  // and should end with kNotifiedOk or kNotifiedFailure.
+  const ZygoteCompilationState* compilation_state_;
 
   DISALLOW_COPY_AND_ASSIGN(ZygoteMap);
 };