diff options
author | 2023-01-31 07:58:23 +0000 | |
---|---|---|
committer | 2023-03-11 03:13:49 +0000 | |
commit | 5e0affb9ad42f617cc0c8c3dd895357ebeaced62 (patch) | |
tree | e4fed6691d7801d6577f1d3dd3d79f6ad1cfd630 | |
parent | 22ec0a49eb93bc8e35d77c4c6d5ec1a40748ee01 (diff) |
Use userfaultfd's SIGBUS feature for concurrent compaction
With the threading-based implementation the mutator threads and
userfaultfd worker threads have to be alternatively scheduled when
a missing page is accessed by the former. OTOH, with SIGBUS feature
the mutator gets a SIGBUS signal on accessing a missing page.
For response time the latter is expected to be significantly better than
the former. With a microbenchmark on host SIGBUS feature is up to 10x
better than the alternative.
Bug: 160737021
Test: art/test/testrunner/testrunner.py --host
Change-Id: I6f8d05690e23b70f9517e9e1929af3006b9960bb
-rw-r--r-- | runtime/fault_handler.cc | 184 | ||||
-rw-r--r-- | runtime/fault_handler.h | 12 | ||||
-rw-r--r-- | runtime/gc/collector/mark_compact.cc | 509 | ||||
-rw-r--r-- | runtime/gc/collector/mark_compact.h | 71 | ||||
-rw-r--r-- | runtime/runtime.cc | 3 | ||||
-rw-r--r-- | runtime/thread.h | 17 |
6 files changed, 540 insertions, 256 deletions
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc index dd28f3658b..a3c1f3bdf5 100644 --- a/runtime/fault_handler.cc +++ b/runtime/fault_handler.cc @@ -16,17 +16,19 @@ #include "fault_handler.h" -#include <atomic> #include <string.h> #include <sys/mman.h> #include <sys/ucontext.h> +#include <atomic> + #include "art_method-inl.h" #include "base/logging.h" // For VLOG #include "base/membarrier.h" #include "base/safe_copy.h" #include "base/stl_util.h" #include "dex/dex_file_types.h" +#include "gc/heap.h" #include "jit/jit.h" #include "jit/jit_code_cache.h" #include "mirror/class.h" @@ -49,64 +51,128 @@ extern "C" NO_INLINE __attribute__((visibility("default"))) void art_sigsegv_fau } // Signal handler called on SIGSEGV. -static bool art_fault_handler(int sig, siginfo_t* info, void* context) { - return fault_manager.HandleFault(sig, info, context); +static bool art_sigsegv_handler(int sig, siginfo_t* info, void* context) { + return fault_manager.HandleSigsegvFault(sig, info, context); +} + +// Signal handler called on SIGBUS. +static bool art_sigbus_handler(int sig, siginfo_t* info, void* context) { + return fault_manager.HandleSigbusFault(sig, info, context); } FaultManager::FaultManager() : generated_code_ranges_lock_("FaultHandler generated code ranges lock", LockLevel::kGenericBottomLock), - initialized_(false) { - sigaction(SIGSEGV, nullptr, &oldaction_); -} + initialized_(false) {} FaultManager::~FaultManager() { } -void FaultManager::Init() { - CHECK(!initialized_); - sigset_t mask; - sigfillset(&mask); - sigdelset(&mask, SIGABRT); - sigdelset(&mask, SIGBUS); - sigdelset(&mask, SIGFPE); - sigdelset(&mask, SIGILL); - sigdelset(&mask, SIGSEGV); - - SigchainAction sa = { - .sc_sigaction = art_fault_handler, - .sc_mask = mask, - .sc_flags = 0UL, - }; - - AddSpecialSignalHandlerFn(SIGSEGV, &sa); - - // Notify the kernel that we intend to use a specific `membarrier()` command. - int result = art::membarrier(MembarrierCommand::kRegisterPrivateExpedited); - if (result != 0) { - LOG(WARNING) << "FaultHandler: MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED failed: " - << errno << " " << strerror(errno); +static const char* SignalCodeName(int sig, int code) { + if (sig == SIGSEGV) { + switch (code) { + case SEGV_MAPERR: return "SEGV_MAPERR"; + case SEGV_ACCERR: return "SEGV_ACCERR"; + case 8: return "SEGV_MTEAERR"; + case 9: return "SEGV_MTESERR"; + default: return "SEGV_UNKNOWN"; + } + } else if (sig == SIGBUS) { + switch (code) { + case BUS_ADRALN: return "BUS_ADRALN"; + case BUS_ADRERR: return "BUS_ADRERR"; + case BUS_OBJERR: return "BUS_OBJERR"; + default: return "BUS_UNKNOWN"; + } + } else { + return "UNKNOWN"; } +} - { - MutexLock lock(Thread::Current(), generated_code_ranges_lock_); - for (size_t i = 0; i != kNumLocalGeneratedCodeRanges; ++i) { - GeneratedCodeRange* next = (i + 1u != kNumLocalGeneratedCodeRanges) - ? &generated_code_ranges_storage_[i + 1u] - : nullptr; - generated_code_ranges_storage_[i].next.store(next, std::memory_order_relaxed); - generated_code_ranges_storage_[i].start = nullptr; - generated_code_ranges_storage_[i].size = 0u; - } - free_generated_code_ranges_ = generated_code_ranges_storage_; +static std::ostream& PrintSignalInfo(std::ostream& os, siginfo_t* info) { + os << " si_signo: " << info->si_signo << " (" << strsignal(info->si_signo) << ")\n" + << " si_code: " << info->si_code + << " (" << SignalCodeName(info->si_signo, info->si_code) << ")"; + if (info->si_signo == SIGSEGV || info->si_signo == SIGBUS) { + os << "\n" << " si_addr: " << info->si_addr; } + return os; +} - initialized_ = true; +static bool InstallSigbusHandler() { + return gUseUserfaultfd && + Runtime::Current()->GetHeap()->MarkCompactCollector()->IsUsingSigbusFeature(); +} + +void FaultManager::Init(bool use_sig_chain) { + CHECK(!initialized_); + if (use_sig_chain) { + sigset_t mask; + sigfillset(&mask); + sigdelset(&mask, SIGABRT); + sigdelset(&mask, SIGBUS); + sigdelset(&mask, SIGFPE); + sigdelset(&mask, SIGILL); + sigdelset(&mask, SIGSEGV); + + SigchainAction sa = { + .sc_sigaction = art_sigsegv_handler, + .sc_mask = mask, + .sc_flags = 0UL, + }; + + AddSpecialSignalHandlerFn(SIGSEGV, &sa); + if (InstallSigbusHandler()) { + sa.sc_sigaction = art_sigbus_handler; + AddSpecialSignalHandlerFn(SIGBUS, &sa); + } + + // Notify the kernel that we intend to use a specific `membarrier()` command. + int result = art::membarrier(MembarrierCommand::kRegisterPrivateExpedited); + if (result != 0) { + LOG(WARNING) << "FaultHandler: MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED failed: " + << errno << " " << strerror(errno); + } + + { + MutexLock lock(Thread::Current(), generated_code_ranges_lock_); + for (size_t i = 0; i != kNumLocalGeneratedCodeRanges; ++i) { + GeneratedCodeRange* next = (i + 1u != kNumLocalGeneratedCodeRanges) + ? &generated_code_ranges_storage_[i + 1u] + : nullptr; + generated_code_ranges_storage_[i].next.store(next, std::memory_order_relaxed); + generated_code_ranges_storage_[i].start = nullptr; + generated_code_ranges_storage_[i].size = 0u; + } + free_generated_code_ranges_ = generated_code_ranges_storage_; + } + + initialized_ = true; + } else if (InstallSigbusHandler()) { + struct sigaction act; + std::memset(&act, '\0', sizeof(act)); + act.sa_flags = SA_SIGINFO | SA_RESTART; + act.sa_sigaction = [](int sig, siginfo_t* info, void* context) { + if (!art_sigbus_handler(sig, info, context)) { + std::ostringstream oss; + PrintSignalInfo(oss, info); + LOG(FATAL) << "Couldn't handle SIGBUS fault:" + << "\n" + << oss.str(); + } + }; + if (sigaction(SIGBUS, &act, nullptr)) { + LOG(FATAL) << "Fault handler for SIGBUS couldn't be setup: " << strerror(errno); + } + } } void FaultManager::Release() { if (initialized_) { - RemoveSpecialSignalHandlerFn(SIGSEGV, art_fault_handler); + RemoveSpecialSignalHandlerFn(SIGSEGV, art_sigsegv_handler); + if (InstallSigbusHandler()) { + RemoveSpecialSignalHandlerFn(SIGBUS, art_sigbus_handler); + } initialized_ = false; } } @@ -157,32 +223,22 @@ bool FaultManager::HandleFaultByOtherHandlers(int sig, siginfo_t* info, void* co return false; } -static const char* SignalCodeName(int sig, int code) { - if (sig != SIGSEGV) { - return "UNKNOWN"; - } else { - switch (code) { - case SEGV_MAPERR: return "SEGV_MAPERR"; - case SEGV_ACCERR: return "SEGV_ACCERR"; - case 8: return "SEGV_MTEAERR"; - case 9: return "SEGV_MTESERR"; - default: return "UNKNOWN"; - } - } -} -static std::ostream& PrintSignalInfo(std::ostream& os, siginfo_t* info) { - os << " si_signo: " << info->si_signo << " (" << strsignal(info->si_signo) << ")\n" - << " si_code: " << info->si_code - << " (" << SignalCodeName(info->si_signo, info->si_code) << ")"; - if (info->si_signo == SIGSEGV) { - os << "\n" << " si_addr: " << info->si_addr; +bool FaultManager::HandleSigbusFault(int sig, siginfo_t* info, void* context ATTRIBUTE_UNUSED) { + DCHECK_EQ(sig, SIGBUS); + if (VLOG_IS_ON(signals)) { + PrintSignalInfo(VLOG_STREAM(signals) << "Handling SIGBUS fault:\n", info); } - return os; + +#ifdef TEST_NESTED_SIGNAL + // Simulate a crash in a handler. + raise(SIGBUS); +#endif + return Runtime::Current()->GetHeap()->MarkCompactCollector()->SigbusHandler(info); } -bool FaultManager::HandleFault(int sig, siginfo_t* info, void* context) { +bool FaultManager::HandleSigsegvFault(int sig, siginfo_t* info, void* context) { if (VLOG_IS_ON(signals)) { - PrintSignalInfo(VLOG_STREAM(signals) << "Handling fault:" << "\n", info); + PrintSignalInfo(VLOG_STREAM(signals) << "Handling SIGSEGV fault:\n", info); } #ifdef TEST_NESTED_SIGNAL diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h index 43f93e42d1..1ed65261b0 100644 --- a/runtime/fault_handler.h +++ b/runtime/fault_handler.h @@ -38,7 +38,9 @@ class FaultManager { FaultManager(); ~FaultManager(); - void Init(); + // Use libsigchain if use_sig_chain is true. Otherwise, setup SIGBUS directly + // using sigaction(). + void Init(bool use_sig_chain); // Unclaim signals. void Release(); @@ -46,8 +48,11 @@ class FaultManager { // Unclaim signals and delete registered handlers. void Shutdown(); - // Try to handle a fault, returns true if successful. - bool HandleFault(int sig, siginfo_t* info, void* context); + // Try to handle a SIGSEGV fault, returns true if successful. + bool HandleSigsegvFault(int sig, siginfo_t* info, void* context); + + // Try to handle a SIGBUS fault, returns true if successful. + bool HandleSigbusFault(int sig, siginfo_t* info, void* context); // Added handlers are owned by the fault handler and will be freed on Shutdown(). void AddHandler(FaultHandler* handler, bool generated_code); @@ -91,7 +96,6 @@ class FaultManager { std::vector<FaultHandler*> generated_code_handlers_; std::vector<FaultHandler*> other_handlers_; - struct sigaction oldaction_; bool initialized_; // We keep a certain number of generated code ranges locally to avoid too many diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index 2496b8eb40..380c47a4a7 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -108,6 +108,17 @@ static uint64_t gUffdFeatures = 0; // Both, missing and minor faults on shmem are needed only for minor-fault mode. static constexpr uint64_t kUffdFeaturesForMinorFault = UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM; +static constexpr uint64_t kUffdFeaturesForSigbus = UFFD_FEATURE_SIGBUS; +// We consider SIGBUS feature necessary to enable this GC as it's superior than +// threading-based implementation for janks. However, since we have the latter +// already implemented, for testing purposes, we allow choosing either of the +// two at boot time in the constructor below. +// Note that having minor-fault feature implies having SIGBUS feature as the +// latter was introduced earlier than the former. In other words, having +// minor-fault feature implies having SIGBUS. We still want minor-fault to be +// available for making jit-code-cache updation concurrent, which uses shmem. +static constexpr uint64_t kUffdFeaturesRequired = + kUffdFeaturesForMinorFault | kUffdFeaturesForSigbus; bool KernelSupportsUffd() { #ifdef __linux__ @@ -126,8 +137,8 @@ bool KernelSupportsUffd() { CHECK_EQ(ioctl(fd, UFFDIO_API, &api), 0) << "ioctl_userfaultfd : API:" << strerror(errno); gUffdFeatures = api.features; close(fd); - // Allow this GC to be used only if minor-fault feature is available. - return (api.features & kUffdFeaturesForMinorFault) == kUffdFeaturesForMinorFault; + // Allow this GC to be used only if minor-fault and sigbus feature is available. + return (api.features & kUffdFeaturesRequired) == kUffdFeaturesRequired; } } #endif @@ -223,6 +234,12 @@ static constexpr bool kCheckLocks = kDebugLocking; static constexpr bool kVerifyRootsMarked = kIsDebugBuild; // Two threads should suffice on devices. static constexpr size_t kMaxNumUffdWorkers = 2; +// Number of compaction buffers reserved for mutator threads in SIGBUS feature +// case. It's extremely unlikely that we will ever have more than these number +// of mutator threads trying to access the moving-space during one compaction +// phase. Using a lower number in debug builds to hopefully catch the issue +// before it becomes a problem on user builds. +static constexpr size_t kMutatorCompactionBufferCount = kIsDebugBuild ? 256 : 512; // Minimum from-space chunk to be madvised (during concurrent compaction) in one go. static constexpr ssize_t kMinFromSpaceMadviseSize = 1 * MB; // Concurrent compaction termination logic is different (and slightly more efficient) if the @@ -268,8 +285,8 @@ bool MarkCompact::CreateUserfaultfd(bool post_fork) { } else { DCHECK(IsValidFd(uffd_)); // Initialize uffd with the features which are required and available. - struct uffdio_api api = { - .api = UFFD_API, .features = gUffdFeatures & kUffdFeaturesForMinorFault, .ioctls = 0}; + struct uffdio_api api = {.api = UFFD_API, .features = gUffdFeatures, .ioctls = 0}; + api.features &= use_uffd_sigbus_ ? kUffdFeaturesRequired : kUffdFeaturesForMinorFault; CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0) << "ioctl_userfaultfd: API: " << strerror(errno); } } @@ -284,20 +301,27 @@ MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignmen MemRangeBitmap::Create("Concurrent Mark Compact live words bitmap", begin, end)); } +static bool IsSigbusFeatureAvailable() { + MarkCompact::GetUffdAndMinorFault(); + return gUffdFeatures & UFFD_FEATURE_SIGBUS; +} + MarkCompact::MarkCompact(Heap* heap) : GarbageCollector(heap, "concurrent mark compact"), gc_barrier_(0), - mark_stack_lock_("mark compact mark stack lock", kMarkSweepMarkStackLock), + lock_("mark compact lock", kMarkSweepMarkStackLock), bump_pointer_space_(heap->GetBumpPointerSpace()), moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()), moving_to_space_fd_(kFdUnused), moving_from_space_fd_(kFdUnused), uffd_(kFdUnused), - thread_pool_counter_(0), + sigbus_in_progress_count_(kSigbusCounterCompactionDoneMask), compaction_in_progress_count_(0), + thread_pool_counter_(0), compacting_(false), uffd_initialized_(false), uffd_minor_fault_supported_(false), + use_uffd_sigbus_(IsSigbusFeatureAvailable()), minor_fault_initialized_(false), map_linear_alloc_shared_(false) { if (kIsDebugBuild) { @@ -383,7 +407,9 @@ MarkCompact::MarkCompact(Heap* heap) LOG(WARNING) << "Failed to allocate concurrent mark-compact moving-space shadow: " << err_msg; } } - const size_t num_pages = 1 + std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers); + const size_t num_pages = 1 + (use_uffd_sigbus_ ? + kMutatorCompactionBufferCount : + std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers)); compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers", kPageSize * num_pages, PROT_READ | PROT_WRITE, @@ -396,7 +422,8 @@ MarkCompact::MarkCompact(Heap* heap) conc_compaction_termination_page_ = compaction_buffers_map_.Begin(); // Touch the page deliberately to avoid userfaults on it. We madvise it in // CompactionPhase() before using it to terminate concurrent compaction. - CHECK_EQ(*conc_compaction_termination_page_, 0); + ForceRead(conc_compaction_termination_page_); + // In most of the cases, we don't expect more than one LinearAlloc space. linear_alloc_spaces_data_.reserve(1); @@ -543,6 +570,8 @@ void MarkCompact::InitializePhase() { non_moving_first_objs_count_ = 0; black_page_count_ = 0; freed_objects_ = 0; + // The first buffer is used by gc-thread. + compaction_buffer_counter_ = 1; from_space_slide_diff_ = from_space_begin_ - bump_pointer_space_->Begin(); black_allocations_begin_ = bump_pointer_space_->Limit(); walk_super_class_cache_ = nullptr; @@ -577,7 +606,7 @@ void MarkCompact::RunPhases() { ReclaimPhase(); PrepareForCompaction(); } - if (uffd_ != kFallbackMode) { + if (uffd_ != kFallbackMode && !use_uffd_sigbus_) { heap_->GetThreadPool()->WaitForWorkersToBeCreated(); } { @@ -847,14 +876,15 @@ void MarkCompact::PrepareForCompaction() { bool is_zygote = Runtime::Current()->IsZygote(); if (!uffd_initialized_ && CreateUserfaultfd(/*post_fork*/false)) { - // Register the buffer that we use for terminating concurrent compaction - struct uffdio_register uffd_register; - uffd_register.range.start = reinterpret_cast<uintptr_t>(conc_compaction_termination_page_); - uffd_register.range.len = kPageSize; - uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; - CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0) + if (!use_uffd_sigbus_) { + // Register the buffer that we use for terminating concurrent compaction + struct uffdio_register uffd_register; + uffd_register.range.start = reinterpret_cast<uintptr_t>(conc_compaction_termination_page_); + uffd_register.range.len = kPageSize; + uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; + CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0) << "ioctl_userfaultfd: register compaction termination page: " << strerror(errno); - + } if (!uffd_minor_fault_supported_ && shadow_to_space_map_.IsValid()) { // A valid shadow-map for moving space is only possible if we // were able to map it in the constructor. That also means that its size @@ -869,20 +899,21 @@ void MarkCompact::PrepareForCompaction() { // and get rid of it when finished. This is expected to happen rarely as // zygote spends most of the time in native fork loop. if (uffd_ != kFallbackMode) { - ThreadPool* pool = heap_->GetThreadPool(); - if (UNLIKELY(pool == nullptr)) { - // On devices with 2 cores, GetParallelGCThreadCount() will return 1, - // which is desired number of workers on such devices. - heap_->CreateThreadPool(std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers)); - pool = heap_->GetThreadPool(); - } - size_t num_threads = pool->GetThreadCount(); - thread_pool_counter_ = num_threads; - for (size_t i = 0; i < num_threads; i++) { - pool->AddTask(thread_running_gc_, new ConcurrentCompactionGcTask(this, i + 1)); + if (!use_uffd_sigbus_) { + ThreadPool* pool = heap_->GetThreadPool(); + if (UNLIKELY(pool == nullptr)) { + // On devices with 2 cores, GetParallelGCThreadCount() will return 1, + // which is desired number of workers on such devices. + heap_->CreateThreadPool(std::min(heap_->GetParallelGCThreadCount(), kMaxNumUffdWorkers)); + pool = heap_->GetThreadPool(); + } + size_t num_threads = pool->GetThreadCount(); + thread_pool_counter_ = num_threads; + for (size_t i = 0; i < num_threads; i++) { + pool->AddTask(thread_running_gc_, new ConcurrentCompactionGcTask(this, i + 1)); + } + CHECK_EQ(pool->GetTaskCount(thread_running_gc_), num_threads); } - CHECK_EQ(pool->GetTaskCount(thread_running_gc_), num_threads); - /* * Possible scenarios for mappings: * A) All zygote GCs (or if minor-fault feature isn't available): uses @@ -1765,26 +1796,52 @@ void MarkCompact::MapProcessedPages(uint8_t* to_space_start, DCHECK_EQ(uffd_continue.mapped, static_cast<ssize_t>(length)); } } + if (use_uffd_sigbus_) { + // Nobody else would modify these pages' state simultaneously so atomic + // store is sufficient. + for (; uffd_continue.mapped > 0; uffd_continue.mapped -= kPageSize) { + arr_idx--; + DCHECK_EQ(state_arr[arr_idx].load(std::memory_order_relaxed), + PageState::kProcessedAndMapping); + state_arr[arr_idx].store(PageState::kProcessedAndMapped, std::memory_order_release); + } + } + } +} + +void MarkCompact::ZeropageIoctl(void* addr, bool tolerate_eexist, bool tolerate_enoent) { + struct uffdio_zeropage uffd_zeropage; + DCHECK(IsAligned<kPageSize>(addr)); + uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr); + uffd_zeropage.range.len = kPageSize; + uffd_zeropage.mode = 0; + int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage); + if (LIKELY(ret == 0)) { + DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(kPageSize)); + } else { + CHECK((tolerate_enoent && errno == ENOENT) || (tolerate_eexist && errno == EEXIST)) + << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr; } } +void MarkCompact::CopyIoctl(void* dst, void* buffer) { + struct uffdio_copy uffd_copy; + uffd_copy.src = reinterpret_cast<uintptr_t>(buffer); + uffd_copy.dst = reinterpret_cast<uintptr_t>(dst); + uffd_copy.len = kPageSize; + uffd_copy.mode = 0; + CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0) + << "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer + << " dst:" << dst; + DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize)); +} + template <int kMode, typename CompactionFn> void MarkCompact::DoPageCompactionWithStateChange(size_t page_idx, size_t status_arr_len, uint8_t* to_space_page, uint8_t* page, CompactionFn func) { - auto copy_ioctl = [this] (void* dst, void* buffer) { - struct uffdio_copy uffd_copy; - uffd_copy.src = reinterpret_cast<uintptr_t>(buffer); - uffd_copy.dst = reinterpret_cast<uintptr_t>(dst); - uffd_copy.len = kPageSize; - uffd_copy.mode = 0; - CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0) - << "ioctl_userfaultfd: copy failed: " << strerror(errno) - << ". src:" << buffer << " dst:" << dst; - DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize)); - }; PageState expected_state = PageState::kUnprocessed; PageState desired_state = kMode == kCopyMode ? PageState::kProcessingAndMapping : PageState::kProcessing; @@ -1792,17 +1849,18 @@ void MarkCompact::DoPageCompactionWithStateChange(size_t page_idx, // to moving_spaces_status_[page_idx] is released before the contents of the page are // made accessible to other threads. // - // In minor-fault case, we need acquire ordering here to ensure that when the - // CAS fails, another thread has completed processing the page, which is guaranteed - // by the release below. - // Relaxed memory-order is used in copy mode as the subsequent ioctl syscall acts as a fence. - std::memory_order order = - kMode == kCopyMode ? std::memory_order_relaxed : std::memory_order_acquire; + // We need acquire ordering here to ensure that when the CAS fails, another thread + // has completed processing the page, which is guaranteed by the release below. if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong( - expected_state, desired_state, order)) { + expected_state, desired_state, std::memory_order_acquire)) { func(); if (kMode == kCopyMode) { - copy_ioctl(to_space_page, page); + CopyIoctl(to_space_page, page); + if (use_uffd_sigbus_) { + // Store is sufficient as no other thread would modify the status at this point. + moving_pages_status_[page_idx].store(PageState::kProcessedAndMapped, + std::memory_order_release); + } } else if (kMode == kMinorFaultMode) { expected_state = PageState::kProcessing; desired_state = PageState::kProcessed; @@ -2447,9 +2505,9 @@ void MarkCompact::PreCompactionPhase() { stack_high_addr_ = reinterpret_cast<char*>(stack_low_addr_) + thread_running_gc_->GetStackSize(); } - + // This store is visible to mutator (or uffd worker threads) as the mutator + // lock's unlock guarantees that. compacting_ = true; - { TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings()); ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_); @@ -2502,6 +2560,7 @@ void MarkCompact::PreCompactionPhase() { // checkpoint, or a stop-the-world pause. thread->SweepInterpreterCache(this); thread->AdjustTlab(black_objs_slide_diff_); + thread->SetThreadLocalGcBuffer(nullptr); } } { @@ -2591,6 +2650,10 @@ void MarkCompact::PreCompactionPhase() { } } + if (use_uffd_sigbus_) { + // Release order wrt to mutator threads' SIGBUS handler load. + sigbus_in_progress_count_.store(0, std::memory_order_release); + } KernelPreparation(); UpdateNonMovingSpace(); // fallback mode @@ -2602,8 +2665,10 @@ void MarkCompact::PreCompactionPhase() { RecordFree(ObjectBytePair(freed_objects_, freed_bytes)); } else { DCHECK_EQ(compaction_in_progress_count_.load(std::memory_order_relaxed), 0u); - // We must start worker threads before resuming mutators to avoid deadlocks. - heap_->GetThreadPool()->StartWorkers(thread_running_gc_); + if (!use_uffd_sigbus_) { + // We must start worker threads before resuming mutators to avoid deadlocks. + heap_->GetThreadPool()->StartWorkers(thread_running_gc_); + } } stack_low_addr_ = nullptr; } @@ -2752,32 +2817,6 @@ template <int kMode> void MarkCompact::ConcurrentCompaction(uint8_t* buf) { DCHECK_NE(kMode, kFallbackMode); DCHECK(kMode != kCopyMode || buf != nullptr); - auto zeropage_ioctl = [this](void* addr, bool tolerate_eexist, bool tolerate_enoent) { - struct uffdio_zeropage uffd_zeropage; - DCHECK(IsAligned<kPageSize>(addr)); - uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr); - uffd_zeropage.range.len = kPageSize; - uffd_zeropage.mode = 0; - int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage); - if (LIKELY(ret == 0)) { - DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(kPageSize)); - } else { - CHECK((tolerate_enoent && errno == ENOENT) || (tolerate_eexist && errno == EEXIST)) - << "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr; - } - }; - - auto copy_ioctl = [this] (void* fault_page, void* src) { - struct uffdio_copy uffd_copy; - uffd_copy.src = reinterpret_cast<uintptr_t>(src); - uffd_copy.dst = reinterpret_cast<uintptr_t>(fault_page); - uffd_copy.len = kPageSize; - uffd_copy.mode = 0; - int ret = ioctl(uffd_, UFFDIO_COPY, &uffd_copy); - CHECK_EQ(ret, 0) << "ioctl_userfaultfd: copy failed: " << strerror(errno) - << ". src:" << src << " fault_page:" << fault_page; - DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize)); - }; size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_; while (true) { struct uffd_msg msg; @@ -2798,7 +2837,7 @@ void MarkCompact::ConcurrentCompaction(uint8_t* buf) { // zeropage so that the gc-thread can proceed. Otherwise, each thread does // it and the gc-thread will repeat this fault until thread_pool_counter == 0. if (!gKernelHasFaultRetry || ret == 1) { - zeropage_ioctl(fault_addr, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false); + ZeropageIoctl(fault_addr, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false); } else { struct uffdio_range uffd_range; uffd_range.start = msg.arg.pagefault.address; @@ -2811,28 +2850,123 @@ void MarkCompact::ConcurrentCompaction(uint8_t* buf) { } uint8_t* fault_page = AlignDown(fault_addr, kPageSize); if (bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_addr))) { - ConcurrentlyProcessMovingPage<kMode>( - zeropage_ioctl, copy_ioctl, fault_page, buf, nr_moving_space_used_pages); + ConcurrentlyProcessMovingPage<kMode>(fault_page, buf, nr_moving_space_used_pages); } else if (minor_fault_initialized_) { ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>( - zeropage_ioctl, - copy_ioctl, - fault_page, - (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0); + fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0); } else { ConcurrentlyProcessLinearAllocPage<kCopyMode>( - zeropage_ioctl, - copy_ioctl, - fault_page, - (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0); + fault_page, (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) != 0); } } } -template <int kMode, typename ZeropageType, typename CopyType> -void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, - CopyType& copy_ioctl, - uint8_t* fault_page, +bool MarkCompact::SigbusHandler(siginfo_t* info) { + class ScopedInProgressCount { + public: + explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) { + // Increment the count only if compaction is not done yet. + SigbusCounterType prev = + collector_->sigbus_in_progress_count_.load(std::memory_order_relaxed); + while ((prev & kSigbusCounterCompactionDoneMask) == 0) { + if (collector_->sigbus_in_progress_count_.compare_exchange_strong( + prev, prev + 1, std::memory_order_acquire)) { + DCHECK_LT(prev, kSigbusCounterCompactionDoneMask - 1); + compaction_done_ = false; + return; + } + } + compaction_done_ = true; + } + + bool IsCompactionDone() const { + return compaction_done_; + } + + ~ScopedInProgressCount() { + if (!IsCompactionDone()) { + collector_->sigbus_in_progress_count_.fetch_sub(1, std::memory_order_release); + } + } + + private: + MarkCompact* const collector_; + bool compaction_done_; + }; + + DCHECK(use_uffd_sigbus_); + if (info->si_code != BUS_ADRERR) { + // Userfaultfd raises SIGBUS with BUS_ADRERR. All other causes can't be + // handled here. + return false; + } + + ScopedInProgressCount spc(this); + uint8_t* fault_page = AlignDown(reinterpret_cast<uint8_t*>(info->si_addr), kPageSize); + if (!spc.IsCompactionDone()) { + if (bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_page))) { + Thread* self = Thread::Current(); + Locks::mutator_lock_->AssertSharedHeld(self); + size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_; + if (minor_fault_initialized_) { + ConcurrentlyProcessMovingPage<kMinorFaultMode>( + fault_page, nullptr, nr_moving_space_used_pages); + } else { + uint8_t* buf = self->GetThreadLocalGcBuffer(); + if (buf == nullptr) { + uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed); + // The buffer-map is one page bigger as the first buffer is used by GC-thread. + CHECK_LE(idx, kMutatorCompactionBufferCount); + buf = compaction_buffers_map_.Begin() + idx * kPageSize; + DCHECK(compaction_buffers_map_.HasAddress(buf)); + self->SetThreadLocalGcBuffer(buf); + } + ConcurrentlyProcessMovingPage<kCopyMode>(fault_page, buf, nr_moving_space_used_pages); + } + return true; + } else { + // Find the linear-alloc space containing fault-addr + for (auto& data : linear_alloc_spaces_data_) { + if (data.begin_ <= fault_page && data.end_ > fault_page) { + if (minor_fault_initialized_) { + ConcurrentlyProcessLinearAllocPage<kMinorFaultMode>(fault_page, false); + } else { + ConcurrentlyProcessLinearAllocPage<kCopyMode>(fault_page, false); + } + return true; + } + } + // Fault address doesn't belong to either moving-space or linear-alloc. + return false; + } + } else { + // We may spuriously get SIGBUS fault, which was initiated before the + // compaction was finished, but ends up here. In that case, if the fault + // address is valid then consider it handled. + return bump_pointer_space_->HasAddress(reinterpret_cast<mirror::Object*>(fault_page)) || + linear_alloc_spaces_data_.end() != + std::find_if(linear_alloc_spaces_data_.begin(), + linear_alloc_spaces_data_.end(), + [fault_page](const LinearAllocSpaceData& data) { + return data.begin_ <= fault_page && data.end_ > fault_page; + }); + } +} + +static void BackOff(uint32_t i) { + static constexpr uint32_t kYieldMax = 5; + // TODO: Consider adding x86 PAUSE and/or ARM YIELD here. + if (i <= kYieldMax) { + sched_yield(); + } else { + // nanosleep is not in the async-signal-safe list, but bionic implements it + // with a pure system call, so it should be fine. + NanoSleep(10000ull * (i - kYieldMax)); + } +} + +template <int kMode> +void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page, uint8_t* buf, size_t nr_moving_space_used_pages) { class ScopedInProgressCount { @@ -2842,7 +2976,7 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, } ~ScopedInProgressCount() { - collector_->compaction_in_progress_count_.fetch_add(-1, std::memory_order_relaxed); + collector_->compaction_in_progress_count_.fetch_sub(1, std::memory_order_relaxed); } private: @@ -2857,7 +2991,7 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, // There is a race which allows more than one thread to install a // zero-page. But we can tolerate that. So absorb the EEXIST returned by // the ioctl and move on. - zeropage_ioctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true); + ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/true); return; } size_t page_idx = (fault_page - bump_pointer_space_->Begin()) / kPageSize; @@ -2869,14 +3003,16 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, if (moving_pages_status_[page_idx].compare_exchange_strong( expected_state, PageState::kProcessedAndMapping, std::memory_order_relaxed)) { // Note: ioctl acts as an acquire fence. - zeropage_ioctl(fault_page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/true); + ZeropageIoctl(fault_page, /*tolerate_eexist=*/false, /*tolerate_enoent=*/true); } else { DCHECK_EQ(expected_state, PageState::kProcessedAndMapping); } return; } - PageState state = moving_pages_status_[page_idx].load(std::memory_order_relaxed); + PageState state = moving_pages_status_[page_idx].load( + use_uffd_sigbus_ ? std::memory_order_acquire : std::memory_order_relaxed); + uint32_t backoff_count = 0; while (true) { switch (state) { case PageState::kUnprocessed: { @@ -2884,13 +3020,13 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, // the page's state. Otherwise, we will end up leaving a window wherein // the GC-thread could observe that no worker is working on compaction // and could end up unregistering the moving space from userfaultfd. - ScopedInProgressCount in_progress(this); + ScopedInProgressCount spc(this); // Acquire order to ensure we don't start writing to shadow map, which is // shared, before the CAS is successful. Release order to ensure that the // increment to moving_compactions_in_progress above is not re-ordered // after the CAS. if (moving_pages_status_[page_idx].compare_exchange_strong( - state, PageState::kMutatorProcessing, std::memory_order_acquire)) { + state, PageState::kMutatorProcessing, std::memory_order_acq_rel)) { if (kMode == kMinorFaultMode) { DCHECK_EQ(buf, nullptr); buf = shadow_to_space_map_.Begin() + page_idx * kPageSize; @@ -2913,7 +3049,12 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, moving_pages_status_[page_idx].store(PageState::kProcessedAndMapping, std::memory_order_release); if (kMode == kCopyMode) { - copy_ioctl(fault_page, buf); + CopyIoctl(fault_page, buf); + if (use_uffd_sigbus_) { + // Store is sufficient as no other thread modifies the status at this stage. + moving_pages_status_[page_idx].store(PageState::kProcessedAndMapped, + std::memory_order_release); + } return; } else { break; @@ -2924,7 +3065,8 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, case PageState::kProcessing: DCHECK_EQ(kMode, kMinorFaultMode); if (moving_pages_status_[page_idx].compare_exchange_strong( - state, PageState::kProcessingAndMapping, std::memory_order_relaxed)) { + state, PageState::kProcessingAndMapping, std::memory_order_relaxed) && + !use_uffd_sigbus_) { // Somebody else took or will take care of finishing the compaction and // then mapping the page. return; @@ -2933,7 +3075,17 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, case PageState::kProcessed: // The page is processed but not mapped. We should map it. break; - default: + case PageState::kProcessingAndMapping: + case PageState::kMutatorProcessing: + case PageState::kProcessedAndMapping: + if (use_uffd_sigbus_) { + // Wait for the page to be mapped before returning. + BackOff(backoff_count++); + state = moving_pages_status_[page_idx].load(std::memory_order_acquire); + continue; + } + return; + case PageState::kProcessedAndMapped: // Somebody else took care of the page. return; } @@ -2951,11 +3103,8 @@ void MarkCompact::ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, } } -template <int kMode, typename ZeropageType, typename CopyType> -void MarkCompact::ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioctl, - CopyType& copy_ioctl, - uint8_t* fault_page, - bool is_minor_fault) { +template <int kMode> +void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault) { DCHECK(!is_minor_fault || kMode == kMinorFaultMode); auto arena_iter = linear_alloc_arenas_.end(); { @@ -2967,7 +3116,7 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioct if (arena_iter == linear_alloc_arenas_.end() || arena_iter->second <= fault_page) { // Fault page isn't in any of the arenas that existed before we started // compaction. So map zeropage and return. - zeropage_ioctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false); + ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false); } else { // fault_page should always belong to some arena. DCHECK(arena_iter != linear_alloc_arenas_.end()) @@ -2985,19 +3134,29 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioct size_t page_idx = (fault_page - space_data->begin_) / kPageSize; Atomic<PageState>* state_arr = reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin()); - PageState state = state_arr[page_idx].load(std::memory_order_relaxed); + PageState state = state_arr[page_idx].load(use_uffd_sigbus_ ? std::memory_order_acquire : + std::memory_order_relaxed); + uint32_t backoff_count = 0; while (true) { switch (state) { - case PageState::kUnprocessed: - if (state_arr[page_idx].compare_exchange_strong( - state, PageState::kProcessingAndMapping, std::memory_order_acquire)) { + case PageState::kUnprocessed: { + // Acquire order to ensure we don't start writing to shadow map, which is + // shared, before the CAS is successful. + if (state_arr[page_idx].compare_exchange_strong( + state, PageState::kProcessingAndMapping, std::memory_order_acquire)) { if (kMode == kCopyMode || is_minor_fault) { uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page); DCHECK_NE(first_obj, nullptr); LinearAllocPageUpdater updater(this); updater(fault_page + diff, first_obj + diff); if (kMode == kCopyMode) { - copy_ioctl(fault_page, fault_page + diff); + CopyIoctl(fault_page, fault_page + diff); + if (use_uffd_sigbus_) { + // Store is sufficient as no other thread can modify the + // status of this page at this point. + state_arr[page_idx].store(PageState::kProcessedAndMapped, + std::memory_order_release); + } return; } } else { @@ -3012,23 +3171,36 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioct MapProcessedPages</*kFirstPageMapping=*/true>( fault_page, state_arr, page_idx, space_data->page_status_map_.Size()); return; - } - continue; + } + } + continue; case PageState::kProcessing: - DCHECK_EQ(kMode, kMinorFaultMode); - if (state_arr[page_idx].compare_exchange_strong( - state, PageState::kProcessingAndMapping, std::memory_order_relaxed)) { + DCHECK_EQ(kMode, kMinorFaultMode); + if (state_arr[page_idx].compare_exchange_strong( + state, PageState::kProcessingAndMapping, std::memory_order_relaxed) && + !use_uffd_sigbus_) { // Somebody else took or will take care of finishing the updates and // then mapping the page. return; - } - continue; + } + continue; case PageState::kProcessed: - // The page is processed but not mapped. We should map it. - break; - default: - // Somebody else took care of the page. - return; + // The page is processed but not mapped. We should map it. + break; + case PageState::kMutatorProcessing: + UNREACHABLE(); + case PageState::kProcessingAndMapping: + case PageState::kProcessedAndMapping: + if (use_uffd_sigbus_) { + // Wait for the page to be mapped before returning. + BackOff(backoff_count++); + state = state_arr[page_idx].load(std::memory_order_acquire); + continue; + } + return; + case PageState::kProcessedAndMapped: + // Somebody else took care of the page. + return; } break; } @@ -3085,15 +3257,12 @@ void MarkCompact::ProcessLinearAlloc() { updater(page_begin + diff, first_obj + diff); expected_state = PageState::kProcessing; if (!minor_fault_initialized_) { - struct uffdio_copy uffd_copy; - uffd_copy.src = reinterpret_cast<uintptr_t>(page_begin + diff); - uffd_copy.dst = reinterpret_cast<uintptr_t>(page_begin); - uffd_copy.len = kPageSize; - uffd_copy.mode = 0; - CHECK_EQ(ioctl(uffd_, UFFDIO_COPY, &uffd_copy), 0) - << "ioctl_userfaultfd: linear-alloc copy failed:" << strerror(errno) - << ". dst:" << static_cast<void*>(page_begin); - DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(kPageSize)); + CopyIoctl(page_begin, page_begin + diff); + if (use_uffd_sigbus_) { + // Store is sufficient as no other thread could be modifying this page's + // status at this point. + state_arr[page_idx].store(PageState::kProcessedAndMapped, std::memory_order_release); + } } else if (!state_arr[page_idx].compare_exchange_strong( expected_state, PageState::kProcessed, std::memory_order_release)) { DCHECK_EQ(expected_state, PageState::kProcessingAndMapping); @@ -3152,10 +3321,15 @@ void MarkCompact::CompactionPhase() { CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin()); } - // TODO: add more sophisticated logic here wherein we sleep after attempting - // yield a couple of times. - while (compaction_in_progress_count_.load(std::memory_order_relaxed) > 0) { - sched_yield(); + // Make sure no mutator is reading from the from-space before unregistering + // userfaultfd from moving-space and then zapping from-space. The mutator + // and GC may race to set a page state to processing or further along. The two + // attempts are ordered. If the collector wins, then the mutator will see that + // and not access the from-space page. If the muator wins, then the + // compaction_in_progress_count_ increment by the mutator happens-before the test + // here, and we will not see a zero value until the mutator has completed. + for (uint32_t i = 0; compaction_in_progress_count_.load(std::memory_order_acquire) > 0; i++) { + BackOff(i); } size_t moving_space_size = bump_pointer_space_->Capacity(); @@ -3204,17 +3378,29 @@ void MarkCompact::CompactionPhase() { ProcessLinearAlloc(); - DCHECK(IsAligned<kPageSize>(conc_compaction_termination_page_)); - // We will only iterate once if gKernelHasFaultRetry is true. - do { - // madvise the page so that we can get userfaults on it. - ZeroAndReleasePages(conc_compaction_termination_page_, kPageSize); - // The following load triggers 'special' userfaults. When received by the - // thread-pool workers, they will exit out of the compaction task. This fault - // happens because we madvised the page. - ForceRead(conc_compaction_termination_page_); - } while (thread_pool_counter_ > 0); - + if (use_uffd_sigbus_) { + // Set compaction-done bit so that no new mutator threads start compaction + // process in the SIGBUS handler. + SigbusCounterType count = sigbus_in_progress_count_.fetch_or(kSigbusCounterCompactionDoneMask, + std::memory_order_acq_rel); + // Wait for SIGBUS handlers already in play. + for (uint32_t i = 0; count > 0; i++) { + BackOff(i); + count = sigbus_in_progress_count_.load(std::memory_order_acquire); + count &= ~kSigbusCounterCompactionDoneMask; + } + } else { + DCHECK(IsAligned<kPageSize>(conc_compaction_termination_page_)); + // We will only iterate once if gKernelHasFaultRetry is true. + do { + // madvise the page so that we can get userfaults on it. + ZeroAndReleasePages(conc_compaction_termination_page_, kPageSize); + // The following load triggers 'special' userfaults. When received by the + // thread-pool workers, they will exit out of the compaction task. This fault + // happens because we madvised the page. + ForceRead(conc_compaction_termination_page_); + } while (thread_pool_counter_ > 0); + } // Unregister linear-alloc spaces for (auto& data : linear_alloc_spaces_data_) { DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size())); @@ -3232,7 +3418,9 @@ void MarkCompact::CompactionPhase() { } } - heap_->GetThreadPool()->StopWorkers(thread_running_gc_); + if (!use_uffd_sigbus_) { + heap_->GetThreadPool()->StopWorkers(thread_running_gc_); + } } template <size_t kBufferSize> @@ -3275,7 +3463,7 @@ class MarkCompact::ThreadRootsVisitor : public RootVisitor { StackReference<mirror::Object>* start; StackReference<mirror::Object>* end; { - MutexLock mu(self_, mark_compact_->mark_stack_lock_); + MutexLock mu(self_, mark_compact_->lock_); // Loop here because even after expanding once it may not be sufficient to // accommodate all references. It's almost impossible, but there is no harm // in implementing it this way. @@ -3841,22 +4029,21 @@ void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass, void MarkCompact::FinishPhase() { bool is_zygote = Runtime::Current()->IsZygote(); minor_fault_initialized_ = !is_zygote && uffd_minor_fault_supported_; - // When poisoning ObjPtr, we are forced to use buffers for page compaction in - // lower 4GB. Now that the usage is done, madvise them. But skip the first - // page, which is used by the gc-thread for the next iteration. Otherwise, we - // get into a deadlock due to userfault on it in the next iteration. This page - // is not consuming any physical memory because we already madvised it above - // and then we triggered a read userfault, which maps a special zero-page. - if (!minor_fault_initialized_ || !shadow_to_space_map_.IsValid() || + // Madvise compaction buffers. When using threaded implementation, skip the first page, + // which is used by the gc-thread for the next iteration. Otherwise, we get into a + // deadlock due to userfault on it in the next iteration. This page is not consuming any + // physical memory because we already madvised it above and then we triggered a read + // userfault, which maps a special zero-page. + if (use_uffd_sigbus_ || !minor_fault_initialized_ || !shadow_to_space_map_.IsValid() || shadow_to_space_map_.Size() < (moving_first_objs_count_ + black_page_count_) * kPageSize) { - ZeroAndReleasePages(compaction_buffers_map_.Begin() + kPageSize, - compaction_buffers_map_.Size() - kPageSize); + size_t adjustment = use_uffd_sigbus_ ? 0 : kPageSize; + ZeroAndReleasePages(compaction_buffers_map_.Begin() + adjustment, + compaction_buffers_map_.Size() - adjustment); } else if (shadow_to_space_map_.Size() == bump_pointer_space_->Capacity()) { // Now that we are going to use minor-faults from next GC cycle, we can // unmap the buffers used by worker threads. compaction_buffers_map_.SetSize(kPageSize); } - info_map_.MadviseDontNeedAndZero(); live_words_bitmap_->ClearBitmap(); // TODO: We can clear this bitmap right before compaction pause. But in that diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h index a83a5bdd38..86f568a653 100644 --- a/runtime/gc/collector/mark_compact.h +++ b/runtime/gc/collector/mark_compact.h @@ -17,6 +17,8 @@ #ifndef ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ #define ART_RUNTIME_GC_COLLECTOR_MARK_COMPACT_H_ +#include <signal.h> + #include <map> #include <memory> #include <unordered_map> @@ -54,20 +56,25 @@ class BumpPointerSpace; namespace collector { class MarkCompact final : public GarbageCollector { public: + using SigbusCounterType = uint32_t; + static constexpr size_t kAlignment = kObjectAlignment; static constexpr int kCopyMode = -1; static constexpr int kMinorFaultMode = -2; // Fake file descriptor for fall back mode (when uffd isn't available) static constexpr int kFallbackMode = -3; - static constexpr int kFdSharedAnon = -1; static constexpr int kFdUnused = -2; + // Bitmask for the compaction-done bit in the sigbus_in_progress_count_. + static constexpr SigbusCounterType kSigbusCounterCompactionDoneMask = + 1u << (BitSizeOf<SigbusCounterType>() - 1); + explicit MarkCompact(Heap* heap); ~MarkCompact() {} - void RunPhases() override REQUIRES(!Locks::mutator_lock_); + void RunPhases() override REQUIRES(!Locks::mutator_lock_, !lock_); // Updated before (or in) pre-compaction pause and is accessed only in the // pause or during concurrent compaction. The flag is reset after compaction @@ -77,6 +84,12 @@ class MarkCompact final : public GarbageCollector { return compacting_ && self == thread_running_gc_; } + bool IsUsingSigbusFeature() const { return use_uffd_sigbus_; } + + // Called by SIGBUS handler. NO_THREAD_SAFETY_ANALYSIS for mutator-lock, which + // is asserted in the function. + bool SigbusHandler(siginfo_t* info) REQUIRES(!lock_) NO_THREAD_SAFETY_ANALYSIS; + GcType GetGcType() const override { return kGcTypeFull; } @@ -157,7 +170,8 @@ class MarkCompact final : public GarbageCollector { kProcessed = 2, // Processed but not mapped kProcessingAndMapping = 3, // Being processed by GC or mutator and will be mapped kMutatorProcessing = 4, // Being processed by mutator thread - kProcessedAndMapping = 5 // Processed and will be mapped + kProcessedAndMapping = 5, // Processed and will be mapped + kProcessedAndMapped = 6 // Processed and mapped. For SIGBUS. }; private: @@ -243,7 +257,7 @@ class MarkCompact final : public GarbageCollector { // mirror::Class. bool IsValidObject(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_); void InitializePhase(); - void FinishPhase() REQUIRES(!Locks::mutator_lock_, !Locks::heap_bitmap_lock_); + void FinishPhase() REQUIRES(!Locks::mutator_lock_, !Locks::heap_bitmap_lock_, !lock_); void MarkingPhase() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::heap_bitmap_lock_); void CompactionPhase() REQUIRES_SHARED(Locks::mutator_lock_); @@ -464,20 +478,15 @@ class MarkCompact final : public GarbageCollector { void ConcurrentCompaction(uint8_t* buf) REQUIRES_SHARED(Locks::mutator_lock_); // Called by thread-pool workers to compact and copy/map the fault page in // moving space. - template <int kMode, typename ZeropageType, typename CopyType> - void ConcurrentlyProcessMovingPage(ZeropageType& zeropage_ioctl, - CopyType& copy_ioctl, - uint8_t* fault_page, + template <int kMode> + void ConcurrentlyProcessMovingPage(uint8_t* fault_page, uint8_t* buf, size_t nr_moving_space_used_pages) REQUIRES_SHARED(Locks::mutator_lock_); // Called by thread-pool workers to process and copy/map the fault page in // linear-alloc. - template <int kMode, typename ZeropageType, typename CopyType> - void ConcurrentlyProcessLinearAllocPage(ZeropageType& zeropage_ioctl, - CopyType& copy_ioctl, - uint8_t* fault_page, - bool is_minor_fault) + template <int kMode> + void ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool is_minor_fault) REQUIRES_SHARED(Locks::mutator_lock_); // Process concurrently all the pages in linear-alloc. Called by gc-thread. @@ -515,20 +524,16 @@ class MarkCompact final : public GarbageCollector { void MarkZygoteLargeObjects() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_); - // Buffers, one per worker thread + gc-thread, to be used when - // kObjPtrPoisoning == true as in that case we can't have the buffer on the - // stack. The first page of the buffer is assigned to - // conc_compaction_termination_page_. A read access to this page signals - // termination of concurrent compaction by making worker threads terminate the - // userfaultfd read loop. - MemMap compaction_buffers_map_; + void ZeropageIoctl(void* addr, bool tolerate_eexist, bool tolerate_enoent); + void CopyIoctl(void* dst, void* buffer); + // For checkpoints Barrier gc_barrier_; // Every object inside the immune spaces is assumed to be marked. ImmuneSpaces immune_spaces_; // Required only when mark-stack is accessed in shared mode, which happens // when collecting thread-stack roots using checkpoint. - Mutex mark_stack_lock_; + Mutex lock_; accounting::ObjectStack* mark_stack_; // Special bitmap wherein all the bits corresponding to an object are set. // TODO: make LiveWordsBitmap encapsulated in this class rather than a @@ -547,6 +552,12 @@ class MarkCompact final : public GarbageCollector { // Any array of live-bytes in logical chunks of kOffsetChunkSize size // in the 'to-be-compacted' space. MemMap info_map_; + // Set of page-sized buffers used for compaction. The first page is used by + // the GC thread. Subdequent pages are used by mutator threads in case of + // SIGBUS feature, and by uffd-worker threads otherwise. In the latter case + // the first page is also used for termination of concurrent compaction by + // making worker threads terminate the userfaultfd read loop. + MemMap compaction_buffers_map_; class LessByArenaAddr { public: @@ -639,7 +650,7 @@ class MarkCompact final : public GarbageCollector { accounting::ContinuousSpaceBitmap* const moving_space_bitmap_; accounting::ContinuousSpaceBitmap* non_moving_space_bitmap_; Thread* thread_running_gc_; - // Array of pages' compaction status. + // Array of moving-space's pages' compaction status. Atomic<PageState>* moving_pages_status_; size_t vector_length_; size_t live_stack_freeze_size_; @@ -711,9 +722,20 @@ class MarkCompact final : public GarbageCollector { // Userfault file descriptor, accessed only by the GC itself. // kFallbackMode value indicates that we are in the fallback mode. int uffd_; + // Number of mutator-threads currently executing SIGBUS handler. When the + // GC-thread is done with compaction, it set the most significant bit to + // indicate that. Mutator threads check for the flag when incrementing in the + // handler. + std::atomic<SigbusCounterType> sigbus_in_progress_count_; + // Number of mutator-threads/uffd-workers working on moving-space page. It + // must be 0 before gc-thread can unregister the space after it's done + // sequentially compacting all pages of the space. + std::atomic<uint16_t> compaction_in_progress_count_; + // When using SIGBUS feature, this counter is used by mutators to claim a page + // out of compaction buffers to be used for the entire compaction cycle. + std::atomic<uint16_t> compaction_buffer_counter_; // Used to exit from compaction loop at the end of concurrent compaction uint8_t thread_pool_counter_; - std::atomic<uint8_t> compaction_in_progress_count_; // True while compacting. bool compacting_; // Flag indicating whether one-time uffd initialization has been done. It will @@ -725,6 +747,9 @@ class MarkCompact final : public GarbageCollector { // Flag indicating if userfaultfd supports minor-faults. Set appropriately in // CreateUserfaultfd(), where we get this information from the kernel. const bool uffd_minor_fault_supported_; + // Flag indicating if we should use sigbus signals instead of threads to + // handle userfaults. + const bool use_uffd_sigbus_; // For non-zygote processes this flag indicates if the spaces are ready to // start using userfaultfd's minor-fault feature. This initialization involves // starting to use shmem (memfd_create) for the userfaultfd protected spaces. diff --git a/runtime/runtime.cc b/runtime/runtime.cc index b84eca8e45..70853e7b7f 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -1775,9 +1775,8 @@ bool Runtime::Init(RuntimeArgumentMap&& runtime_options_in) { break; } + fault_manager.Init(!no_sig_chain_); if (!no_sig_chain_) { - fault_manager.Init(); - if (HandlesSignalsInCompiledCode()) { // These need to be in a specific order. The null point check handler must be // after the suspend check and stack overflow check handlers. diff --git a/runtime/thread.h b/runtime/thread.h index 8bef83fac1..a9ac3af209 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -402,6 +402,15 @@ class Thread { tlsPtr_.thread_local_mark_stack = stack; } + uint8_t* GetThreadLocalGcBuffer() { + DCHECK(gUseUserfaultfd); + return tlsPtr_.thread_local_gc_buffer; + } + void SetThreadLocalGcBuffer(uint8_t* buf) { + DCHECK(gUseUserfaultfd); + tlsPtr_.thread_local_gc_buffer = buf; + } + // Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of // mutator_lock_ and waits until it is resumed and thread_suspend_count_ is zero. void FullSuspendCheck(bool implicit = false) @@ -2088,8 +2097,12 @@ class Thread { // Current method verifier, used for root marking. verifier::MethodVerifier* method_verifier; - // Thread-local mark stack for the concurrent copying collector. - gc::accounting::AtomicStack<mirror::Object>* thread_local_mark_stack; + union { + // Thread-local mark stack for the concurrent copying collector. + gc::accounting::AtomicStack<mirror::Object>* thread_local_mark_stack; + // Thread-local page-sized buffer for userfaultfd GC. + uint8_t* thread_local_gc_buffer; + }; // The pending async-exception or null. mirror::Throwable* async_exception; |