diff options
author | 2023-10-17 19:15:23 +0000 | |
---|---|---|
committer | 2023-10-17 21:32:14 +0000 | |
commit | 9faffd5c4e062ca45bd6f29a3b6d1b276e6c9839 (patch) | |
tree | 2c49f685bbe21597b707587859f2bae1f1f92495 | |
parent | 66b2595485bdc2a9fbadd7a1058f9c9fd0a0cc37 (diff) |
Revert "Update class-table and intern-table concurrently with uffd GC"
This reverts commit 97a6f7cd191cde0abaaf6323ae2f67d8e42a1236.
Reason for revert: LUCI failure in libcore test
Change-Id: I381b261ae0f67103bf1d096d8f64c84ba3f3e19c
-rw-r--r-- | runtime/base/gc_visited_arena_pool.cc | 244 | ||||
-rw-r--r-- | runtime/base/gc_visited_arena_pool.h | 207 | ||||
-rw-r--r-- | runtime/class_linker.cc | 20 | ||||
-rw-r--r-- | runtime/class_linker.h | 5 | ||||
-rw-r--r-- | runtime/class_table-inl.h | 36 | ||||
-rw-r--r-- | runtime/class_table.h | 26 | ||||
-rw-r--r-- | runtime/gc/collector/mark_compact.cc | 278 | ||||
-rw-r--r-- | runtime/gc/collector/mark_compact.h | 5 | ||||
-rw-r--r-- | runtime/gc/heap.cc | 4 | ||||
-rw-r--r-- | runtime/intern_table.h | 13 | ||||
-rw-r--r-- | runtime/runtime.cc | 16 |
11 files changed, 241 insertions, 613 deletions
diff --git a/runtime/base/gc_visited_arena_pool.cc b/runtime/base/gc_visited_arena_pool.cc index 77cc7ae32d..039157a30c 100644 --- a/runtime/base/gc_visited_arena_pool.cc +++ b/runtime/base/gc_visited_arena_pool.cc @@ -27,67 +27,43 @@ namespace art { -TrackedArena::TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork, bool single_obj_arena) - : Arena(), - first_obj_array_(nullptr), - pre_zygote_fork_(pre_zygote_fork), - waiting_for_deletion_(false) { +TrackedArena::TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork) + : Arena(), first_obj_array_(nullptr), pre_zygote_fork_(pre_zygote_fork) { static_assert(ArenaAllocator::kArenaAlignment <= kPageSize, "Arena should not need stronger alignment than kPageSize."); + DCHECK_ALIGNED(size, kPageSize); + DCHECK_ALIGNED(start, kPageSize); memory_ = start; size_ = size; - if (single_obj_arena) { - // We have only one object in this arena and it is expected to consume the - // entire arena. - bytes_allocated_ = size; - } else { - DCHECK_ALIGNED(size, kPageSize); - DCHECK_ALIGNED(start, kPageSize); - size_t arr_size = size / kPageSize; - first_obj_array_.reset(new uint8_t*[arr_size]); - std::fill_n(first_obj_array_.get(), arr_size, nullptr); - } -} - -void TrackedArena::ReleasePages(uint8_t* begin, size_t size, bool pre_zygote_fork) { - DCHECK_ALIGNED(begin, kPageSize); - // Userfaultfd GC uses MAP_SHARED mappings for linear-alloc and therefore - // MADV_DONTNEED will not free the pages from page cache. Therefore use - // MADV_REMOVE instead, which is meant for this purpose. - // Arenas allocated pre-zygote fork are private anonymous and hence must be - // released using MADV_DONTNEED. - if (!gUseUserfaultfd || pre_zygote_fork || - (madvise(begin, size, MADV_REMOVE) == -1 && errno == EINVAL)) { - // MADV_REMOVE fails if invoked on anonymous mapping, which could happen - // if the arena is released before userfaultfd-GC starts using memfd. So - // use MADV_DONTNEED. - ZeroAndReleaseMemory(begin, size); - } + size_t arr_size = size / kPageSize; + first_obj_array_.reset(new uint8_t*[arr_size]); + std::fill_n(first_obj_array_.get(), arr_size, nullptr); } void TrackedArena::Release() { if (bytes_allocated_ > 0) { - ReleasePages(Begin(), Size(), pre_zygote_fork_); - if (first_obj_array_.get() != nullptr) { - std::fill_n(first_obj_array_.get(), Size() / kPageSize, nullptr); + // Userfaultfd GC uses MAP_SHARED mappings for linear-alloc and therefore + // MADV_DONTNEED will not free the pages from page cache. Therefore use + // MADV_REMOVE instead, which is meant for this purpose. + // Arenas allocated pre-zygote fork are private anonymous and hence must be + // released using MADV_DONTNEED. + if (!gUseUserfaultfd || pre_zygote_fork_ || + (madvise(Begin(), Size(), MADV_REMOVE) == -1 && errno == EINVAL)) { + // MADV_REMOVE fails if invoked on anonymous mapping, which could happen + // if the arena is released before userfaultfd-GC starts using memfd. So + // use MADV_DONTNEED. + ZeroAndReleaseMemory(Begin(), Size()); } + std::fill_n(first_obj_array_.get(), Size() / kPageSize, nullptr); bytes_allocated_ = 0; } } void TrackedArena::SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end) { - DCHECK(first_obj_array_.get() != nullptr); DCHECK_LE(static_cast<void*>(Begin()), static_cast<void*>(obj_end)); DCHECK_LT(static_cast<void*>(obj_begin), static_cast<void*>(obj_end)); - GcVisitedArenaPool* arena_pool = - static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); size_t idx = static_cast<size_t>(obj_begin - Begin()) / kPageSize; size_t last_byte_idx = static_cast<size_t>(obj_end - 1 - Begin()) / kPageSize; - // Do the update below with arena-pool's lock in shared-mode to serialize with - // the compaction-pause wherein we acquire it exclusively. This is to ensure - // that last-byte read there doesn't change after reading it and before - // userfaultfd registration. - ReaderMutexLock rmu(Thread::Current(), arena_pool->GetLock()); // If the addr is at the beginning of a page, then we set it for that page too. if (IsAligned<kPageSize>(obj_begin)) { first_obj_array_[idx] = obj_begin; @@ -130,13 +106,7 @@ uint8_t* GcVisitedArenaPool::AddMap(size_t min_size) { } GcVisitedArenaPool::GcVisitedArenaPool(bool low_4gb, bool is_zygote, const char* name) - : lock_("gc-visited arena-pool", kGenericBottomLock), - bytes_allocated_(0), - unused_arenas_(nullptr), - name_(name), - defer_arena_freeing_(false), - low_4gb_(low_4gb), - pre_zygote_fork_(is_zygote) {} + : bytes_allocated_(0), name_(name), low_4gb_(low_4gb), pre_zygote_fork_(is_zygote) {} GcVisitedArenaPool::~GcVisitedArenaPool() { for (Chunk* chunk : free_chunks_) { @@ -147,12 +117,13 @@ GcVisitedArenaPool::~GcVisitedArenaPool() { } size_t GcVisitedArenaPool::GetBytesAllocated() const { - ReaderMutexLock rmu(Thread::Current(), lock_); + std::lock_guard<std::mutex> lock(lock_); return bytes_allocated_; } uint8_t* GcVisitedArenaPool::AddPreZygoteForkMap(size_t size) { DCHECK(pre_zygote_fork_); + DCHECK(Runtime::Current()->IsZygote()); std::string pre_fork_name = "Pre-zygote-"; pre_fork_name += name_; std::string err_msg; @@ -166,67 +137,18 @@ uint8_t* GcVisitedArenaPool::AddPreZygoteForkMap(size_t size) { return map.Begin(); } -uint8_t* GcVisitedArenaPool::AllocSingleObjArena(size_t size) { - WriterMutexLock wmu(Thread::Current(), lock_); - Arena* arena; - DCHECK(gUseUserfaultfd); - // To minimize private dirty, all class and intern table allocations are - // done outside LinearAlloc range so they are untouched during GC. - if (pre_zygote_fork_) { - uint8_t* begin = static_cast<uint8_t*>(malloc(size)); - auto insert_result = allocated_arenas_.insert( - new TrackedArena(begin, size, /*pre_zygote_fork=*/true, /*single_obj_arena=*/true)); - arena = *insert_result.first; - } else { - arena = AllocArena(size, /*single_obj_arena=*/true); - } - return arena->Begin(); -} - -void GcVisitedArenaPool::FreeSingleObjArena(uint8_t* addr) { - Thread* self = Thread::Current(); - size_t size; - bool zygote_arena; - { - TrackedArena temp_arena(addr); - WriterMutexLock wmu(self, lock_); - auto iter = allocated_arenas_.find(&temp_arena); - DCHECK(iter != allocated_arenas_.end()); - TrackedArena* arena = *iter; - size = arena->Size(); - zygote_arena = arena->IsPreZygoteForkArena(); - DCHECK_EQ(arena->Begin(), addr); - DCHECK(arena->IsSingleObjectArena()); - allocated_arenas_.erase(iter); - if (defer_arena_freeing_) { - arena->SetupForDeferredDeletion(unused_arenas_); - unused_arenas_ = arena; - } else { - delete arena; - } - } - // Refer to the comment in FreeArenaChain() for why the pages are released - // after deleting the arena. - if (zygote_arena) { - free(addr); - } else { - TrackedArena::ReleasePages(addr, size, /*pre_zygote_fork=*/false); - WriterMutexLock wmu(self, lock_); - FreeRangeLocked(addr, size); - } -} - -Arena* GcVisitedArenaPool::AllocArena(size_t size, bool single_obj_arena) { +Arena* GcVisitedArenaPool::AllocArena(size_t size) { // Return only page aligned sizes so that madvise can be leveraged. size = RoundUp(size, kPageSize); + std::lock_guard<std::mutex> lock(lock_); + if (pre_zygote_fork_) { // The first fork out of zygote hasn't happened yet. Allocate arena in a // private-anonymous mapping to retain clean pages across fork. + DCHECK(Runtime::Current()->IsZygote()); uint8_t* addr = AddPreZygoteForkMap(size); - auto insert_result = allocated_arenas_.insert( - new TrackedArena(addr, size, /*pre_zygote_fork=*/true, single_obj_arena)); - DCHECK(insert_result.second); - return *insert_result.first; + auto emplace_result = allocated_arenas_.emplace(addr, size, /*pre_zygote_fork=*/true); + return const_cast<TrackedArena*>(&(*emplace_result.first)); } Chunk temp_chunk(nullptr, size); @@ -243,21 +165,19 @@ Arena* GcVisitedArenaPool::AllocArena(size_t size, bool single_obj_arena) { // if the best-fit chunk < 2x the requested size, then give the whole chunk. if (chunk->size_ < 2 * size) { DCHECK_GE(chunk->size_, size); - auto insert_result = allocated_arenas_.insert(new TrackedArena(chunk->addr_, - chunk->size_, - /*pre_zygote_fork=*/false, - single_obj_arena)); - DCHECK(insert_result.second); + auto emplace_result = allocated_arenas_.emplace(chunk->addr_, + chunk->size_, + /*pre_zygote_fork=*/false); + DCHECK(emplace_result.second); free_chunks_.erase(free_chunks_iter); best_fit_allocs_.erase(best_fit_iter); delete chunk; - return *insert_result.first; + return const_cast<TrackedArena*>(&(*emplace_result.first)); } else { - auto insert_result = allocated_arenas_.insert(new TrackedArena(chunk->addr_, - size, - /*pre_zygote_fork=*/false, - single_obj_arena)); - DCHECK(insert_result.second); + auto emplace_result = allocated_arenas_.emplace(chunk->addr_, + size, + /*pre_zygote_fork=*/false); + DCHECK(emplace_result.second); // Compute next iterators for faster insert later. auto next_best_fit_iter = best_fit_iter; next_best_fit_iter++; @@ -270,7 +190,7 @@ Arena* GcVisitedArenaPool::AllocArena(size_t size, bool single_obj_arena) { DCHECK_EQ(free_chunks_nh.value()->addr_, chunk->addr_); best_fit_allocs_.insert(next_best_fit_iter, std::move(best_fit_nh)); free_chunks_.insert(next_free_chunks_iter, std::move(free_chunks_nh)); - return *insert_result.first; + return const_cast<TrackedArena*>(&(*emplace_result.first)); } } @@ -346,79 +266,27 @@ void GcVisitedArenaPool::FreeArenaChain(Arena* first) { // TODO: Handle the case when arena_allocator::kArenaAllocatorPreciseTracking // is true. See MemMapArenaPool::FreeArenaChain() for example. CHECK(!arena_allocator::kArenaAllocatorPreciseTracking); - Thread* self = Thread::Current(); - // vector of arena ranges to be freed and whether they are pre-zygote-fork. - std::vector<std::tuple<uint8_t*, size_t, bool>> free_ranges; - - { - WriterMutexLock wmu(self, lock_); - while (first != nullptr) { - TrackedArena* temp = down_cast<TrackedArena*>(first); - DCHECK(!temp->IsSingleObjectArena()); - first = first->Next(); - free_ranges.emplace_back(temp->Begin(), temp->Size(), temp->IsPreZygoteForkArena()); - // In other implementations of ArenaPool this is calculated when asked for, - // thanks to the list of free arenas that is kept around. But in this case, - // we release the freed arena back to the pool and therefore need to - // calculate here. - bytes_allocated_ += temp->GetBytesAllocated(); - auto iter = allocated_arenas_.find(temp); - DCHECK(iter != allocated_arenas_.end()); - allocated_arenas_.erase(iter); - if (defer_arena_freeing_) { - temp->SetupForDeferredDeletion(unused_arenas_); - unused_arenas_ = temp; - } else { - delete temp; - } - } - } - // madvise of arenas must be done after the above loop which serializes with - // MarkCompact::ProcessLinearAlloc() so that if it finds an arena to be not - // 'waiting-for-deletion' then it finishes the arena's processing before - // clearing here. Otherwise, we could have a situation wherein arena-pool - // assumes the memory range of the arena(s) to be zero'ed (by madvise), - // whereas GC maps stale arena pages. - for (auto& iter : free_ranges) { - // No need to madvise pre-zygote-fork arenas as they will munmapped below. - if (!std::get<2>(iter)) { - TrackedArena::ReleasePages(std::get<0>(iter), std::get<1>(iter), /*pre_zygote_fork=*/false); - } + // madvise the arenas before acquiring lock for scalability + for (Arena* temp = first; temp != nullptr; temp = temp->Next()) { + temp->Release(); } - WriterMutexLock wmu(self, lock_); - for (auto& iter : free_ranges) { - if (UNLIKELY(std::get<2>(iter))) { - bool found = false; - for (auto map_iter = maps_.begin(); map_iter != maps_.end(); map_iter++) { - if (map_iter->Begin() == std::get<0>(iter)) { - // erase will destruct the MemMap and thereby munmap. But this happens - // very rarely so it's ok to do it with lock acquired. - maps_.erase(map_iter); - found = true; - break; - } - } - CHECK(found); - } else { - FreeRangeLocked(std::get<0>(iter), std::get<1>(iter)); - } - } -} - -void GcVisitedArenaPool::DeleteUnusedArenas() { - TrackedArena* arena; - { - WriterMutexLock wmu(Thread::Current(), lock_); - defer_arena_freeing_ = false; - arena = unused_arenas_; - unused_arenas_ = nullptr; - } - while (arena != nullptr) { - TrackedArena* temp = down_cast<TrackedArena*>(arena->Next()); - delete arena; - arena = temp; + std::lock_guard<std::mutex> lock(lock_); + arenas_freed_ = true; + while (first != nullptr) { + FreeRangeLocked(first->Begin(), first->Size()); + // In other implementations of ArenaPool this is calculated when asked for, + // thanks to the list of free arenas that is kept around. But in this case, + // we release the freed arena back to the pool and therefore need to + // calculate here. + bytes_allocated_ += first->GetBytesAllocated(); + TrackedArena* temp = down_cast<TrackedArena*>(first); + // TODO: Add logic to unmap the maps corresponding to pre-zygote-fork + // arenas, which are expected to be released only during shutdown. + first = first->Next(); + size_t erase_count = allocated_arenas_.erase(*temp); + DCHECK_EQ(erase_count, 1u); } } diff --git a/runtime/base/gc_visited_arena_pool.h b/runtime/base/gc_visited_arena_pool.h index f3c1abcb5a..e307147c9e 100644 --- a/runtime/base/gc_visited_arena_pool.h +++ b/runtime/base/gc_visited_arena_pool.h @@ -17,16 +17,12 @@ #ifndef ART_RUNTIME_BASE_GC_VISITED_ARENA_POOL_H_ #define ART_RUNTIME_BASE_GC_VISITED_ARENA_POOL_H_ -#include <set> - -#include "base/allocator.h" -#include "base/arena_allocator.h" #include "base/casts.h" -#include "base/hash_set.h" +#include "base/arena_allocator.h" #include "base/locks.h" #include "base/mem_map.h" -#include "read_barrier_config.h" -#include "runtime.h" + +#include <set> namespace art { @@ -38,45 +34,27 @@ class TrackedArena final : public Arena { public: // Used for searching in maps. Only arena's starting address is relevant. explicit TrackedArena(uint8_t* addr) : pre_zygote_fork_(false) { memory_ = addr; } - TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork, bool single_obj_arena); + TrackedArena(uint8_t* start, size_t size, bool pre_zygote_fork); template <typename PageVisitor> void VisitRoots(PageVisitor& visitor) const REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK_ALIGNED(Size(), kPageSize); + DCHECK_ALIGNED(Begin(), kPageSize); + int nr_pages = Size() / kPageSize; uint8_t* page_begin = Begin(); - if (first_obj_array_.get() != nullptr) { - DCHECK_ALIGNED(Size(), kPageSize); - DCHECK_ALIGNED(Begin(), kPageSize); - for (int i = 0, nr_pages = Size() / kPageSize; i < nr_pages; i++, page_begin += kPageSize) { - uint8_t* first = first_obj_array_[i]; - if (first != nullptr) { - visitor(page_begin, first, kPageSize); - } else { - break; - } - } - } else { - size_t page_size = Size(); - while (page_size > kPageSize) { - visitor(page_begin, nullptr, kPageSize); - page_begin += kPageSize; - page_size -= kPageSize; - } - visitor(page_begin, nullptr, page_size); + for (int i = 0; i < nr_pages && first_obj_array_[i] != nullptr; i++, page_begin += kPageSize) { + visitor(page_begin, first_obj_array_[i]); } } // Return the page addr of the first page with first_obj set to nullptr. uint8_t* GetLastUsedByte() const REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK_ALIGNED(Begin(), kPageSize); + DCHECK_ALIGNED(End(), kPageSize); // Jump past bytes-allocated for arenas which are not currently being used // by arena-allocator. This helps in reducing loop iterations below. uint8_t* last_byte = AlignUp(Begin() + GetBytesAllocated(), kPageSize); - if (first_obj_array_.get() != nullptr) { - DCHECK_ALIGNED(Begin(), kPageSize); - DCHECK_ALIGNED(End(), kPageSize); - DCHECK_LE(last_byte, End()); - } else { - DCHECK_EQ(last_byte, End()); - } + DCHECK_LE(last_byte, End()); for (size_t i = (last_byte - Begin()) / kPageSize; last_byte < End() && first_obj_array_[i] != nullptr; last_byte += kPageSize, i++) { @@ -88,43 +66,21 @@ class TrackedArena final : public Arena { uint8_t* GetFirstObject(uint8_t* addr) const REQUIRES_SHARED(Locks::mutator_lock_) { DCHECK_LE(Begin(), addr); DCHECK_GT(End(), addr); - if (first_obj_array_.get() != nullptr) { - return first_obj_array_[(addr - Begin()) / kPageSize]; - } else { - // The pages of this arena contain array of GC-roots. So we don't need - // first-object of any given page of the arena. - // Returning null helps distinguish which visitor is to be called. - return nullptr; - } + return first_obj_array_[(addr - Begin()) / kPageSize]; } // Set 'obj_begin' in first_obj_array_ in every element for which it's the // first object. void SetFirstObject(uint8_t* obj_begin, uint8_t* obj_end); - // Setup the arena for deferred deletion. - void SetupForDeferredDeletion(TrackedArena* next_arena) { - DCHECK(next_arena->waiting_for_deletion_); - DCHECK(!waiting_for_deletion_); - waiting_for_deletion_ = true; - next_ = next_arena; - } - bool IsWaitingForDeletion() const { return waiting_for_deletion_; } - // Madvise the pages in the given range. 'begin' is expected to be page - // aligned. - // TODO: Remove this once we remove the shmem (minor-fault) code in - // userfaultfd GC and directly use ZeroAndReleaseMemory(). - static void ReleasePages(uint8_t* begin, size_t size, bool pre_zygote_fork); void Release() override; bool IsPreZygoteForkArena() const { return pre_zygote_fork_; } - bool IsSingleObjectArena() const { return first_obj_array_.get() == nullptr; } private: // first_obj_array_[i] is the object that overlaps with the ith page's // beginning, i.e. first_obj_array_[i] <= ith page_begin. std::unique_ptr<uint8_t*[]> first_obj_array_; const bool pre_zygote_fork_; - bool waiting_for_deletion_; }; // An arena-pool wherein allocations can be tracked so that the GC can visit all @@ -145,24 +101,15 @@ class GcVisitedArenaPool final : public ArenaPool { bool is_zygote = false, const char* name = "LinearAlloc"); virtual ~GcVisitedArenaPool(); - - Arena* AllocArena(size_t size, bool need_first_obj_arr) REQUIRES(lock_); - // Use by arena allocator. - Arena* AllocArena(size_t size) override REQUIRES(!lock_) { - WriterMutexLock wmu(Thread::Current(), lock_); - return AllocArena(size, /*single_obj_arena=*/false); - } - void FreeArenaChain(Arena* first) override REQUIRES(!lock_); - size_t GetBytesAllocated() const override REQUIRES(!lock_); + Arena* AllocArena(size_t size) override; + void FreeArenaChain(Arena* first) override; + size_t GetBytesAllocated() const override; void ReclaimMemory() override {} void LockReclaimMemory() override {} void TrimMaps() override {} - uint8_t* AllocSingleObjArena(size_t size) REQUIRES(!lock_); - void FreeSingleObjArena(uint8_t* addr) REQUIRES(!lock_); - - bool Contains(void* ptr) REQUIRES(!lock_) { - ReaderMutexLock rmu(Thread::Current(), lock_); + bool Contains(void* ptr) { + std::lock_guard<std::mutex> lock(lock_); for (auto& map : maps_) { if (map.HasAddress(ptr)) { return true; @@ -172,43 +119,51 @@ class GcVisitedArenaPool final : public ArenaPool { } template <typename PageVisitor> - void VisitRoots(PageVisitor& visitor) REQUIRES_SHARED(Locks::mutator_lock_, lock_) { + void VisitRoots(PageVisitor& visitor) REQUIRES_SHARED(Locks::mutator_lock_) { + std::lock_guard<std::mutex> lock(lock_); for (auto& arena : allocated_arenas_) { - arena->VisitRoots(visitor); + arena.VisitRoots(visitor); } } template <typename Callback> - void ForEachAllocatedArena(Callback cb) REQUIRES_SHARED(Locks::mutator_lock_, lock_) { - // We should not have any unused arenas when calling this function. - CHECK(unused_arenas_ == nullptr); + void ForEachAllocatedArena(Callback cb) REQUIRES_SHARED(Locks::mutator_lock_) { + std::lock_guard<std::mutex> lock(lock_); for (auto& arena : allocated_arenas_) { - cb(*arena); + cb(arena); } } // Called in Heap::PreZygoteFork(). All allocations after this are done in // arena-pool which is visited by userfaultfd. - void SetupPostZygoteMode() REQUIRES(!lock_) { - WriterMutexLock wmu(Thread::Current(), lock_); + void SetupPostZygoteMode() { + std::lock_guard<std::mutex> lock(lock_); DCHECK(pre_zygote_fork_); pre_zygote_fork_ = false; } // For userfaultfd GC to be able to acquire the lock to avoid concurrent // release of arenas when it is visiting them. - ReaderWriterMutex& GetLock() const RETURN_CAPABILITY(lock_) { return lock_; } + std::mutex& GetLock() { return lock_; } + + // Find the given arena in allocated_arenas_. The function is called with + // lock_ acquired. + bool FindAllocatedArena(const TrackedArena* arena) const NO_THREAD_SAFETY_ANALYSIS { + for (auto& allocated_arena : allocated_arenas_) { + if (arena == &allocated_arena) { + return true; + } + } + return false; + } - // Called in the compaction pause to indicate that all arenas that will be - // freed until compaction is done shouldn't delete the TrackedArena object to - // avoid ABA problem. Called with lock_ acquired. - void DeferArenaFreeing() REQUIRES(lock_) { - CHECK(unused_arenas_ == nullptr); - defer_arena_freeing_ = true; + void ClearArenasFreed() { + std::lock_guard<std::mutex> lock(lock_); + arenas_freed_ = false; } - // Clear defer_arena_freeing_ and delete all unused arenas. - void DeleteUnusedArenas() REQUIRES(!lock_); + // The function is called with lock_ acquired. + bool AreArenasFreed() const NO_THREAD_SAFETY_ANALYSIS { return arenas_freed_; } private: void FreeRangeLocked(uint8_t* range_begin, size_t range_size) REQUIRES(lock_); @@ -242,40 +197,31 @@ class GcVisitedArenaPool final : public ArenaPool { } }; - class TrackedArenaEquals { + class LessByArenaAddr { public: - bool operator()(const TrackedArena* a, const TrackedArena* b) const { - return std::equal_to<uint8_t*>{}(a->Begin(), b->Begin()); + bool operator()(const TrackedArena& a, const TrackedArena& b) const { + return std::less<uint8_t*>{}(a.Begin(), b.Begin()); } }; - class TrackedArenaHash { - public: - size_t operator()(const TrackedArena* arena) const { - return std::hash<size_t>{}(reinterpret_cast<uintptr_t>(arena->Begin()) / kPageSize); - } - }; - using AllocatedArenaSet = - HashSet<TrackedArena*, DefaultEmptyFn<TrackedArena*>, TrackedArenaHash, TrackedArenaEquals>; - - mutable ReaderWriterMutex lock_; + // Use a std::mutex here as Arenas are second-from-the-bottom when using MemMaps, and MemMap + // itself uses std::mutex scoped to within an allocate/free only. + mutable std::mutex lock_; std::vector<MemMap> maps_ GUARDED_BY(lock_); std::set<Chunk*, LessByChunkSize> best_fit_allocs_ GUARDED_BY(lock_); std::set<Chunk*, LessByChunkAddr> free_chunks_ GUARDED_BY(lock_); // Set of allocated arenas. It's required to be able to find the arena // corresponding to a given address. - AllocatedArenaSet allocated_arenas_ GUARDED_BY(lock_); + // TODO: consider using HashSet, which is more memory efficient. + std::set<TrackedArena, LessByArenaAddr> allocated_arenas_ GUARDED_BY(lock_); // Number of bytes allocated so far. size_t bytes_allocated_ GUARDED_BY(lock_); - // To hold arenas that are freed while GC is happening. These are kept until - // the end of GC to avoid ABA problem. - TrackedArena* unused_arenas_ GUARDED_BY(lock_); const char* name_; // Flag to indicate that some arenas have been freed. This flag is used as an // optimization by GC to know if it needs to find if the arena being visited // has been freed or not. The flag is cleared in the compaction pause and read // when linear-alloc space is concurrently visited updated to update GC roots. - bool defer_arena_freeing_ GUARDED_BY(lock_); + bool arenas_freed_ GUARDED_BY(lock_); const bool low_4gb_; // Set to true in zygote process so that all linear-alloc allocations are in // private-anonymous mappings and not on userfaultfd visited pages. At @@ -286,55 +232,6 @@ class GcVisitedArenaPool final : public ArenaPool { DISALLOW_COPY_AND_ASSIGN(GcVisitedArenaPool); }; -// Allocator for class-table and intern-table hash-sets. It enables updating the -// roots concurrently page-by-page. -template <class T, AllocatorTag kTag> -class GcRootArenaAllocator : public TrackingAllocator<T, kTag> { - public: - using value_type = typename TrackingAllocator<T, kTag>::value_type; - using size_type = typename TrackingAllocator<T, kTag>::size_type; - using difference_type = typename TrackingAllocator<T, kTag>::difference_type; - using pointer = typename TrackingAllocator<T, kTag>::pointer; - using const_pointer = typename TrackingAllocator<T, kTag>::const_pointer; - using reference = typename TrackingAllocator<T, kTag>::reference; - using const_reference = typename TrackingAllocator<T, kTag>::const_reference; - - // Used internally by STL data structures. - template <class U> - explicit GcRootArenaAllocator( - [[maybe_unused]] const GcRootArenaAllocator<U, kTag>& alloc) noexcept {} - // Used internally by STL data structures. - GcRootArenaAllocator() noexcept : TrackingAllocator<T, kTag>() {} - - // Enables an allocator for objects of one type to allocate storage for objects of another type. - // Used internally by STL data structures. - template <class U> - struct rebind { - using other = GcRootArenaAllocator<U, kTag>; - }; - - pointer allocate(size_type n, [[maybe_unused]] const_pointer hint = 0) { - if (!gUseUserfaultfd) { - return TrackingAllocator<T, kTag>::allocate(n); - } - size_t size = n * sizeof(T); - GcVisitedArenaPool* pool = - down_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); - return reinterpret_cast<pointer>(pool->AllocSingleObjArena(size)); - } - - template <typename PT> - void deallocate(PT p, size_type n) { - if (!gUseUserfaultfd) { - TrackingAllocator<T, kTag>::deallocate(p, n); - return; - } - GcVisitedArenaPool* pool = - down_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); - pool->FreeSingleObjArena(reinterpret_cast<uint8_t*>(p)); - } -}; - } // namespace art #endif // ART_RUNTIME_BASE_GC_VISITED_ARENA_POOL_H_ diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc index d4666e5f9c..9d175cdba5 100644 --- a/runtime/class_linker.cc +++ b/runtime/class_linker.cc @@ -2426,9 +2426,17 @@ void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) { boot_class_table_->VisitRoots(root_visitor); // If tracing is enabled, then mark all the class loaders to prevent unloading. if ((flags & kVisitRootFlagClassLoader) != 0 || tracing_enabled) { - for (const ClassLoaderData& data : class_loaders_) { - GcRoot<mirror::Object> root(GcRoot<mirror::Object>(self->DecodeJObject(data.weak_root))); - root.VisitRoot(visitor, RootInfo(kRootVMInternal)); + gc::Heap* const heap = Runtime::Current()->GetHeap(); + // Don't visit class-loaders if compacting with userfaultfd GC as these + // weaks are updated using Runtime::SweepSystemWeaks() and the GC doesn't + // tolerate double updates. + if (!heap->IsPerformingUffdCompaction()) { + for (const ClassLoaderData& data : class_loaders_) { + GcRoot<mirror::Object> root(GcRoot<mirror::Object>(self->DecodeJObject(data.weak_root))); + root.VisitRoot(visitor, RootInfo(kRootVMInternal)); + } + } else { + DCHECK_EQ(heap->CurrentCollectorType(), gc::CollectorType::kCollectorTypeCMC); } } } else if (!gUseReadBarrier && (flags & kVisitRootFlagNewRoots) != 0) { @@ -2468,11 +2476,9 @@ void ClassLinker::VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) { // Keep in sync with InitCallback. Anything we visit, we need to // reinit references to when reinitializing a ClassLinker from a // mapped image. -void ClassLinker::VisitRoots(RootVisitor* visitor, VisitRootFlags flags, bool visit_class_roots) { +void ClassLinker::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) { class_roots_.VisitRootIfNonNull(visitor, RootInfo(kRootVMInternal)); - if (visit_class_roots) { - VisitClassRoots(visitor, flags); - } + VisitClassRoots(visitor, flags); // Instead of visiting the find_array_class_cache_ drop it so that it doesn't prevent class // unloading if we are marking roots. DropFindArrayClassCache(); diff --git a/runtime/class_linker.h b/runtime/class_linker.h index c81713b116..3aae0422c1 100644 --- a/runtime/class_linker.h +++ b/runtime/class_linker.h @@ -514,7 +514,7 @@ class ClassLinker { void VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags) REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_) REQUIRES_SHARED(Locks::mutator_lock_); - void VisitRoots(RootVisitor* visitor, VisitRootFlags flags, bool visit_class_roots = true) + void VisitRoots(RootVisitor* visitor, VisitRootFlags flags) REQUIRES(!Locks::dex_lock_, !Locks::classlinker_classes_lock_, !Locks::trace_lock_) REQUIRES_SHARED(Locks::mutator_lock_); // Visits all dex-files accessible by any class-loader or the BCP. @@ -866,9 +866,6 @@ class ClassLinker { // Enable or disable public sdk checks. virtual void SetEnablePublicSdkChecks(bool enabled); void RemoveDexFromCaches(const DexFile& dex_file); - ClassTable* GetBootClassTable() REQUIRES_SHARED(Locks::classlinker_classes_lock_) { - return boot_class_table_.get(); - } protected: virtual bool InitializeClass(Thread* self, diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h index 4ee59a79f7..ecc8a0a620 100644 --- a/runtime/class_table-inl.h +++ b/runtime/class_table-inl.h @@ -68,14 +68,12 @@ inline bool ClassTable::ClassDescriptorEquals::operator()(const TableSlot& a, return a.Read<kWithoutReadBarrier>()->DescriptorEquals(b.first); } -template <class Visitor> -void ClassTable::VisitRoots(Visitor& visitor, bool skip_classes) { +template<class Visitor> +void ClassTable::VisitRoots(Visitor& visitor) { ReaderMutexLock mu(Thread::Current(), lock_); - if (!skip_classes) { - for (ClassSet& class_set : classes_) { - for (TableSlot& table_slot : class_set) { - table_slot.VisitRoot(visitor); - } + for (ClassSet& class_set : classes_) { + for (TableSlot& table_slot : class_set) { + table_slot.VisitRoot(visitor); } } for (GcRoot<mirror::Object>& root : strong_roots_) { @@ -88,14 +86,12 @@ void ClassTable::VisitRoots(Visitor& visitor, bool skip_classes) { } } -template <class Visitor> -void ClassTable::VisitRoots(const Visitor& visitor, bool skip_classes) { +template<class Visitor> +void ClassTable::VisitRoots(const Visitor& visitor) { ReaderMutexLock mu(Thread::Current(), lock_); - if (!skip_classes) { - for (ClassSet& class_set : classes_) { - for (TableSlot& table_slot : class_set) { - table_slot.VisitRoot(visitor); - } + for (ClassSet& class_set : classes_) { + for (TableSlot& table_slot : class_set) { + table_slot.VisitRoot(visitor); } } for (GcRoot<mirror::Object>& root : strong_roots_) { @@ -108,18 +104,6 @@ void ClassTable::VisitRoots(const Visitor& visitor, bool skip_classes) { } } -template <class Condition, class Visitor> -void ClassTable::VisitClassesIfConditionMet(Condition& cond, Visitor& visitor) { - ReaderMutexLock mu(Thread::Current(), lock_); - for (ClassSet& class_set : classes_) { - if (cond(class_set)) { - for (TableSlot& table_slot : class_set) { - table_slot.VisitRoot(visitor); - } - } - } -} - template <typename Visitor> class ClassTable::TableSlot::ClassAndRootVisitor { public: diff --git a/runtime/class_table.h b/runtime/class_table.h index 54e066a18c..7e263737c3 100644 --- a/runtime/class_table.h +++ b/runtime/class_table.h @@ -21,7 +21,7 @@ #include <utility> #include <vector> -#include "base/gc_visited_arena_pool.h" +#include "base/allocator.h" #include "base/hash_set.h" #include "base/macros.h" #include "base/mutex.h" @@ -151,7 +151,7 @@ class ClassTable { TableSlotEmptyFn, ClassDescriptorHash, ClassDescriptorEquals, - GcRootArenaAllocator<TableSlot, kAllocatorTagClassTable>>; + TrackingAllocator<TableSlot, kAllocatorTagClassTable>>; ClassTable(); @@ -181,7 +181,7 @@ class ClassTable { REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_); - // Returns the number of class-sets in the class table. + // Returns the number of classes in the class table. size_t Size() const REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_); @@ -194,13 +194,17 @@ class ClassTable { REQUIRES_SHARED(Locks::mutator_lock_); // NO_THREAD_SAFETY_ANALYSIS for object marking requiring heap bitmap lock. - template <class Visitor> - void VisitRoots(Visitor& visitor, bool skip_classes = false) NO_THREAD_SAFETY_ANALYSIS - REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_); + template<class Visitor> + void VisitRoots(Visitor& visitor) + NO_THREAD_SAFETY_ANALYSIS + REQUIRES(!lock_) + REQUIRES_SHARED(Locks::mutator_lock_); - template <class Visitor> - void VisitRoots(const Visitor& visitor, bool skip_classes = false) NO_THREAD_SAFETY_ANALYSIS - REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_); + template<class Visitor> + void VisitRoots(const Visitor& visitor) + NO_THREAD_SAFETY_ANALYSIS + REQUIRES(!lock_) + REQUIRES_SHARED(Locks::mutator_lock_); template<class Visitor> void VisitClassesAndRoots(Visitor& visitor) @@ -208,10 +212,6 @@ class ClassTable { REQUIRES(!lock_) REQUIRES_SHARED(Locks::mutator_lock_); - // Visit classes in those class-sets which satisfy 'cond'. - template <class Condition, class Visitor> - void VisitClassesIfConditionMet(Condition& cond, Visitor& visitor) REQUIRES(!lock_) - REQUIRES_SHARED(Locks::mutator_lock_); // Stops visit if the visitor returns false. template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor> bool Visit(Visitor& visitor) diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc index f49c8cba7a..a8ec6537db 100644 --- a/runtime/gc/collector/mark_compact.cc +++ b/runtime/gc/collector/mark_compact.cc @@ -303,8 +303,9 @@ static constexpr size_t kMaxNumUffdWorkers = 2; // Number of compaction buffers reserved for mutator threads in SIGBUS feature // case. It's extremely unlikely that we will ever have more than these number // of mutator threads trying to access the moving-space during one compaction -// phase. -static constexpr size_t kMutatorCompactionBufferCount = 2048; +// phase. Using a lower number in debug builds to hopefully catch the issue +// before it becomes a problem on user builds. +static constexpr size_t kMutatorCompactionBufferCount = kIsDebugBuild ? 256 : 2048; // Minimum from-space chunk to be madvised (during concurrent compaction) in one go. static constexpr ssize_t kMinFromSpaceMadviseSize = 1 * MB; // Concurrent compaction termination logic is different (and slightly more efficient) if the @@ -2387,7 +2388,7 @@ void MarkCompact::UpdateNonMovingPage(mirror::Object* first, uint8_t* page) { } void MarkCompact::UpdateNonMovingSpace() { - TimingLogger::ScopedTiming t("(Paused)UpdateNonMovingSpace", GetTimings()); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); // Iterating in reverse ensures that the class pointer in objects which span // across more than one page gets updated in the end. This is necessary for // VisitRefsForCompaction() to work correctly. @@ -2604,8 +2605,7 @@ class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor { REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) { ClassTable* const class_table = class_loader->GetClassTable(); if (class_table != nullptr) { - // Classes are updated concurrently. - class_table->VisitRoots(*this, /*skip_classes=*/true); + class_table->VisitRoots(*this); } } @@ -2636,10 +2636,8 @@ class MarkCompact::LinearAllocPageUpdater { moving_space_end_(collector->moving_space_end_), last_page_touched_(false) {} - // Update a page in multi-object arena. - void MultiObjectArena(uint8_t* page_begin, uint8_t* first_obj) + void operator()(uint8_t* page_begin, uint8_t* first_obj) ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) { - DCHECK(first_obj != nullptr); DCHECK_ALIGNED(page_begin, kPageSize); uint8_t* page_end = page_begin + kPageSize; uint32_t obj_size; @@ -2669,28 +2667,6 @@ class MarkCompact::LinearAllocPageUpdater { last_page_touched_ = true; } - // This version is only used for cases where the entire page is filled with - // GC-roots. For example, class-table and intern-table. - void SingleObjectArena(uint8_t* page_begin, size_t page_size) - REQUIRES_SHARED(Locks::mutator_lock_) { - static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>)); - DCHECK_ALIGNED(page_begin, kAlignment); - // Least significant bits are used by class-table. - static constexpr uint32_t kMask = kObjectAlignment - 1; - size_t num_roots = page_size / sizeof(GcRoot<mirror::Object>); - uint32_t* root_ptr = reinterpret_cast<uint32_t*>(page_begin); - for (size_t i = 0; i < num_roots; root_ptr++, i++) { - uint32_t word = *root_ptr; - if (word != 0) { - uint32_t lsbs = word & kMask; - word &= ~kMask; - VisitRootIfNonNull(reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(&word)); - *root_ptr = word | lsbs; - last_page_touched_ = true; - } - } - } - bool WasLastPageTouched() const { return last_page_touched_; } void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const @@ -2709,8 +2685,7 @@ class MarkCompact::LinearAllocPageUpdater { if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) { new_ref = collector_->PostCompactBlackObjAddr(old_ref); } else if (collector_->live_words_bitmap_->Test(old_ref)) { - DCHECK(collector_->moving_space_bitmap_->Test(old_ref)) - << "ref:" << old_ref << " root:" << root; + DCHECK(collector_->moving_space_bitmap_->Test(old_ref)) << old_ref; new_ref = collector_->PostCompactOldObjAddr(old_ref); } if (old_ref != new_ref) { @@ -2779,34 +2754,9 @@ class MarkCompact::LinearAllocPageUpdater { uint8_t* const moving_space_begin_; uint8_t* const moving_space_end_; // Whether the last page was touched or not. - bool last_page_touched_ = false; + bool last_page_touched_; }; -void MarkCompact::UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) { - // If the process is debuggable then redefinition is allowed, which may mean - // pre-zygote-fork class-tables may have pointer to class in moving-space. - // So visit classes from class-sets that are not in linear-alloc arena-pool. - if (UNLIKELY(runtime->IsJavaDebuggableAtInit())) { - ClassLinker* linker = runtime->GetClassLinker(); - ClassLoaderRootsUpdater updater(this); - GcVisitedArenaPool* pool = static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool()); - auto cond = [this, pool, immune_class_table_only](ClassTable::ClassSet& set) -> bool { - if (!set.empty()) { - return immune_class_table_only ? - immune_spaces_.ContainsObject(reinterpret_cast<mirror::Object*>(&*set.begin())) : - !pool->Contains(reinterpret_cast<void*>(&*set.begin())); - } - return false; - }; - linker->VisitClassTables([cond, &updater](ClassTable* table) - REQUIRES_SHARED(Locks::mutator_lock_) { - table->VisitClassesIfConditionMet(cond, updater); - }); - ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_); - linker->GetBootClassTable()->VisitClassesIfConditionMet(cond, updater); - } -} - void MarkCompact::CompactionPause() { TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); Runtime* runtime = Runtime::Current(); @@ -2860,6 +2810,65 @@ void MarkCompact::CompactionPause() { heap_->GetReferenceProcessor()->UpdateRoots(this); } { + TimingLogger::ScopedTiming t2("(Paused)UpdateClassLoaderRoots", GetTimings()); + ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_); + { + ClassLoaderRootsUpdater updater(this); + runtime->GetClassLinker()->VisitClassLoaders(&updater); + } + } + + bool has_zygote_space = heap_->HasZygoteSpace(); + // TODO: Find out why it's not sufficient to visit native roots of immune + // spaces, and why all the pre-zygote fork arenas have to be linearly updated. + // Is it possible that some native root starts getting pointed to by some object + // in moving space after fork? Or are we missing a write-barrier somewhere + // when a native root is updated? + GcVisitedArenaPool* arena_pool = + static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool()); + if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) { + // Besides fallback-mode, visit linear-alloc space in the pause for zygote + // processes prior to first fork (that's when zygote space gets created). + if (kIsDebugBuild && IsValidFd(uffd_)) { + // All arenas allocated so far are expected to be pre-zygote fork. + arena_pool->ForEachAllocatedArena( + [](const TrackedArena& arena) + REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); }); + } + LinearAllocPageUpdater updater(this); + arena_pool->VisitRoots(updater); + } else { + // Clear the flag as we care about this only if arenas are freed during + // concurrent compaction. + arena_pool->ClearArenasFreed(); + arena_pool->ForEachAllocatedArena( + [this](const TrackedArena& arena) REQUIRES_SHARED(Locks::mutator_lock_) { + // The pre-zygote fork arenas are not visited concurrently in the + // zygote children processes. The native roots of the dirty objects + // are visited during immune space visit below. + if (!arena.IsPreZygoteForkArena()) { + uint8_t* last_byte = arena.GetLastUsedByte(); + CHECK(linear_alloc_arenas_.insert({&arena, last_byte}).second); + } else { + LinearAllocPageUpdater updater(this); + arena.VisitRoots(updater); + } + }); + } + + SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ true); + + { + TimingLogger::ScopedTiming t2("(Paused)UpdateConcurrentRoots", GetTimings()); + runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots); + } + { + // TODO: don't visit the transaction roots if it's not active. + TimingLogger::ScopedTiming t2("(Paused)UpdateNonThreadRoots", GetTimings()); + runtime->VisitNonThreadRoots(this); + } + + { // TODO: Immune space updation has to happen either before or after // remapping pre-compact pages to from-space. And depending on when it's // done, we have to invoke VisitRefsForCompaction() with or without @@ -2890,98 +2899,11 @@ void MarkCompact::CompactionPause() { } } - { - TimingLogger::ScopedTiming t2("(Paused)UpdateRoots", GetTimings()); - runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots); - runtime->VisitNonThreadRoots(this); - { - ClassLinker* linker = runtime->GetClassLinker(); - ClassLoaderRootsUpdater updater(this); - ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_); - linker->VisitClassLoaders(&updater); - linker->GetBootClassTable()->VisitRoots(updater, /*skip_classes=*/true); - } - SweepSystemWeaks(thread_running_gc_, runtime, /*paused=*/true); - - bool has_zygote_space = heap_->HasZygoteSpace(); - GcVisitedArenaPool* arena_pool = - static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool()); - // Update immune/pre-zygote class-tables in case class redefinition took - // place. pre-zygote class-tables that are not in immune spaces are updated - // below if we are in fallback-mode or if there is no zygote space. So in - // that case only visit class-tables that are there in immune-spaces. - UpdateClassTableClasses(runtime, uffd_ == kFallbackMode || !has_zygote_space); - - // Acquire arena-pool's lock, which should be released after the pool is - // userfaultfd registered. This is to ensure that no new arenas are - // allocated and used in between. Since they will not be captured in - // linear_alloc_arenas_ below, we will miss updating their pages. The same - // reason also applies to new allocations within the existing arena which - // may change last_byte. - // Since we are in a STW pause, this shouldn't happen anyways, but holding - // the lock confirms it. - // TODO (b/305779657): Replace with ExclusiveTryLock() and assert that it - // doesn't fail once it is available for ReaderWriterMutex. - WriterMutexLock pool_wmu(thread_running_gc_, arena_pool->GetLock()); - - // TODO: Find out why it's not sufficient to visit native roots of immune - // spaces, and why all the pre-zygote fork arenas have to be linearly updated. - // Is it possible that some native root starts getting pointed to by some object - // in moving space after fork? Or are we missing a write-barrier somewhere - // when a native root is updated? - auto arena_visitor = [this](uint8_t* page_begin, uint8_t* first_obj, size_t page_size) - REQUIRES_SHARED(Locks::mutator_lock_) { - LinearAllocPageUpdater updater(this); - if (first_obj != nullptr) { - updater.MultiObjectArena(page_begin, first_obj); - } else { - updater.SingleObjectArena(page_begin, page_size); - } - }; - if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) { - // Besides fallback-mode, visit linear-alloc space in the pause for zygote - // processes prior to first fork (that's when zygote space gets created). - if (kIsDebugBuild && IsValidFd(uffd_)) { - // All arenas allocated so far are expected to be pre-zygote fork. - arena_pool->ForEachAllocatedArena( - [](const TrackedArena& arena) - REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); }); - } - arena_pool->VisitRoots(arena_visitor); - } else { - // Inform the arena-pool that compaction is going on. So the TrackedArena - // objects corresponding to the arenas that are freed shouldn't be deleted - // immediately. We will do that in FinishPhase(). This is to avoid ABA - // problem. - arena_pool->DeferArenaFreeing(); - arena_pool->ForEachAllocatedArena( - [this, arena_visitor, has_zygote_space](const TrackedArena& arena) - REQUIRES_SHARED(Locks::mutator_lock_) { - // The pre-zygote fork arenas are not visited concurrently in the - // zygote children processes. The native roots of the dirty objects - // are visited during immune space visit below. - if (!arena.IsPreZygoteForkArena()) { - uint8_t* last_byte = arena.GetLastUsedByte(); - auto ret = linear_alloc_arenas_.insert({&arena, last_byte}); - CHECK(ret.second); - } else if (!arena.IsSingleObjectArena() || !has_zygote_space) { - // Pre-zygote class-table and intern-table don't need to be updated. - // TODO: Explore the possibility of using /proc/self/pagemap to - // fetch which pages in these arenas are private-dirty and then only - // visit those pages. To optimize it further, we can keep all - // pre-zygote arenas in a single memory range so that just one read - // from pagemap is sufficient. - arena.VisitRoots(arena_visitor); - } - }); - } - if (use_uffd_sigbus_) { - // Release order wrt to mutator threads' SIGBUS handler load. - sigbus_in_progress_count_.store(0, std::memory_order_release); - } - KernelPreparation(); + if (use_uffd_sigbus_) { + // Release order wrt to mutator threads' SIGBUS handler load. + sigbus_in_progress_count_.store(0, std::memory_order_release); } - + KernelPreparation(); UpdateNonMovingSpace(); // fallback mode if (uffd_ == kFallbackMode) { @@ -3047,7 +2969,7 @@ void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr, } void MarkCompact::KernelPreparation() { - TimingLogger::ScopedTiming t("(Paused)KernelPreparation", GetTimings()); + TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings()); uint8_t* moving_space_begin = bump_pointer_space_->Begin(); size_t moving_space_size = bump_pointer_space_->Capacity(); int mode = kCopyMode; @@ -3474,22 +3396,14 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter) : linear_alloc_arenas_.end(); } - // Unlike ProcessLinearAlloc(), we don't need to hold arena-pool's lock here - // because a thread trying to access the page and as a result causing this - // userfault confirms that nobody can delete the corresponding arena and - // release its pages. - // NOTE: We may have some memory range be recycled several times during a - // compaction cycle, thereby potentially causing userfault on the same page - // several times. That's not a problem as all of them (except for possibly the - // first one) would require us mapping a zero-page, which we do without updating - // the 'state_arr'. - if (arena_iter == linear_alloc_arenas_.end() || - arena_iter->first->IsWaitingForDeletion() || - arena_iter->second <= fault_page) { + if (arena_iter == linear_alloc_arenas_.end() || arena_iter->second <= fault_page) { // Fault page isn't in any of the arenas that existed before we started // compaction. So map zeropage and return. ZeropageIoctl(fault_page, /*tolerate_eexist=*/true, /*tolerate_enoent=*/false); } else { + // fault_page should always belong to some arena. + DCHECK(arena_iter != linear_alloc_arenas_.end()) + << "fault_page:" << static_cast<void*>(fault_page) << "is_minor_fault:" << is_minor_fault; // Find the linear-alloc space containing fault-page LinearAllocSpaceData* space_data = nullptr; for (auto& data : linear_alloc_spaces_data_) { @@ -3514,15 +3428,10 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i if (state_arr[page_idx].compare_exchange_strong( state, PageState::kProcessingAndMapping, std::memory_order_acquire)) { if (kMode == kCopyMode || is_minor_fault) { - LinearAllocPageUpdater updater(this); uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page); - // null first_obj indicates that it's a page from arena for - // intern-table/class-table. So first object isn't required. - if (first_obj != nullptr) { - updater.MultiObjectArena(fault_page + diff, first_obj + diff); - } else { - updater.SingleObjectArena(fault_page + diff, kPageSize); - } + DCHECK_NE(first_obj, nullptr); + LinearAllocPageUpdater updater(this); + updater(fault_page + diff, first_obj + diff); if (kMode == kCopyMode) { MapUpdatedLinearAllocPage(fault_page, fault_page + diff, @@ -3591,7 +3500,6 @@ void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool i void MarkCompact::ProcessLinearAlloc() { GcVisitedArenaPool* arena_pool = static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); - DCHECK_EQ(thread_running_gc_, Thread::Current()); for (auto& pair : linear_alloc_arenas_) { const TrackedArena* arena = pair.first; size_t arena_size; @@ -3599,15 +3507,12 @@ void MarkCompact::ProcessLinearAlloc() { ptrdiff_t diff; bool others_processing; { - // Acquire arena-pool's lock (in shared-mode) so that the arena being updated - // does not get deleted at the same time. If this critical section is too - // long and impacts mutator response time, then we get rid of this lock by - // holding onto memory ranges of all deleted (since compaction pause) - // arenas until completion finishes. - ReaderMutexLock rmu(thread_running_gc_, arena_pool->GetLock()); + // Acquire arena-pool's lock so that the arena being worked cannot be + // deallocated at the same time. + std::lock_guard<std::mutex> lock(arena_pool->GetLock()); // If any arenas were freed since compaction pause then skip them from // visiting. - if (arena->IsWaitingForDeletion()) { + if (arena_pool->AreArenasFreed() && !arena_pool->FindAllocatedArena(arena)) { continue; } uint8_t* last_byte = pair.second; @@ -3623,12 +3528,11 @@ void MarkCompact::ProcessLinearAlloc() { break; } } - CHECK_NE(space_data, nullptr); + DCHECK_NE(space_data, nullptr); diff = space_data->shadow_.Begin() - space_data->begin_; auto visitor = [space_data, last_byte, diff, this, &others_processing]( uint8_t* page_begin, - uint8_t* first_obj, - size_t page_size) REQUIRES_SHARED(Locks::mutator_lock_) { + uint8_t* first_obj) REQUIRES_SHARED(Locks::mutator_lock_) { // No need to process pages past last_byte as they already have updated // gc-roots, if any. if (page_begin >= last_byte) { @@ -3647,14 +3551,7 @@ void MarkCompact::ProcessLinearAlloc() { // reason, we used 'release' order for changing the state to 'processed'. if (state_arr[page_idx].compare_exchange_strong( expected_state, desired_state, std::memory_order_acquire)) { - // null first_obj indicates that it's a page from arena for - // intern-table/class-table. So first object isn't required. - if (first_obj != nullptr) { - updater.MultiObjectArena(page_begin + diff, first_obj + diff); - } else { - DCHECK_EQ(page_size, kPageSize); - updater.SingleObjectArena(page_begin + diff, page_size); - } + updater(page_begin + diff, first_obj + diff); expected_state = PageState::kProcessing; if (!minor_fault_initialized_) { MapUpdatedLinearAllocPage( @@ -4457,9 +4354,6 @@ void MarkCompact::FinishPhase() { DCHECK_EQ(fstat(moving_to_space_fd_, &buf), 0) << "fstat failed: " << strerror(errno); DCHECK_EQ(buf.st_blocks, 0u); } - GcVisitedArenaPool* arena_pool = - static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool()); - arena_pool->DeleteUnusedArenas(); } } // namespace collector diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h index 1ecb49ae52..3dea40842c 100644 --- a/runtime/gc/collector/mark_compact.h +++ b/runtime/gc/collector/mark_compact.h @@ -560,11 +560,6 @@ class MarkCompact final : public GarbageCollector { // Initialize all the info-map related fields of this GC. Returns total size // of all the structures in info-map. size_t InitializeInfoMap(uint8_t* p, size_t moving_space_sz); - // Update class-table classes in compaction pause if we are running in debuggable - // mode. Only visit class-table in image spaces if 'immune_class_table_only' - // is true. - void UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) - REQUIRES_SHARED(Locks::mutator_lock_); // For checkpoints Barrier gc_barrier_; diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc index 329777c7d7..796f8fbf09 100644 --- a/runtime/gc/heap.cc +++ b/runtime/gc/heap.cc @@ -2451,11 +2451,9 @@ void Heap::PreZygoteFork() { return; } Runtime* runtime = Runtime::Current(); - // Setup linear-alloc pool for post-zygote fork allocations before freezing - // snapshots of intern-table and class-table. - runtime->SetupLinearAllocForPostZygoteFork(self); runtime->GetInternTable()->AddNewTable(); runtime->GetClassLinker()->MoveClassTableToPreZygote(); + runtime->SetupLinearAllocForPostZygoteFork(self); VLOG(heap) << "Starting PreZygoteFork"; // The end of the non-moving space may be protected, unprotect it so that we can copy the zygote // there. diff --git a/runtime/intern_table.h b/runtime/intern_table.h index 0fcada1af4..d115f51f7e 100644 --- a/runtime/intern_table.h +++ b/runtime/intern_table.h @@ -17,8 +17,8 @@ #ifndef ART_RUNTIME_INTERN_TABLE_H_ #define ART_RUNTIME_INTERN_TABLE_H_ +#include "base/allocator.h" #include "base/dchecked_vector.h" -#include "base/gc_visited_arena_pool.h" #include "base/hash_set.h" #include "base/mutex.h" #include "gc/weak_root_state.h" @@ -109,12 +109,11 @@ class InternTable { } }; - using UnorderedSet = - HashSet<GcRoot<mirror::String>, - GcRootEmptyFn, - StringHash, - StringEquals, - GcRootArenaAllocator<GcRoot<mirror::String>, kAllocatorTagInternTable>>; + using UnorderedSet = HashSet<GcRoot<mirror::String>, + GcRootEmptyFn, + StringHash, + StringEquals, + TrackingAllocator<GcRoot<mirror::String>, kAllocatorTagInternTable>>; InternTable(); diff --git a/runtime/runtime.cc b/runtime/runtime.cc index 97db5fb0fd..9c3b9de70c 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -859,11 +859,7 @@ void Runtime::CallExitHook(jint status) { } void Runtime::SweepSystemWeaks(IsMarkedVisitor* visitor) { - // Userfaultfd compaction updates weak intern-table page-by-page via - // LinearAlloc. - if (!GetHeap()->IsPerformingUffdCompaction()) { - GetInternTable()->SweepInternTableWeaks(visitor); - } + GetInternTable()->SweepInternTableWeaks(visitor); GetMonitorList()->SweepMonitorList(visitor); GetJavaVM()->SweepJniWeakGlobals(visitor); GetHeap()->SweepAllocationRecords(visitor); @@ -2594,14 +2590,8 @@ void Runtime::VisitConstantRoots(RootVisitor* visitor) { } void Runtime::VisitConcurrentRoots(RootVisitor* visitor, VisitRootFlags flags) { - // Userfaultfd compaction updates intern-tables and class-tables page-by-page - // via LinearAlloc. So don't visit them here. - if (GetHeap()->IsPerformingUffdCompaction()) { - class_linker_->VisitRoots(visitor, flags, /*visit_class_roots=*/false); - } else { - intern_table_->VisitRoots(visitor, flags); - class_linker_->VisitRoots(visitor, flags, /*visit_class_roots=*/true); - } + intern_table_->VisitRoots(visitor, flags); + class_linker_->VisitRoots(visitor, flags); jni_id_manager_->VisitRoots(visitor); heap_->VisitAllocationRecords(visitor); if (jit_ != nullptr) { |