Align uffd compacted spaces for faster mremap during pause
By aligning moving space related mappings to 2MB and linear-alloc space
to 1GB helps in reducing mremap latency, which is done during compaction
pause, by leveraging faster page-table move.
Bug: 160737021
Test: art/test/testrunner/testrunner.py
Change-Id: I4f7b3b7dde785002d6c41b52eda9f1ac5132c0d7
(cherry picked from commit 5cac217cdcae15890b8260839c5a0f16a2c5a773)
Merged-In: I4f7b3b7dde785002d6c41b52eda9f1ac5132c0d7
diff --git a/libartbase/base/globals.h b/libartbase/base/globals.h
index f4d44b8..4103154 100644
--- a/libartbase/base/globals.h
+++ b/libartbase/base/globals.h
@@ -38,6 +38,17 @@
// compile-time constant so the compiler can generate better code.
static constexpr size_t kPageSize = 4096;
+// TODO: Kernels for arm and x86 in both, 32-bit and 64-bit modes use 512 entries per page-table
+// page. Find a way to confirm that in userspace.
+// Address range covered by 1 Page Middle Directory (PMD) entry in the page table
+static constexpr size_t kPMDSize = (kPageSize / sizeof(uint64_t)) * kPageSize;
+// Address range covered by 1 Page Upper Directory (PUD) entry in the page table
+static constexpr size_t kPUDSize = (kPageSize / sizeof(uint64_t)) * kPMDSize;
+// Returns the ideal alignment corresponding to page-table levels for the
+// given size.
+static constexpr size_t BestPageTableAlignment(size_t size) {
+ return size < kPUDSize ? kPMDSize : kPUDSize;
+}
// Clion, clang analyzer, etc can falsely believe that "if (kIsDebugBuild)" always
// returns the same value. By wrapping into a call to another constexpr function, we force it
// to realize that is not actually always evaluating to the same value.
diff --git a/libartbase/base/mem_map.cc b/libartbase/base/mem_map.cc
index 688325d..b3e2840 100644
--- a/libartbase/base/mem_map.cc
+++ b/libartbase/base/mem_map.cc
@@ -389,6 +389,32 @@
reuse);
}
+MemMap MemMap::MapAnonymousAligned(const char* name,
+ size_t byte_count,
+ int prot,
+ bool low_4gb,
+ size_t alignment,
+ /*out=*/std::string* error_msg) {
+ DCHECK(IsPowerOfTwo(alignment));
+ DCHECK_GT(alignment, kPageSize);
+ // Allocate extra 'alignment - kPageSize' bytes so that the mapping can be aligned.
+ MemMap ret = MapAnonymous(name,
+ /*addr=*/nullptr,
+ byte_count + alignment - kPageSize,
+ prot,
+ low_4gb,
+ /*reuse=*/false,
+ /*reservation=*/nullptr,
+ error_msg);
+ if (LIKELY(ret.IsValid())) {
+ ret.AlignBy(alignment, /*align_both_ends=*/false);
+ ret.SetSize(byte_count);
+ DCHECK_EQ(ret.Size(), byte_count);
+ DCHECK_ALIGNED_PARAM(ret.Begin(), alignment);
+ }
+ return ret;
+}
+
MemMap MemMap::MapPlaceholder(const char* name, uint8_t* addr, size_t byte_count) {
if (byte_count == 0) {
return Invalid();
@@ -1247,40 +1273,46 @@
}
}
-void MemMap::AlignBy(size_t size) {
+void MemMap::AlignBy(size_t alignment, bool align_both_ends) {
CHECK_EQ(begin_, base_begin_) << "Unsupported";
CHECK_EQ(size_, base_size_) << "Unsupported";
- CHECK_GT(size, static_cast<size_t>(kPageSize));
- CHECK_ALIGNED(size, kPageSize);
+ CHECK_GT(alignment, static_cast<size_t>(kPageSize));
+ CHECK_ALIGNED(alignment, kPageSize);
CHECK(!reuse_);
- if (IsAlignedParam(reinterpret_cast<uintptr_t>(base_begin_), size) &&
- IsAlignedParam(base_size_, size)) {
+ if (IsAlignedParam(reinterpret_cast<uintptr_t>(base_begin_), alignment) &&
+ (!align_both_ends || IsAlignedParam(base_size_, alignment))) {
// Already aligned.
return;
}
uint8_t* base_begin = reinterpret_cast<uint8_t*>(base_begin_);
- uint8_t* base_end = base_begin + base_size_;
- uint8_t* aligned_base_begin = AlignUp(base_begin, size);
- uint8_t* aligned_base_end = AlignDown(base_end, size);
+ uint8_t* aligned_base_begin = AlignUp(base_begin, alignment);
CHECK_LE(base_begin, aligned_base_begin);
- CHECK_LE(aligned_base_end, base_end);
- size_t aligned_base_size = aligned_base_end - aligned_base_begin;
- CHECK_LT(aligned_base_begin, aligned_base_end)
- << "base_begin = " << reinterpret_cast<void*>(base_begin)
- << " base_end = " << reinterpret_cast<void*>(base_end);
- CHECK_GE(aligned_base_size, size);
- // Unmap the unaligned parts.
if (base_begin < aligned_base_begin) {
MEMORY_TOOL_MAKE_UNDEFINED(base_begin, aligned_base_begin - base_begin);
CHECK_EQ(TargetMUnmap(base_begin, aligned_base_begin - base_begin), 0)
<< "base_begin=" << reinterpret_cast<void*>(base_begin)
<< " aligned_base_begin=" << reinterpret_cast<void*>(aligned_base_begin);
}
- if (aligned_base_end < base_end) {
- MEMORY_TOOL_MAKE_UNDEFINED(aligned_base_end, base_end - aligned_base_end);
- CHECK_EQ(TargetMUnmap(aligned_base_end, base_end - aligned_base_end), 0)
- << "base_end=" << reinterpret_cast<void*>(base_end)
- << " aligned_base_end=" << reinterpret_cast<void*>(aligned_base_end);
+ uint8_t* base_end = base_begin + base_size_;
+ size_t aligned_base_size;
+ if (align_both_ends) {
+ uint8_t* aligned_base_end = AlignDown(base_end, alignment);
+ CHECK_LE(aligned_base_end, base_end);
+ CHECK_LT(aligned_base_begin, aligned_base_end)
+ << "base_begin = " << reinterpret_cast<void*>(base_begin)
+ << " base_end = " << reinterpret_cast<void*>(base_end);
+ aligned_base_size = aligned_base_end - aligned_base_begin;
+ CHECK_GE(aligned_base_size, alignment);
+ if (aligned_base_end < base_end) {
+ MEMORY_TOOL_MAKE_UNDEFINED(aligned_base_end, base_end - aligned_base_end);
+ CHECK_EQ(TargetMUnmap(aligned_base_end, base_end - aligned_base_end), 0)
+ << "base_end=" << reinterpret_cast<void*>(base_end)
+ << " aligned_base_end=" << reinterpret_cast<void*>(aligned_base_end);
+ }
+ } else {
+ CHECK_LT(aligned_base_begin, base_end)
+ << "base_begin = " << reinterpret_cast<void*>(base_begin);
+ aligned_base_size = base_end - aligned_base_begin;
}
std::lock_guard<std::mutex> mu(*mem_maps_lock_);
if (base_begin < aligned_base_begin) {
diff --git a/libartbase/base/mem_map.h b/libartbase/base/mem_map.h
index 28d1058..42120a3 100644
--- a/libartbase/base/mem_map.h
+++ b/libartbase/base/mem_map.h
@@ -137,6 +137,17 @@
/*inout*/MemMap* reservation,
/*out*/std::string* error_msg,
bool use_debug_name = true);
+
+ // Request an aligned anonymous region. We can't directly ask for a MAP_SHARED (anonymous or
+ // otherwise) mapping to be aligned as in that case file offset is involved and could make
+ // the starting offset to be out of sync with another mapping of the same file.
+ static MemMap MapAnonymousAligned(const char* name,
+ size_t byte_count,
+ int prot,
+ bool low_4gb,
+ size_t alignment,
+ /*out=*/std::string* error_msg);
+
static MemMap MapAnonymous(const char* name,
size_t byte_count,
int prot,
@@ -310,8 +321,9 @@
// intermittently.
void TryReadable();
- // Align the map by unmapping the unaligned parts at the lower and the higher ends.
- void AlignBy(size_t size);
+ // Align the map by unmapping the unaligned part at the lower end and if 'align_both_ends' is
+ // true, then the higher end as well.
+ void AlignBy(size_t alignment, bool align_both_ends = true);
// For annotation reasons.
static std::mutex* GetMemMapsLock() RETURN_CAPABILITY(mem_maps_lock_) {
diff --git a/runtime/base/gc_visited_arena_pool.cc b/runtime/base/gc_visited_arena_pool.cc
index 8b778c7..6bf52ce 100644
--- a/runtime/base/gc_visited_arena_pool.cc
+++ b/runtime/base/gc_visited_arena_pool.cc
@@ -81,35 +81,23 @@
size = std::max(min_size, kLow4GBLinearAllocPoolSize);
}
#endif
- Runtime* runtime = Runtime::Current();
- gc::collector::MarkCompact* mark_compact = runtime->GetHeap()->MarkCompactCollector();
+ size_t alignment = BestPageTableAlignment(size);
+ DCHECK_GE(size, kPMDSize);
std::string err_msg;
- bool mapped_shared;
- // We use MAP_SHARED on non-zygote processes for leveraging userfaultfd's minor-fault feature.
- if (gUseUserfaultfd && mark_compact->MapLinearAllocShared()) {
- maps_.emplace_back(MemMap::MapFile(size,
- PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_SHARED,
- -1,
- /*start=*/0,
- low_4gb_,
- name_,
- &err_msg));
- mapped_shared = true;
- } else {
- maps_.emplace_back(
- MemMap::MapAnonymous(name_, size, PROT_READ | PROT_WRITE, low_4gb_, &err_msg));
- mapped_shared = false;
- }
-
+ maps_.emplace_back(MemMap::MapAnonymousAligned(
+ name_, size, PROT_READ | PROT_WRITE, low_4gb_, alignment, &err_msg));
MemMap& map = maps_.back();
if (!map.IsValid()) {
LOG(FATAL) << "Failed to allocate " << name_ << ": " << err_msg;
UNREACHABLE();
}
+
if (gUseUserfaultfd) {
// Create a shadow-map for the map being added for userfaultfd GC
- mark_compact->AddLinearAllocSpaceData(map.Begin(), map.Size(), mapped_shared);
+ gc::collector::MarkCompact* mark_compact =
+ Runtime::Current()->GetHeap()->MarkCompactCollector();
+ DCHECK_NE(mark_compact, nullptr);
+ mark_compact->AddLinearAllocSpaceData(map.Begin(), map.Size());
}
Chunk* chunk = new Chunk(map.Begin(), map.Size());
best_fit_allocs_.insert(chunk);
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 834078b..4f16ca1 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -263,8 +263,9 @@
reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit())));
// Create one MemMap for all the data structures
- size_t chunk_info_vec_size = bump_pointer_space_->Capacity() / kOffsetChunkSize;
- size_t nr_moving_pages = bump_pointer_space_->Capacity() / kPageSize;
+ size_t moving_space_size = bump_pointer_space_->Capacity();
+ size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
+ size_t nr_moving_pages = moving_space_size / kPageSize;
size_t nr_non_moving_pages = heap->GetNonMovingSpace()->Capacity() / kPageSize;
std::string err_msg;
@@ -293,13 +294,21 @@
pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p);
}
+ size_t moving_space_alignment = BestPageTableAlignment(moving_space_size);
+ // The moving space is created at a fixed address, which is expected to be
+ // PMD-size aligned.
+ if (!IsAlignedParam(bump_pointer_space_->Begin(), moving_space_alignment)) {
+ LOG(WARNING) << "Bump pointer space is not aligned to " << PrettySize(moving_space_alignment)
+ << ". This can lead to longer stop-the-world pauses for compaction";
+ }
// NOTE: PROT_NONE is used here as these mappings are for address space reservation
// only and will be used only after appropriately remapping them.
- from_space_map_ = MemMap::MapAnonymous("Concurrent mark-compact from-space",
- bump_pointer_space_->Capacity(),
- PROT_NONE,
- /*low_4gb=*/ kObjPtrPoisoning,
- &err_msg);
+ from_space_map_ = MemMap::MapAnonymousAligned("Concurrent mark-compact from-space",
+ moving_space_size,
+ PROT_NONE,
+ /*low_4gb=*/kObjPtrPoisoning,
+ moving_space_alignment,
+ &err_msg);
if (UNLIKELY(!from_space_map_.IsValid())) {
LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg;
} else {
@@ -313,7 +322,7 @@
//
// This map doesn't have to be aligned to 2MB as we don't mremap on it.
shadow_to_space_map_ = MemMap::MapAnonymous("Concurrent mark-compact moving-space shadow",
- bump_pointer_space_->Capacity(),
+ moving_space_size,
PROT_NONE,
/*low_4gb=*/kObjPtrPoisoning,
&err_msg);
@@ -338,15 +347,30 @@
linear_alloc_spaces_data_.reserve(1);
}
-void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len, bool already_shared) {
+void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) {
DCHECK_ALIGNED(begin, kPageSize);
DCHECK_ALIGNED(len, kPageSize);
+ DCHECK_GE(len, kPMDSize);
+ size_t alignment = BestPageTableAlignment(len);
+ bool is_shared = false;
+ // We use MAP_SHARED on non-zygote processes for leveraging userfaultfd's minor-fault feature.
+ if (map_linear_alloc_shared_) {
+ void* ret = mmap(begin,
+ len,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED,
+ /*fd=*/-1,
+ /*offset=*/0);
+ CHECK_EQ(ret, begin) << "mmap failed: " << strerror(errno);
+ is_shared = true;
+ }
std::string err_msg;
- MemMap shadow(MemMap::MapAnonymous("linear-alloc shadow map",
- len,
- PROT_NONE,
- /*low_4gb=*/false,
- &err_msg));
+ MemMap shadow(MemMap::MapAnonymousAligned("linear-alloc shadow map",
+ len,
+ PROT_NONE,
+ /*low_4gb=*/false,
+ alignment,
+ &err_msg));
if (!shadow.IsValid()) {
LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg;
UNREACHABLE();
@@ -365,7 +389,7 @@
std::forward<MemMap>(page_status_map),
begin,
begin + len,
- already_shared);
+ is_shared);
}
void MarkCompact::BindAndResetBitmaps() {
@@ -2425,8 +2449,6 @@
int fd,
int uffd_mode,
uint8_t* shadow_addr) {
- // TODO: Create mapping's at 2MB aligned addresses to benefit from optimized
- // mremap.
int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED;
if (gHaveMremapDontunmap) {
mremap_flags |= MREMAP_DONTUNMAP;
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index fa2c430..8ba3774 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -137,14 +137,13 @@
// created or was already done.
bool CreateUserfaultfd(bool post_fork);
- bool MapLinearAllocShared() const { return map_linear_alloc_shared_; }
// Returns a pair indicating if userfaultfd itself is available (first) and if
// so then whether its minor-fault feature is available or not (second).
static std::pair<bool, bool> GetUffdAndMinorFault();
// Add linear-alloc space data when a new space is added to
// GcVisitedArenaPool, which mostly happens only once.
- void AddLinearAllocSpaceData(uint8_t* begin, size_t len, bool already_shared);
+ void AddLinearAllocSpaceData(uint8_t* begin, size_t len);
// In copy-mode of userfaultfd, we don't need to reach a 'processed' state as
// it's given that processing thread also copies the page, thereby mapping it.