Use MADV_FREE to reclaim pages of freed regions
In order to release memory consumed by freed regions back to kernel we
use madvise(MADV_DONTNEED) syscall, which synchronously takes away all
the pages of the given memory range. As a side-effect, this also
provides us clean (zeroed-out) pages the next time region is used by
an application thread as a TLAB. The downside is overhead of
MADV_DONTNEED (it has to manipulate all the corresponding page-table
entries). Furthermore, the application thread gets page fault each
time it moves from one page to the next while consuming the TLAB.
MADV_FREE is another madvise option that also advises kernel to take
away the pages, but only when there is memory pressure. It is a much
more light-weight operation than MADV_DONTNEED. Also, the application
thread doesn't get the page fault overhead, if the page is not already
reclaimed. However, a page which is not reclaimed by the kernel will
have stale data on it.
This change uses MADV_FREE for region-space's reclamation and ensures
that pages about to be used for new allocations are properly cleared.
Bug: 74447417
Bug: 140130889
Test: Golem, mpts/greenday tests
Change-Id: I1e4a75abed51844d5062685bf77871f609af5a65
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index d769543..6922e7f 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -3112,11 +3112,7 @@
static void b13564922() {
#if defined(__linux__) && defined(__arm__)
- int major, minor;
- struct utsname uts;
- if (uname(&uts) != -1 &&
- sscanf(uts.release, "%d.%d", &major, &minor) == 2 &&
- ((major < 3) || ((major == 3) && (minor < 4)))) {
+ if (KernelVersionLower(3, 4)) {
// Kernels before 3.4 don't handle the ASLR well and we can run out of address
// space (http://b/13564922). Work around the issue by inhibiting further mmap() randomization.
int old_personality = personality(0xffffffff);
diff --git a/libartbase/base/membarrier.cc b/libartbase/base/membarrier.cc
index 48f47df..d925049 100644
--- a/libartbase/base/membarrier.cc
+++ b/libartbase/base/membarrier.cc
@@ -21,7 +21,6 @@
#if !defined(_WIN32)
#include <sys/syscall.h>
-#include <sys/utsname.h>
#include <unistd.h>
#endif
#include "macros.h"
@@ -29,6 +28,7 @@
#if defined(__BIONIC__)
#include <atomic>
+#include <base/utils.h>
#include <linux/membarrier.h>
#define CHECK_MEMBARRIER_CMD(art_value, membarrier_value) \
@@ -49,14 +49,7 @@
int membarrier(MembarrierCommand command) {
// Check kernel version supports membarrier(2).
- static constexpr int kRequiredMajor = 4;
- static constexpr int kRequiredMinor = 16;
- struct utsname uts;
- int major, minor;
- if (uname(&uts) != 0 ||
- strcmp(uts.sysname, "Linux") != 0 ||
- sscanf(uts.release, "%d.%d", &major, &minor) != 2 ||
- (major < kRequiredMajor || (major == kRequiredMajor && minor < kRequiredMinor))) {
+ if (KernelVersionLower(4, 16)) {
errno = ENOSYS;
return -1;
}
diff --git a/libartbase/base/memfd.cc b/libartbase/base/memfd.cc
index 8512a3a..d031fb6 100644
--- a/libartbase/base/memfd.cc
+++ b/libartbase/base/memfd.cc
@@ -21,7 +21,6 @@
#if !defined(_WIN32)
#include <fcntl.h>
#include <sys/syscall.h>
-#include <sys/utsname.h>
#include <unistd.h>
#endif
#if defined(__BIONIC__)
@@ -30,6 +29,7 @@
#include <android-base/logging.h>
#include <android-base/unique_fd.h>
+#include <base/utils.h>
#include "macros.h"
@@ -51,14 +51,7 @@
int memfd_create(const char* name, unsigned int flags) {
// Check kernel version supports memfd_create(). Some older kernels segfault executing
// memfd_create() rather than returning ENOSYS (b/116769556).
- static constexpr int kRequiredMajor = 3;
- static constexpr int kRequiredMinor = 17;
- struct utsname uts;
- int major, minor;
- if (uname(&uts) != 0 ||
- strcmp(uts.sysname, "Linux") != 0 ||
- sscanf(uts.release, "%d.%d", &major, &minor) != 2 ||
- (major < kRequiredMajor || (major == kRequiredMajor && minor < kRequiredMinor))) {
+ if (KernelVersionLower(3, 17)) {
errno = ENOSYS;
return -1;
}
diff --git a/libartbase/base/utils.cc b/libartbase/base/utils.cc
index 19311b3..4e8d306 100644
--- a/libartbase/base/utils.cc
+++ b/libartbase/base/utils.cc
@@ -156,6 +156,33 @@
#endif
+// On non-linux builds assume that the kernel version is lower than required.
+#if defined(__linux__)
+std::pair<int, int> GetKernelVersion() {
+ struct utsname uts;
+ int major, minor;
+ CHECK_EQ(uname(&uts), 0);
+ CHECK_EQ(strcmp(uts.sysname, "Linux"), 0);
+ CHECK_EQ(sscanf(uts.release, "%d.%d", &major, &minor), 2);
+ return std::pair(major, minor);
+}
+
+bool KernelVersionLower(int required_major, int required_minor) {
+ // static (major, minor) pair as it never changes during runtime.
+ static std::pair<int, int> kernel_version = GetKernelVersion();
+ if (kernel_version.first < required_major
+ || (kernel_version.first == required_major && kernel_version.second < required_minor)) {
+ return true;
+ } else {
+ return false;
+ }
+}
+#else
+bool KernelVersionLower(int required_major ATTRIBUTE_UNUSED, int required_minor ATTRIBUTE_UNUSED) {
+ return true;
+}
+#endif
+
bool CacheOperationsMaySegFault() {
#if defined(__linux__) && defined(__aarch64__)
// Avoid issue on older ARM64 kernels where data cache operations could be classified as writes
@@ -165,14 +192,7 @@
//
// This behaviour means we should avoid the dual view JIT on the device. This is just
// an issue when running tests on devices that have an old kernel.
- static constexpr int kRequiredMajor = 3;
- static constexpr int kRequiredMinor = 12;
- struct utsname uts;
- int major, minor;
- if (uname(&uts) != 0 ||
- strcmp(uts.sysname, "Linux") != 0 ||
- sscanf(uts.release, "%d.%d", &major, &minor) != 2 ||
- (major < kRequiredMajor || (major == kRequiredMajor && minor < kRequiredMinor))) {
+ if (KernelVersionLower(3, 12)) {
return true;
}
#endif
diff --git a/libartbase/base/utils.h b/libartbase/base/utils.h
index 4bcb915..1fe465c 100644
--- a/libartbase/base/utils.h
+++ b/libartbase/base/utils.h
@@ -42,6 +42,9 @@
// Returns a human-readable size string such as "1MB".
std::string PrettySize(int64_t size_in_bytes);
+// Returns true if the kernel's version (based on uname) is lower than required.
+bool KernelVersionLower(int required_major, int required_minor);
+
// Splits a string using the given separator character into a vector of
// strings. Empty strings will be omitted.
void Split(const std::string& s, char separator, std::vector<std::string>* result);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index f989360..f18fb53 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -4187,6 +4187,11 @@
return nullptr;
}
*bytes_tl_bulk_allocated = expand_bytes;
+ // Zero the TLAB pages as we MADV_FREE the regions in CC, which doesn't
+ // guarantee clean pages.
+ if (allocator_type == kAllocatorTypeRegionTLAB) {
+ region_space_->ZeroAllocRange(self->GetTlabEnd(), expand_bytes);
+ }
self->ExpandTlab(expand_bytes);
DCHECK_LE(alloc_size, self->TlabSize());
} else if (allocator_type == kAllocatorTypeTLAB) {
diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h
index 901568e..65cd111 100644
--- a/runtime/gc/space/region_space-inl.h
+++ b/runtime/gc/space/region_space-inl.h
@@ -56,26 +56,26 @@
mirror::Object* obj;
if (LIKELY(num_bytes <= kRegionSize)) {
// Non-large object.
- obj = (kForEvac ? evac_region_ : current_region_)->Alloc(num_bytes,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
+ obj = (kForEvac ? evac_region_ : current_region_)->Alloc<kForEvac>(num_bytes,
+ bytes_allocated,
+ usable_size,
+ bytes_tl_bulk_allocated);
if (LIKELY(obj != nullptr)) {
return obj;
}
MutexLock mu(Thread::Current(), region_lock_);
// Retry with current region since another thread may have updated
// current_region_ or evac_region_. TODO: fix race.
- obj = (kForEvac ? evac_region_ : current_region_)->Alloc(num_bytes,
- bytes_allocated,
- usable_size,
- bytes_tl_bulk_allocated);
+ obj = (kForEvac ? evac_region_ : current_region_)->Alloc<kForEvac>(num_bytes,
+ bytes_allocated,
+ usable_size,
+ bytes_tl_bulk_allocated);
if (LIKELY(obj != nullptr)) {
return obj;
}
Region* r = AllocateRegion(kForEvac);
if (LIKELY(r != nullptr)) {
- obj = r->Alloc(num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated);
+ obj = r->Alloc<kForEvac>(num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated);
CHECK(obj != nullptr);
// Do our allocation before setting the region, this makes sure no threads race ahead
// and fill in the region before we allocate the object. b/63153464
@@ -96,6 +96,14 @@
return nullptr;
}
+inline void RegionSpace::ZeroAllocRange(uint8_t* start, size_t length) {
+ if (gPurgeAdvice == MADV_DONTNEED) {
+ return;
+ }
+ std::fill(start, start + length, 0);
+}
+
+template <bool kForEvac>
inline mirror::Object* RegionSpace::Region::Alloc(size_t num_bytes,
/* out */ size_t* bytes_allocated,
/* out */ size_t* usable_size,
@@ -120,6 +128,10 @@
*usable_size = num_bytes;
}
*bytes_tl_bulk_allocated = num_bytes;
+ // We don't need to clean allocations for evacuation as we memcpy in that case.
+ if (!kForEvac) {
+ ZeroAllocRange(old_top, num_bytes);
+ }
return reinterpret_cast<mirror::Object*>(old_top);
}
@@ -283,6 +295,14 @@
reinterpret_cast<uintptr_t>(top),
visitor);
} else {
+ // When using MADV_FREE instead of MADV_DONTNEED for release regions' pages
+ // in ClearFromSpace(), we may have non-zero pages beyond r->Top().
+ // This can happen only with regions which are TLABs. Therefore, we can
+ // fetch the right pos from thread-local TLAB values.
+ if (r->is_a_tlab_) {
+ DCHECK(r->thread_ != nullptr);
+ top = r->thread_->GetTlabPos();
+ }
while (pos < top) {
mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos);
if (obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
@@ -371,8 +391,13 @@
usable_size,
bytes_tl_bulk_allocated);
}
- if (kForEvac && region != nullptr) {
- TraceHeapSize();
+ if (kForEvac) {
+ if (region != nullptr) {
+ TraceHeapSize();
+ }
+ } else {
+ // We don't need to clean allocations for evacuation as we memcpy in that case.
+ ZeroAllocRange(reinterpret_cast<uint8_t*>(region), *bytes_tl_bulk_allocated);
}
return region;
}
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index faeeec0..7b3a8f7 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -24,6 +24,10 @@
#include "mirror/object-inl.h"
#include "thread_list.h"
+#if defined(__linux__)
+#include <sys/utsname.h>
+#endif
+
namespace art {
namespace gc {
namespace space {
@@ -47,6 +51,12 @@
// Whether we check a region's live bytes count against the region bitmap.
static constexpr bool kCheckLiveBytesAgainstRegionBitmap = kIsDebugBuild;
+#ifndef MADV_FREE
+const int RegionSpace::gPurgeAdvice = MADV_DONTNEED;
+#else
+const int RegionSpace::gPurgeAdvice = KernelVersionLower(4, 12) ? MADV_DONTNEED : MADV_FREE;
+#endif
+
MemMap RegionSpace::CreateMemMap(const std::string& name,
size_t capacity,
uint8_t* requested_begin) {
@@ -141,7 +151,7 @@
DCHECK(!full_region_.IsFree());
DCHECK(full_region_.IsAllocated());
size_t ignored;
- DCHECK(full_region_.Alloc(kAlignment, &ignored, nullptr, &ignored) == nullptr);
+ DCHECK(full_region_.Alloc</*kForEvac*/true>(kAlignment, &ignored, nullptr, &ignored) == nullptr);
// Protect the whole region space from the start.
Protect();
}
@@ -497,10 +507,9 @@
madvise_list.push_back(std::pair(clear_block_begin, clear_block_end));
}
}
-
// Madvise the memory ranges.
for (const auto &iter : madvise_list) {
- ZeroAndProtectRegion(iter.first, iter.second);
+ PurgePages(iter.first, iter.second - iter.first);
if (clear_bitmap) {
GetLiveBitmap()->ClearRange(
reinterpret_cast<mirror::Object*>(iter.first),
@@ -630,6 +639,19 @@
num_evac_regions_ = 0;
}
+void RegionSpace::PurgePages(void* address, size_t length) {
+ DCHECK(IsAligned<kPageSize>(address));
+ if (length == 0) {
+ return;
+ }
+#ifdef _WIN32
+ // PurgePages does not madvise on Windows.
+#else
+ CHECK_EQ(madvise(address, length, gPurgeAdvice), 0)
+ << "madvise failed: " << strerror(errno);
+#endif
+}
+
void RegionSpace::CheckLiveBytesAgainstRegionBitmap(Region* r) {
if (r->LiveBytes() == static_cast<size_t>(-1)) {
// Live bytes count is undefined for `r`; nothing to check here.
@@ -867,6 +889,9 @@
if (r != nullptr) {
uint8_t* start = pos != nullptr ? pos : r->Begin();
DCHECK_ALIGNED(start, kObjectAlignment);
+ // If we are allocating a partially utilized TLAB, then the tlab is already
+ // clean from [pos, r->Top()).
+ ZeroAllocRange(pos != nullptr ? r->Top() : r->Begin(), *bytes_tl_bulk_allocated);
r->is_a_tlab_ = true;
r->thread_ = self;
r->SetTop(r->End());
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index f74abfb..d515412 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -45,6 +45,11 @@
// A space that consists of equal-sized regions.
class RegionSpace final : public ContinuousMemMapAllocSpace {
+ private:
+ // Constant used to mark the non-zero pages before madvise(MADV_FREE) them.
+ static constexpr uint8_t kMadvFreeMagic = 0xdf;
+ static const int gPurgeAdvice; // advice to madvise for reclaiming pages.
+
public:
typedef void(*WalkCallback)(void *start, void *end, size_t num_bytes, void* callback_arg);
@@ -366,6 +371,8 @@
}
}
+ ALWAYS_INLINE static void ZeroAllocRange(uint8_t* start, size_t length);
+
// Increment object allocation count for region containing ref.
void RecordAlloc(mirror::Object* ref) REQUIRES(!region_lock_);
@@ -426,6 +433,7 @@
void Clear(bool zero_and_release_pages);
+ template <bool kForEvac>
ALWAYS_INLINE mirror::Object* Alloc(size_t num_bytes,
/* out */ size_t* bytes_allocated,
/* out */ size_t* usable_size,
@@ -645,6 +653,7 @@
return RefToRegionLocked(ref);
}
+ void PurgePages(void* address, size_t length);
void TraceHeapSize() REQUIRES(region_lock_);
Region* RefToRegionUnlocked(mirror::Object* ref) NO_THREAD_SAFETY_ANALYSIS {