mm: compaction: cache if a pageblock was scanned and no pages were isolated
When compaction was implemented it was known that scanning could
potentially be excessive. The ideal was that a counter be maintained for
each pageblock but maintaining this information would incur a severe
penalty due to a shared writable cache line. It has reached the point
where the scanning costs are a serious problem, particularly on
long-lived systems where a large process starts and allocates a large
number of THPs at the same time.
Instead of using a shared counter, this patch adds another bit to the
pageblock flags called PG_migrate_skip. If a pageblock is scanned by
either migrate or free scanner and 0 pages were isolated, the pageblock is
marked to be skipped in the future. When scanning, this bit is checked
before any scanning takes place and the block skipped if set.
The main difficulty with a patch like this is "when to ignore the cached
information?" If it's ignored too often, the scanning rates will still be
excessive. If the information is too stale then allocations will fail
that might have otherwise succeeded. In this patch
o CMA always ignores the information
o If the migrate and free scanner meet then the cached information will
be discarded if it's at least 5 seconds since the last time the cache
was discarded
o If there are a large number of allocation failures, discard the cache.
The time-based heuristic is very clumsy but there are few choices for a
better event. Depending solely on multiple allocation failures still
allows excessive scanning when THP allocations are failing in quick
succession due to memory pressure. Waiting until memory pressure is
relieved would cause compaction to continually fail instead of using
reclaim/compaction to try allocate the page. The time-based mechanism is
clumsy but a better option is not obvious.
Signed-off-by: Mel Gorman <mgorman@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Richard Davies <richard@arachsys.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Avi Kivity <avi@redhat.com>
Acked-by: Rafael Aquini <aquini@redhat.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Mark Brown <broonie@opensource.wolfsonmicro.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 628968c..44c5604 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5679,7 +5679,8 @@
}
/* [start, end) must belong to a single zone. */
-static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
+static int __alloc_contig_migrate_range(struct compact_control *cc,
+ unsigned long start, unsigned long end)
{
/* This function is based on compact_zone() from compaction.c. */
@@ -5687,25 +5688,17 @@
unsigned int tries = 0;
int ret = 0;
- struct compact_control cc = {
- .nr_migratepages = 0,
- .order = -1,
- .zone = page_zone(pfn_to_page(start)),
- .sync = true,
- };
- INIT_LIST_HEAD(&cc.migratepages);
-
migrate_prep_local();
- while (pfn < end || !list_empty(&cc.migratepages)) {
+ while (pfn < end || !list_empty(&cc->migratepages)) {
if (fatal_signal_pending(current)) {
ret = -EINTR;
break;
}
- if (list_empty(&cc.migratepages)) {
- cc.nr_migratepages = 0;
- pfn = isolate_migratepages_range(cc.zone, &cc,
+ if (list_empty(&cc->migratepages)) {
+ cc->nr_migratepages = 0;
+ pfn = isolate_migratepages_range(cc->zone, cc,
pfn, end);
if (!pfn) {
ret = -EINTR;
@@ -5717,14 +5710,14 @@
break;
}
- reclaim_clean_pages_from_list(cc.zone, &cc.migratepages);
+ reclaim_clean_pages_from_list(cc->zone, &cc->migratepages);
- ret = migrate_pages(&cc.migratepages,
+ ret = migrate_pages(&cc->migratepages,
__alloc_contig_migrate_alloc,
0, false, MIGRATE_SYNC);
}
- putback_lru_pages(&cc.migratepages);
+ putback_lru_pages(&cc->migratepages);
return ret > 0 ? 0 : ret;
}
@@ -5803,6 +5796,15 @@
unsigned long outer_start, outer_end;
int ret = 0, order;
+ struct compact_control cc = {
+ .nr_migratepages = 0,
+ .order = -1,
+ .zone = page_zone(pfn_to_page(start)),
+ .sync = true,
+ .ignore_skip_hint = true,
+ };
+ INIT_LIST_HEAD(&cc.migratepages);
+
/*
* What we do here is we mark all pageblocks in range as
* MIGRATE_ISOLATE. Because pageblock and max order pages may
@@ -5832,7 +5834,7 @@
if (ret)
goto done;
- ret = __alloc_contig_migrate_range(start, end);
+ ret = __alloc_contig_migrate_range(&cc, start, end);
if (ret)
goto done;
@@ -5881,7 +5883,7 @@
__reclaim_pages(zone, GFP_HIGHUSER_MOVABLE, end-start);
/* Grab isolated pages from freelists. */
- outer_end = isolate_freepages_range(outer_start, end);
+ outer_end = isolate_freepages_range(&cc, outer_start, end);
if (!outer_end) {
ret = -EBUSY;
goto done;