Group short-lived and reclaimable kernel allocations
This patch marks a number of allocations that are either short-lived such as
network buffers or are reclaimable such as inode allocations. When something
like updatedb is called, long-lived and unmovable kernel allocations tend to
be spread throughout the address space which increases fragmentation.
This patch groups these allocations together as much as possible by adding a
new MIGRATE_TYPE. The MIGRATE_RECLAIMABLE type is for allocations that can be
reclaimed on demand, but not moved. i.e. they can be migrated by deleting
them and re-reading the information from elsewhere.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Cc: Andy Whitcroft <apw@shadowen.org>
Cc: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/fs/buffer.c b/fs/buffer.c
index a406cfd..faceb5e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3169,7 +3169,8 @@
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
{
- struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
+ struct buffer_head *ret = kmem_cache_zalloc(bh_cachep,
+ set_migrateflags(gfp_flags, __GFP_RECLAIMABLE));
if (ret) {
INIT_LIST_HEAD(&ret->b_assoc_buffers);
get_cpu_var(bh_accounting).nr++;
diff --git a/fs/dcache.c b/fs/dcache.c
index 678d39de..7da0cf5 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -903,7 +903,7 @@
struct dentry *dentry;
char *dname;
- dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
+ dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
if (!dentry)
return NULL;
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 06ab3c1..a6be78c 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1710,7 +1710,7 @@
journal_head_cache = kmem_cache_create("journal_head",
sizeof(struct journal_head),
0, /* offset */
- 0, /* flags */
+ SLAB_TEMPORARY, /* flags */
NULL); /* ctor */
retval = 0;
if (journal_head_cache == 0) {
@@ -2006,7 +2006,7 @@
jbd_handle_cache = kmem_cache_create("journal_handle",
sizeof(handle_t),
0, /* offset */
- 0, /* flags */
+ SLAB_TEMPORARY, /* flags */
NULL); /* ctor */
if (jbd_handle_cache == NULL) {
printk(KERN_EMERG "JBD: failed to create handle cache\n");
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 62e13c8..ad2eacf 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -170,13 +170,15 @@
{
revoke_record_cache = kmem_cache_create("revoke_record",
sizeof(struct jbd_revoke_record_s),
- 0, SLAB_HWCACHE_ALIGN, NULL);
+ 0,
+ SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
+ NULL);
if (revoke_record_cache == 0)
return -ENOMEM;
revoke_table_cache = kmem_cache_create("revoke_table",
sizeof(struct jbd_revoke_table_s),
- 0, 0, NULL);
+ 0, SLAB_TEMPORARY, NULL);
if (revoke_table_cache == 0) {
kmem_cache_destroy(revoke_record_cache);
revoke_record_cache = NULL;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index e5d0953..78fdfea 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -492,7 +492,7 @@
count = PROC_BLOCK_SIZE;
length = -ENOMEM;
- if (!(page = __get_free_page(GFP_KERNEL)))
+ if (!(page = __get_free_page(GFP_TEMPORARY)))
goto out;
length = PROC_I(inode)->op.proc_read(task, (char*)page);
@@ -532,7 +532,7 @@
goto out;
ret = -ENOMEM;
- page = (char *)__get_free_page(GFP_USER);
+ page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
goto out;
@@ -602,7 +602,7 @@
goto out;
copied = -ENOMEM;
- page = (char *)__get_free_page(GFP_USER);
+ page = (char *)__get_free_page(GFP_TEMPORARY);
if (!page)
goto out;
@@ -788,7 +788,7 @@
/* No partial writes. */
return -EINVAL;
}
- page = (char*)__get_free_page(GFP_USER);
+ page = (char*)__get_free_page(GFP_TEMPORARY);
if (!page)
return -ENOMEM;
length = -EFAULT;
@@ -954,7 +954,8 @@
char __user *buffer, int buflen)
{
struct inode * inode;
- char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
+ char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
+ char *path;
int len;
if (!tmp)
@@ -1726,7 +1727,7 @@
goto out;
length = -ENOMEM;
- page = (char*)__get_free_page(GFP_USER);
+ page = (char*)__get_free_page(GFP_TEMPORARY);
if (!page)
goto out;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index b5e7155..1bdb624 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -74,7 +74,7 @@
nbytes = MAX_NON_LFS - pos;
dp = PDE(inode);
- if (!(page = (char*) __get_free_page(GFP_KERNEL)))
+ if (!(page = (char*) __get_free_page(GFP_TEMPORARY)))
return -ENOMEM;
while ((nbytes > 0) && !eof) {
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index da8aa87..f8ffcd4 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -48,9 +48,10 @@
#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
-#define __GFP_MOVABLE ((__force gfp_t)0x80000u) /* Page is movable */
+#define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
+#define __GFP_MOVABLE ((__force gfp_t)0x100000u) /* Page is movable */
-#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
+#define __GFP_BITS_SHIFT 21 /* Room for 21 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
/* This equals 0, but use constants in case they ever change */
@@ -60,6 +61,8 @@
#define GFP_NOIO (__GFP_WAIT)
#define GFP_NOFS (__GFP_WAIT | __GFP_IO)
#define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS)
+#define GFP_TEMPORARY (__GFP_WAIT | __GFP_IO | __GFP_FS | \
+ __GFP_RECLAIMABLE)
#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
__GFP_HIGHMEM)
@@ -80,7 +83,7 @@
#endif
/* This mask makes up all the page movable related flags */
-#define GFP_MOVABLE_MASK (__GFP_MOVABLE)
+#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
/* Control page allocator reclaim behavior */
#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
@@ -129,6 +132,12 @@
return base + ZONE_NORMAL;
}
+static inline gfp_t set_migrateflags(gfp_t gfp, gfp_t migrate_flags)
+{
+ BUG_ON((gfp & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+ return (gfp & ~(GFP_MOVABLE_MASK)) | migrate_flags;
+}
+
/*
* There is only one page-allocator function, and two main namespaces to
* it. The alloc_page*() variants return 'struct page *' and as such
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7d7e4fe..4721e9a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -35,10 +35,12 @@
#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY
#define MIGRATE_UNMOVABLE 0
-#define MIGRATE_MOVABLE 1
-#define MIGRATE_TYPES 2
+#define MIGRATE_RECLAIMABLE 1
+#define MIGRATE_MOVABLE 2
+#define MIGRATE_TYPES 3
#else
#define MIGRATE_UNMOVABLE 0
+#define MIGRATE_UNRECLAIMABLE 0
#define MIGRATE_MOVABLE 0
#define MIGRATE_TYPES 1
#endif
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 3619d52..5456da6 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -31,7 +31,7 @@
/* Bit indices that affect a whole block of pages */
enum pageblock_bits {
- PB_range(PB_migrate, 1), /* 1 bit required for migrate types */
+ PB_range(PB_migrate, 2), /* 2 bits required for migrate types */
NR_PAGEBLOCK_BITS
};
diff --git a/include/linux/slab.h b/include/linux/slab.h
index d859354..3a5bad3 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -24,12 +24,14 @@
#define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */
#define SLAB_CACHE_DMA 0x00004000UL /* Use GFP_DMA memory */
#define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */
-#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
#define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */
#define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */
#define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */
#define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */
+/* The following flags affect the page allocator grouping pages by mobility */
+#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
+#define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */
/*
* ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
*
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 8b2daac..e196510 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1463,7 +1463,7 @@
ssize_t retval = 0;
char *s;
- if (!(page = (char *)__get_free_page(GFP_KERNEL)))
+ if (!(page = (char *)__get_free_page(GFP_TEMPORARY)))
return -ENOMEM;
s = page;
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 519d3f0..6b26f9d 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -98,7 +98,8 @@
struct radix_tree_node *ret;
gfp_t gfp_mask = root_gfp_mask(root);
- ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+ ret = kmem_cache_alloc(radix_tree_node_cachep,
+ set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
if (ret == NULL && !(gfp_mask & __GFP_WAIT)) {
struct radix_tree_preload *rtp;
@@ -142,7 +143,8 @@
rtp = &__get_cpu_var(radix_tree_preloads);
while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
preempt_enable();
- node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+ node = kmem_cache_alloc(radix_tree_node_cachep,
+ set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
if (node == NULL)
goto out;
preempt_disable();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d575a3e..29f4de1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -172,7 +172,10 @@
static inline int gfpflags_to_migratetype(gfp_t gfp_flags)
{
- return ((gfp_flags & __GFP_MOVABLE) != 0);
+ WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
+
+ return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
+ ((gfp_flags & __GFP_RECLAIMABLE) != 0);
}
#else
@@ -676,8 +679,9 @@
* the free lists for the desirable migrate type are depleted
*/
static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = {
- [MIGRATE_UNMOVABLE] = { MIGRATE_MOVABLE },
- [MIGRATE_MOVABLE] = { MIGRATE_UNMOVABLE },
+ [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE },
+ [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE },
+ [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE },
};
/*
diff --git a/mm/shmem.c b/mm/shmem.c
index 855b93b..76ecbac 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -95,9 +95,9 @@
* BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
* might be reconsidered if it ever diverges from PAGE_SIZE.
*
- * __GFP_MOVABLE is masked out as swap vectors cannot move
+ * Mobility flags are masked out as swap vectors cannot move
*/
- return alloc_pages((gfp_mask & ~__GFP_MOVABLE) | __GFP_ZERO,
+ return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
PAGE_CACHE_SHIFT-PAGE_SHIFT);
}
diff --git a/mm/slab.c b/mm/slab.c
index 8fb56ae..e34bcb8 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1643,6 +1643,8 @@
#endif
flags |= cachep->gfpflags;
+ if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+ flags |= __GFP_RECLAIMABLE;
page = alloc_pages_node(nodeid, flags, cachep->gfporder);
if (!page)
diff --git a/mm/slub.c b/mm/slub.c
index 19d3202..a90c4ff 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1055,6 +1055,9 @@
if (s->flags & SLAB_CACHE_DMA)
flags |= SLUB_DMA;
+ if (s->flags & SLAB_RECLAIM_ACCOUNT)
+ flags |= __GFP_RECLAIMABLE;
+
if (node == -1)
page = alloc_pages(flags, s->order);
else