slab: get_online_mems for kmem_cache_{create,destroy,shrink}
When we create a sl[au]b cache, we allocate kmem_cache_node structures
for each online NUMA node. To handle nodes taken online/offline, we
register memory hotplug notifier and allocate/free kmem_cache_node
corresponding to the node that changes its state for each kmem cache.
To synchronize between the two paths we hold the slab_mutex during both
the cache creationg/destruction path and while tuning per-node parts of
kmem caches in memory hotplug handler, but that's not quite right,
because it does not guarantee that a newly created cache will have all
kmem_cache_nodes initialized in case it races with memory hotplug. For
instance, in case of slub:
CPU0 CPU1
---- ----
kmem_cache_create: online_pages:
__kmem_cache_create: slab_memory_callback:
slab_mem_going_online_callback:
lock slab_mutex
for each slab_caches list entry
allocate kmem_cache node
unlock slab_mutex
lock slab_mutex
init_kmem_cache_nodes:
for_each_node_state(node, N_NORMAL_MEMORY)
allocate kmem_cache node
add kmem_cache to slab_caches list
unlock slab_mutex
online_pages (continued):
node_states_set_node
As a result we'll get a kmem cache with not all kmem_cache_nodes
allocated.
To avoid issues like that we should hold get/put_online_mems() during
the whole kmem cache creation/destruction/shrink paths, just like we
deal with cpu hotplug. This patch does the trick.
Note, that after it's applied, there is no need in taking the slab_mutex
for kmem_cache_shrink any more, so it is removed from there.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2834bc2..2dd920d 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -205,6 +205,8 @@
int err;
get_online_cpus();
+ get_online_mems();
+
mutex_lock(&slab_mutex);
err = kmem_cache_sanity_check(name, size);
@@ -239,6 +241,8 @@
out_unlock:
mutex_unlock(&slab_mutex);
+
+ put_online_mems();
put_online_cpus();
if (err) {
@@ -272,6 +276,8 @@
char *cache_name;
get_online_cpus();
+ get_online_mems();
+
mutex_lock(&slab_mutex);
/*
@@ -295,6 +301,8 @@
out_unlock:
mutex_unlock(&slab_mutex);
+
+ put_online_mems();
put_online_cpus();
}
@@ -328,6 +336,8 @@
void kmem_cache_destroy(struct kmem_cache *s)
{
get_online_cpus();
+ get_online_mems();
+
mutex_lock(&slab_mutex);
s->refcount--;
@@ -359,15 +369,36 @@
#else
slab_kmem_cache_release(s);
#endif
- goto out_put_cpus;
+ goto out;
out_unlock:
mutex_unlock(&slab_mutex);
-out_put_cpus:
+out:
+ put_online_mems();
put_online_cpus();
}
EXPORT_SYMBOL(kmem_cache_destroy);
+/**
+ * kmem_cache_shrink - Shrink a cache.
+ * @cachep: The cache to shrink.
+ *
+ * Releases as many slabs as possible for a cache.
+ * To help debugging, a zero exit status indicates all slabs were released.
+ */
+int kmem_cache_shrink(struct kmem_cache *cachep)
+{
+ int ret;
+
+ get_online_cpus();
+ get_online_mems();
+ ret = __kmem_cache_shrink(cachep);
+ put_online_mems();
+ put_online_cpus();
+ return ret;
+}
+EXPORT_SYMBOL(kmem_cache_shrink);
+
int slab_is_available(void)
{
return slab_state >= UP;