modules: Use a better scheme for refcounting
Current refcounting for modules (done if CONFIG_MODULE_UNLOAD=y) is
using a lot of memory.
Each 'struct module' contains an [NR_CPUS] array of full cache lines.
This patch uses existing infrastructure (percpu_modalloc() &
percpu_modfree()) to allocate percpu space for the refcount storage.
Instead of wasting NR_CPUS*128 bytes (on i386), we now use
nr_cpu_ids*sizeof(local_t) bytes.
On a typical distro, where NR_CPUS=8, shiping 2000 modules, we reduce
size of module files by about 2 Mbytes. (1Kb per module)
Instead of having all refcounters in the same memory node - with TLB misses
because of vmalloc() - this new implementation permits to have better
NUMA properties, since each CPU will use storage on its preferred node,
thanks to percpu storage.
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/include/linux/module.h b/include/linux/module.h
index 4f7ea12..f3b8329 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -219,11 +219,6 @@
#endif
-struct module_ref
-{
- local_t count;
-} ____cacheline_aligned;
-
enum module_state
{
MODULE_STATE_LIVE,
@@ -344,8 +339,11 @@
/* Destruction function. */
void (*exit)(void);
- /* Reference counts */
- struct module_ref ref[NR_CPUS];
+#ifdef CONFIG_SMP
+ char *refptr;
+#else
+ local_t ref;
+#endif
#endif
};
#ifndef MODULE_ARCH_INIT
@@ -395,13 +393,22 @@
#define symbol_put(x) __symbol_put(MODULE_SYMBOL_PREFIX #x)
void symbol_put_addr(void *addr);
+static inline local_t *__module_ref_addr(struct module *mod, int cpu)
+{
+#ifdef CONFIG_SMP
+ return (local_t *) (mod->refptr + per_cpu_offset(cpu));
+#else
+ return &mod->ref;
+#endif
+}
+
/* Sometimes we know we already have a refcount, and it's easier not
to handle the error case (which only happens with rmmod --wait). */
static inline void __module_get(struct module *module)
{
if (module) {
BUG_ON(module_refcount(module) == 0);
- local_inc(&module->ref[get_cpu()].count);
+ local_inc(__module_ref_addr(module, get_cpu()));
put_cpu();
}
}
@@ -413,7 +420,7 @@
if (module) {
unsigned int cpu = get_cpu();
if (likely(module_is_live(module)))
- local_inc(&module->ref[cpu].count);
+ local_inc(__module_ref_addr(module, cpu));
else
ret = 0;
put_cpu();