[PATCH] memory hotplug: sysfs and add/remove functions
This adds generic memory add/remove and supporting functions for memory
hotplug into a new file as well as a memory hotplug kernel config option.
Individual architecture patches will follow.
For now, disable memory hotplug when swsusp is enabled. There's a lot of
churn there right now. We'll fix it up properly once it calms down.
Signed-off-by: Matt Tolentino <matthew.e.tolentino@intel.com>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/include/linux/memory.h b/include/linux/memory.h
new file mode 100644
index 0000000..0def328
--- /dev/null
+++ b/include/linux/memory.h
@@ -0,0 +1,94 @@
+/*
+ * include/linux/memory.h - generic memory definition
+ *
+ * This is mainly for topological representation. We define the
+ * basic "struct memory_block" here, which can be embedded in per-arch
+ * definitions or NUMA information.
+ *
+ * Basic handling of the devices is done in drivers/base/memory.c
+ * and system devices are handled in drivers/base/sys.c.
+ *
+ * Memory block are exported via sysfs in the class/memory/devices/
+ * directory.
+ *
+ */
+#ifndef _LINUX_MEMORY_H_
+#define _LINUX_MEMORY_H_
+
+#include <linux/sysdev.h>
+#include <linux/node.h>
+#include <linux/compiler.h>
+
+#include <asm/semaphore.h>
+
+struct memory_block {
+ unsigned long phys_index;
+ unsigned long state;
+ /*
+ * This serializes all state change requests. It isn't
+ * held during creation because the control files are
+ * created long after the critical areas during
+ * initialization.
+ */
+ struct semaphore state_sem;
+ int phys_device; /* to which fru does this belong? */
+ void *hw; /* optional pointer to fw/hw data */
+ int (*phys_callback)(struct memory_block *);
+ struct sys_device sysdev;
+};
+
+/* These states are exposed to userspace as text strings in sysfs */
+#define MEM_ONLINE (1<<0) /* exposed to userspace */
+#define MEM_GOING_OFFLINE (1<<1) /* exposed to userspace */
+#define MEM_OFFLINE (1<<2) /* exposed to userspace */
+
+/*
+ * All of these states are currently kernel-internal for notifying
+ * kernel components and architectures.
+ *
+ * For MEM_MAPPING_INVALID, all notifier chains with priority >0
+ * are called before pfn_to_page() becomes invalid. The priority=0
+ * entry is reserved for the function that actually makes
+ * pfn_to_page() stop working. Any notifiers that want to be called
+ * after that should have priority <0.
+ */
+#define MEM_MAPPING_INVALID (1<<3)
+
+#ifndef CONFIG_MEMORY_HOTPLUG
+static inline int memory_dev_init(void)
+{
+ return 0;
+}
+static inline int register_memory_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+static inline void unregister_memory_notifier(struct notifier_block *nb)
+{
+}
+#else
+extern int register_memory(struct memory_block *, struct mem_section *section, struct node *);
+extern int register_new_memory(struct mem_section *);
+extern int unregister_memory_section(struct mem_section *);
+extern int memory_dev_init(void);
+extern int register_memory_notifier(struct notifier_block *nb);
+extern void unregister_memory_notifier(struct notifier_block *nb);
+
+#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
+
+extern int invalidate_phys_mapping(unsigned long, unsigned long);
+struct notifier_block;
+
+extern int register_memory_notifier(struct notifier_block *nb);
+extern void unregister_memory_notifier(struct notifier_block *nb);
+
+extern struct sysdev_class memory_sysdev_class;
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+#define hotplug_memory_notifier(fn, pri) { \
+ static struct notifier_block fn##_mem_nb = \
+ { .notifier_call = fn, .priority = pri }; \
+ register_memory_notifier(&fn##_mem_nb); \
+}
+
+#endif /* _LINUX_MEMORY_H_ */
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 4b08bc9..01f03bc 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -3,6 +3,8 @@
#include <linux/mmzone.h>
#include <linux/spinlock.h>
+#include <linux/mmzone.h>
+#include <linux/notifier.h>
#ifdef CONFIG_MEMORY_HOTPLUG
/*
@@ -46,6 +48,19 @@
{
seqlock_init(&zone->span_seqlock);
}
+extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
+extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
+extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
+/* need some defines for these for archs that don't support it */
+extern void online_page(struct page *page);
+/* VM interface that may be used by firmware interface */
+extern int add_memory(u64 start, u64 size);
+extern int remove_memory(u64 start, u64 size);
+extern int online_pages(unsigned long, unsigned long);
+
+/* reasonably generic interface to expand the physical pages in a zone */
+extern int __add_pages(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages);
#else /* ! CONFIG_MEMORY_HOTPLUG */
/*
* Stub functions for when hotplug is off
@@ -65,5 +80,25 @@
static inline void zone_span_writelock(struct zone *zone) {}
static inline void zone_span_writeunlock(struct zone *zone) {}
static inline void zone_seqlock_init(struct zone *zone) {}
+
+static inline int mhp_notimplemented(const char *func)
+{
+ printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func);
+ dump_stack();
+ return -ENOSYS;
+}
+
+static inline int __add_pages(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ return mhp_notimplemented(__FUNCTION__);
+}
#endif /* ! CONFIG_MEMORY_HOTPLUG */
+static inline int __remove_pages(struct zone *zone, unsigned long start_pfn,
+ unsigned long nr_pages)
+{
+ printk(KERN_WARNING "%s() called, not yet supported\n", __FUNCTION__);
+ dump_stack();
+ return -ENOSYS;
+}
#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8a514ec..5c1fb0a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -840,6 +840,7 @@
unsigned long * zones_size, unsigned long zone_start_pfn,
unsigned long *zholes_size);
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
+extern void setup_per_zone_pages_min(void);
extern void mem_init(void);
extern void show_mem(void);
extern void si_meminfo(struct sysinfo * val);