Blame - kernel/kexec.c - LeafOS-Devices/android_kernel_samsung_gta4xl

blob: 2c95848fbce8261463e54833d60d68573e9fb9b8 [file] [log] [blame]

Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1	/*
				2	* kexec.c - kexec system call
				3	* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
				4	*
				5	* This source code is licensed under the GNU General Public License,
				6	* Version 2. See the file COPYING for more details.
				7	*/
				8
				9	#include <linux/mm.h>
				10	#include <linux/file.h>
				11	#include <linux/slab.h>
				12	#include <linux/fs.h>
				13	#include <linux/kexec.h>
				14	#include <linux/spinlock.h>
				15	#include <linux/list.h>
				16	#include <linux/highmem.h>
				17	#include <linux/syscalls.h>
				18	#include <linux/reboot.h>
				19	#include <linux/syscalls.h>
				20	#include <linux/ioport.h>
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	21	#include <linux/hardirq.h>
				22
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	23	#include <asm/page.h>
				24	#include <asm/uaccess.h>
				25	#include <asm/io.h>
				26	#include <asm/system.h>
				27	#include <asm/semaphore.h>
				28
				29	/* Location of the reserved area for the crash kernel */
				30	struct resource crashk_res = {
				31	.name = "Crash kernel",
				32	.start = 0,
				33	.end = 0,
				34	.flags = IORESOURCE_BUSY \| IORESOURCE_MEM
				35	};
				36
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	37	int kexec_should_crash(struct task_struct *p)
				38	{
				39	if (in_interrupt() \|\| !p->pid \|\| p->pid == 1 \|\| panic_on_oops)
				40	return 1;
				41	return 0;
				42	}
				43
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	44	/*
				45	* When kexec transitions to the new kernel there is a one-to-one
				46	* mapping between physical and virtual addresses. On processors
				47	* where you can disable the MMU this is trivial, and easy. For
				48	* others it is still a simple predictable page table to setup.
				49	*
				50	* In that environment kexec copies the new kernel to its final
				51	* resting place. This means I can only support memory whose
				52	* physical address can fit in an unsigned long. In particular
				53	* addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
				54	* If the assembly stub has more restrictive requirements
				55	* KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
				56	* defined more restrictively in <asm/kexec.h>.
				57	*
				58	* The code for the transition from the current kernel to the
				59	* the new kernel is placed in the control_code_buffer, whose size
				60	* is given by KEXEC_CONTROL_CODE_SIZE. In the best case only a single
				61	* page of memory is necessary, but some architectures require more.
				62	* Because this memory must be identity mapped in the transition from
				63	* virtual to physical addresses it must live in the range
				64	* 0 - TASK_SIZE, as only the user space mappings are arbitrarily
				65	* modifiable.
				66	*
				67	* The assembly stub in the control code buffer is passed a linked list
				68	* of descriptor pages detailing the source pages of the new kernel,
				69	* and the destination addresses of those source pages. As this data
				70	* structure is not used in the context of the current OS, it must
				71	* be self-contained.
				72	*
				73	* The code has been made to work with highmem pages and will use a
				74	* destination page in its final resting place (if it happens
				75	* to allocate it). The end product of this is that most of the
				76	* physical address space, and most of RAM can be used.
				77	*
				78	* Future directions include:
				79	* - allocating a page table with the control code buffer identity
				80	* mapped, to simplify machine_kexec and make kexec_on_panic more
				81	* reliable.
				82	*/
				83
				84	/*
				85	* KIMAGE_NO_DEST is an impossible destination address..., for
				86	* allocating pages whose destination address we do not care about.
				87	*/
				88	#define KIMAGE_NO_DEST (-1UL)
				89
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	90	static int kimage_is_destination_range(struct kimage *image,
				91	unsigned long start, unsigned long end);
				92	static struct page kimage_alloc_page(struct kimage image,
Al Viro	9796fdd	2005-10-21 03:22:03 -0400	[diff] [blame]	93	gfp_t gfp_mask,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	94	unsigned long dest);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	95
				96	static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	97	unsigned long nr_segments,
				98	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	99	{
				100	size_t segment_bytes;
				101	struct kimage *image;
				102	unsigned long i;
				103	int result;
				104
				105	/* Allocate a controlling structure */
				106	result = -ENOMEM;
				107	image = kmalloc(sizeof(*image), GFP_KERNEL);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	108	if (!image)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	109	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	110
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	111	memset(image, 0, sizeof(*image));
				112	image->head = 0;
				113	image->entry = &image->head;
				114	image->last_entry = &image->head;
				115	image->control_page = ~0; /* By default this does not apply */
				116	image->start = entry;
				117	image->type = KEXEC_TYPE_DEFAULT;
				118
				119	/* Initialize the list of control pages */
				120	INIT_LIST_HEAD(&image->control_pages);
				121
				122	/* Initialize the list of destination pages */
				123	INIT_LIST_HEAD(&image->dest_pages);
				124
				125	/* Initialize the list of unuseable pages */
				126	INIT_LIST_HEAD(&image->unuseable_pages);
				127
				128	/* Read in the segments */
				129	image->nr_segments = nr_segments;
				130	segment_bytes = nr_segments * sizeof(*segments);
				131	result = copy_from_user(image->segment, segments, segment_bytes);
				132	if (result)
				133	goto out;
				134
				135	/*
				136	* Verify we have good destination addresses. The caller is
				137	* responsible for making certain we don't attempt to load
				138	* the new image into invalid or reserved areas of RAM. This
				139	* just verifies it is an address we can use.
				140	*
				141	* Since the kernel does everything in page size chunks ensure
				142	* the destination addreses are page aligned. Too many
				143	* special cases crop of when we don't do this. The most
				144	* insidious is getting overlapping destination addresses
				145	* simply because addresses are changed to page size
				146	* granularity.
				147	*/
				148	result = -EADDRNOTAVAIL;
				149	for (i = 0; i < nr_segments; i++) {
				150	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	151
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	152	mstart = image->segment[i].mem;
				153	mend = mstart + image->segment[i].memsz;
				154	if ((mstart & ~PAGE_MASK) \|\| (mend & ~PAGE_MASK))
				155	goto out;
				156	if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
				157	goto out;
				158	}
				159
				160	/* Verify our destination addresses do not overlap.
				161	* If we alloed overlapping destination addresses
				162	* through very weird things can happen with no
				163	* easy explanation as one segment stops on another.
				164	*/
				165	result = -EINVAL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	166	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	167	unsigned long mstart, mend;
				168	unsigned long j;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	169
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	170	mstart = image->segment[i].mem;
				171	mend = mstart + image->segment[i].memsz;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	172	for (j = 0; j < i; j++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	173	unsigned long pstart, pend;
				174	pstart = image->segment[j].mem;
				175	pend = pstart + image->segment[j].memsz;
				176	/* Do the segments overlap ? */
				177	if ((mend > pstart) && (mstart < pend))
				178	goto out;
				179	}
				180	}
				181
				182	/* Ensure our buffer sizes are strictly less than
				183	* our memory sizes. This should always be the case,
				184	* and it is easier to check up front than to be surprised
				185	* later on.
				186	*/
				187	result = -EINVAL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	188	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	189	if (image->segment[i].bufsz > image->segment[i].memsz)
				190	goto out;
				191	}
				192
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	193	result = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	194	out:
				195	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	196	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	197	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	198	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	199
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	200	return result;
				201
				202	}
				203
				204	static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	205	unsigned long nr_segments,
				206	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	207	{
				208	int result;
				209	struct kimage *image;
				210
				211	/* Allocate and initialize a controlling structure */
				212	image = NULL;
				213	result = do_kimage_alloc(&image, entry, nr_segments, segments);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	214	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	215	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	216
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	217	*rimage = image;
				218
				219	/*
				220	* Find a location for the control code buffer, and add it
				221	* the vector of segments so that it's pages will also be
				222	* counted as destination pages.
				223	*/
				224	result = -ENOMEM;
				225	image->control_code_page = kimage_alloc_control_pages(image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	226	get_order(KEXEC_CONTROL_CODE_SIZE));
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	227	if (!image->control_code_page) {
				228	printk(KERN_ERR "Could not allocate control_code_buffer\n");
				229	goto out;
				230	}
				231
				232	result = 0;
				233	out:
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	234	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	235	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	236	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	237	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	238
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	239	return result;
				240	}
				241
				242	static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	243	unsigned long nr_segments,
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	244	struct kexec_segment __user *segments)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	245	{
				246	int result;
				247	struct kimage *image;
				248	unsigned long i;
				249
				250	image = NULL;
				251	/* Verify we have a valid entry point */
				252	if ((entry < crashk_res.start) \|\| (entry > crashk_res.end)) {
				253	result = -EADDRNOTAVAIL;
				254	goto out;
				255	}
				256
				257	/* Allocate and initialize a controlling structure */
				258	result = do_kimage_alloc(&image, entry, nr_segments, segments);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	259	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	260	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	261
				262	/* Enable the special crash kernel control page
				263	* allocation policy.
				264	*/
				265	image->control_page = crashk_res.start;
				266	image->type = KEXEC_TYPE_CRASH;
				267
				268	/*
				269	* Verify we have good destination addresses. Normally
				270	* the caller is responsible for making certain we don't
				271	* attempt to load the new image into invalid or reserved
				272	* areas of RAM. But crash kernels are preloaded into a
				273	* reserved area of ram. We must ensure the addresses
				274	* are in the reserved area otherwise preloading the
				275	* kernel could corrupt things.
				276	*/
				277	result = -EADDRNOTAVAIL;
				278	for (i = 0; i < nr_segments; i++) {
				279	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	280
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	281	mstart = image->segment[i].mem;
Vivek Goyal	50cccc6	2005-06-25 14:57:55 -0700	[diff] [blame]	282	mend = mstart + image->segment[i].memsz - 1;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	283	/* Ensure we are within the crash kernel limits */
				284	if ((mstart < crashk_res.start) \|\| (mend > crashk_res.end))
				285	goto out;
				286	}
				287
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	288	/*
				289	* Find a location for the control code buffer, and add
				290	* the vector of segments so that it's pages will also be
				291	* counted as destination pages.
				292	*/
				293	result = -ENOMEM;
				294	image->control_code_page = kimage_alloc_control_pages(image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	295	get_order(KEXEC_CONTROL_CODE_SIZE));
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	296	if (!image->control_code_page) {
				297	printk(KERN_ERR "Could not allocate control_code_buffer\n");
				298	goto out;
				299	}
				300
				301	result = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	302	out:
				303	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	304	*rimage = image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	305	else
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	306	kfree(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	307
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	308	return result;
				309	}
				310
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	311	static int kimage_is_destination_range(struct kimage *image,
				312	unsigned long start,
				313	unsigned long end)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	314	{
				315	unsigned long i;
				316
				317	for (i = 0; i < image->nr_segments; i++) {
				318	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	319
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	320	mstart = image->segment[i].mem;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	321	mend = mstart + image->segment[i].memsz;
				322	if ((end > mstart) && (start < mend))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	323	return 1;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	324	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	325
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	326	return 0;
				327	}
				328
Al Viro	9796fdd	2005-10-21 03:22:03 -0400	[diff] [blame]	329	static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	330	{
				331	struct page *pages;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	332
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	333	pages = alloc_pages(gfp_mask, order);
				334	if (pages) {
				335	unsigned int count, i;
				336	pages->mapping = NULL;
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	337	set_page_private(pages, order);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	338	count = 1 << order;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	339	for (i = 0; i < count; i++)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	340	SetPageReserved(pages + i);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	341	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	342
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	343	return pages;
				344	}
				345
				346	static void kimage_free_pages(struct page *page)
				347	{
				348	unsigned int order, count, i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	349
Hugh Dickins	4c21e2f	2005-10-29 18:16:40 -0700	[diff] [blame]	350	order = page_private(page);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	351	count = 1 << order;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	352	for (i = 0; i < count; i++)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	353	ClearPageReserved(page + i);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	354	__free_pages(page, order);
				355	}
				356
				357	static void kimage_free_page_list(struct list_head *list)
				358	{
				359	struct list_head pos, next;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	360
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	361	list_for_each_safe(pos, next, list) {
				362	struct page *page;
				363
				364	page = list_entry(pos, struct page, lru);
				365	list_del(&page->lru);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	366	kimage_free_pages(page);
				367	}
				368	}
				369
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	370	static struct page kimage_alloc_normal_control_pages(struct kimage image,
				371	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	372	{
				373	/* Control pages are special, they are the intermediaries
				374	* that are needed while we copy the rest of the pages
				375	* to their final resting place. As such they must
				376	* not conflict with either the destination addresses
				377	* or memory the kernel is already using.
				378	*
				379	* The only case where we really need more than one of
				380	* these are for architectures where we cannot disable
				381	* the MMU and must instead generate an identity mapped
				382	* page table for all of the memory.
				383	*
				384	* At worst this runs in O(N) of the image size.
				385	*/
				386	struct list_head extra_pages;
				387	struct page *pages;
				388	unsigned int count;
				389
				390	count = 1 << order;
				391	INIT_LIST_HEAD(&extra_pages);
				392
				393	/* Loop while I can allocate a page and the page allocated
				394	* is a destination page.
				395	*/
				396	do {
				397	unsigned long pfn, epfn, addr, eaddr;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	398
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	399	pages = kimage_alloc_pages(GFP_KERNEL, order);
				400	if (!pages)
				401	break;
				402	pfn = page_to_pfn(pages);
				403	epfn = pfn + count;
				404	addr = pfn << PAGE_SHIFT;
				405	eaddr = epfn << PAGE_SHIFT;
				406	if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) \|\|
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	407	kimage_is_destination_range(image, addr, eaddr)) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	408	list_add(&pages->lru, &extra_pages);
				409	pages = NULL;
				410	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	411	} while (!pages);
				412
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	413	if (pages) {
				414	/* Remember the allocated page... */
				415	list_add(&pages->lru, &image->control_pages);
				416
				417	/* Because the page is already in it's destination
				418	* location we will never allocate another page at
				419	* that address. Therefore kimage_alloc_pages
				420	* will not return it (again) and we don't need
				421	* to give it an entry in image->segment[].
				422	*/
				423	}
				424	/* Deal with the destination pages I have inadvertently allocated.
				425	*
				426	* Ideally I would convert multi-page allocations into single
				427	* page allocations, and add everyting to image->dest_pages.
				428	*
				429	* For now it is simpler to just free the pages.
				430	*/
				431	kimage_free_page_list(&extra_pages);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	432
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	433	return pages;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	434	}
				435
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	436	static struct page kimage_alloc_crash_control_pages(struct kimage image,
				437	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	438	{
				439	/* Control pages are special, they are the intermediaries
				440	* that are needed while we copy the rest of the pages
				441	* to their final resting place. As such they must
				442	* not conflict with either the destination addresses
				443	* or memory the kernel is already using.
				444	*
				445	* Control pages are also the only pags we must allocate
				446	* when loading a crash kernel. All of the other pages
				447	* are specified by the segments and we just memcpy
				448	* into them directly.
				449	*
				450	* The only case where we really need more than one of
				451	* these are for architectures where we cannot disable
				452	* the MMU and must instead generate an identity mapped
				453	* page table for all of the memory.
				454	*
				455	* Given the low demand this implements a very simple
				456	* allocator that finds the first hole of the appropriate
				457	* size in the reserved memory region, and allocates all
				458	* of the memory up to and including the hole.
				459	*/
				460	unsigned long hole_start, hole_end, size;
				461	struct page *pages;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	462
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	463	pages = NULL;
				464	size = (1 << order) << PAGE_SHIFT;
				465	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
				466	hole_end = hole_start + size - 1;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	467	while (hole_end <= crashk_res.end) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	468	unsigned long i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	469
				470	if (hole_end > KEXEC_CONTROL_MEMORY_LIMIT)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	471	break;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	472	if (hole_end > crashk_res.end)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	473	break;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	474	/* See if I overlap any of the segments */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	475	for (i = 0; i < image->nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	476	unsigned long mstart, mend;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	477
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	478	mstart = image->segment[i].mem;
				479	mend = mstart + image->segment[i].memsz - 1;
				480	if ((hole_end >= mstart) && (hole_start <= mend)) {
				481	/* Advance the hole to the end of the segment */
				482	hole_start = (mend + (size - 1)) & ~(size - 1);
				483	hole_end = hole_start + size - 1;
				484	break;
				485	}
				486	}
				487	/* If I don't overlap any segments I have found my hole! */
				488	if (i == image->nr_segments) {
				489	pages = pfn_to_page(hole_start >> PAGE_SHIFT);
				490	break;
				491	}
				492	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	493	if (pages)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	494	image->control_page = hole_end;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	495
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	496	return pages;
				497	}
				498
				499
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	500	struct page kimage_alloc_control_pages(struct kimage image,
				501	unsigned int order)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	502	{
				503	struct page *pages = NULL;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	504
				505	switch (image->type) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	506	case KEXEC_TYPE_DEFAULT:
				507	pages = kimage_alloc_normal_control_pages(image, order);
				508	break;
				509	case KEXEC_TYPE_CRASH:
				510	pages = kimage_alloc_crash_control_pages(image, order);
				511	break;
				512	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	513
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	514	return pages;
				515	}
				516
				517	static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
				518	{
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	519	if (*image->entry != 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	520	image->entry++;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	521
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	522	if (image->entry == image->last_entry) {
				523	kimage_entry_t *ind_page;
				524	struct page *page;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	525
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	526	page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	527	if (!page)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	528	return -ENOMEM;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	529
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	530	ind_page = page_address(page);
				531	*image->entry = virt_to_phys(ind_page) \| IND_INDIRECTION;
				532	image->entry = ind_page;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	533	image->last_entry = ind_page +
				534	((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	535	}
				536	*image->entry = entry;
				537	image->entry++;
				538	*image->entry = 0;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	539
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	540	return 0;
				541	}
				542
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	543	static int kimage_set_destination(struct kimage *image,
				544	unsigned long destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	545	{
				546	int result;
				547
				548	destination &= PAGE_MASK;
				549	result = kimage_add_entry(image, destination \| IND_DESTINATION);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	550	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	551	image->destination = destination;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	552
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	553	return result;
				554	}
				555
				556
				557	static int kimage_add_page(struct kimage *image, unsigned long page)
				558	{
				559	int result;
				560
				561	page &= PAGE_MASK;
				562	result = kimage_add_entry(image, page \| IND_SOURCE);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	563	if (result == 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	564	image->destination += PAGE_SIZE;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	565
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	566	return result;
				567	}
				568
				569
				570	static void kimage_free_extra_pages(struct kimage *image)
				571	{
				572	/* Walk through and free any extra destination pages I may have */
				573	kimage_free_page_list(&image->dest_pages);
				574
				575	/* Walk through and free any unuseable pages I have cached */
				576	kimage_free_page_list(&image->unuseable_pages);
				577
				578	}
				579	static int kimage_terminate(struct kimage *image)
				580	{
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	581	if (*image->entry != 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	582	image->entry++;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	583
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	584	*image->entry = IND_DONE;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	585
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	586	return 0;
				587	}
				588
				589	#define for_each_kimage_entry(image, ptr, entry) \
				590	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
				591	ptr = (entry & IND_INDIRECTION)? \
				592	phys_to_virt((entry & PAGE_MASK)): ptr +1)
				593
				594	static void kimage_free_entry(kimage_entry_t entry)
				595	{
				596	struct page *page;
				597
				598	page = pfn_to_page(entry >> PAGE_SHIFT);
				599	kimage_free_pages(page);
				600	}
				601
				602	static void kimage_free(struct kimage *image)
				603	{
				604	kimage_entry_t *ptr, entry;
				605	kimage_entry_t ind = 0;
				606
				607	if (!image)
				608	return;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	609
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	610	kimage_free_extra_pages(image);
				611	for_each_kimage_entry(image, ptr, entry) {
				612	if (entry & IND_INDIRECTION) {
				613	/* Free the previous indirection page */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	614	if (ind & IND_INDIRECTION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	615	kimage_free_entry(ind);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	616	/* Save this indirection page until we are
				617	* done with it.
				618	*/
				619	ind = entry;
				620	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	621	else if (entry & IND_SOURCE)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	622	kimage_free_entry(entry);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	623	}
				624	/* Free the final indirection page */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	625	if (ind & IND_INDIRECTION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	626	kimage_free_entry(ind);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	627
				628	/* Handle any machine specific cleanup */
				629	machine_kexec_cleanup(image);
				630
				631	/* Free the kexec control pages... */
				632	kimage_free_page_list(&image->control_pages);
				633	kfree(image);
				634	}
				635
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	636	static kimage_entry_t kimage_dst_used(struct kimage image,
				637	unsigned long page)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	638	{
				639	kimage_entry_t *ptr, entry;
				640	unsigned long destination = 0;
				641
				642	for_each_kimage_entry(image, ptr, entry) {
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	643	if (entry & IND_DESTINATION)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	644	destination = entry & PAGE_MASK;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	645	else if (entry & IND_SOURCE) {
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	646	if (page == destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	647	return ptr;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	648	destination += PAGE_SIZE;
				649	}
				650	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	651
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	652	return NULL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	653	}
				654
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	655	static struct page kimage_alloc_page(struct kimage image,
Al Viro	9796fdd	2005-10-21 03:22:03 -0400	[diff] [blame]	656	gfp_t gfp_mask,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	657	unsigned long destination)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	658	{
				659	/*
				660	* Here we implement safeguards to ensure that a source page
				661	* is not copied to its destination page before the data on
				662	* the destination page is no longer useful.
				663	*
				664	* To do this we maintain the invariant that a source page is
				665	* either its own destination page, or it is not a
				666	* destination page at all.
				667	*
				668	* That is slightly stronger than required, but the proof
				669	* that no problems will not occur is trivial, and the
				670	* implementation is simply to verify.
				671	*
				672	* When allocating all pages normally this algorithm will run
				673	* in O(N) time, but in the worst case it will run in O(N^2)
				674	* time. If the runtime is a problem the data structures can
				675	* be fixed.
				676	*/
				677	struct page *page;
				678	unsigned long addr;
				679
				680	/*
				681	* Walk through the list of destination pages, and see if I
				682	* have a match.
				683	*/
				684	list_for_each_entry(page, &image->dest_pages, lru) {
				685	addr = page_to_pfn(page) << PAGE_SHIFT;
				686	if (addr == destination) {
				687	list_del(&page->lru);
				688	return page;
				689	}
				690	}
				691	page = NULL;
				692	while (1) {
				693	kimage_entry_t *old;
				694
				695	/* Allocate a page, if we run out of memory give up */
				696	page = kimage_alloc_pages(gfp_mask, 0);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	697	if (!page)
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	698	return NULL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	699	/* If the page cannot be used file it away */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	700	if (page_to_pfn(page) >
				701	(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	702	list_add(&page->lru, &image->unuseable_pages);
				703	continue;
				704	}
				705	addr = page_to_pfn(page) << PAGE_SHIFT;
				706
				707	/* If it is the destination page we want use it */
				708	if (addr == destination)
				709	break;
				710
				711	/* If the page is not a destination page use it */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	712	if (!kimage_is_destination_range(image, addr,
				713	addr + PAGE_SIZE))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	714	break;
				715
				716	/*
				717	* I know that the page is someones destination page.
				718	* See if there is already a source page for this
				719	* destination page. And if so swap the source pages.
				720	*/
				721	old = kimage_dst_used(image, addr);
				722	if (old) {
				723	/* If so move it */
				724	unsigned long old_addr;
				725	struct page *old_page;
				726
				727	old_addr = *old & PAGE_MASK;
				728	old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
				729	copy_highpage(page, old_page);
				730	old = addr \| (old & ~PAGE_MASK);
				731
				732	/* The old page I have found cannot be a
				733	* destination page, so return it.
				734	*/
				735	addr = old_addr;
				736	page = old_page;
				737	break;
				738	}
				739	else {
				740	/* Place the page on the destination list I
				741	* will use it later.
				742	*/
				743	list_add(&page->lru, &image->dest_pages);
				744	}
				745	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	746
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	747	return page;
				748	}
				749
				750	static int kimage_load_normal_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	751	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	752	{
				753	unsigned long maddr;
				754	unsigned long ubytes, mbytes;
				755	int result;
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	756	unsigned char __user *buf;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	757
				758	result = 0;
				759	buf = segment->buf;
				760	ubytes = segment->bufsz;
				761	mbytes = segment->memsz;
				762	maddr = segment->mem;
				763
				764	result = kimage_set_destination(image, maddr);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	765	if (result < 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	766	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	767
				768	while (mbytes) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	769	struct page *page;
				770	char *ptr;
				771	size_t uchunk, mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	772
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	773	page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
				774	if (page == 0) {
				775	result = -ENOMEM;
				776	goto out;
				777	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	778	result = kimage_add_page(image, page_to_pfn(page)
				779	<< PAGE_SHIFT);
				780	if (result < 0)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	781	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	782
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	783	ptr = kmap(page);
				784	/* Start with a clear page */
				785	memset(ptr, 0, PAGE_SIZE);
				786	ptr += maddr & ~PAGE_MASK;
				787	mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	788	if (mchunk > mbytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	789	mchunk = mbytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	790
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	791	uchunk = mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	792	if (uchunk > ubytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	793	uchunk = ubytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	794
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	795	result = copy_from_user(ptr, buf, uchunk);
				796	kunmap(page);
				797	if (result) {
				798	result = (result < 0) ? result : -EIO;
				799	goto out;
				800	}
				801	ubytes -= uchunk;
				802	maddr += mchunk;
				803	buf += mchunk;
				804	mbytes -= mchunk;
				805	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	806	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	807	return result;
				808	}
				809
				810	static int kimage_load_crash_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	811	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	812	{
				813	/* For crash dumps kernels we simply copy the data from
				814	* user space to it's destination.
				815	* We do things a page at a time for the sake of kmap.
				816	*/
				817	unsigned long maddr;
				818	unsigned long ubytes, mbytes;
				819	int result;
Alexey Dobriyan	314b6a4	2005-06-27 22:29:33 -0700	[diff] [blame]	820	unsigned char __user *buf;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	821
				822	result = 0;
				823	buf = segment->buf;
				824	ubytes = segment->bufsz;
				825	mbytes = segment->memsz;
				826	maddr = segment->mem;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	827	while (mbytes) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	828	struct page *page;
				829	char *ptr;
				830	size_t uchunk, mchunk;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	831
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	832	page = pfn_to_page(maddr >> PAGE_SHIFT);
				833	if (page == 0) {
				834	result = -ENOMEM;
				835	goto out;
				836	}
				837	ptr = kmap(page);
				838	ptr += maddr & ~PAGE_MASK;
				839	mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	840	if (mchunk > mbytes)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	841	mchunk = mbytes;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	842
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	843	uchunk = mchunk;
				844	if (uchunk > ubytes) {
				845	uchunk = ubytes;
				846	/* Zero the trailing part of the page */
				847	memset(ptr + uchunk, 0, mchunk - uchunk);
				848	}
				849	result = copy_from_user(ptr, buf, uchunk);
				850	kunmap(page);
				851	if (result) {
				852	result = (result < 0) ? result : -EIO;
				853	goto out;
				854	}
				855	ubytes -= uchunk;
				856	maddr += mchunk;
				857	buf += mchunk;
				858	mbytes -= mchunk;
				859	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	860	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	861	return result;
				862	}
				863
				864	static int kimage_load_segment(struct kimage *image,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	865	struct kexec_segment *segment)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	866	{
				867	int result = -ENOMEM;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	868
				869	switch (image->type) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	870	case KEXEC_TYPE_DEFAULT:
				871	result = kimage_load_normal_segment(image, segment);
				872	break;
				873	case KEXEC_TYPE_CRASH:
				874	result = kimage_load_crash_segment(image, segment);
				875	break;
				876	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	877
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	878	return result;
				879	}
				880
				881	/*
				882	* Exec Kernel system call: for obvious reasons only root may call it.
				883	*
				884	* This call breaks up into three pieces.
				885	* - A generic part which loads the new kernel from the current
				886	* address space, and very carefully places the data in the
				887	* allocated pages.
				888	*
				889	* - A generic part that interacts with the kernel and tells all of
				890	* the devices to shut down. Preventing on-going dmas, and placing
				891	* the devices in a consistent state so a later kernel can
				892	* reinitialize them.
				893	*
				894	* - A machine specific part that includes the syscall number
				895	* and the copies the image to it's final destination. And
				896	* jumps into the image at entry.
				897	*
				898	* kexec does not sync, or unmount filesystems so if you need
				899	* that to happen you need to do that yourself.
				900	*/
				901	struct kimage *kexec_image = NULL;
				902	static struct kimage *kexec_crash_image = NULL;
				903	/*
				904	* A home grown binary mutex.
				905	* Nothing can wait so this mutex is safe to use
				906	* in interrupt context :)
				907	*/
				908	static int kexec_lock = 0;
				909
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	910	asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
				911	struct kexec_segment __user *segments,
				912	unsigned long flags)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	913	{
				914	struct kimage *dest_image, image;
				915	int locked;
				916	int result;
				917
				918	/* We only trust the superuser with rebooting the system. */
				919	if (!capable(CAP_SYS_BOOT))
				920	return -EPERM;
				921
				922	/*
				923	* Verify we have a legal set of flags
				924	* This leaves us room for future extensions.
				925	*/
				926	if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
				927	return -EINVAL;
				928
				929	/* Verify we are on the appropriate architecture */
				930	if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
				931	((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	932	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	933
				934	/* Put an artificial cap on the number
				935	* of segments passed to kexec_load.
				936	*/
				937	if (nr_segments > KEXEC_SEGMENT_MAX)
				938	return -EINVAL;
				939
				940	image = NULL;
				941	result = 0;
				942
				943	/* Because we write directly to the reserved memory
				944	* region when loading crash kernels we need a mutex here to
				945	* prevent multiple crash kernels from attempting to load
				946	* simultaneously, and to prevent a crash kernel from loading
				947	* over the top of a in use crash kernel.
				948	*
				949	* KISS: always take the mutex.
				950	*/
				951	locked = xchg(&kexec_lock, 1);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	952	if (locked)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	953	return -EBUSY;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	954
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	955	dest_image = &kexec_image;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	956	if (flags & KEXEC_ON_CRASH)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	957	dest_image = &kexec_crash_image;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	958	if (nr_segments > 0) {
				959	unsigned long i;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	960
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	961	/* Loading another kernel to reboot into */
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	962	if ((flags & KEXEC_ON_CRASH) == 0)
				963	result = kimage_normal_alloc(&image, entry,
				964	nr_segments, segments);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	965	/* Loading another kernel to switch to if this one crashes */
				966	else if (flags & KEXEC_ON_CRASH) {
				967	/* Free any current crash dump kernel before
				968	* we corrupt it.
				969	*/
				970	kimage_free(xchg(&kexec_crash_image, NULL));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	971	result = kimage_crash_alloc(&image, entry,
				972	nr_segments, segments);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	973	}
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	974	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	975	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	976
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	977	result = machine_kexec_prepare(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	978	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	979	goto out;
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	980
				981	for (i = 0; i < nr_segments; i++) {
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	982	result = kimage_load_segment(image, &image->segment[i]);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	983	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	984	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	985	}
				986	result = kimage_terminate(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	987	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	988	goto out;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	989	}
				990	/* Install the new kernel, and Uninstall the old */
				991	image = xchg(dest_image, image);
				992
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	993	out:
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	994	xchg(&kexec_lock, 0); /* Release the mutex */
				995	kimage_free(image);
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	996
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	997	return result;
				998	}
				999
				1000	#ifdef CONFIG_COMPAT
				1001	asmlinkage long compat_sys_kexec_load(unsigned long entry,
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1002	unsigned long nr_segments,
				1003	struct compat_kexec_segment __user *segments,
				1004	unsigned long flags)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1005	{
				1006	struct compat_kexec_segment in;
				1007	struct kexec_segment out, __user *ksegments;
				1008	unsigned long i, result;
				1009
				1010	/* Don't allow clients that don't understand the native
				1011	* architecture to do anything.
				1012	*/
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1013	if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1014	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1015
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1016	if (nr_segments > KEXEC_SEGMENT_MAX)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1017	return -EINVAL;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1018
				1019	ksegments = compat_alloc_user_space(nr_segments * sizeof(out));
				1020	for (i=0; i < nr_segments; i++) {
				1021	result = copy_from_user(&in, &segments[i], sizeof(in));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1022	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1023	return -EFAULT;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1024
				1025	out.buf = compat_ptr(in.buf);
				1026	out.bufsz = in.bufsz;
				1027	out.mem = in.mem;
				1028	out.memsz = in.memsz;
				1029
				1030	result = copy_to_user(&ksegments[i], &out, sizeof(out));
Maneesh Soni	72414d3	2005-06-25 14:58:28 -0700	[diff] [blame]	1031	if (result)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1032	return -EFAULT;
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1033	}
				1034
				1035	return sys_kexec_load(entry, nr_segments, ksegments, flags);
				1036	}
				1037	#endif
				1038
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	1039	void crash_kexec(struct pt_regs *regs)
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1040	{
				1041	struct kimage *image;
				1042	int locked;
				1043
				1044
				1045	/* Take the kexec_lock here to prevent sys_kexec_load
				1046	* running on one cpu from replacing the crash kernel
				1047	* we are using after a panic on a different cpu.
				1048	*
				1049	* If the crash kernel was not located in a fixed area
				1050	* of memory the xchg(&kexec_crash_image) would be
				1051	* sufficient. But since I reuse the memory...
				1052	*/
				1053	locked = xchg(&kexec_lock, 1);
				1054	if (!locked) {
				1055	image = xchg(&kexec_crash_image, NULL);
				1056	if (image) {
Alexander Nyberg	6e274d1	2005-06-25 14:58:26 -0700	[diff] [blame]	1057	machine_crash_shutdown(regs);
Eric W. Biederman	dc009d9	2005-06-25 14:57:52 -0700	[diff] [blame]	1058	machine_kexec(image);
				1059	}
				1060	xchg(&kexec_lock, 0);
				1061	}
				1062	}