powerpc: make process.c suitable for both 32-bit and 64-bit

Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ae316e9..f09908a 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -36,6 +36,8 @@
 #include <linux/kallsyms.h>
 #include <linux/mqueue.h>
 #include <linux/hardirq.h>
+#include <linux/utsname.h>
+#include <linux/kprobes.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -44,6 +46,11 @@
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/prom.h>
+#ifdef CONFIG_PPC64
+#include <asm/firmware.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/time.h>
+#endif
 
 extern unsigned long _get_SP(void);
 
@@ -53,26 +60,6 @@
 struct task_struct *last_task_used_spe = NULL;
 #endif
 
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-EXPORT_SYMBOL(init_mm);
-
-/* this is 8kB-aligned so we can get to the thread_info struct
-   at the base of it from the stack pointer with 1 integer instruction. */
-union thread_union init_thread_union
-	__attribute__((__section__(".data.init_task"))) =
-{ INIT_THREAD_INFO(init_task) };
-
-/* initial task structure */
-struct task_struct init_task = INIT_TASK(init_task);
-EXPORT_SYMBOL(init_task);
-
-/* only used to get secondary processor up */
-struct task_struct *current_set[NR_CPUS] = {&init_task, };
-
 /*
  * Make sure the floating-point register state in the
  * the thread_struct is up to date for task tsk.
@@ -237,7 +224,10 @@
 	return ret;
 }
 
+#ifdef CONFIG_PPC64
+DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
 static DEFINE_PER_CPU(unsigned long, current_dabr);
+#endif
 
 struct task_struct *__switch_to(struct task_struct *prev,
 	struct task_struct *new)
@@ -308,10 +298,27 @@
 		set_dabr(new->thread.dabr);
 		__get_cpu_var(current_dabr) = new->thread.dabr;
 	}
+
+	flush_tlb_pending();
 #endif
 
 	new_thread = &new->thread;
 	old_thread = &current->thread;
+
+#ifdef CONFIG_PPC64
+	/*
+	 * Collect processor utilization data per process
+	 */
+	if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+		struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
+		long unsigned start_tb, current_tb;
+		start_tb = old_thread->start_tb;
+		cu->current_tb = current_tb = mfspr(SPRN_PURR);
+		old_thread->accum_tb += (current_tb - start_tb);
+		new_thread->start_tb = current_tb;
+	}
+#endif
+
 	local_irq_save(flags);
 	last = _switch(old_thread, new_thread);
 
@@ -320,37 +327,106 @@
 	return last;
 }
 
+static int instructions_to_print = 16;
+
+#ifdef CONFIG_PPC64
+#define BAD_PC(pc)	((REGION_ID(pc) != KERNEL_REGION_ID) && \
+		         (REGION_ID(pc) != VMALLOC_REGION_ID))
+#else
+#define BAD_PC(pc)	((pc) < KERNELBASE)
+#endif
+
+static void show_instructions(struct pt_regs *regs)
+{
+	int i;
+	unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 *
+			sizeof(int));
+
+	printk("Instruction dump:");
+
+	for (i = 0; i < instructions_to_print; i++) {
+		int instr;
+
+		if (!(i % 8))
+			printk("\n");
+
+		if (BAD_PC(pc) || __get_user(instr, (unsigned int *)pc)) {
+			printk("XXXXXXXX ");
+		} else {
+			if (regs->nip == pc)
+				printk("<%08x> ", instr);
+			else
+				printk("%08x ", instr);
+		}
+
+		pc += sizeof(int);
+	}
+
+	printk("\n");
+}
+
+static struct regbit {
+	unsigned long bit;
+	const char *name;
+} msr_bits[] = {
+	{MSR_EE,	"EE"},
+	{MSR_PR,	"PR"},
+	{MSR_FP,	"FP"},
+	{MSR_ME,	"ME"},
+	{MSR_IR,	"IR"},
+	{MSR_DR,	"DR"},
+	{0,		NULL}
+};
+
+static void printbits(unsigned long val, struct regbit *bits)
+{
+	const char *sep = "";
+
+	printk("<");
+	for (; bits->bit; ++bits)
+		if (val & bits->bit) {
+			printk("%s%s", sep, bits->name);
+			sep = ",";
+		}
+	printk(">");
+}
+
+#ifdef CONFIG_PPC64
+#define REG		"%016lX"
+#define REGS_PER_LINE	4
+#define LAST_VOLATILE	13
+#else
+#define REG		"%08lX"
+#define REGS_PER_LINE	8
+#define LAST_VOLATILE	12
+#endif
+
 void show_regs(struct pt_regs * regs)
 {
 	int i, trap;
 
-	printk("NIP: %08lX LR: %08lX SP: %08lX REGS: %p TRAP: %04lx    %s\n",
-	       regs->nip, regs->link, regs->gpr[1], regs, regs->trap,
-	       print_tainted());
-	printk("MSR: %08lx EE: %01x PR: %01x FP: %01x ME: %01x IR/DR: %01x%01x\n",
-	       regs->msr, regs->msr&MSR_EE ? 1 : 0, regs->msr&MSR_PR ? 1 : 0,
-	       regs->msr & MSR_FP ? 1 : 0,regs->msr&MSR_ME ? 1 : 0,
-	       regs->msr&MSR_IR ? 1 : 0,
-	       regs->msr&MSR_DR ? 1 : 0);
+	printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
+	       regs->nip, regs->link, regs->ctr);
+	printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
+	       regs, regs->trap, print_tainted(), system_utsname.release);
+	printk("MSR: "REG" ", regs->msr);
+	printbits(regs->msr, msr_bits);
+	printk("  CR: %08lX  XER: %08lX\n", regs->ccr, regs->xer);
 	trap = TRAP(regs);
 	if (trap == 0x300 || trap == 0x600)
-		printk("DAR: %08lX, DSISR: %08lX\n", regs->dar, regs->dsisr);
-	printk("TASK = %p[%d] '%s' THREAD: %p\n",
+		printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr);
+	printk("TASK = %p[%d] '%s' THREAD: %p",
 	       current, current->pid, current->comm, current->thread_info);
-	printk("Last syscall: %ld ", current->thread.last_syscall);
 
 #ifdef CONFIG_SMP
 	printk(" CPU: %d", smp_processor_id());
 #endif /* CONFIG_SMP */
 
 	for (i = 0;  i < 32;  i++) {
-		long r;
-		if ((i % 8) == 0)
+		if ((i % REGS_PER_LINE) == 0)
 			printk("\n" KERN_INFO "GPR%02d: ", i);
-		if (__get_user(r, &regs->gpr[i]))
-			break;
-		printk("%08lX ", r);
-		if (i == 12 && !FULL_REGS(regs))
+		printk(REG " ", regs->gpr[i]);
+		if (i == LAST_VOLATILE && !FULL_REGS(regs))
 			break;
 	}
 	printk("\n");
@@ -359,16 +435,20 @@
 	 * Lookup NIP late so we have the best change of getting the
 	 * above info out without failing
 	 */
-	printk("NIP [%08lx] ", regs->nip);
+	printk("NIP ["REG"] ", regs->nip);
 	print_symbol("%s\n", regs->nip);
-	printk("LR [%08lx] ", regs->link);
+	printk("LR ["REG"] ", regs->link);
 	print_symbol("%s\n", regs->link);
 #endif
 	show_stack(current, (unsigned long *) regs->gpr[1]);
+	if (!user_mode(regs))
+		show_instructions(regs);
 }
 
 void exit_thread(void)
 {
+	kprobe_flush_task(current);
+
 #ifndef CONFIG_SMP
 	if (last_task_used_math == current)
 		last_task_used_math = NULL;
@@ -385,6 +465,14 @@
 
 void flush_thread(void)
 {
+#ifdef CONFIG_PPC64
+	struct thread_info *t = current_thread_info();
+
+	if (t->flags & _TIF_ABI_PENDING)
+		t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
+#endif
+	kprobe_flush_task(current);
+
 #ifndef CONFIG_SMP
 	if (last_task_used_math == current)
 		last_task_used_math = NULL;
@@ -425,15 +513,13 @@
 /*
  * Copy a thread..
  */
-int
-copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
-	    unsigned long unused,
-	    struct task_struct *p, struct pt_regs *regs)
+int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+		unsigned long unused, struct task_struct *p,
+		struct pt_regs *regs)
 {
 	struct pt_regs *childregs, *kregs;
 	extern void ret_from_fork(void);
 	unsigned long sp = (unsigned long)p->thread_info + THREAD_SIZE;
-	unsigned long childframe;
 
 	CHECK_FULL_REGS(regs);
 	/* Copy registers */
@@ -443,17 +529,26 @@
 	if ((childregs->msr & MSR_PR) == 0) {
 		/* for kernel thread, set `current' and stackptr in new task */
 		childregs->gpr[1] = sp + sizeof(struct pt_regs);
+#ifdef CONFIG_PPC32
 		childregs->gpr[2] = (unsigned long) p;
+#else
+		clear_ti_thread_flag(p->thread_info, TIF_32BIT);
+#endif
 		p->thread.regs = NULL;	/* no user register state */
 	} else {
 		childregs->gpr[1] = usp;
 		p->thread.regs = childregs;
-		if (clone_flags & CLONE_SETTLS)
-			childregs->gpr[2] = childregs->gpr[6];
+		if (clone_flags & CLONE_SETTLS) {
+#ifdef CONFIG_PPC64
+			if (!test_thread_flag(TIF_32BIT))
+				childregs->gpr[13] = childregs->gpr[6];
+			else
+#endif
+				childregs->gpr[2] = childregs->gpr[6];
+		}
 	}
 	childregs->gpr[3] = 0;  /* Result from fork() */
 	sp -= STACK_FRAME_OVERHEAD;
-	childframe = sp;
 
 	/*
 	 * The way this works is that at some point in the future
@@ -467,9 +562,30 @@
 	kregs = (struct pt_regs *) sp;
 	sp -= STACK_FRAME_OVERHEAD;
 	p->thread.ksp = sp;
-	kregs->nip = (unsigned long)ret_from_fork;
 
+#ifdef CONFIG_PPC64
+	if (cpu_has_feature(CPU_FTR_SLB)) {
+		unsigned long sp_vsid = get_kernel_vsid(sp);
+
+		sp_vsid <<= SLB_VSID_SHIFT;
+		sp_vsid |= SLB_VSID_KERNEL;
+		if (cpu_has_feature(CPU_FTR_16M_PAGE))
+			sp_vsid |= SLB_VSID_L;
+
+		p->thread.ksp_vsid = sp_vsid;
+	}
+
+	/*
+	 * The PPC64 ABI makes use of a TOC to contain function 
+	 * pointers.  The function (ret_from_except) is actually a pointer
+	 * to the TOC entry.  The first entry is a pointer to the actual
+	 * function.
+ 	 */
+	kregs->nip = *((unsigned long *)ret_from_fork);
+#else
+	kregs->nip = (unsigned long)ret_from_fork;
 	p->thread.last_syscall = -1;
+#endif
 
 	return 0;
 }
@@ -477,18 +593,61 @@
 /*
  * Set up a thread for executing a new program
  */
-void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp)
+void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 {
 	set_fs(USER_DS);
+
+	/*
+	 * If we exec out of a kernel thread then thread.regs will not be
+	 * set.  Do it now.
+	 */
+	if (!current->thread.regs) {
+		unsigned long childregs = (unsigned long)current->thread_info +
+						THREAD_SIZE;
+		childregs -= sizeof(struct pt_regs);
+		current->thread.regs = (struct pt_regs *)childregs;
+	}
+
 	memset(regs->gpr, 0, sizeof(regs->gpr));
 	regs->ctr = 0;
 	regs->link = 0;
 	regs->xer = 0;
 	regs->ccr = 0;
-	regs->mq = 0;
-	regs->nip = nip;
 	regs->gpr[1] = sp;
+
+#ifdef CONFIG_PPC32
+	regs->mq = 0;
+	regs->nip = start;
 	regs->msr = MSR_USER;
+#else
+	if (test_thread_flag(TIF_32BIT)) {
+		unsigned long entry, toc, load_addr = regs->gpr[2];
+
+		/* start is a relocated pointer to the function descriptor for
+		 * the elf _start routine.  The first entry in the function
+		 * descriptor is the entry address of _start and the second
+		 * entry is the TOC value we need to use.
+		 */
+		__get_user(entry, (unsigned long __user *)start);
+		__get_user(toc, (unsigned long __user *)start+1);
+
+		/* Check whether the e_entry function descriptor entries
+		 * need to be relocated before we can use them.
+		 */
+		if (load_addr != 0) {
+			entry += load_addr;
+			toc   += load_addr;
+		}
+		regs->nip = entry;
+		regs->gpr[2] = toc;
+		regs->msr = MSR_USER64;
+	} else {
+		regs->nip = start;
+		regs->gpr[2] = 0;
+		regs->msr = MSR_USER32;
+	}
+#endif
+
 #ifndef CONFIG_SMP
 	if (last_task_used_math == current)
 		last_task_used_math = NULL;
@@ -506,6 +665,7 @@
 #ifdef CONFIG_ALTIVEC
 	memset(current->thread.vr, 0, sizeof(current->thread.vr));
 	memset(&current->thread.vscr, 0, sizeof(current->thread.vscr));
+	current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */
 	current->thread.vrsave = 0;
 	current->thread.used_vr = 0;
 #endif /* CONFIG_ALTIVEC */
@@ -532,22 +692,23 @@
 #ifdef CONFIG_SPE
 		tsk->thread.fpexc_mode = val &
 			(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
+		return 0;
 #else
 		return -EINVAL;
 #endif
-	} else {
-		/* on a CONFIG_SPE this does not hurt us.  The bits that
-		 * __pack_fe01 use do not overlap with bits used for
-		 * PR_FP_EXC_SW_ENABLE.  Additionally, the MSR[FE0,FE1] bits
-		 * on CONFIG_SPE implementations are reserved so writing to
-		 * them does not change anything */
-		if (val > PR_FP_EXC_PRECISE)
-			return -EINVAL;
-		tsk->thread.fpexc_mode = __pack_fe01(val);
-		if (regs != NULL && (regs->msr & MSR_FP) != 0)
-			regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
-				| tsk->thread.fpexc_mode;
 	}
+
+	/* on a CONFIG_SPE this does not hurt us.  The bits that
+	 * __pack_fe01 use do not overlap with bits used for
+	 * PR_FP_EXC_SW_ENABLE.  Additionally, the MSR[FE0,FE1] bits
+	 * on CONFIG_SPE implementations are reserved so writing to
+	 * them does not change anything */
+	if (val > PR_FP_EXC_PRECISE)
+		return -EINVAL;
+	tsk->thread.fpexc_mode = __pack_fe01(val);
+	if (regs != NULL && (regs->msr & MSR_FP) != 0)
+		regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
+			| tsk->thread.fpexc_mode;
 	return 0;
 }
 
@@ -566,6 +727,8 @@
 	return put_user(val, (unsigned int __user *) adr);
 }
 
+#define TRUNC_PTR(x)	((typeof(x))(((unsigned long)(x)) & 0xffffffff))
+
 int sys_clone(unsigned long clone_flags, unsigned long usp,
 	      int __user *parent_tidp, void __user *child_threadptr,
 	      int __user *child_tidp, int p6,
@@ -574,6 +737,12 @@
 	CHECK_FULL_REGS(regs);
 	if (usp == 0)
 		usp = regs->gpr[1];	/* stack pointer for child */
+#ifdef CONFIG_PPC64
+	if (test_thread_flag(TIF_32BIT)) {
+		parent_tidp = TRUNC_PTR(parent_tidp);
+		child_tidp = TRUNC_PTR(child_tidp);
+	}
+#endif
  	return do_fork(clone_flags, usp, regs, 0, parent_tidp, child_tidp);
 }
 
@@ -599,7 +768,7 @@
 	       struct pt_regs *regs)
 {
 	int error;
-	char * filename;
+	char *filename;
 
 	filename = getname((char __user *) a0);
 	error = PTR_ERR(filename);
@@ -644,20 +813,53 @@
 	return 0;
 }
 
-void dump_stack(void)
-{
-	show_stack(current, NULL);
-}
+#ifdef CONFIG_PPC64
+#define MIN_STACK_FRAME	112	/* same as STACK_FRAME_OVERHEAD, in fact */
+#define FRAME_LR_SAVE	2
+#define INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD + 288)
+#define REGS_MARKER	0x7265677368657265ul
+#define FRAME_MARKER	12
+#else
+#define MIN_STACK_FRAME	16
+#define FRAME_LR_SAVE	1
+#define INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
+#define REGS_MARKER	0x72656773ul
+#define FRAME_MARKER	2
+#endif
 
-EXPORT_SYMBOL(dump_stack);
+unsigned long get_wchan(struct task_struct *p)
+{
+	unsigned long ip, sp;
+	int count = 0;
+
+	if (!p || p == current || p->state == TASK_RUNNING)
+		return 0;
+
+	sp = p->thread.ksp;
+	if (!validate_sp(sp, p, MIN_STACK_FRAME))
+		return 0;
+
+	do {
+		sp = *(unsigned long *)sp;
+		if (!validate_sp(sp, p, MIN_STACK_FRAME))
+			return 0;
+		if (count > 0) {
+			ip = ((unsigned long *)sp)[FRAME_LR_SAVE];
+			if (!in_sched_functions(ip))
+				return ip;
+		}
+	} while (count++ < 16);
+	return 0;
+}
+EXPORT_SYMBOL(get_wchan);
+
+static int kstack_depth_to_print = 64;
 
 void show_stack(struct task_struct *tsk, unsigned long *stack)
 {
-	unsigned long sp, stack_top, prev_sp, ret;
+	unsigned long sp, ip, lr, newsp;
 	int count = 0;
-	unsigned long next_exc = 0;
-	struct pt_regs *regs;
-	extern char ret_from_except, ret_from_except_full, ret_from_syscall;
+	int firstframe = 1;
 
 	sp = (unsigned long) stack;
 	if (tsk == NULL)
@@ -669,65 +871,45 @@
 			sp = tsk->thread.ksp;
 	}
 
-	prev_sp = (unsigned long) (tsk->thread_info + 1);
-	stack_top = (unsigned long) tsk->thread_info + THREAD_SIZE;
-	while (count < 16 && sp > prev_sp && sp < stack_top && (sp & 3) == 0) {
-		if (count == 0) {
-			printk("Call trace:");
-#ifdef CONFIG_KALLSYMS
-			printk("\n");
-#endif
-		} else {
-			if (next_exc) {
-				ret = next_exc;
-				next_exc = 0;
-			} else
-				ret = *(unsigned long *)(sp + 4);
-			printk(" [%08lx] ", ret);
-#ifdef CONFIG_KALLSYMS
-			print_symbol("%s", ret);
-			printk("\n");
-#endif
-			if (ret == (unsigned long) &ret_from_except
-			    || ret == (unsigned long) &ret_from_except_full
-			    || ret == (unsigned long) &ret_from_syscall) {
-				/* sp + 16 points to an exception frame */
-				regs = (struct pt_regs *) (sp + 16);
-				if (sp + 16 + sizeof(*regs) <= stack_top)
-					next_exc = regs->nip;
-			}
-		}
-		++count;
-		sp = *(unsigned long *)sp;
-	}
-#ifndef CONFIG_KALLSYMS
-	if (count > 0)
-		printk("\n");
-#endif
-}
-
-unsigned long get_wchan(struct task_struct *p)
-{
-	unsigned long ip, sp;
-	int count = 0;
-
-	if (!p || p == current || p->state == TASK_RUNNING)
-		return 0;
-
-	sp = p->thread.ksp;
-	if (!validate_sp(sp, p, 16))
-		return 0;
-
+	lr = 0;
+	printk("Call Trace:\n");
 	do {
-		sp = *(unsigned long *)sp;
-		if (!validate_sp(sp, p, 16))
-			return 0;
-		if (count > 0) {
-			ip = *(unsigned long *)(sp + 4);
-			if (!in_sched_functions(ip))
-				return ip;
+		if (!validate_sp(sp, tsk, MIN_STACK_FRAME))
+			return;
+
+		stack = (unsigned long *) sp;
+		newsp = stack[0];
+		ip = stack[FRAME_LR_SAVE];
+		if (!firstframe || ip != lr) {
+			printk("["REG"] ["REG"] ", sp, ip);
+			print_symbol("%s", ip);
+			if (firstframe)
+				printk(" (unreliable)");
+			printk("\n");
 		}
-	} while (count++ < 16);
-	return 0;
+		firstframe = 0;
+
+		/*
+		 * See if this is an exception frame.
+		 * We look for the "regshere" marker in the current frame.
+		 */
+		if (validate_sp(sp, tsk, INT_FRAME_SIZE)
+		    && stack[FRAME_MARKER] == REGS_MARKER) {
+			struct pt_regs *regs = (struct pt_regs *)
+				(sp + STACK_FRAME_OVERHEAD);
+			printk("--- Exception: %lx", regs->trap);
+			print_symbol(" at %s\n", regs->nip);
+			lr = regs->link;
+			print_symbol("    LR = %s\n", lr);
+			firstframe = 1;
+		}
+
+		sp = newsp;
+	} while (count++ < kstack_depth_to_print);
 }
-EXPORT_SYMBOL(get_wchan);
+
+void dump_stack(void)
+{
+	show_stack(current, NULL);
+}
+EXPORT_SYMBOL(dump_stack);