powerpc: make process.c suitable for both 32-bit and 64-bit
Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/arch/powerpc/kernel/init_task.c b/arch/powerpc/kernel/init_task.c
new file mode 100644
index 0000000..941043a
--- /dev/null
+++ b/arch/powerpc/kernel/init_task.c
@@ -0,0 +1,36 @@
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/init_task.h>
+#include <linux/fs.h>
+#include <linux/mqueue.h>
+#include <asm/uaccess.h>
+
+static struct fs_struct init_fs = INIT_FS;
+static struct files_struct init_files = INIT_FILES;
+static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+struct mm_struct init_mm = INIT_MM(init_mm);
+
+EXPORT_SYMBOL(init_mm);
+
+/*
+ * Initial thread structure.
+ *
+ * We need to make sure that this is 16384-byte aligned due to the
+ * way process stacks are handled. This is done by having a special
+ * "init_task" linker map entry..
+ */
+union thread_union init_thread_union
+ __attribute__((__section__(".data.init_task"))) =
+ { INIT_THREAD_INFO(init_task) };
+
+/*
+ * Initial task structure.
+ *
+ * All other task structs will be allocated on slabs in fork.c
+ */
+struct task_struct init_task = INIT_TASK(init_task);
+
+EXPORT_SYMBOL(init_task);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ae316e9..f09908a 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -36,6 +36,8 @@
#include <linux/kallsyms.h>
#include <linux/mqueue.h>
#include <linux/hardirq.h>
+#include <linux/utsname.h>
+#include <linux/kprobes.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
@@ -44,6 +46,11 @@
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/prom.h>
+#ifdef CONFIG_PPC64
+#include <asm/firmware.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/time.h>
+#endif
extern unsigned long _get_SP(void);
@@ -53,26 +60,6 @@
struct task_struct *last_task_used_spe = NULL;
#endif
-static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
-static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
-static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
-struct mm_struct init_mm = INIT_MM(init_mm);
-EXPORT_SYMBOL(init_mm);
-
-/* this is 8kB-aligned so we can get to the thread_info struct
- at the base of it from the stack pointer with 1 integer instruction. */
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
-{ INIT_THREAD_INFO(init_task) };
-
-/* initial task structure */
-struct task_struct init_task = INIT_TASK(init_task);
-EXPORT_SYMBOL(init_task);
-
-/* only used to get secondary processor up */
-struct task_struct *current_set[NR_CPUS] = {&init_task, };
-
/*
* Make sure the floating-point register state in the
* the thread_struct is up to date for task tsk.
@@ -237,7 +224,10 @@
return ret;
}
+#ifdef CONFIG_PPC64
+DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
static DEFINE_PER_CPU(unsigned long, current_dabr);
+#endif
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
@@ -308,10 +298,27 @@
set_dabr(new->thread.dabr);
__get_cpu_var(current_dabr) = new->thread.dabr;
}
+
+ flush_tlb_pending();
#endif
new_thread = &new->thread;
old_thread = ¤t->thread;
+
+#ifdef CONFIG_PPC64
+ /*
+ * Collect processor utilization data per process
+ */
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
+ long unsigned start_tb, current_tb;
+ start_tb = old_thread->start_tb;
+ cu->current_tb = current_tb = mfspr(SPRN_PURR);
+ old_thread->accum_tb += (current_tb - start_tb);
+ new_thread->start_tb = current_tb;
+ }
+#endif
+
local_irq_save(flags);
last = _switch(old_thread, new_thread);
@@ -320,37 +327,106 @@
return last;
}
+static int instructions_to_print = 16;
+
+#ifdef CONFIG_PPC64
+#define BAD_PC(pc) ((REGION_ID(pc) != KERNEL_REGION_ID) && \
+ (REGION_ID(pc) != VMALLOC_REGION_ID))
+#else
+#define BAD_PC(pc) ((pc) < KERNELBASE)
+#endif
+
+static void show_instructions(struct pt_regs *regs)
+{
+ int i;
+ unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 *
+ sizeof(int));
+
+ printk("Instruction dump:");
+
+ for (i = 0; i < instructions_to_print; i++) {
+ int instr;
+
+ if (!(i % 8))
+ printk("\n");
+
+ if (BAD_PC(pc) || __get_user(instr, (unsigned int *)pc)) {
+ printk("XXXXXXXX ");
+ } else {
+ if (regs->nip == pc)
+ printk("<%08x> ", instr);
+ else
+ printk("%08x ", instr);
+ }
+
+ pc += sizeof(int);
+ }
+
+ printk("\n");
+}
+
+static struct regbit {
+ unsigned long bit;
+ const char *name;
+} msr_bits[] = {
+ {MSR_EE, "EE"},
+ {MSR_PR, "PR"},
+ {MSR_FP, "FP"},
+ {MSR_ME, "ME"},
+ {MSR_IR, "IR"},
+ {MSR_DR, "DR"},
+ {0, NULL}
+};
+
+static void printbits(unsigned long val, struct regbit *bits)
+{
+ const char *sep = "";
+
+ printk("<");
+ for (; bits->bit; ++bits)
+ if (val & bits->bit) {
+ printk("%s%s", sep, bits->name);
+ sep = ",";
+ }
+ printk(">");
+}
+
+#ifdef CONFIG_PPC64
+#define REG "%016lX"
+#define REGS_PER_LINE 4
+#define LAST_VOLATILE 13
+#else
+#define REG "%08lX"
+#define REGS_PER_LINE 8
+#define LAST_VOLATILE 12
+#endif
+
void show_regs(struct pt_regs * regs)
{
int i, trap;
- printk("NIP: %08lX LR: %08lX SP: %08lX REGS: %p TRAP: %04lx %s\n",
- regs->nip, regs->link, regs->gpr[1], regs, regs->trap,
- print_tainted());
- printk("MSR: %08lx EE: %01x PR: %01x FP: %01x ME: %01x IR/DR: %01x%01x\n",
- regs->msr, regs->msr&MSR_EE ? 1 : 0, regs->msr&MSR_PR ? 1 : 0,
- regs->msr & MSR_FP ? 1 : 0,regs->msr&MSR_ME ? 1 : 0,
- regs->msr&MSR_IR ? 1 : 0,
- regs->msr&MSR_DR ? 1 : 0);
+ printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
+ regs->nip, regs->link, regs->ctr);
+ printk("REGS: %p TRAP: %04lx %s (%s)\n",
+ regs, regs->trap, print_tainted(), system_utsname.release);
+ printk("MSR: "REG" ", regs->msr);
+ printbits(regs->msr, msr_bits);
+ printk(" CR: %08lX XER: %08lX\n", regs->ccr, regs->xer);
trap = TRAP(regs);
if (trap == 0x300 || trap == 0x600)
- printk("DAR: %08lX, DSISR: %08lX\n", regs->dar, regs->dsisr);
- printk("TASK = %p[%d] '%s' THREAD: %p\n",
+ printk("DAR: "REG", DSISR: "REG"\n", regs->dar, regs->dsisr);
+ printk("TASK = %p[%d] '%s' THREAD: %p",
current, current->pid, current->comm, current->thread_info);
- printk("Last syscall: %ld ", current->thread.last_syscall);
#ifdef CONFIG_SMP
printk(" CPU: %d", smp_processor_id());
#endif /* CONFIG_SMP */
for (i = 0; i < 32; i++) {
- long r;
- if ((i % 8) == 0)
+ if ((i % REGS_PER_LINE) == 0)
printk("\n" KERN_INFO "GPR%02d: ", i);
- if (__get_user(r, ®s->gpr[i]))
- break;
- printk("%08lX ", r);
- if (i == 12 && !FULL_REGS(regs))
+ printk(REG " ", regs->gpr[i]);
+ if (i == LAST_VOLATILE && !FULL_REGS(regs))
break;
}
printk("\n");
@@ -359,16 +435,20 @@
* Lookup NIP late so we have the best change of getting the
* above info out without failing
*/
- printk("NIP [%08lx] ", regs->nip);
+ printk("NIP ["REG"] ", regs->nip);
print_symbol("%s\n", regs->nip);
- printk("LR [%08lx] ", regs->link);
+ printk("LR ["REG"] ", regs->link);
print_symbol("%s\n", regs->link);
#endif
show_stack(current, (unsigned long *) regs->gpr[1]);
+ if (!user_mode(regs))
+ show_instructions(regs);
}
void exit_thread(void)
{
+ kprobe_flush_task(current);
+
#ifndef CONFIG_SMP
if (last_task_used_math == current)
last_task_used_math = NULL;
@@ -385,6 +465,14 @@
void flush_thread(void)
{
+#ifdef CONFIG_PPC64
+ struct thread_info *t = current_thread_info();
+
+ if (t->flags & _TIF_ABI_PENDING)
+ t->flags ^= (_TIF_ABI_PENDING | _TIF_32BIT);
+#endif
+ kprobe_flush_task(current);
+
#ifndef CONFIG_SMP
if (last_task_used_math == current)
last_task_used_math = NULL;
@@ -425,15 +513,13 @@
/*
* Copy a thread..
*/
-int
-copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
- unsigned long unused,
- struct task_struct *p, struct pt_regs *regs)
+int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+ unsigned long unused, struct task_struct *p,
+ struct pt_regs *regs)
{
struct pt_regs *childregs, *kregs;
extern void ret_from_fork(void);
unsigned long sp = (unsigned long)p->thread_info + THREAD_SIZE;
- unsigned long childframe;
CHECK_FULL_REGS(regs);
/* Copy registers */
@@ -443,17 +529,26 @@
if ((childregs->msr & MSR_PR) == 0) {
/* for kernel thread, set `current' and stackptr in new task */
childregs->gpr[1] = sp + sizeof(struct pt_regs);
+#ifdef CONFIG_PPC32
childregs->gpr[2] = (unsigned long) p;
+#else
+ clear_ti_thread_flag(p->thread_info, TIF_32BIT);
+#endif
p->thread.regs = NULL; /* no user register state */
} else {
childregs->gpr[1] = usp;
p->thread.regs = childregs;
- if (clone_flags & CLONE_SETTLS)
- childregs->gpr[2] = childregs->gpr[6];
+ if (clone_flags & CLONE_SETTLS) {
+#ifdef CONFIG_PPC64
+ if (!test_thread_flag(TIF_32BIT))
+ childregs->gpr[13] = childregs->gpr[6];
+ else
+#endif
+ childregs->gpr[2] = childregs->gpr[6];
+ }
}
childregs->gpr[3] = 0; /* Result from fork() */
sp -= STACK_FRAME_OVERHEAD;
- childframe = sp;
/*
* The way this works is that at some point in the future
@@ -467,9 +562,30 @@
kregs = (struct pt_regs *) sp;
sp -= STACK_FRAME_OVERHEAD;
p->thread.ksp = sp;
- kregs->nip = (unsigned long)ret_from_fork;
+#ifdef CONFIG_PPC64
+ if (cpu_has_feature(CPU_FTR_SLB)) {
+ unsigned long sp_vsid = get_kernel_vsid(sp);
+
+ sp_vsid <<= SLB_VSID_SHIFT;
+ sp_vsid |= SLB_VSID_KERNEL;
+ if (cpu_has_feature(CPU_FTR_16M_PAGE))
+ sp_vsid |= SLB_VSID_L;
+
+ p->thread.ksp_vsid = sp_vsid;
+ }
+
+ /*
+ * The PPC64 ABI makes use of a TOC to contain function
+ * pointers. The function (ret_from_except) is actually a pointer
+ * to the TOC entry. The first entry is a pointer to the actual
+ * function.
+ */
+ kregs->nip = *((unsigned long *)ret_from_fork);
+#else
+ kregs->nip = (unsigned long)ret_from_fork;
p->thread.last_syscall = -1;
+#endif
return 0;
}
@@ -477,18 +593,61 @@
/*
* Set up a thread for executing a new program
*/
-void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp)
+void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
{
set_fs(USER_DS);
+
+ /*
+ * If we exec out of a kernel thread then thread.regs will not be
+ * set. Do it now.
+ */
+ if (!current->thread.regs) {
+ unsigned long childregs = (unsigned long)current->thread_info +
+ THREAD_SIZE;
+ childregs -= sizeof(struct pt_regs);
+ current->thread.regs = (struct pt_regs *)childregs;
+ }
+
memset(regs->gpr, 0, sizeof(regs->gpr));
regs->ctr = 0;
regs->link = 0;
regs->xer = 0;
regs->ccr = 0;
- regs->mq = 0;
- regs->nip = nip;
regs->gpr[1] = sp;
+
+#ifdef CONFIG_PPC32
+ regs->mq = 0;
+ regs->nip = start;
regs->msr = MSR_USER;
+#else
+ if (test_thread_flag(TIF_32BIT)) {
+ unsigned long entry, toc, load_addr = regs->gpr[2];
+
+ /* start is a relocated pointer to the function descriptor for
+ * the elf _start routine. The first entry in the function
+ * descriptor is the entry address of _start and the second
+ * entry is the TOC value we need to use.
+ */
+ __get_user(entry, (unsigned long __user *)start);
+ __get_user(toc, (unsigned long __user *)start+1);
+
+ /* Check whether the e_entry function descriptor entries
+ * need to be relocated before we can use them.
+ */
+ if (load_addr != 0) {
+ entry += load_addr;
+ toc += load_addr;
+ }
+ regs->nip = entry;
+ regs->gpr[2] = toc;
+ regs->msr = MSR_USER64;
+ } else {
+ regs->nip = start;
+ regs->gpr[2] = 0;
+ regs->msr = MSR_USER32;
+ }
+#endif
+
#ifndef CONFIG_SMP
if (last_task_used_math == current)
last_task_used_math = NULL;
@@ -506,6 +665,7 @@
#ifdef CONFIG_ALTIVEC
memset(current->thread.vr, 0, sizeof(current->thread.vr));
memset(¤t->thread.vscr, 0, sizeof(current->thread.vscr));
+ current->thread.vscr.u[3] = 0x00010000; /* Java mode disabled */
current->thread.vrsave = 0;
current->thread.used_vr = 0;
#endif /* CONFIG_ALTIVEC */
@@ -532,22 +692,23 @@
#ifdef CONFIG_SPE
tsk->thread.fpexc_mode = val &
(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
+ return 0;
#else
return -EINVAL;
#endif
- } else {
- /* on a CONFIG_SPE this does not hurt us. The bits that
- * __pack_fe01 use do not overlap with bits used for
- * PR_FP_EXC_SW_ENABLE. Additionally, the MSR[FE0,FE1] bits
- * on CONFIG_SPE implementations are reserved so writing to
- * them does not change anything */
- if (val > PR_FP_EXC_PRECISE)
- return -EINVAL;
- tsk->thread.fpexc_mode = __pack_fe01(val);
- if (regs != NULL && (regs->msr & MSR_FP) != 0)
- regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
- | tsk->thread.fpexc_mode;
}
+
+ /* on a CONFIG_SPE this does not hurt us. The bits that
+ * __pack_fe01 use do not overlap with bits used for
+ * PR_FP_EXC_SW_ENABLE. Additionally, the MSR[FE0,FE1] bits
+ * on CONFIG_SPE implementations are reserved so writing to
+ * them does not change anything */
+ if (val > PR_FP_EXC_PRECISE)
+ return -EINVAL;
+ tsk->thread.fpexc_mode = __pack_fe01(val);
+ if (regs != NULL && (regs->msr & MSR_FP) != 0)
+ regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
+ | tsk->thread.fpexc_mode;
return 0;
}
@@ -566,6 +727,8 @@
return put_user(val, (unsigned int __user *) adr);
}
+#define TRUNC_PTR(x) ((typeof(x))(((unsigned long)(x)) & 0xffffffff))
+
int sys_clone(unsigned long clone_flags, unsigned long usp,
int __user *parent_tidp, void __user *child_threadptr,
int __user *child_tidp, int p6,
@@ -574,6 +737,12 @@
CHECK_FULL_REGS(regs);
if (usp == 0)
usp = regs->gpr[1]; /* stack pointer for child */
+#ifdef CONFIG_PPC64
+ if (test_thread_flag(TIF_32BIT)) {
+ parent_tidp = TRUNC_PTR(parent_tidp);
+ child_tidp = TRUNC_PTR(child_tidp);
+ }
+#endif
return do_fork(clone_flags, usp, regs, 0, parent_tidp, child_tidp);
}
@@ -599,7 +768,7 @@
struct pt_regs *regs)
{
int error;
- char * filename;
+ char *filename;
filename = getname((char __user *) a0);
error = PTR_ERR(filename);
@@ -644,20 +813,53 @@
return 0;
}
-void dump_stack(void)
-{
- show_stack(current, NULL);
-}
+#ifdef CONFIG_PPC64
+#define MIN_STACK_FRAME 112 /* same as STACK_FRAME_OVERHEAD, in fact */
+#define FRAME_LR_SAVE 2
+#define INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD + 288)
+#define REGS_MARKER 0x7265677368657265ul
+#define FRAME_MARKER 12
+#else
+#define MIN_STACK_FRAME 16
+#define FRAME_LR_SAVE 1
+#define INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
+#define REGS_MARKER 0x72656773ul
+#define FRAME_MARKER 2
+#endif
-EXPORT_SYMBOL(dump_stack);
+unsigned long get_wchan(struct task_struct *p)
+{
+ unsigned long ip, sp;
+ int count = 0;
+
+ if (!p || p == current || p->state == TASK_RUNNING)
+ return 0;
+
+ sp = p->thread.ksp;
+ if (!validate_sp(sp, p, MIN_STACK_FRAME))
+ return 0;
+
+ do {
+ sp = *(unsigned long *)sp;
+ if (!validate_sp(sp, p, MIN_STACK_FRAME))
+ return 0;
+ if (count > 0) {
+ ip = ((unsigned long *)sp)[FRAME_LR_SAVE];
+ if (!in_sched_functions(ip))
+ return ip;
+ }
+ } while (count++ < 16);
+ return 0;
+}
+EXPORT_SYMBOL(get_wchan);
+
+static int kstack_depth_to_print = 64;
void show_stack(struct task_struct *tsk, unsigned long *stack)
{
- unsigned long sp, stack_top, prev_sp, ret;
+ unsigned long sp, ip, lr, newsp;
int count = 0;
- unsigned long next_exc = 0;
- struct pt_regs *regs;
- extern char ret_from_except, ret_from_except_full, ret_from_syscall;
+ int firstframe = 1;
sp = (unsigned long) stack;
if (tsk == NULL)
@@ -669,65 +871,45 @@
sp = tsk->thread.ksp;
}
- prev_sp = (unsigned long) (tsk->thread_info + 1);
- stack_top = (unsigned long) tsk->thread_info + THREAD_SIZE;
- while (count < 16 && sp > prev_sp && sp < stack_top && (sp & 3) == 0) {
- if (count == 0) {
- printk("Call trace:");
-#ifdef CONFIG_KALLSYMS
- printk("\n");
-#endif
- } else {
- if (next_exc) {
- ret = next_exc;
- next_exc = 0;
- } else
- ret = *(unsigned long *)(sp + 4);
- printk(" [%08lx] ", ret);
-#ifdef CONFIG_KALLSYMS
- print_symbol("%s", ret);
- printk("\n");
-#endif
- if (ret == (unsigned long) &ret_from_except
- || ret == (unsigned long) &ret_from_except_full
- || ret == (unsigned long) &ret_from_syscall) {
- /* sp + 16 points to an exception frame */
- regs = (struct pt_regs *) (sp + 16);
- if (sp + 16 + sizeof(*regs) <= stack_top)
- next_exc = regs->nip;
- }
- }
- ++count;
- sp = *(unsigned long *)sp;
- }
-#ifndef CONFIG_KALLSYMS
- if (count > 0)
- printk("\n");
-#endif
-}
-
-unsigned long get_wchan(struct task_struct *p)
-{
- unsigned long ip, sp;
- int count = 0;
-
- if (!p || p == current || p->state == TASK_RUNNING)
- return 0;
-
- sp = p->thread.ksp;
- if (!validate_sp(sp, p, 16))
- return 0;
-
+ lr = 0;
+ printk("Call Trace:\n");
do {
- sp = *(unsigned long *)sp;
- if (!validate_sp(sp, p, 16))
- return 0;
- if (count > 0) {
- ip = *(unsigned long *)(sp + 4);
- if (!in_sched_functions(ip))
- return ip;
+ if (!validate_sp(sp, tsk, MIN_STACK_FRAME))
+ return;
+
+ stack = (unsigned long *) sp;
+ newsp = stack[0];
+ ip = stack[FRAME_LR_SAVE];
+ if (!firstframe || ip != lr) {
+ printk("["REG"] ["REG"] ", sp, ip);
+ print_symbol("%s", ip);
+ if (firstframe)
+ printk(" (unreliable)");
+ printk("\n");
}
- } while (count++ < 16);
- return 0;
+ firstframe = 0;
+
+ /*
+ * See if this is an exception frame.
+ * We look for the "regshere" marker in the current frame.
+ */
+ if (validate_sp(sp, tsk, INT_FRAME_SIZE)
+ && stack[FRAME_MARKER] == REGS_MARKER) {
+ struct pt_regs *regs = (struct pt_regs *)
+ (sp + STACK_FRAME_OVERHEAD);
+ printk("--- Exception: %lx", regs->trap);
+ print_symbol(" at %s\n", regs->nip);
+ lr = regs->link;
+ print_symbol(" LR = %s\n", lr);
+ firstframe = 1;
+ }
+
+ sp = newsp;
+ } while (count++ < kstack_depth_to_print);
}
-EXPORT_SYMBOL(get_wchan);
+
+void dump_stack(void)
+{
+ show_stack(current, NULL);
+}
+EXPORT_SYMBOL(dump_stack);
diff --git a/include/asm-powerpc/elf.h b/include/asm-powerpc/elf.h
index f0a6779..d22b100 100644
--- a/include/asm-powerpc/elf.h
+++ b/include/asm-powerpc/elf.h
@@ -214,10 +214,8 @@
but it's not easy, and we've already done it here. */
# define ELF_HWCAP (cur_cpu_spec->cpu_user_features)
#ifdef __powerpc64__
-# define ELF_PLAT_INIT(_r, load_addr) do { \
- memset(_r->gpr, 0, sizeof(_r->gpr)); \
- _r->ctr = _r->link = _r->xer = _r->ccr = 0; \
- _r->gpr[2] = load_addr; \
+# define ELF_PLAT_INIT(_r, load_addr) do { \
+ _r->gpr[2] = load_addr; \
} while (0)
#endif /* __powerpc64__ */