Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6:
  sparc64: Handle stack trace attempts before irqstacks are setup.
  sparc64: Implement IRQ stacks.
  sparc: remove include of linux/of_device.h from asm/of_device.h
  sparc64: Fix recursion in stack overflow detection handling.
  sparc/drivers: use linux/of_device.h instead of asm/of_device.h
  sparc64: Don't MAGIC_SYSRQ ifdef smp_fetch_global_regs and support code.
diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h
index 3473e25..e3dd930 100644
--- a/arch/sparc/include/asm/irq_64.h
+++ b/arch/sparc/include/asm/irq_64.h
@@ -93,4 +93,8 @@
 void __trigger_all_cpu_backtrace(void);
 #define trigger_all_cpu_backtrace() __trigger_all_cpu_backtrace()
 
+extern void *hardirq_stack[NR_CPUS];
+extern void *softirq_stack[NR_CPUS];
+#define __ARCH_HAS_DO_SOFTIRQ
+
 #endif
diff --git a/arch/sparc/include/asm/of_device.h b/arch/sparc/include/asm/of_device.h
index e5f5aed..bba777a 100644
--- a/arch/sparc/include/asm/of_device.h
+++ b/arch/sparc/include/asm/of_device.h
@@ -30,8 +30,7 @@
 extern void __iomem *of_ioremap(struct resource *res, unsigned long offset, unsigned long size, char *name);
 extern void of_iounmap(struct resource *res, void __iomem *base, unsigned long size);
 
-/* These are just here during the transition */
-#include <linux/of_device.h>
+/* This is just here during the transition */
 #include <linux/of_platform.h>
 
 #endif /* __KERNEL__ */
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index ba43d85..9b6689d 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -682,10 +682,32 @@
 	       ino, virt_irq);
 }
 
+void *hardirq_stack[NR_CPUS];
+void *softirq_stack[NR_CPUS];
+
+static __attribute__((always_inline)) void *set_hardirq_stack(void)
+{
+	void *orig_sp, *sp = hardirq_stack[smp_processor_id()];
+
+	__asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp));
+	if (orig_sp < sp ||
+	    orig_sp > (sp + THREAD_SIZE)) {
+		sp += THREAD_SIZE - 192 - STACK_BIAS;
+		__asm__ __volatile__("mov %0, %%sp" : : "r" (sp));
+	}
+
+	return orig_sp;
+}
+static __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp)
+{
+	__asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp));
+}
+
 void handler_irq(int irq, struct pt_regs *regs)
 {
 	unsigned long pstate, bucket_pa;
 	struct pt_regs *old_regs;
+	void *orig_sp;
 
 	clear_softint(1 << irq);
 
@@ -703,6 +725,8 @@
 			       "i" (PSTATE_IE)
 			     : "memory");
 
+	orig_sp = set_hardirq_stack();
+
 	while (bucket_pa) {
 		struct irq_desc *desc;
 		unsigned long next_pa;
@@ -719,10 +743,38 @@
 		bucket_pa = next_pa;
 	}
 
+	restore_hardirq_stack(orig_sp);
+
 	irq_exit();
 	set_irq_regs(old_regs);
 }
 
+void do_softirq(void)
+{
+	unsigned long flags;
+
+	if (in_interrupt())
+		return;
+
+	local_irq_save(flags);
+
+	if (local_softirq_pending()) {
+		void *orig_sp, *sp = softirq_stack[smp_processor_id()];
+
+		sp += THREAD_SIZE - 192 - STACK_BIAS;
+
+		__asm__ __volatile__("mov %%sp, %0\n\t"
+				     "mov %1, %%sp"
+				     : "=&r" (orig_sp)
+				     : "r" (sp));
+		__do_softirq();
+		__asm__ __volatile__("mov %0, %%sp"
+				     : : "r" (orig_sp));
+	}
+
+	local_irq_restore(flags);
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 void fixup_irqs(void)
 {
diff --git a/arch/sparc64/kernel/kstack.h b/arch/sparc64/kernel/kstack.h
new file mode 100644
index 0000000..4248d96
--- /dev/null
+++ b/arch/sparc64/kernel/kstack.h
@@ -0,0 +1,60 @@
+#ifndef _KSTACK_H
+#define _KSTACK_H
+
+#include <linux/thread_info.h>
+#include <linux/sched.h>
+#include <asm/ptrace.h>
+#include <asm/irq.h>
+
+/* SP must be STACK_BIAS adjusted already.  */
+static inline bool kstack_valid(struct thread_info *tp, unsigned long sp)
+{
+	unsigned long base = (unsigned long) tp;
+
+	if (sp >= (base + sizeof(struct thread_info)) &&
+	    sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+		return true;
+
+	if (hardirq_stack[tp->cpu]) {
+		base = (unsigned long) hardirq_stack[tp->cpu];
+		if (sp >= base &&
+		    sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+			return true;
+		base = (unsigned long) softirq_stack[tp->cpu];
+		if (sp >= base &&
+		    sp <= (base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+			return true;
+	}
+	return false;
+}
+
+/* Does "regs" point to a valid pt_regs trap frame?  */
+static inline bool kstack_is_trap_frame(struct thread_info *tp, struct pt_regs *regs)
+{
+	unsigned long base = (unsigned long) tp;
+	unsigned long addr = (unsigned long) regs;
+
+	if (addr >= base &&
+	    addr <= (base + THREAD_SIZE - sizeof(*regs)))
+		goto check_magic;
+
+	if (hardirq_stack[tp->cpu]) {
+		base = (unsigned long) hardirq_stack[tp->cpu];
+		if (addr >= base &&
+		    addr <= (base + THREAD_SIZE - sizeof(*regs)))
+			goto check_magic;
+		base = (unsigned long) softirq_stack[tp->cpu];
+		if (addr >= base &&
+		    addr <= (base + THREAD_SIZE - sizeof(*regs)))
+			goto check_magic;
+	}
+	return false;
+
+check_magic:
+	if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC)
+		return true;
+	return false;
+
+}
+
+#endif /* _KSTACK_H */
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 7f5debd..15f4178 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -52,6 +52,8 @@
 #include <asm/irq_regs.h>
 #include <asm/smp.h>
 
+#include "kstack.h"
+
 static void sparc64_yield(int cpu)
 {
 	if (tlb_type != hypervisor)
@@ -235,19 +237,6 @@
 struct global_reg_snapshot global_reg_snapshot[NR_CPUS];
 static DEFINE_SPINLOCK(global_reg_snapshot_lock);
 
-static bool kstack_valid(struct thread_info *tp, struct reg_window *rw)
-{
-	unsigned long thread_base, fp;
-
-	thread_base = (unsigned long) tp;
-	fp = (unsigned long) rw;
-
-	if (fp < (thread_base + sizeof(struct thread_info)) ||
-	    fp >= (thread_base + THREAD_SIZE))
-		return false;
-	return true;
-}
-
 static void __global_reg_self(struct thread_info *tp, struct pt_regs *regs,
 			      int this_cpu)
 {
@@ -264,11 +253,11 @@
 
 		rw = (struct reg_window *)
 			(regs->u_regs[UREG_FP] + STACK_BIAS);
-		if (kstack_valid(tp, rw)) {
+		if (kstack_valid(tp, (unsigned long) rw)) {
 			global_reg_snapshot[this_cpu].i7 = rw->ins[7];
 			rw = (struct reg_window *)
 				(rw->ins[6] + STACK_BIAS);
-			if (kstack_valid(tp, rw))
+			if (kstack_valid(tp, (unsigned long) rw))
 				global_reg_snapshot[this_cpu].rpc = rw->ins[7];
 		}
 	} else {
@@ -828,7 +817,7 @@
 unsigned long get_wchan(struct task_struct *task)
 {
 	unsigned long pc, fp, bias = 0;
-	unsigned long thread_info_base;
+	struct thread_info *tp;
 	struct reg_window *rw;
         unsigned long ret = 0;
 	int count = 0; 
@@ -837,14 +826,12 @@
             task->state == TASK_RUNNING)
 		goto out;
 
-	thread_info_base = (unsigned long) task_stack_page(task);
+	tp = task_thread_info(task);
 	bias = STACK_BIAS;
 	fp = task_thread_info(task)->ksp + bias;
 
 	do {
-		/* Bogus frame pointer? */
-		if (fp < (thread_info_base + sizeof(struct thread_info)) ||
-		    fp >= (thread_info_base + THREAD_SIZE))
+		if (!kstack_valid(tp, fp))
 			break;
 		rw = (struct reg_window *) fp;
 		pc = rw->ins[7];
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 27b8177..743ccad 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -858,9 +858,7 @@
 extern unsigned long xcall_flush_tlb_mm;
 extern unsigned long xcall_flush_tlb_pending;
 extern unsigned long xcall_flush_tlb_kernel_range;
-#ifdef CONFIG_MAGIC_SYSRQ
 extern unsigned long xcall_fetch_glob_regs;
-#endif
 extern unsigned long xcall_receive_signal;
 extern unsigned long xcall_new_mmu_context_version;
 #ifdef CONFIG_KGDB
@@ -1005,12 +1003,10 @@
 }
 #endif
 
-#ifdef CONFIG_MAGIC_SYSRQ
 void smp_fetch_global_regs(void)
 {
 	smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
 }
-#endif
 
 /* We know that the window frames of the user have been flushed
  * to the stack before we get here because all callers of us
diff --git a/arch/sparc64/kernel/stacktrace.c b/arch/sparc64/kernel/stacktrace.c
index e9d7f06..4e21d4a 100644
--- a/arch/sparc64/kernel/stacktrace.c
+++ b/arch/sparc64/kernel/stacktrace.c
@@ -5,10 +5,12 @@
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 
+#include "kstack.h"
+
 void save_stack_trace(struct stack_trace *trace)
 {
-	unsigned long ksp, fp, thread_base;
 	struct thread_info *tp = task_thread_info(current);
+	unsigned long ksp, fp;
 
 	stack_trace_flush();
 
@@ -18,23 +20,18 @@
 	);
 
 	fp = ksp + STACK_BIAS;
-	thread_base = (unsigned long) tp;
 	do {
 		struct sparc_stackf *sf;
 		struct pt_regs *regs;
 		unsigned long pc;
 
-		/* Bogus frame pointer? */
-		if (fp < (thread_base + sizeof(struct thread_info)) ||
-		    fp > (thread_base + THREAD_SIZE - sizeof(struct sparc_stackf)))
+		if (!kstack_valid(tp, fp))
 			break;
 
 		sf = (struct sparc_stackf *) fp;
 		regs = (struct pt_regs *) (sf + 1);
 
-		if (((unsigned long)regs <=
-		     (thread_base + THREAD_SIZE - sizeof(*regs))) &&
-		    (regs->magic & ~0x1ff) == PT_REGS_MAGIC) {
+		if (kstack_is_trap_frame(tp, regs)) {
 			if (!(regs->tstate & TSTATE_PRIV))
 				break;
 			pc = regs->tpc;
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 404e856..3d92412 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -39,6 +39,7 @@
 #include <asm/prom.h>
 
 #include "entry.h"
+#include "kstack.h"
 
 /* When an irrecoverable trap occurs at tl > 0, the trap entry
  * code logs the trap state registers at every level in the trap
@@ -2115,14 +2116,12 @@
 		struct pt_regs *regs;
 		unsigned long pc;
 
-		/* Bogus frame pointer? */
-		if (fp < (thread_base + sizeof(struct thread_info)) ||
-		    fp >= (thread_base + THREAD_SIZE))
+		if (!kstack_valid(tp, fp))
 			break;
 		sf = (struct sparc_stackf *) fp;
 		regs = (struct pt_regs *) (sf + 1);
 
-		if ((regs->magic & ~0x1ff) == PT_REGS_MAGIC) {
+		if (kstack_is_trap_frame(tp, regs)) {
 			if (!(regs->tstate & TSTATE_PRIV))
 				break;
 			pc = regs->tpc;
diff --git a/arch/sparc64/lib/mcount.S b/arch/sparc64/lib/mcount.S
index 7735a7a..fad90dd 100644
--- a/arch/sparc64/lib/mcount.S
+++ b/arch/sparc64/lib/mcount.S
@@ -48,12 +48,45 @@
 	sub		%g3, STACK_BIAS, %g3
 	cmp		%sp, %g3
 	bg,pt		%xcc, 1f
-	 sethi		%hi(panicstring), %g3
+	 nop
+	lduh		[%g6 + TI_CPU], %g1
+	sethi		%hi(hardirq_stack), %g3
+	or		%g3, %lo(hardirq_stack), %g3
+	sllx		%g1, 3, %g1
+	ldx		[%g3 + %g1], %g7
+	sub		%g7, STACK_BIAS, %g7
+	cmp		%sp, %g7
+	bleu,pt		%xcc, 2f
+	 sethi		%hi(THREAD_SIZE), %g3
+	add		%g7, %g3, %g7
+	cmp		%sp, %g7
+	blu,pn		%xcc, 1f
+2:	 sethi		%hi(softirq_stack), %g3
+	or		%g3, %lo(softirq_stack), %g3
+	ldx		[%g3 + %g1], %g7
+	cmp		%sp, %g7
+	bleu,pt		%xcc, 2f
+	 sethi		%hi(THREAD_SIZE), %g3
+	add		%g7, %g3, %g7
+	cmp		%sp, %g7
+	blu,pn		%xcc, 1f
+	 nop
+	/* If we are already on ovstack, don't hop onto it
+	 * again, we are already trying to output the stack overflow
+	 * message.
+	 */
 	sethi		%hi(ovstack), %g7		! cant move to panic stack fast enough
 	 or		%g7, %lo(ovstack), %g7
-	add		%g7, OVSTACKSIZE, %g7
+	add		%g7, OVSTACKSIZE, %g3
+	sub		%g3, STACK_BIAS + 192, %g3
 	sub		%g7, STACK_BIAS, %g7
-	mov		%g7, %sp
+	cmp		%sp, %g7
+	blu,pn		%xcc, 2f
+	 cmp		%sp, %g3
+	bleu,pn		%xcc, 1f
+	 nop
+2:	mov		%g3, %sp
+	sethi		%hi(panicstring), %g3
 	call		prom_printf
 	 or		%g3, %lo(panicstring), %o0
 	call		prom_halt
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 4e821b3..217de3e 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -49,6 +49,7 @@
 #include <asm/sstate.h>
 #include <asm/mdesc.h>
 #include <asm/cpudata.h>
+#include <asm/irq.h>
 
 #define MAX_PHYS_ADDRESS	(1UL << 42UL)
 #define KPTE_BITMAP_CHUNK_SZ	(256UL * 1024UL * 1024UL)
@@ -1771,6 +1772,16 @@
 	if (tlb_type == hypervisor)
 		sun4v_mdesc_init();
 
+	/* Once the OF device tree and MDESC have been setup, we know
+	 * the list of possible cpus.  Therefore we can allocate the
+	 * IRQ stacks.
+	 */
+	for_each_possible_cpu(i) {
+		/* XXX Use node local allocations... XXX */
+		softirq_stack[i] = __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+		hardirq_stack[i] = __va(lmb_alloc(THREAD_SIZE, THREAD_SIZE));
+	}
+
 	/* Setup bootmem... */
 	last_valid_pfn = end_pfn = bootmem_init(phys_base);
 
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index ff1dc44..86773e8 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -480,7 +480,6 @@
 	b		rtrap_xcall
 	 ldx		[%sp + PTREGS_OFF + PT_V9_TSTATE], %l1
 
-#ifdef CONFIG_MAGIC_SYSRQ
 	.globl		xcall_fetch_glob_regs
 xcall_fetch_glob_regs:
 	sethi		%hi(global_reg_snapshot), %g1
@@ -511,7 +510,6 @@
 	membar		#StoreStore
 	stx		%g3, [%g1 + GR_SNAP_THREAD]
 	retry
-#endif /* CONFIG_MAGIC_SYSRQ */
 
 #ifdef DCACHE_ALIASING_POSSIBLE
 	.align		32
diff --git a/drivers/input/serio/i8042-sparcio.h b/drivers/input/serio/i8042-sparcio.h
index 66bafe3..692a79e 100644
--- a/drivers/input/serio/i8042-sparcio.h
+++ b/drivers/input/serio/i8042-sparcio.h
@@ -1,10 +1,11 @@
 #ifndef _I8042_SPARCIO_H
 #define _I8042_SPARCIO_H
 
+#include <linux/of_device.h>
+
 #include <asm/io.h>
 #include <asm/oplib.h>
 #include <asm/prom.h>
-#include <asm/of_device.h>
 
 static int i8042_kbd_irq = -1;
 static int i8042_aux_irq = -1;
diff --git a/drivers/sbus/sbus.c b/drivers/sbus/sbus.c
index 73a86d0..9c12924 100644
--- a/drivers/sbus/sbus.c
+++ b/drivers/sbus/sbus.c
@@ -7,13 +7,13 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/device.h>
+#include <linux/of_device.h>
 
 #include <asm/system.h>
 #include <asm/sbus.h>
 #include <asm/dma.h>
 #include <asm/oplib.h>
 #include <asm/prom.h>
-#include <asm/of_device.h>
 #include <asm/bpp.h>
 #include <asm/irq.h>
 
diff --git a/drivers/serial/sunhv.c b/drivers/serial/sunhv.c
index aeeec55..e41766d 100644
--- a/drivers/serial/sunhv.c
+++ b/drivers/serial/sunhv.c
@@ -17,11 +17,11 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/of_device.h>
 
 #include <asm/hypervisor.h>
 #include <asm/spitfire.h>
 #include <asm/prom.h>
-#include <asm/of_device.h>
 #include <asm/irq.h>
 
 #if defined(CONFIG_MAGIC_SYSRQ)
diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c
index 15ee497..29b4458 100644
--- a/drivers/serial/sunsab.c
+++ b/drivers/serial/sunsab.c
@@ -32,11 +32,11 @@
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/of_device.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/prom.h>
-#include <asm/of_device.h>
 
 #if defined(CONFIG_SERIAL_SUNSAB_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
 #define SUPPORT_SYSRQ
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index e24e682..a378464 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -35,11 +35,11 @@
 #include <linux/serial_reg.h>
 #include <linux/init.h>
 #include <linux/delay.h>
+#include <linux/of_device.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/prom.h>
-#include <asm/of_device.h>
 
 #if defined(CONFIG_SERIAL_SUNSU_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
 #define SUPPORT_SYSRQ
diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c
index 0f3d69b..3cb4c8a 100644
--- a/drivers/serial/sunzilog.c
+++ b/drivers/serial/sunzilog.c
@@ -32,11 +32,11 @@
 #include <linux/serio.h>
 #endif
 #include <linux/init.h>
+#include <linux/of_device.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/prom.h>
-#include <asm/of_device.h>
 
 #if defined(CONFIG_SERIAL_SUNZILOG_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
 #define SUPPORT_SYSRQ