[AVR32] Follow the rules when dealing with the OCD system

The current debug trap handling code does a number of things that are
illegal according to the AVR32 Architecture manual. Most importantly,
it may try to schedule from Debug Mode, thus clearing the D bit, which
can lead to "undefined behaviour".

It seems like this works in most cases, but several people have
observed somewhat unstable behaviour when debugging programs,
including soft lockups. So there's definitely something which is not
right with the existing code.

The new code will never schedule from Debug mode, it will always exit
Debug mode with a "retd" instruction, and if something not running in
Debug mode needs to do something debug-related (like doing a single
step), it will enter debug mode through a "breakpoint" instruction.
The monitor code will then return directly to user space, bypassing
its own saved registers if necessary (since we don't actually care
about the trapped context, only the one that came before.)

This adds three instructions to the common exception handling code,
including one branch. It does not touch super-hot paths like the TLB
miss handler.

Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
diff --git a/arch/avr32/kernel/asm-offsets.c b/arch/avr32/kernel/asm-offsets.c
index 97d8658..078cd33 100644
--- a/arch/avr32/kernel/asm-offsets.c
+++ b/arch/avr32/kernel/asm-offsets.c
@@ -21,5 +21,7 @@
 	OFFSET(TI_flags, thread_info, flags);
 	OFFSET(TI_cpu, thread_info, cpu);
 	OFFSET(TI_preempt_count, thread_info, preempt_count);
+	OFFSET(TI_rar_saved, thread_info, rar_saved);
+	OFFSET(TI_rsr_saved, thread_info, rsr_saved);
 	OFFSET(TI_restart_block, thread_info, restart_block);
 }
diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S
index d7b93f1..df6c747 100644
--- a/arch/avr32/kernel/entry-avr32b.S
+++ b/arch/avr32/kernel/entry-avr32b.S
@@ -264,16 +264,7 @@
 
 3:	bld	r1, TIF_BREAKPOINT
 	brcc	syscall_exit_cont
-	mfsr	r3, SYSREG_TLBEHI
-	lddsp	r2, sp[REG_PC]
-	andl	r3, 0xff, COH
-	lsl	r3, 1
-	sbr	r3, 30
-	sbr	r3, 0
-	mtdr	OCD_BWA2A, r2
-	mtdr	OCD_BWC2A, r3
-	rjmp	syscall_exit_cont
-
+	rjmp	enter_monitor_mode
 
 	/* The slow path of the TLB miss handler */
 page_table_not_present:
@@ -288,11 +279,16 @@
 	rjmp	ret_from_exception
 
 	/* This function expects to find offending PC in SYSREG_RAR_EX */
+	.type	save_full_context_ex, @function
+	.align	2
 save_full_context_ex:
+	mfsr	r11, SYSREG_RAR_EX
+	sub	r9, pc, . - debug_trampoline
 	mfsr	r8, SYSREG_RSR_EX
+	cp.w	r9, r11
+	breq	3f
 	mov	r12, r8
 	andh	r8, (MODE_MASK >> 16), COH
-	mfsr	r11, SYSREG_RAR_EX
 	brne	2f
 
 1:	pushm	r11, r12	/* PC and SR */
@@ -303,6 +299,21 @@
 	stdsp	sp[4], r10	/* replace saved SP */
 	rjmp	1b
 
+	/*
+	 * The debug handler set up a trampoline to make us
+	 * automatically enter monitor mode upon return, but since
+	 * we're saving the full context, we must assume that the
+	 * exception handler might want to alter the return address
+	 * and/or status register. So we need to restore the original
+	 * context and enter monitor mode manually after the exception
+	 * has been handled.
+	 */
+3:	get_thread_info r8
+	ld.w	r11, r8[TI_rar_saved]
+	ld.w	r12, r8[TI_rsr_saved]
+	rjmp	1b
+	.size	save_full_context_ex, . - save_full_context_ex
+
 	/* Low-level exception handlers */
 handle_critical:
 	pushm	r12
@@ -439,6 +450,7 @@
 ret_from_exception:
 	mask_interrupts
 	lddsp	r4, sp[REG_SR]
+
 	andh	r4, (MODE_MASK >> 16), COH
 	brne	fault_resume_kernel
 
@@ -515,34 +527,76 @@
 
 2:	bld	r1, TIF_BREAKPOINT
 	brcc	fault_resume_user
-	mfsr	r3, SYSREG_TLBEHI
-	lddsp	r2, sp[REG_PC]
-	andl	r3, 0xff, COH
-	lsl	r3, 1
-	sbr	r3, 30
-	sbr	r3, 0
-	mtdr	OCD_BWA2A, r2
-	mtdr	OCD_BWC2A, r3
-	rjmp	fault_resume_user
+	rjmp	enter_monitor_mode
 
-	/* If we get a debug trap from privileged context we end up here */
-handle_debug_priv:
-	/* Fix up LR and SP in regs. r1 contains the mode we came from */
-	mfsr	r2, SYSREG_SR
-	mov	r3, r2
-	bfins	r2, r1, SYSREG_MODE_OFFSET, SYSREG_MODE_SIZE
-	mtsr	SYSREG_SR, r2
+	.section .kprobes.text, "ax", @progbits
+	.type	handle_debug, @function
+handle_debug:
+	sub	sp, 4		/* r12_orig */
+	stmts	--sp, r0-lr
+	mfsr	r8, SYSREG_RAR_DBG
+	mfsr	r9, SYSREG_RSR_DBG
+	unmask_exceptions
+	pushm	r8-r9
+	bfextu	r9, r9, SYSREG_MODE_OFFSET, SYSREG_MODE_SIZE
+	brne	debug_fixup_regs
+
+.Ldebug_fixup_cont:
+#ifdef CONFIG_TRACE_IRQFLAGS
+	rcall	trace_hardirqs_off
+#endif
+	mov	r12, sp
+	rcall	do_debug
+	mov	sp, r12
+
+	lddsp	r2, sp[REG_SR]
+	bfextu	r3, r2, SYSREG_MODE_OFFSET, SYSREG_MODE_SIZE
+	brne	debug_resume_kernel
+
+	get_thread_info r0
+	ld.w	r1, r0[TI_flags]
+	mov	r2, _TIF_DBGWORK_MASK
+	tst	r1, r2
+	brne	debug_exit_work
+
+	bld	r1, TIF_SINGLE_STEP
+	brcc	1f
+	mfdr	r4, OCD_DC
+	sbr	r4, OCD_DC_SS_BIT
+	mtdr	OCD_DC, r4
+
+1:	popm	r10,r11
+	mask_exceptions
+	mtsr	SYSREG_RSR_DBG, r11
+	mtsr	SYSREG_RAR_DBG, r10
+#ifdef CONFIG_TRACE_IRQFLAGS
+	rcall	trace_hardirqs_on
+1:
+#endif
+	ldmts	sp++, r0-lr
+	sub	sp, -4
+	retd
+	.size	handle_debug, . - handle_debug
+
+	/* Mode of the trapped context is in r9 */
+	.type	debug_fixup_regs, @function
+debug_fixup_regs:
+	mfsr	r8, SYSREG_SR
+	mov	r10, r8
+	bfins	r8, r9, SYSREG_MODE_OFFSET, SYSREG_MODE_SIZE
+	mtsr	SYSREG_SR, r8
 	sub	pc, -2
 	stdsp	sp[REG_LR], lr
-	mtsr	SYSREG_SR, r3
+	mtsr	SYSREG_SR, r10
 	sub	pc, -2
-	sub	r10, sp, -FRAME_SIZE_FULL
-	stdsp	sp[REG_SP], r10
-	mov	r12, sp
-	rcall	do_debug_priv
+	sub	r8, sp, -FRAME_SIZE_FULL
+	stdsp	sp[REG_SP], r8
+	rjmp	.Ldebug_fixup_cont
+	.size	debug_fixup_regs, . - debug_fixup_regs
 
-	/* Now, put everything back */
-	ssrf	SR_EM_BIT
+	.type	debug_resume_kernel, @function
+debug_resume_kernel:
+	mask_exceptions
 	popm	r10, r11
 	mtsr	SYSREG_RAR_DBG, r10
 	mtsr	SYSREG_RSR_DBG, r11
@@ -553,93 +607,44 @@
 1:
 #endif
 	mfsr	r2, SYSREG_SR
-	mov	r3, r2
-	bfins	r2, r1, SYSREG_MODE_OFFSET, SYSREG_MODE_SIZE
+	mov	r1, r2
+	bfins	r2, r3, SYSREG_MODE_OFFSET, SYSREG_MODE_SIZE
 	mtsr	SYSREG_SR, r2
 	sub	pc, -2
 	popm	lr
-	mtsr	SYSREG_SR, r3
+	mtsr	SYSREG_SR, r1
 	sub	pc, -2
 	sub	sp, -4		/* skip SP */
 	popm	r0-r12
 	sub	sp, -4
 	retd
+	.size	debug_resume_kernel, . - debug_resume_kernel
 
+	.type	debug_exit_work, @function
+debug_exit_work:
 	/*
-	 * At this point, everything is masked, that is, interrupts,
-	 * exceptions and debugging traps. We might get called from
-	 * interrupt or exception context in some rare cases, but this
-	 * will be taken care of by do_debug(), so we're not going to
-	 * do a 100% correct context save here.
+	 * We must return from Monitor Mode using a retd, and we must
+	 * not schedule since that involves the D bit in SR getting
+	 * cleared by something other than the debug hardware. This
+	 * may cause undefined behaviour according to the Architecture
+	 * manual.
+	 *
+	 * So we fix up the return address and status and return to a
+	 * stub below in Exception mode. From there, we can follow the
+	 * normal exception return path.
+	 *
+	 * The real return address and status registers are stored on
+	 * the stack in the way the exception return path understands,
+	 * so no need to fix anything up there.
 	 */
-handle_debug:
-	sub	sp, 4		/* r12_orig */
-	stmts	--sp, r0-lr
-	mfsr	r0, SYSREG_RAR_DBG
-	mfsr	r1, SYSREG_RSR_DBG
-#ifdef CONFIG_TRACE_IRQFLAGS
-	rcall	trace_hardirqs_off
-#endif
-	unmask_exceptions
-	stm	--sp, r0, r1
-	bfextu	r1, r1, SYSREG_MODE_OFFSET, SYSREG_MODE_SIZE
-	brne	handle_debug_priv
-
-	mov	r12, sp
-	rcall	do_debug
-
-	lddsp	r10, sp[REG_SR]
-	andh	r10, (MODE_MASK >> 16), COH
-	breq	debug_resume_user
-
-debug_restore_all:
-	popm	r10,r11
-	mask_exceptions
-	mtsr	SYSREG_RSR_DBG, r11
-	mtsr	SYSREG_RAR_DBG, r10
-#ifdef CONFIG_TRACE_IRQFLAGS
-	bld	r11, SYSREG_GM_OFFSET
-	brcc	1f
-	rcall	trace_hardirqs_on
-1:
-#endif
-	ldmts	sp++, r0-lr
-	sub	sp, -4
+	sub	r8, pc, . - fault_exit_work
+	mtsr	SYSREG_RAR_DBG, r8
+	mov	r9, 0
+	orh	r9, hi(SR_EM | SR_GM | MODE_EXCEPTION)
+	mtsr	SYSREG_RSR_DBG, r9
+	sub	pc, -2
 	retd
-
-debug_resume_user:
-	get_thread_info r0
-	mask_interrupts
-
-	ld.w	r1, r0[TI_flags]
-	andl	r1, _TIF_DBGWORK_MASK, COH
-	breq	debug_restore_all
-
-1:	bld	r1, TIF_NEED_RESCHED
-	brcc	2f
-	unmask_interrupts
-	rcall	schedule
-	mask_interrupts
-	ld.w	r1, r0[TI_flags]
-	rjmp	1b
-
-2:	mov	r2, _TIF_SIGPENDING | _TIF_RESTORE_SIGMASK
-	tst	r1, r2
-	breq	3f
-	unmask_interrupts
-	mov	r12, sp
-	mov	r11, r0
-	rcall	do_notify_resume
-	mask_interrupts
-	ld.w	r1, r0[TI_flags]
-	rjmp	1b
-
-3:	bld	r1, TIF_SINGLE_STEP
-	brcc	debug_restore_all
-	mfdr	r2, OCD_DC
-	sbr	r2, OCD_DC_SS_BIT
-	mtdr	OCD_DC, r2
-	rjmp	debug_restore_all
+	.size	debug_exit_work, . - debug_exit_work
 
 	.set	rsr_int0,	SYSREG_RSR_INT0
 	.set	rsr_int1,	SYSREG_RSR_INT1
@@ -764,3 +769,53 @@
 	IRQ_LEVEL 1
 	IRQ_LEVEL 2
 	IRQ_LEVEL 3
+
+	.section .kprobes.text, "ax", @progbits
+	.type	enter_monitor_mode, @function
+enter_monitor_mode:
+	/*
+	 * We need to enter monitor mode to do a single step. The
+	 * monitor code will alter the return address so that we
+	 * return directly to the user instead of returning here.
+	 */
+	breakpoint
+	rjmp	breakpoint_failed
+
+	.size	enter_monitor_mode, . - enter_monitor_mode
+
+	.type	debug_trampoline, @function
+	.global	debug_trampoline
+debug_trampoline:
+	/*
+	 * Save the registers on the stack so that the monitor code
+	 * can find them easily.
+	 */
+	sub	sp, 4		/* r12_orig */
+	stmts	--sp, r0-lr
+	get_thread_info	r0
+	ld.w	r8, r0[TI_rar_saved]
+	ld.w	r9, r0[TI_rsr_saved]
+	pushm	r8-r9
+
+	/*
+	 * The monitor code will alter the return address so we don't
+	 * return here.
+	 */
+	breakpoint
+	rjmp	breakpoint_failed
+	.size	debug_trampoline, . - debug_trampoline
+
+	.type breakpoint_failed, @function
+breakpoint_failed:
+	/*
+	 * Something went wrong. Perhaps the debug hardware isn't
+	 * enabled?
+	 */
+	lda.w	r12, msg_breakpoint_failed
+	mov	r11, sp
+	mov	r10, 9		/* SIGKILL */
+	call	die
+1:	rjmp	1b
+
+msg_breakpoint_failed:
+	.asciz	"Failed to enter Debug Mode"
diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c
index 0de9a6e..002369e 100644
--- a/arch/avr32/kernel/ptrace.c
+++ b/arch/avr32/kernel/ptrace.c
@@ -30,20 +30,22 @@
 
 static void ptrace_single_step(struct task_struct *tsk)
 {
-	pr_debug("ptrace_single_step: pid=%u, SR=0x%08lx\n",
-		 tsk->pid, tsk->thread.cpu_context.sr);
-	if (!(tsk->thread.cpu_context.sr & SR_D)) {
-		/*
-		 * Set a breakpoint at the current pc to force the
-		 * process into debug mode.  The syscall/exception
-		 * exit code will set a breakpoint at the return
-		 * address when this flag is set.
-		 */
-		pr_debug("ptrace_single_step: Setting TIF_BREAKPOINT\n");
-		set_tsk_thread_flag(tsk, TIF_BREAKPOINT);
-	}
+	pr_debug("ptrace_single_step: pid=%u, PC=0x%08lx, SR=0x%08lx\n",
+		 tsk->pid, task_pt_regs(tsk)->pc, task_pt_regs(tsk)->sr);
 
-	/* The monitor code will do the actual step for us */
+	/*
+	 * We can't schedule in Debug mode, so when TIF_BREAKPOINT is
+	 * set, the system call or exception handler will do a
+	 * breakpoint to enter monitor mode before returning to
+	 * userspace.
+	 *
+	 * The monitor code will then notice that TIF_SINGLE_STEP is
+	 * set and return to userspace with single stepping enabled.
+	 * The CPU will then enter monitor mode again after exactly
+	 * one instruction has been executed, and the monitor code
+	 * will then send a SIGTRAP to the process.
+	 */
+	set_tsk_thread_flag(tsk, TIF_BREAKPOINT);
 	set_tsk_thread_flag(tsk, TIF_SINGLE_STEP);
 }
 
@@ -55,23 +57,7 @@
 void ptrace_disable(struct task_struct *child)
 {
 	clear_tsk_thread_flag(child, TIF_SINGLE_STEP);
-}
-
-/*
- * Handle hitting a breakpoint
- */
-static void ptrace_break(struct task_struct *tsk, struct pt_regs *regs)
-{
-	siginfo_t info;
-
-	info.si_signo = SIGTRAP;
-	info.si_errno = 0;
-	info.si_code  = TRAP_BRKPT;
-	info.si_addr  = (void __user *)instruction_pointer(regs);
-
-	pr_debug("ptrace_break: Sending SIGTRAP to PID %u (pc = 0x%p)\n",
-		 tsk->pid, info.si_addr);
-	force_sig_info(SIGTRAP, &info, tsk);
+	clear_tsk_thread_flag(child, TIF_BREAKPOINT);
 }
 
 /*
@@ -84,9 +70,6 @@
 	unsigned long *regs;
 	unsigned long value;
 
-	pr_debug("ptrace_read_user(%p, %#lx, %p)\n",
-		 tsk, offset, data);
-
 	if (offset & 3 || offset >= sizeof(struct user)) {
 		printk("ptrace_read_user: invalid offset 0x%08lx\n", offset);
 		return -EIO;
@@ -98,6 +81,9 @@
 	if (offset < sizeof(struct pt_regs))
 		value = regs[offset / sizeof(regs[0])];
 
+	pr_debug("ptrace_read_user(%s[%u], %#lx, %p) -> %#lx\n",
+		 tsk->comm, tsk->pid, offset, data, value);
+
 	return put_user(value, data);
 }
 
@@ -111,8 +97,11 @@
 {
 	unsigned long *regs;
 
+	pr_debug("ptrace_write_user(%s[%u], %#lx, %#lx)\n",
+			tsk->comm, tsk->pid, offset, value);
+
 	if (offset & 3 || offset >= sizeof(struct user)) {
-		printk("ptrace_write_user: invalid offset 0x%08lx\n", offset);
+		pr_debug("  invalid offset 0x%08lx\n", offset);
 		return -EIO;
 	}
 
@@ -155,9 +144,6 @@
 {
 	int ret;
 
-	pr_debug("arch_ptrace(%ld, %d, %#lx, %#lx)\n",
-		 request, child->pid, addr, data);
-
 	pr_debug("ptrace: Enabling monitor mode...\n");
 	ocd_write(DC, ocd_read(DC) | (1 << OCD_DC_MM_BIT)
 			| (1 << OCD_DC_DBE_BIT));
@@ -241,20 +227,16 @@
 		break;
 	}
 
-	pr_debug("sys_ptrace returning %d (DC = 0x%08lx)\n",
-			ret, ocd_read(DC));
 	return ret;
 }
 
 asmlinkage void syscall_trace(void)
 {
-	pr_debug("syscall_trace called\n");
 	if (!test_thread_flag(TIF_SYSCALL_TRACE))
 		return;
 	if (!(current->ptrace & PT_PTRACED))
 		return;
 
-	pr_debug("syscall_trace: notifying parent\n");
 	/* The 0x80 provides a way for the tracing parent to
 	 * distinguish between a syscall stop and SIGTRAP delivery */
 	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
@@ -273,86 +255,143 @@
 	}
 }
 
-asmlinkage void do_debug_priv(struct pt_regs *regs)
-{
-	unsigned long dc, ds;
-	unsigned long die_val;
-
-	ds = ocd_read(DS);
-
-	pr_debug("do_debug_priv: pc = %08lx, ds = %08lx\n", regs->pc, ds);
-
-	if (ds & (1 << OCD_DS_SSS_BIT))
-		die_val = DIE_SSTEP;
-	else
-		die_val = DIE_BREAKPOINT;
-
-	if (notify_die(die_val, "ptrace", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
-		return;
-
-	if (likely(ds & (1 << OCD_DS_SSS_BIT))) {
-		extern void itlb_miss(void);
-		extern void tlb_miss_common(void);
-		struct thread_info *ti;
-
-		dc = ocd_read(DC);
-		dc &= ~(1 << OCD_DC_SS_BIT);
-		ocd_write(DC, dc);
-
-		ti = current_thread_info();
-		set_ti_thread_flag(ti, TIF_BREAKPOINT);
-
-		/* The TLB miss handlers don't check thread flags */
-		if ((regs->pc >= (unsigned long)&itlb_miss)
-		    && (regs->pc <= (unsigned long)&tlb_miss_common)) {
-			ocd_write(BWA2A, sysreg_read(RAR_EX));
-			ocd_write(BWC2A, 0x40000001 | (get_asid() << 1));
-		}
-
-		/*
-		 * If we're running in supervisor mode, the breakpoint
-		 * will take us where we want directly, no need to
-		 * single step.
-		 */
-		if ((regs->sr & MODE_MASK) != MODE_SUPERVISOR)
-			set_ti_thread_flag(ti, TIF_SINGLE_STEP);
-	} else {
-		panic("Unable to handle debug trap at pc = %08lx\n",
-		      regs->pc);
-	}
-}
-
 /*
- * Handle breakpoints, single steps and other debuggy things. To keep
- * things simple initially, we run with interrupts and exceptions
- * disabled all the time.
+ * debug_trampoline() is an assembly stub which will store all user
+ * registers on the stack and execute a breakpoint instruction.
+ *
+ * If we single-step into an exception handler which runs with
+ * interrupts disabled the whole time so it doesn't have to check for
+ * pending work, its return address will be modified so that it ends
+ * up returning to debug_trampoline.
+ *
+ * If the exception handler decides to store the user context and
+ * enable interrupts after all, it will restore the original return
+ * address and status register value. Before it returns, it will
+ * notice that TIF_BREAKPOINT is set and execute a breakpoint
+ * instruction.
  */
-asmlinkage void do_debug(struct pt_regs *regs)
+extern void debug_trampoline(void);
+
+asmlinkage struct pt_regs *do_debug(struct pt_regs *regs)
 {
-	unsigned long dc, ds;
+	struct thread_info	*ti;
+	unsigned long		trampoline_addr;
+	u32			status;
+	u32			ctrl;
+	int			code;
 
-	ds = ocd_read(DS);
-	pr_debug("do_debug: pc = %08lx, ds = %08lx\n", regs->pc, ds);
+	status = ocd_read(DS);
+	ti = current_thread_info();
+	code = TRAP_BRKPT;
 
-	if (test_thread_flag(TIF_BREAKPOINT)) {
-		pr_debug("TIF_BREAKPOINT set\n");
-		/* We're taking care of it */
-		clear_thread_flag(TIF_BREAKPOINT);
-		ocd_write(BWC2A, 0);
-	}
+	pr_debug("do_debug: status=0x%08x PC=0x%08lx SR=0x%08lx tif=0x%08lx\n",
+			status, regs->pc, regs->sr, ti->flags);
 
-	if (test_thread_flag(TIF_SINGLE_STEP)) {
-		pr_debug("TIF_SINGLE_STEP set, ds = 0x%08lx\n", ds);
-		if (ds & (1 << OCD_DS_SSS_BIT)) {
-			dc = ocd_read(DC);
-			dc &= ~(1 << OCD_DC_SS_BIT);
-			ocd_write(DC, dc);
+	if (!user_mode(regs)) {
+		unsigned long	die_val = DIE_BREAKPOINT;
 
-			clear_thread_flag(TIF_SINGLE_STEP);
-			ptrace_break(current, regs);
+		if (status & (1 << OCD_DS_SSS_BIT))
+			die_val = DIE_SSTEP;
+
+		if (notify_die(die_val, "ptrace", regs, 0, 0, SIGTRAP)
+				== NOTIFY_STOP)
+			return regs;
+
+		if ((status & (1 << OCD_DS_SWB_BIT))
+				&& test_and_clear_ti_thread_flag(
+					ti, TIF_BREAKPOINT)) {
+			/*
+			 * Explicit breakpoint from trampoline or
+			 * exception/syscall/interrupt handler.
+			 *
+			 * The real saved regs are on the stack right
+			 * after the ones we saved on entry.
+			 */
+			regs++;
+			pr_debug("  -> TIF_BREAKPOINT done, adjusted regs:"
+					"PC=0x%08lx SR=0x%08lx\n",
+					regs->pc, regs->sr);
+			BUG_ON(!user_mode(regs));
+
+			if (test_thread_flag(TIF_SINGLE_STEP)) {
+				pr_debug("Going to do single step...\n");
+				return regs;
+			}
+
+			/*
+			 * No TIF_SINGLE_STEP means we're done
+			 * stepping over a syscall. Do the trap now.
+			 */
+			code = TRAP_TRACE;
+		} else if ((status & (1 << OCD_DS_SSS_BIT))
+				&& test_ti_thread_flag(ti, TIF_SINGLE_STEP)) {
+
+			pr_debug("Stepped into something, "
+					"setting TIF_BREAKPOINT...\n");
+			set_ti_thread_flag(ti, TIF_BREAKPOINT);
+
+			/*
+			 * We stepped into an exception, interrupt or
+			 * syscall handler. Some exception handlers
+			 * don't check for pending work, so we need to
+			 * set up a trampoline just in case.
+			 *
+			 * The exception entry code will undo the
+			 * trampoline stuff if it does a full context
+			 * save (which also means that it'll check for
+			 * pending work later.)
+			 */
+			if ((regs->sr & MODE_MASK) == MODE_EXCEPTION) {
+				trampoline_addr
+					= (unsigned long)&debug_trampoline;
+
+				pr_debug("Setting up trampoline...\n");
+				ti->rar_saved = sysreg_read(RAR_EX);
+				ti->rsr_saved = sysreg_read(RSR_EX);
+				sysreg_write(RAR_EX, trampoline_addr);
+				sysreg_write(RSR_EX, (MODE_EXCEPTION
+							| SR_EM | SR_GM));
+				BUG_ON(ti->rsr_saved & MODE_MASK);
+			}
+
+			/*
+			 * If we stepped into a system call, we
+			 * shouldn't do a single step after we return
+			 * since the return address is right after the
+			 * "scall" instruction we were told to step
+			 * over.
+			 */
+			if ((regs->sr & MODE_MASK) == MODE_SUPERVISOR) {
+				pr_debug("Supervisor; no single step\n");
+				clear_ti_thread_flag(ti, TIF_SINGLE_STEP);
+			}
+
+			ctrl = ocd_read(DC);
+			ctrl &= ~(1 << OCD_DC_SS_BIT);
+			ocd_write(DC, ctrl);
+
+			return regs;
+		} else {
+			printk(KERN_ERR "Unexpected OCD_DS value: 0x%08x\n",
+					status);
+			printk(KERN_ERR "Thread flags: 0x%08lx\n", ti->flags);
+			die("Unhandled debug trap in kernel mode",
+					regs, SIGTRAP);
 		}
-	} else {
-		/* regular breakpoint */
-		ptrace_break(current, regs);
+	} else if (status & (1 << OCD_DS_SSS_BIT)) {
+		/* Single step in user mode */
+		code = TRAP_TRACE;
+
+		ctrl = ocd_read(DC);
+		ctrl &= ~(1 << OCD_DC_SS_BIT);
+		ocd_write(DC, ctrl);
 	}
+
+	pr_debug("Sending SIGTRAP: code=%d PC=0x%08lx SR=0x%08lx\n",
+			code, regs->pc, regs->sr);
+
+	clear_thread_flag(TIF_SINGLE_STEP);
+	_exception(SIGTRAP, regs, code, instruction_pointer(regs));
+
+	return regs;
 }
diff --git a/arch/avr32/kernel/vmlinux.lds.S b/arch/avr32/kernel/vmlinux.lds.S
index ce9ac96..11f08e3 100644
--- a/arch/avr32/kernel/vmlinux.lds.S
+++ b/arch/avr32/kernel/vmlinux.lds.S
@@ -77,10 +77,10 @@
 		. = 0x100;
 		*(.scall.text)
 		*(.irq.text)
+		KPROBES_TEXT
 		TEXT_TEXT
 		SCHED_TEXT
 		LOCK_TEXT
-		KPROBES_TEXT
 		*(.fixup)
 		*(.gnu.warning)
 		_etext = .;
diff --git a/include/asm-avr32/processor.h b/include/asm-avr32/processor.h
index 6a64833..a52576b 100644
--- a/include/asm-avr32/processor.h
+++ b/include/asm-avr32/processor.h
@@ -139,6 +139,9 @@
 extern void show_stack_log_lvl(struct task_struct *tsk, unsigned long sp,
 			       struct pt_regs *regs, const char *log_lvl);
 
+#define task_pt_regs(p) \
+	((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
+
 #define KSTK_EIP(tsk)	((tsk)->thread.cpu_context.pc)
 #define KSTK_ESP(tsk)	((tsk)->thread.cpu_context.ksp)
 
diff --git a/include/asm-avr32/thread_info.h b/include/asm-avr32/thread_info.h
index 67e7aae..184b574 100644
--- a/include/asm-avr32/thread_info.h
+++ b/include/asm-avr32/thread_info.h
@@ -25,6 +25,11 @@
 	unsigned long		flags;		/* low level flags */
 	__u32			cpu;
 	__s32			preempt_count;	/* 0 => preemptable, <0 => BUG */
+	__u32			rar_saved;	/* return address... */
+	__u32			rsr_saved;	/* ...and status register
+						   saved by debug handler
+						   when setting up
+						   trampoline */
 	struct restart_block	restart_block;
 	__u8			supervisor_stack[0];
 };
@@ -78,8 +83,8 @@
 #define TIF_NEED_RESCHED        2       /* rescheduling necessary */
 #define TIF_POLLING_NRFLAG      3       /* true if poll_idle() is polling
 					   TIF_NEED_RESCHED */
-#define TIF_BREAKPOINT		4	/* true if we should break after return */
-#define TIF_SINGLE_STEP		5	/* single step after next break */
+#define TIF_BREAKPOINT		4	/* enter monitor mode on return */
+#define TIF_SINGLE_STEP		5	/* single step in progress */
 #define TIF_MEMDIE		6
 #define TIF_RESTORE_SIGMASK	7	/* restore signal mask in do_signal */
 #define TIF_CPU_GOING_TO_SLEEP	8	/* CPU is entering sleep 0 mode */
@@ -89,7 +94,6 @@
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_POLLING_NRFLAG	(1 << TIF_POLLING_NRFLAG)
-#define _TIF_BREAKPOINT		(1 << TIF_BREAKPOINT)
 #define _TIF_SINGLE_STEP	(1 << TIF_SINGLE_STEP)
 #define _TIF_MEMDIE		(1 << TIF_MEMDIE)
 #define _TIF_RESTORE_SIGMASK	(1 << TIF_RESTORE_SIGMASK)
@@ -108,6 +112,6 @@
 /* work to do on any return to userspace */
 #define _TIF_ALLWORK_MASK	(_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE))
 /* work to do on return from debug mode */
-#define _TIF_DBGWORK_MASK	(_TIF_WORK_MASK | (1 << TIF_SINGLE_STEP))
+#define _TIF_DBGWORK_MASK	(_TIF_WORK_MASK & ~(1 << TIF_BREAKPOINT))
 
 #endif /* __ASM_AVR32_THREAD_INFO_H */