[S390] ptrace cleanup

Overhaul program event recording and the code dealing with the ptrace
user space interface.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 33982e7..fe03c14 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -23,14 +23,16 @@
 {
 	DEFINE(__THREAD_info, offsetof(struct task_struct, stack));
 	DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp));
-	DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info));
 	DEFINE(__THREAD_mm_segment, offsetof(struct task_struct, thread.mm_segment));
 	BLANK();
 	DEFINE(__TASK_pid, offsetof(struct task_struct, pid));
 	BLANK();
-	DEFINE(__PER_atmid, offsetof(per_struct, lowcore.words.perc_atmid));
-	DEFINE(__PER_address, offsetof(per_struct, lowcore.words.address));
-	DEFINE(__PER_access_id, offsetof(per_struct, lowcore.words.access_id));
+	DEFINE(__THREAD_per_cause,
+	       offsetof(struct task_struct, thread.per_event.cause));
+	DEFINE(__THREAD_per_address,
+	       offsetof(struct task_struct, thread.per_event.address));
+	DEFINE(__THREAD_per_paid,
+	       offsetof(struct task_struct, thread.per_event.paid));
 	BLANK();
 	DEFINE(__TI_task, offsetof(struct thread_info, task));
 	DEFINE(__TI_domain, offsetof(struct thread_info, exec_domain));
@@ -85,9 +87,9 @@
 	DEFINE(__LC_PGM_ILC, offsetof(struct _lowcore, pgm_ilc));
 	DEFINE(__LC_PGM_INT_CODE, offsetof(struct _lowcore, pgm_code));
 	DEFINE(__LC_TRANS_EXC_CODE, offsetof(struct _lowcore, trans_exc_code));
-	DEFINE(__LC_PER_ATMID, offsetof(struct _lowcore, per_perc_atmid));
+	DEFINE(__LC_PER_CAUSE, offsetof(struct _lowcore, per_perc_atmid));
 	DEFINE(__LC_PER_ADDRESS, offsetof(struct _lowcore, per_address));
-	DEFINE(__LC_PER_ACCESS_ID, offsetof(struct _lowcore, per_access_id));
+	DEFINE(__LC_PER_PAID, offsetof(struct _lowcore, per_access_id));
 	DEFINE(__LC_AR_MODE_ID, offsetof(struct _lowcore, ar_access_id));
 	DEFINE(__LC_SUBCHANNEL_ID, offsetof(struct _lowcore, subchannel_id));
 	DEFINE(__LC_SUBCHANNEL_NR, offsetof(struct _lowcore, subchannel_nr));
diff --git a/arch/s390/kernel/compat_ptrace.h b/arch/s390/kernel/compat_ptrace.h
index 3141025..12b8238 100644
--- a/arch/s390/kernel/compat_ptrace.h
+++ b/arch/s390/kernel/compat_ptrace.h
@@ -4,40 +4,19 @@
 #include <asm/ptrace.h>    /* needed for NUM_CR_WORDS */
 #include "compat_linux.h"  /* needed for psw_compat_t */
 
-typedef struct {
-	__u32 cr[NUM_CR_WORDS];
-} per_cr_words32;
+struct compat_per_struct_kernel {
+	__u32 cr9;		/* PER control bits */
+	__u32 cr10;		/* PER starting address */
+	__u32 cr11;		/* PER ending address */
+	__u32 bits;		/* Obsolete software bits */
+	__u32 starting_addr;	/* User specified start address */
+	__u32 ending_addr;	/* User specified end address */
+	__u16 perc_atmid;	/* PER trap ATMID */
+	__u32 address;		/* PER trap instruction address */
+	__u8  access_id;	/* PER trap access identification */
+};
 
-typedef struct {
-	__u16          perc_atmid;          /* 0x096 */
-	__u32          address;             /* 0x098 */
-	__u8           access_id;           /* 0x0a1 */
-} per_lowcore_words32;
-
-typedef struct {
-	union {
-		per_cr_words32   words;
-	} control_regs;
-	/*
-	 * Use these flags instead of setting em_instruction_fetch
-	 * directly they are used so that single stepping can be
-	 * switched on & off while not affecting other tracing
-	 */
-	unsigned  single_step       : 1;
-	unsigned  instruction_fetch : 1;
-	unsigned                    : 30;
-	/*
-	 * These addresses are copied into cr10 & cr11 if single
-	 * stepping is switched off
-	 */
-	__u32     starting_addr;
-	__u32     ending_addr;
-	union {
-		per_lowcore_words32 words;
-	} lowcore; 
-} per_struct32;
-
-struct user_regs_struct32
+struct compat_user_regs_struct
 {
 	psw_compat_t psw;
 	u32 gprs[NUM_GPRS];
@@ -50,14 +29,14 @@
 	 * itself as there is no "official" ptrace interface for hardware
 	 * watchpoints. This is the way intel does it.
 	 */
-	per_struct32 per_info;
+	struct compat_per_struct_kernel per_info;
 	u32  ieee_instruction_pointer;	/* obsolete, always 0 */
 };
 
-struct user32 {
+struct compat_user {
 	/* We start with the registers, to mimic the way that "memory"
 	   is returned from the ptrace(3,...) function.  */
-	struct user_regs_struct32 regs; /* Where the registers are actually stored */
+	struct compat_user_regs_struct regs;
 	/* The rest of this junk is to help gdb figure out what goes where */
 	u32 u_tsize;		/* Text segment size (pages). */
 	u32 u_dsize;	        /* Data segment size (pages). */
@@ -79,6 +58,6 @@
 	__u32   len;
 	__u32   kernel_addr;
 	__u32   process_addr;
-} ptrace_area_emu31;
+} compat_ptrace_area;
 
 #endif /* _PTRACE32_H */
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index af8bd3b..648f642 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -48,7 +48,7 @@
 SP_SIZE      =	STACK_FRAME_OVERHEAD + __PT_SIZE
 
 _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
+		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP )
 _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
 _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
@@ -200,31 +200,21 @@
 	.globl	__switch_to
 __switch_to:
 	basr	%r1,0
-__switch_to_base:
-	tm	__THREAD_per(%r3),0xe8		# new process is using per ?
-	bz	__switch_to_noper-__switch_to_base(%r1)	# if not we're fine
-	stctl	%c9,%c11,__SF_EMPTY(%r15)	# We are using per stuff
-	clc	__THREAD_per(12,%r3),__SF_EMPTY(%r15)
-	be	__switch_to_noper-__switch_to_base(%r1)	# we got away w/o bashing TLB's
-	lctl	%c9,%c11,__THREAD_per(%r3)	# Nope we didn't
-__switch_to_noper:
-	l	%r4,__THREAD_info(%r2)		# get thread_info of prev
+0:	l	%r4,__THREAD_info(%r2)		# get thread_info of prev
+	l	%r5,__THREAD_info(%r3)		# get thread_info of next
 	tm	__TI_flags+3(%r4),_TIF_MCCK_PENDING # machine check pending?
-	bz	__switch_to_no_mcck-__switch_to_base(%r1)
-	ni	__TI_flags+3(%r4),255-_TIF_MCCK_PENDING # clear flag in prev
-	l	%r4,__THREAD_info(%r3)		# get thread_info of next
-	oi	__TI_flags+3(%r4),_TIF_MCCK_PENDING # set it in next
-__switch_to_no_mcck:
-	stm	%r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task
-	st	%r15,__THREAD_ksp(%r2)	# store kernel stack to prev->tss.ksp
-	l	%r15,__THREAD_ksp(%r3)	# load kernel stack from next->tss.ksp
-	lm	%r6,%r15,__SF_GPRS(%r15)# load __switch_to registers of next task
-	st	%r3,__LC_CURRENT	# __LC_CURRENT = current task struct
-	lctl	%c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
-	l	%r3,__THREAD_info(%r3)	# load thread_info from task struct
-	st	%r3,__LC_THREAD_INFO
-	ahi	%r3,STACK_SIZE
-	st	%r3,__LC_KERNEL_STACK	# __LC_KERNEL_STACK = new kernel stack
+	bz	1f-0b(%r1)
+	ni	__TI_flags+3(%r4),255-_TIF_MCCK_PENDING	# clear flag in prev
+	oi	__TI_flags+3(%r5),_TIF_MCCK_PENDING	# set it in next
+1:	stm	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
+	st	%r15,__THREAD_ksp(%r2)		# store kernel stack of prev
+	l	%r15,__THREAD_ksp(%r3)		# load kernel stack of next
+	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
+	lm	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
+	st	%r3,__LC_CURRENT		# store task struct of next
+	st	%r5,__LC_THREAD_INFO		# store thread info of next
+	ahi	%r5,STACK_SIZE			# end of kernel stack of next
+	st	%r5,__LC_KERNEL_STACK		# store end of kernel stack
 	br	%r14
 
 __critical_start:
@@ -297,7 +287,7 @@
 	bo	BASED(sysc_notify_resume)
 	tm	__TI_flags+3(%r12),_TIF_RESTART_SVC
 	bo	BASED(sysc_restart)
-	tm	__TI_flags+3(%r12),_TIF_SINGLE_STEP
+	tm	__TI_flags+3(%r12),_TIF_PER_TRAP
 	bo	BASED(sysc_singlestep)
 	b	BASED(sysc_return)	# beware of critical section cleanup
 
@@ -321,13 +311,13 @@
 # _TIF_SIGPENDING is set, call do_signal
 #
 sysc_sigpending:
-	ni	__TI_flags+3(%r12),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
+	ni	__TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	l	%r1,BASED(.Ldo_signal)
 	basr	%r14,%r1		# call do_signal
 	tm	__TI_flags+3(%r12),_TIF_RESTART_SVC
 	bo	BASED(sysc_restart)
-	tm	__TI_flags+3(%r12),_TIF_SINGLE_STEP
+	tm	__TI_flags+3(%r12),_TIF_PER_TRAP
 	bo	BASED(sysc_singlestep)
 	b	BASED(sysc_return)
 
@@ -353,15 +343,15 @@
 	b	BASED(sysc_nr_ok)	# restart svc
 
 #
-# _TIF_SINGLE_STEP is set, call do_single_step
+# _TIF_PER_TRAP is set, call do_per_trap
 #
 sysc_singlestep:
-	ni	__TI_flags+3(%r12),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
+	ni	__TI_flags+3(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP
 	xc	SP_SVCNR(2,%r15),SP_SVCNR(%r15)		# clear svc number
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
 	l	%r1,BASED(.Lhandle_per)	# load adr. of per handler
 	la	%r14,BASED(sysc_return)	# load adr. of system return
-	br	%r1			# branch to do_single_step
+	br	%r1			# branch to do_per_trap
 
 #
 # call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
@@ -520,10 +510,10 @@
 	l	%r1,__TI_task(%r12)
 	tm	SP_PSW+1(%r15),0x01	# kernel per event ?
 	bz	BASED(kernel_per)
-	mvc	__THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID
-	mvc	__THREAD_per+__PER_address(4,%r1),__LC_PER_ADDRESS
-	mvc	__THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
-	oi	__TI_flags+3(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
+	mvc	__THREAD_per_cause(2,%r1),__LC_PER_CAUSE
+	mvc	__THREAD_per_address(4,%r1),__LC_PER_ADDRESS
+	mvc	__THREAD_per_paid(1,%r1),__LC_PER_PAID
+	oi	__TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP
 	l	%r3,__LC_PGM_ILC	# load program interruption code
 	l	%r4,__LC_TRANS_EXC_CODE
 	REENABLE_IRQS
@@ -551,10 +541,10 @@
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
 	l	%r8,__TI_task(%r12)
-	mvc	__THREAD_per+__PER_atmid(2,%r8),__LC_PER_ATMID
-	mvc	__THREAD_per+__PER_address(4,%r8),__LC_PER_ADDRESS
-	mvc	__THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID
-	oi	__TI_flags+3(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
+	mvc	__THREAD_per_cause(2,%r8),__LC_PER_CAUSE
+	mvc	__THREAD_per_address(4,%r8),__LC_PER_ADDRESS
+	mvc	__THREAD_per_paid(1,%r8),__LC_PER_PAID
+	oi	__TI_flags+3(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	lm	%r2,%r6,SP_R2(%r15)	# load svc arguments
 	b	BASED(sysc_do_svc)
@@ -1056,7 +1046,7 @@
 .Ldo_signal:	.long	do_signal
 .Ldo_notify_resume:
 		.long	do_notify_resume
-.Lhandle_per:	.long	do_single_step
+.Lhandle_per:	.long	do_per_trap
 .Ldo_execve:	.long	do_execve
 .Lexecve_tail:	.long	execve_tail
 .Ljump_table:	.long	pgm_check_table
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 95c1dfc..17a6f83 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -12,7 +12,7 @@
 
 extern int sysctl_userprocess_debug;
 
-void do_single_step(struct pt_regs *regs);
+void do_per_trap(struct pt_regs *regs);
 void syscall_trace(struct pt_regs *regs, int entryexit);
 void kernel_stack_overflow(struct pt_regs * regs);
 void do_signal(struct pt_regs *regs);
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 1c0dce5..9d3603d 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -51,7 +51,7 @@
 STACK_SIZE  = 1 << STACK_SHIFT
 
 _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
-		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_SINGLE_STEP )
+		 _TIF_MCCK_PENDING | _TIF_RESTART_SVC | _TIF_PER_TRAP )
 _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
 		 _TIF_MCCK_PENDING)
 _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
@@ -208,30 +208,21 @@
  */
 	.globl	__switch_to
 __switch_to:
-	tm	__THREAD_per+4(%r3),0xe8 # is the new process using per ?
-	jz	__switch_to_noper		# if not we're fine
-	stctg	%c9,%c11,__SF_EMPTY(%r15)# We are using per stuff
-	clc	__THREAD_per(24,%r3),__SF_EMPTY(%r15)
-	je	__switch_to_noper	     # we got away without bashing TLB's
-	lctlg	%c9,%c11,__THREAD_per(%r3)	# Nope we didn't
-__switch_to_noper:
-	lg	%r4,__THREAD_info(%r2)		    # get thread_info of prev
+	lg	%r4,__THREAD_info(%r2)		# get thread_info of prev
+	lg	%r5,__THREAD_info(%r3)		# get thread_info of next
 	tm	__TI_flags+7(%r4),_TIF_MCCK_PENDING # machine check pending?
-	jz	__switch_to_no_mcck
-	ni	__TI_flags+7(%r4),255-_TIF_MCCK_PENDING # clear flag in prev
-	lg	%r4,__THREAD_info(%r3)		    # get thread_info of next
-	oi	__TI_flags+7(%r4),_TIF_MCCK_PENDING # set it in next
-__switch_to_no_mcck:
-	stmg	%r6,%r15,__SF_GPRS(%r15)# store __switch_to registers of prev task
-	stg	%r15,__THREAD_ksp(%r2)	# store kernel stack to prev->tss.ksp
-	lg	%r15,__THREAD_ksp(%r3)	# load kernel stack from next->tss.ksp
-	lmg	%r6,%r15,__SF_GPRS(%r15)# load __switch_to registers of next task
-	stg	%r3,__LC_CURRENT	# __LC_CURRENT = current task struct
-	lctl	%c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4
-	lg	%r3,__THREAD_info(%r3)	# load thread_info from task struct
-	stg	%r3,__LC_THREAD_INFO
-	aghi	%r3,STACK_SIZE
-	stg	%r3,__LC_KERNEL_STACK	# __LC_KERNEL_STACK = new kernel stack
+	jz	0f
+	ni	__TI_flags+7(%r4),255-_TIF_MCCK_PENDING	# clear flag in prev
+	oi	__TI_flags+7(%r5),_TIF_MCCK_PENDING	# set it in next
+0:	stmg	%r6,%r15,__SF_GPRS(%r15)	# store gprs of prev task
+	stg	%r15,__THREAD_ksp(%r2)		# store kernel stack of prev
+	lg	%r15,__THREAD_ksp(%r3)		# load kernel stack of next
+	lctl	%c4,%c4,__TASK_pid(%r3)		# load pid to control reg. 4
+	lmg	%r6,%r15,__SF_GPRS(%r15)	# load gprs of next task
+	stg	%r3,__LC_CURRENT		# store task struct of next
+	stg	%r5,__LC_THREAD_INFO		# store thread info of next
+	aghi	%r5,STACK_SIZE			# end of kernel stack of next
+	stg	%r5,__LC_KERNEL_STACK		# store end of kernel stack
 	br	%r14
 
 __critical_start:
@@ -311,7 +302,7 @@
 	jo	sysc_notify_resume
 	tm	__TI_flags+7(%r12),_TIF_RESTART_SVC
 	jo	sysc_restart
-	tm	__TI_flags+7(%r12),_TIF_SINGLE_STEP
+	tm	__TI_flags+7(%r12),_TIF_PER_TRAP
 	jo	sysc_singlestep
 	j	sysc_return		# beware of critical section cleanup
 
@@ -333,12 +324,12 @@
 # _TIF_SIGPENDING is set, call do_signal
 #
 sysc_sigpending:
-	ni	__TI_flags+7(%r12),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
+	ni	__TI_flags+7(%r12),255-_TIF_PER_TRAP # clear TIF_PER_TRAP
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	brasl	%r14,do_signal		# call do_signal
 	tm	__TI_flags+7(%r12),_TIF_RESTART_SVC
 	jo	sysc_restart
-	tm	__TI_flags+7(%r12),_TIF_SINGLE_STEP
+	tm	__TI_flags+7(%r12),_TIF_PER_TRAP
 	jo	sysc_singlestep
 	j	sysc_return
 
@@ -363,14 +354,14 @@
 	j	sysc_nr_ok		# restart svc
 
 #
-# _TIF_SINGLE_STEP is set, call do_single_step
+# _TIF_PER_TRAP is set, call do_per_trap
 #
 sysc_singlestep:
-	ni	__TI_flags+7(%r12),255-_TIF_SINGLE_STEP	# clear TIF_SINGLE_STEP
+	ni	__TI_flags+7(%r12),255-_TIF_PER_TRAP	# clear TIF_PER_TRAP
 	xc	SP_SVCNR(2,%r15),SP_SVCNR(%r15)		# clear svc number
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
 	larl	%r14,sysc_return	# load adr. of system return
-	jg	do_single_step		# branch to do_sigtrap
+	jg	do_per_trap
 
 #
 # call tracehook_report_syscall_entry/tracehook_report_syscall_exit before
@@ -526,10 +517,10 @@
 	lg	%r1,__TI_task(%r12)
 	tm	SP_PSW+1(%r15),0x01	# kernel per event ?
 	jz	kernel_per
-	mvc	__THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID
-	mvc	__THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS
-	mvc	__THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
-	oi	__TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
+	mvc	__THREAD_per_cause(2,%r1),__LC_PER_CAUSE
+	mvc	__THREAD_per_address(8,%r1),__LC_PER_ADDRESS
+	mvc	__THREAD_per_paid(1,%r1),__LC_PER_PAID
+	oi	__TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP
 	lgf	%r3,__LC_PGM_ILC	# load program interruption code
 	lg	%r4,__LC_TRANS_EXC_CODE
 	REENABLE_IRQS
@@ -558,10 +549,10 @@
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
 	LAST_BREAK
 	lg	%r8,__TI_task(%r12)
-	mvc	__THREAD_per+__PER_atmid(2,%r8),__LC_PER_ATMID
-	mvc	__THREAD_per+__PER_address(8,%r8),__LC_PER_ADDRESS
-	mvc	__THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID
-	oi	__TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
+	mvc	__THREAD_per_cause(2,%r8),__LC_PER_CAUSE
+	mvc	__THREAD_per_address(8,%r8),__LC_PER_ADDRESS
+	mvc	__THREAD_per_paid(1,%r8),__LC_PER_PAID
+	oi	__TI_flags+7(%r12),_TIF_PER_TRAP # set TIF_PER_TRAP
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	lmg	%r2,%r6,SP_R2(%r15)	# load svc arguments
 	j	sysc_do_svc
@@ -573,7 +564,7 @@
 	REENABLE_IRQS
 	xc	SP_SVCNR(2,%r15),SP_SVCNR(%r15)	# clear svc number
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
-	brasl	%r14,do_single_step
+	brasl	%r14,do_per_trap
 	j	pgm_exit
 
 /*
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index f227f52..1d05d66 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -175,13 +175,12 @@
 					struct pt_regs *regs,
 					unsigned long ip)
 {
-	per_cr_bits kprobe_per_regs[1];
+	struct per_regs per_kprobe;
 
-	/* Set up the per control reg info, will pass to lctl */
-	memset(kprobe_per_regs, 0, sizeof(per_cr_bits));
-	kprobe_per_regs[0].em_instruction_fetch = 1;
-	kprobe_per_regs[0].starting_addr = ip;
-	kprobe_per_regs[0].ending_addr = ip;
+	/* Set up the PER control registers %cr9-%cr11 */
+	per_kprobe.control = PER_EVENT_IFETCH;
+	per_kprobe.start = ip;
+	per_kprobe.end = ip;
 
 	/* Save control regs and psw mask */
 	__ctl_store(kcb->kprobe_saved_ctl, 9, 11);
@@ -189,7 +188,7 @@
 		(PSW_MASK_PER | PSW_MASK_IO | PSW_MASK_EXT);
 
 	/* Set PER control regs, turns on single step for the given address */
-	__ctl_load(kprobe_per_regs, 9, 11);
+	__ctl_load(per_kprobe, 9, 11);
 	regs->psw.mask |= PSW_MASK_PER;
 	regs->psw.mask &= ~(PSW_MASK_IO | PSW_MASK_EXT);
 	regs->psw.addr = ip | PSW_ADDR_AMODE;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index c2fffb5..6ba4222 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -213,8 +213,10 @@
 	/* start new process with ar4 pointing to the correct address space */
 	p->thread.mm_segment = get_fs();
 	/* Don't copy debug registers */
-	memset(&p->thread.per_info, 0, sizeof(p->thread.per_info));
+	memset(&p->thread.per_user, 0, sizeof(p->thread.per_user));
+	memset(&p->thread.per_event, 0, sizeof(p->thread.per_event));
 	clear_tsk_thread_flag(p, TIF_SINGLE_STEP);
+	clear_tsk_thread_flag(p, TIF_PER_TRAP);
 	/* Initialize per thread user and system timer values */
 	ti = task_thread_info(p);
 	ti->user_timer = 0;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 019bb71..ef86ad2 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -1,25 +1,9 @@
 /*
- *  arch/s390/kernel/ptrace.c
+ *  Ptrace user space interface.
  *
- *  S390 version
- *    Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
- *    Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ *    Copyright IBM Corp. 1999,2010
+ *    Author(s): Denis Joseph Barrow
  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
- *
- *  Based on PowerPC version 
- *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- *  Derived from "arch/m68k/kernel/ptrace.c"
- *  Copyright (C) 1994 by Hamish Macdonald
- *  Taken from linux/kernel/ptrace.c and modified for M680x0.
- *  linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
- *
- * Modified by Cort Dougan (cort@cs.nmt.edu) 
- *
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file README.legal in the main directory of
- * this archive for more details.
  */
 
 #include <linux/kernel.h>
@@ -61,76 +45,58 @@
 	REGSET_GENERAL_EXTENDED,
 };
 
-static void
-FixPerRegisters(struct task_struct *task)
+void update_per_regs(struct task_struct *task)
 {
-	struct pt_regs *regs;
-	per_struct *per_info;
-	per_cr_words cr_words;
+	static const struct per_regs per_single_step = {
+		.control = PER_EVENT_IFETCH,
+		.start = 0,
+		.end = PSW_ADDR_INSN,
+	};
+	struct pt_regs *regs = task_pt_regs(task);
+	struct thread_struct *thread = &task->thread;
+	const struct per_regs *new;
+	struct per_regs old;
 
-	regs = task_pt_regs(task);
-	per_info = (per_struct *) &task->thread.per_info;
-	per_info->control_regs.bits.em_instruction_fetch =
-		per_info->single_step | per_info->instruction_fetch;
-	
-	if (per_info->single_step) {
-		per_info->control_regs.bits.starting_addr = 0;
-#ifdef CONFIG_COMPAT
-		if (is_compat_task())
-			per_info->control_regs.bits.ending_addr = 0x7fffffffUL;
-		else
-#endif
-			per_info->control_regs.bits.ending_addr = PSW_ADDR_INSN;
-	} else {
-		per_info->control_regs.bits.starting_addr =
-			per_info->starting_addr;
-		per_info->control_regs.bits.ending_addr =
-			per_info->ending_addr;
-	}
-	/*
-	 * if any of the control reg tracing bits are on 
-	 * we switch on per in the psw
-	 */
-	if (per_info->control_regs.words.cr[0] & PER_EM_MASK)
-		regs->psw.mask |= PSW_MASK_PER;
-	else
+	/* TIF_SINGLE_STEP overrides the user specified PER registers. */
+	new = test_tsk_thread_flag(task, TIF_SINGLE_STEP) ?
+		&per_single_step : &thread->per_user;
+
+	/* Take care of the PER enablement bit in the PSW. */
+	if (!(new->control & PER_EVENT_MASK)) {
 		regs->psw.mask &= ~PSW_MASK_PER;
-
-	if (per_info->control_regs.bits.em_storage_alteration)
-		per_info->control_regs.bits.storage_alt_space_ctl = 1;
-	else
-		per_info->control_regs.bits.storage_alt_space_ctl = 0;
-
-	if (task == current) {
-		__ctl_store(cr_words, 9, 11);
-		if (memcmp(&cr_words, &per_info->control_regs.words,
-			   sizeof(cr_words)) != 0)
-			__ctl_load(per_info->control_regs.words, 9, 11);
+		return;
 	}
+	regs->psw.mask |= PSW_MASK_PER;
+	__ctl_store(old, 9, 11);
+	if (memcmp(new, &old, sizeof(struct per_regs)) != 0)
+		__ctl_load(*new, 9, 11);
 }
 
 void user_enable_single_step(struct task_struct *task)
 {
-	task->thread.per_info.single_step = 1;
-	FixPerRegisters(task);
+	set_tsk_thread_flag(task, TIF_SINGLE_STEP);
+	if (task == current)
+		update_per_regs(task);
 }
 
 void user_disable_single_step(struct task_struct *task)
 {
-	task->thread.per_info.single_step = 0;
-	FixPerRegisters(task);
+	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+	if (task == current)
+		update_per_regs(task);
 }
 
 /*
  * Called by kernel/ptrace.c when detaching..
  *
- * Make sure single step bits etc are not set.
+ * Clear all debugging related fields.
  */
-void
-ptrace_disable(struct task_struct *child)
+void ptrace_disable(struct task_struct *task)
 {
-	/* make sure the single step bit is not set. */
-	user_disable_single_step(child);
+	memset(&task->thread.per_user, 0, sizeof(task->thread.per_user));
+	memset(&task->thread.per_event, 0, sizeof(task->thread.per_event));
+	clear_tsk_thread_flag(task, TIF_SINGLE_STEP);
+	clear_tsk_thread_flag(task, TIF_PER_TRAP);
 }
 
 #ifndef CONFIG_64BIT
@@ -139,6 +105,47 @@
 # define __ADDR_MASK 7
 #endif
 
+static inline unsigned long __peek_user_per(struct task_struct *child,
+					    addr_t addr)
+{
+	struct per_struct_kernel *dummy = NULL;
+
+	if (addr == (addr_t) &dummy->cr9)
+		/* Control bits of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			PER_EVENT_IFETCH : child->thread.per_user.control;
+	else if (addr == (addr_t) &dummy->cr10)
+		/* Start address of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			0 : child->thread.per_user.start;
+	else if (addr == (addr_t) &dummy->cr11)
+		/* End address of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			PSW_ADDR_INSN : child->thread.per_user.end;
+	else if (addr == (addr_t) &dummy->bits)
+		/* Single-step bit. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			(1UL << (BITS_PER_LONG - 1)) : 0;
+	else if (addr == (addr_t) &dummy->starting_addr)
+		/* Start address of the user specified per set. */
+		return child->thread.per_user.start;
+	else if (addr == (addr_t) &dummy->ending_addr)
+		/* End address of the user specified per set. */
+		return child->thread.per_user.end;
+	else if (addr == (addr_t) &dummy->perc_atmid)
+		/* PER code, ATMID and AI of the last PER trap */
+		return (unsigned long)
+			child->thread.per_event.cause << (BITS_PER_LONG - 16);
+	else if (addr == (addr_t) &dummy->address)
+		/* Address of the last PER trap */
+		return child->thread.per_event.address;
+	else if (addr == (addr_t) &dummy->access_id)
+		/* Access id of the last PER trap */
+		return (unsigned long)
+			child->thread.per_event.paid << (BITS_PER_LONG - 8);
+	return 0;
+}
+
 /*
  * Read the word at offset addr from the user area of a process. The
  * trouble here is that the information is littered over different
@@ -204,10 +211,10 @@
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
-		 * per_info is found in the thread structure
+		 * Handle access to the per_info structure.
 		 */
-		offset = addr - (addr_t) &dummy->regs.per_info;
-		tmp = *(addr_t *)((addr_t) &child->thread.per_info + offset);
+		addr -= (addr_t) &dummy->regs.per_info;
+		tmp = __peek_user_per(child, addr);
 
 	} else
 		tmp = 0;
@@ -237,6 +244,35 @@
 	return put_user(tmp, (addr_t __user *) data);
 }
 
+static inline void __poke_user_per(struct task_struct *child,
+				   addr_t addr, addr_t data)
+{
+	struct per_struct_kernel *dummy = NULL;
+
+	/*
+	 * There are only three fields in the per_info struct that the
+	 * debugger user can write to.
+	 * 1) cr9: the debugger wants to set a new PER event mask
+	 * 2) starting_addr: the debugger wants to set a new starting
+	 *    address to use with the PER event mask.
+	 * 3) ending_addr: the debugger wants to set a new ending
+	 *    address to use with the PER event mask.
+	 * The user specified PER event mask and the start and end
+	 * addresses are used only if single stepping is not in effect.
+	 * Writes to any other field in per_info are ignored.
+	 */
+	if (addr == (addr_t) &dummy->cr9)
+		/* PER event mask of the user specified per set. */
+		child->thread.per_user.control =
+			data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+	else if (addr == (addr_t) &dummy->starting_addr)
+		/* Starting address of the user specified per set. */
+		child->thread.per_user.start = data;
+	else if (addr == (addr_t) &dummy->ending_addr)
+		/* Ending address of the user specified per set. */
+		child->thread.per_user.end = data;
+}
+
 /*
  * Write a word to the user area of a process at location addr. This
  * operation does have an additional problem compared to peek_user.
@@ -311,19 +347,17 @@
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
-		 * per_info is found in the thread structure 
+		 * Handle access to the per_info structure.
 		 */
-		offset = addr - (addr_t) &dummy->regs.per_info;
-		*(addr_t *)((addr_t) &child->thread.per_info + offset) = data;
+		addr -= (addr_t) &dummy->regs.per_info;
+		__poke_user_per(child, addr, data);
 
 	}
 
-	FixPerRegisters(child);
 	return 0;
 }
 
-static int
-poke_user(struct task_struct *child, addr_t addr, addr_t data)
+static int poke_user(struct task_struct *child, addr_t addr, addr_t data)
 {
 	addr_t mask;
 
@@ -410,12 +444,53 @@
  */
 
 /*
+ * Same as peek_user_per but for a 31 bit program.
+ */
+static inline __u32 __peek_user_per_compat(struct task_struct *child,
+					   addr_t addr)
+{
+	struct compat_per_struct_kernel *dummy32 = NULL;
+
+	if (addr == (addr_t) &dummy32->cr9)
+		/* Control bits of the active per set. */
+		return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+			PER_EVENT_IFETCH : child->thread.per_user.control;
+	else if (addr == (addr_t) &dummy32->cr10)
+		/* Start address of the active per set. */
+		return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+			0 : child->thread.per_user.start;
+	else if (addr == (addr_t) &dummy32->cr11)
+		/* End address of the active per set. */
+		return test_thread_flag(TIF_SINGLE_STEP) ?
+			PSW32_ADDR_INSN : child->thread.per_user.end;
+	else if (addr == (addr_t) &dummy32->bits)
+		/* Single-step bit. */
+		return (__u32) test_thread_flag(TIF_SINGLE_STEP) ?
+			0x80000000 : 0;
+	else if (addr == (addr_t) &dummy32->starting_addr)
+		/* Start address of the user specified per set. */
+		return (__u32) child->thread.per_user.start;
+	else if (addr == (addr_t) &dummy32->ending_addr)
+		/* End address of the user specified per set. */
+		return (__u32) child->thread.per_user.end;
+	else if (addr == (addr_t) &dummy32->perc_atmid)
+		/* PER code, ATMID and AI of the last PER trap */
+		return (__u32) child->thread.per_event.cause << 16;
+	else if (addr == (addr_t) &dummy32->address)
+		/* Address of the last PER trap */
+		return (__u32) child->thread.per_event.address;
+	else if (addr == (addr_t) &dummy32->access_id)
+		/* Access id of the last PER trap */
+		return (__u32) child->thread.per_event.paid << 24;
+	return 0;
+}
+
+/*
  * Same as peek_user but for a 31 bit program.
  */
 static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 {
-	struct user32 *dummy32 = NULL;
-	per_struct32 *dummy_per32 = NULL;
+	struct compat_user *dummy32 = NULL;
 	addr_t offset;
 	__u32 tmp;
 
@@ -465,19 +540,10 @@
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
-		 * per_info is found in the thread structure
+		 * Handle access to the per_info structure.
 		 */
-		offset = addr - (addr_t) &dummy32->regs.per_info;
-		/* This is magic. See per_struct and per_struct32. */
-		if ((offset >= (addr_t) &dummy_per32->control_regs &&
-		     offset < (addr_t) (&dummy_per32->control_regs + 1)) ||
-		    (offset >= (addr_t) &dummy_per32->starting_addr &&
-		     offset <= (addr_t) &dummy_per32->ending_addr) ||
-		    offset == (addr_t) &dummy_per32->lowcore.words.address)
-			offset = offset*2 + 4;
-		else
-			offset = offset*2;
-		tmp = *(__u32 *)((addr_t) &child->thread.per_info + offset);
+		addr -= (addr_t) &dummy32->regs.per_info;
+		tmp = __peek_user_per_compat(child, addr);
 
 	} else
 		tmp = 0;
@@ -498,13 +564,32 @@
 }
 
 /*
+ * Same as poke_user_per but for a 31 bit program.
+ */
+static inline void __poke_user_per_compat(struct task_struct *child,
+					  addr_t addr, __u32 data)
+{
+	struct compat_per_struct_kernel *dummy32 = NULL;
+
+	if (addr == (addr_t) &dummy32->cr9)
+		/* PER event mask of the user specified per set. */
+		child->thread.per_user.control =
+			data & (PER_EVENT_MASK | PER_CONTROL_MASK);
+	else if (addr == (addr_t) &dummy32->starting_addr)
+		/* Starting address of the user specified per set. */
+		child->thread.per_user.start = data;
+	else if (addr == (addr_t) &dummy32->ending_addr)
+		/* Ending address of the user specified per set. */
+		child->thread.per_user.end = data;
+}
+
+/*
  * Same as poke_user but for a 31 bit program.
  */
 static int __poke_user_compat(struct task_struct *child,
 			      addr_t addr, addr_t data)
 {
-	struct user32 *dummy32 = NULL;
-	per_struct32 *dummy_per32 = NULL;
+	struct compat_user *dummy32 = NULL;
 	__u32 tmp = (__u32) data;
 	addr_t offset;
 
@@ -561,37 +646,20 @@
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
-		 * per_info is found in the thread structure.
+		 * Handle access to the per_info structure.
 		 */
-		offset = addr - (addr_t) &dummy32->regs.per_info;
-		/*
-		 * This is magic. See per_struct and per_struct32.
-		 * By incident the offsets in per_struct are exactly
-		 * twice the offsets in per_struct32 for all fields.
-		 * The 8 byte fields need special handling though,
-		 * because the second half (bytes 4-7) is needed and
-		 * not the first half.
-		 */
-		if ((offset >= (addr_t) &dummy_per32->control_regs &&
-		     offset < (addr_t) (&dummy_per32->control_regs + 1)) ||
-		    (offset >= (addr_t) &dummy_per32->starting_addr &&
-		     offset <= (addr_t) &dummy_per32->ending_addr) ||
-		    offset == (addr_t) &dummy_per32->lowcore.words.address)
-			offset = offset*2 + 4;
-		else
-			offset = offset*2;
-		*(__u32 *)((addr_t) &child->thread.per_info + offset) = tmp;
-
+		addr -= (addr_t) &dummy32->regs.per_info;
+		__poke_user_per_compat(child, addr, data);
 	}
 
-	FixPerRegisters(child);
 	return 0;
 }
 
 static int poke_user_compat(struct task_struct *child,
 			    addr_t addr, addr_t data)
 {
-	if (!is_compat_task() || (addr & 3) || addr > sizeof(struct user32) - 3)
+	if (!is_compat_task() || (addr & 3) ||
+	    addr > sizeof(struct compat_user) - 3)
 		return -EIO;
 
 	return __poke_user_compat(child, addr, data);
@@ -602,7 +670,7 @@
 {
 	unsigned long addr = caddr;
 	unsigned long data = cdata;
-	ptrace_area_emu31 parea; 
+	compat_ptrace_area parea;
 	int copied, ret;
 
 	switch (request) {
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index ee7ac8b..abbb3c3 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -505,7 +505,7 @@
 			 * Let tracing know that we've done the handler setup.
 			 */
 			tracehook_signal_handler(signr, &info, &ka, regs,
-					current->thread.per_info.single_step);
+					test_thread_flag(TIF_SINGLE_STEP));
 		}
 		return;
 	}
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 4f0cecb..5eb78dd 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -365,12 +365,10 @@
 		((regs->psw.addr - (pgm_int_code >> 16)) & PSW_ADDR_INSN);
 }
 
-void __kprobes do_single_step(struct pt_regs *regs)
+void __kprobes do_per_trap(struct pt_regs *regs)
 {
-	if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0,
-					SIGTRAP) == NOTIFY_STOP){
+	if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP)
 		return;
-	}
 	if (tracehook_consider_fatal_signal(current, SIGTRAP))
 		force_sig(SIGTRAP, current);
 }