Merge with temp tree to get David's gdb inferior calls patch

diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index d3f0938..9353adc 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -470,18 +470,6 @@
 	br.cond.sptk.many b7
 END(load_switch_stack)
 
-GLOBAL_ENTRY(__ia64_syscall)
-	.regstk 6,0,0,0
-	mov r15=in5				// put syscall number in place
-	break __BREAK_SYSCALL
-	movl r2=errno
-	cmp.eq p6,p7=-1,r10
-	;;
-(p6)	st4 [r2]=r8
-(p6)	mov r8=-1
-	br.ret.sptk.many rp
-END(__ia64_syscall)
-
 GLOBAL_ENTRY(execve)
 	mov r15=__NR_execve			// put syscall number in place
 	break __BREAK_SYSCALL
@@ -637,7 +625,7 @@
  *	      r8-r11: restored (syscall return value(s))
  *		 r12: restored (user-level stack pointer)
  *		 r13: restored (user-level thread pointer)
- *		 r14: cleared
+ *		 r14: set to __kernel_syscall_via_epc
  *		 r15: restored (syscall #)
  *	     r16-r17: cleared
  *		 r18: user-level b6
@@ -658,7 +646,7 @@
  *		  pr: restored (user-level pr)
  *		  b0: restored (user-level rp)
  *	          b6: restored
- *		  b7: cleared
+ *		  b7: set to __kernel_syscall_via_epc
  *	     ar.unat: restored (user-level ar.unat)
  *	      ar.pfs: restored (user-level ar.pfs)
  *	      ar.rsc: restored (user-level ar.rsc)
@@ -704,72 +692,79 @@
 	;;
 (p6)	ld4 r31=[r18]				// load current_thread_info()->flags
 	ld8 r19=[r2],PT(B6)-PT(LOADRS)		// load ar.rsc value for "loadrs"
-	mov b7=r0		// clear b7
+	nop.i 0
 	;;
-	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
+	mov r16=ar.bsp				// M2  get existing backing store pointer
 	ld8 r18=[r2],PT(R9)-PT(B6)		// load b6
 (p6)	and r15=TIF_WORK_MASK,r31		// any work other than TIF_SYSCALL_TRACE?
 	;;
-	mov r16=ar.bsp				// M2  get existing backing store pointer
+	ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE)	// load ar.bspstore (may be garbage)
 (p6)	cmp4.ne.unc p6,p0=r15, r0		// any special work pending?
 (p6)	br.cond.spnt .work_pending_syscall
 	;;
 	// start restoring the state saved on the kernel stack (struct pt_regs):
 	ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
 	ld8 r11=[r3],PT(CR_IIP)-PT(R11)
-	mov f6=f0		// clear f6
+(pNonSys) break 0		//      bug check: we shouldn't be here if pNonSys is TRUE!
 	;;
 	invala			// M0|1 invalidate ALAT
-	rsm psr.i | psr.ic	// M2 initiate turning off of interrupt and interruption collection
-	mov f9=f0		// clear f9
+	rsm psr.i | psr.ic	// M2   turn off interrupts and interruption collection
+	cmp.eq p9,p0=r0,r0	// A    set p9 to indicate that we should restore cr.ifs
 
-	ld8 r29=[r2],16		// load cr.ipsr
-	ld8 r28=[r3],16			// load cr.iip
-	mov f8=f0		// clear f8
+	ld8 r29=[r2],16		// M0|1 load cr.ipsr
+	ld8 r28=[r3],16		// M0|1 load cr.iip
+	mov r22=r0		// A    clear r22
 	;;
 	ld8 r30=[r2],16		// M0|1 load cr.ifs
 	ld8 r25=[r3],16		// M0|1 load ar.unat
-	cmp.eq p9,p0=r0,r0	// set p9 to indicate that we should restore cr.ifs
-	;;
-	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
-(pKStk)	mov r22=psr		// M2 read PSR now that interrupts are disabled
-	mov f10=f0		// clear f10
-	;;
-	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
-	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// load ar.rsc
-	mov f11=f0		// clear f11
-	;;
-	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// load ar.rnat (may be garbage)
-	ld8 r31=[r3],PT(R1)-PT(PR)		// load predicates
 (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
 	;;
-	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// load ar.fpsr
-	ld8.fill r1=[r3],16	// load r1
-(pUStk) mov r17=1
+	ld8 r26=[r2],PT(B0)-PT(AR_PFS)	// M0|1 load ar.pfs
+(pKStk)	mov r22=psr			// M2   read PSR now that interrupts are disabled
+	nop 0
 	;;
-	srlz.d			// M0  ensure interruption collection is off
-	ld8.fill r13=[r3],16
-	mov f7=f0		// clear f7
+	ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0
+	ld8 r27=[r3],PT(PR)-PT(AR_RSC)	// M0|1 load ar.rsc
+	mov f6=f0			// F    clear f6
 	;;
-	ld8.fill r12=[r2]	// restore r12 (sp)
-	mov.m ar.ssd=r0		// M2 clear ar.ssd
-	mov r22=r0		// clear r22
+	ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT)	// M0|1 load ar.rnat (may be garbage)
+	ld8 r31=[r3],PT(R1)-PT(PR)		// M0|1 load predicates
+	mov f7=f0				// F    clear f7
+	;;
+	ld8 r20=[r2],PT(R12)-PT(AR_FPSR)	// M0|1 load ar.fpsr
+	ld8.fill r1=[r3],16			// M0|1 load r1
+(pUStk) mov r17=1				// A
+	;;
+(pUStk) st1 [r14]=r17				// M2|3
+	ld8.fill r13=[r3],16			// M0|1
+	mov f8=f0				// F    clear f8
+	;;
+	ld8.fill r12=[r2]			// M0|1 restore r12 (sp)
+	ld8.fill r15=[r3]			// M0|1 restore r15
+	mov b6=r18				// I0   restore b6
 
-	ld8.fill r15=[r3]	// restore r15
-(pUStk) st1 [r14]=r17
-	addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
-	;;
-(pUStk)	ld4 r17=[r3]		// r17 = cpu_data->phys_stacked_size_p8
-	mov.m ar.csd=r0		// M2 clear ar.csd
-	mov b6=r18		// I0  restore b6
-	;;
-	mov r14=r0		// clear r14
-	shr.u r18=r19,16	// I0|1 get byte size of existing "dirty" partition
-(pKStk) br.cond.dpnt.many skip_rbs_switch
+	addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 // A
+	mov f9=f0					// F    clear f9
+(pKStk) br.cond.dpnt.many skip_rbs_switch		// B
 
-	mov.m ar.ccv=r0		// clear ar.ccv
-(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
-	br.cond.sptk.many rbs_switch
+	srlz.d				// M0   ensure interruption collection is off (for cover)
+	shr.u r18=r19,16		// I0|1 get byte size of existing "dirty" partition
+	cover				// B    add current frame into dirty partition & set cr.ifs
+	;;
+(pUStk) ld4 r17=[r17]			// M0|1 r17 = cpu_data->phys_stacked_size_p8
+	mov r19=ar.bsp			// M2   get new backing store pointer
+	mov f10=f0			// F    clear f10
+
+	nop.m 0
+	movl r14=__kernel_syscall_via_epc // X
+	;;
+	mov.m ar.csd=r0			// M2   clear ar.csd
+	mov.m ar.ccv=r0			// M2   clear ar.ccv
+	mov b7=r14			// I0   clear b7 (hint with __kernel_syscall_via_epc)
+
+	mov.m ar.ssd=r0			// M2   clear ar.ssd
+	mov f11=f0			// F    clear f11
+	br.cond.sptk.many rbs_switch	// B
 END(ia64_leave_syscall)
 
 #ifdef CONFIG_IA32_SUPPORT
@@ -885,7 +880,7 @@
 	ldf.fill f7=[r2],PT(F11)-PT(F7)
 	ldf.fill f8=[r3],32
 	;;
-	srlz.i			// ensure interruption collection is off
+	srlz.d	// ensure that inter. collection is off (VHPT is don't care, since text is pinned)
 	mov ar.ccv=r15
 	;;
 	ldf.fill f11=[r2]
@@ -945,11 +940,10 @@
 	 * NOTE: alloc, loadrs, and cover can't be predicated.
 	 */
 (pNonSys) br.cond.dpnt dont_preserve_current_frame
-
-rbs_switch:
 	cover				// add current frame into dirty partition and set cr.ifs
 	;;
 	mov r19=ar.bsp			// get new backing store pointer
+rbs_switch:
 	sub r16=r16,r18			// krbs = old bsp - size of dirty partition
 	cmp.ne p9,p0=r0,r0		// clear p9 to skip restore of cr.ifs
 	;;
@@ -1024,14 +1018,14 @@
 	mov loc5=0
 	mov loc6=0
 	mov loc7=0
-(pRecurse) br.call.sptk.few b0=rse_clear_invalid
+(pRecurse) br.call.dptk.few b0=rse_clear_invalid
 	;;
 	mov loc8=0
 	mov loc9=0
 	cmp.ne pReturn,p0=r0,in1	// if recursion count != 0, we need to do a br.ret
 	mov loc10=0
 	mov loc11=0
-(pReturn) br.ret.sptk.many b0
+(pReturn) br.ret.dptk.many b0
 #endif /* !CONFIG_ITANIUM */
 #	undef pRecurse
 #	undef pReturn
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 0d8650f..f566ff4 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -531,91 +531,114 @@
 	.altrp b6
 	.body
 	/*
-	 * We get here for syscalls that don't have a lightweight handler.  For those, we
-	 * need to bubble down into the kernel and that requires setting up a minimal
-	 * pt_regs structure, and initializing the CPU state more or less as if an
-	 * interruption had occurred.  To make syscall-restarts work, we setup pt_regs
-	 * such that cr_iip points to the second instruction in syscall_via_break.
-	 * Decrementing the IP hence will restart the syscall via break and not
-	 * decrementing IP will return us to the caller, as usual.  Note that we preserve
-	 * the value of psr.pp rather than initializing it from dcr.pp.  This makes it
-	 * possible to distinguish fsyscall execution from other privileged execution.
+	 * We get here for syscalls that don't have a lightweight
+	 * handler.  For those, we need to bubble down into the kernel
+	 * and that requires setting up a minimal pt_regs structure,
+	 * and initializing the CPU state more or less as if an
+	 * interruption had occurred.  To make syscall-restarts work,
+	 * we setup pt_regs such that cr_iip points to the second
+	 * instruction in syscall_via_break.  Decrementing the IP
+	 * hence will restart the syscall via break and not
+	 * decrementing IP will return us to the caller, as usual.
+	 * Note that we preserve the value of psr.pp rather than
+	 * initializing it from dcr.pp.  This makes it possible to
+	 * distinguish fsyscall execution from other privileged
+	 * execution.
 	 *
 	 * On entry:
-	 *	- normal fsyscall handler register usage, except that we also have:
+	 *	- normal fsyscall handler register usage, except
+	 *	  that we also have:
 	 *	- r18: address of syscall entry point
 	 *	- r21: ar.fpsr
 	 *	- r26: ar.pfs
 	 *	- r27: ar.rsc
 	 *	- r29: psr
+	 *
+	 * We used to clear some PSR bits here but that requires slow
+	 * serialization.  Fortuntely, that isn't really necessary.
+	 * The rationale is as follows: we used to clear bits
+	 * ~PSR_PRESERVED_BITS in PSR.L.  Since
+	 * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we
+	 * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}.
+	 * However,
+	 *
+	 * PSR.BE : already is turned off in __kernel_syscall_via_epc()
+	 * PSR.AC : don't care (kernel normally turns PSR.AC on)
+	 * PSR.I  : already turned off by the time fsys_bubble_down gets
+	 *	    invoked
+	 * PSR.DFL: always 0 (kernel never turns it on)
+	 * PSR.DFH: don't care --- kernel never touches f32-f127 on its own
+	 *	    initiative
+	 * PSR.DI : always 0 (kernel never turns it on)
+	 * PSR.SI : always 0 (kernel never turns it on)
+	 * PSR.DB : don't care --- kernel never enables kernel-level
+	 *	    breakpoints
+	 * PSR.TB : must be 0 already; if it wasn't zero on entry to
+	 *          __kernel_syscall_via_epc, the branch to fsys_bubble_down
+	 *          will trigger a taken branch; the taken-trap-handler then
+	 *          converts the syscall into a break-based system-call.
 	 */
-#	define PSR_PRESERVED_BITS	(IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
-					 | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \
-					 | IA64_PSR_IC)
 	/*
-	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.  The rest we have
-	 * to synthesize.
+	 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.
+	 * The rest we have to synthesize.
 	 */
-#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
+#	define PSR_ONE_BITS		((3 << IA64_PSR_CPL0_BIT)	\
+					 | (0x1 << IA64_PSR_RI_BIT)	\
 					 | IA64_PSR_BN | IA64_PSR_I)
 
-	invala
-	movl r8=PSR_ONE_BITS
+	invala					// M0|1
+	movl r14=ia64_ret_from_syscall		// X
 
-	mov r25=ar.unat			// save ar.unat (5 cyc)
-	movl r9=PSR_PRESERVED_BITS
+	nop.m 0
+	movl r28=__kernel_syscall_via_break	// X	create cr.iip
+	;;
 
-	mov ar.rsc=0			// set enforced lazy mode, pl 0, little-endian, loadrs=0
-	movl r28=__kernel_syscall_via_break
+	mov r2=r16				// A    get task addr to addl-addressable register
+	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A
+	mov r31=pr				// I0   save pr (2 cyc)
 	;;
-	mov r23=ar.bspstore		// save ar.bspstore (12 cyc)
-	mov r31=pr			// save pr (2 cyc)
-	mov r20=r1			// save caller's gp in r20
+	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+	addl r22=IA64_RBS_OFFSET,r2		// A    compute base of RBS
+	add r3=TI_FLAGS+IA64_TASK_SIZE,r2	// A
 	;;
-	mov r2=r16			// copy current task addr to addl-addressable register
-	and r9=r9,r29
-	mov r19=b6			// save b6 (2 cyc)
+	ld4 r3=[r3]				// M0|1 r3 = current_thread_info()->flags
+	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch register backing-store
+	nop.i 0
 	;;
-	mov psr.l=r9			// slam the door (17 cyc to srlz.i)
-	or r29=r8,r29			// construct cr.ipsr value to save
-	addl r22=IA64_RBS_OFFSET,r2	// compute base of RBS
+	mov ar.rsc=0				// M2   set enforced lazy mode, pl 0, LE, loadrs=0
+	nop.m 0
+	nop.i 0
 	;;
-	// GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks
-	// we may be reading ar.itc after writing to psr.l.  Avoid that message with
-	// this directive:
-	dv_serialize_data
-	mov.m r24=ar.rnat		// read ar.rnat (5 cyc lat)
-	lfetch.fault.excl.nt1 [r22]
-	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
+	mov r23=ar.bspstore			// M2 (12 cyc) save ar.bspstore
+	mov.m r24=ar.rnat			// M2 (5 cyc) read ar.rnat (dual-issues!)
+	nop.i 0
+	;;
+	mov ar.bspstore=r22			// M2 (6 cyc) switch to kernel RBS
+	movl r8=PSR_ONE_BITS			// X
+	;;
+	mov r25=ar.unat				// M2 (5 cyc) save ar.unat
+	mov r19=b6				// I0   save b6 (2 cyc)
+	mov r20=r1				// A    save caller's gp in r20
+	;;
+	or r29=r8,r29				// A    construct cr.ipsr value to save
+	mov b6=r18				// I0   copy syscall entry-point to b6 (7 cyc)
+	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack
 
-	// ensure previous insn group is issued before we stall for srlz.i:
+	mov r18=ar.bsp				// M2   save (kernel) ar.bsp (12 cyc)
+	cmp.ne pKStk,pUStk=r0,r0		// A    set pKStk <- 0, pUStk <- 1
+	br.call.sptk.many b7=ia64_syscall_setup	// B
 	;;
-	srlz.i				// ensure new psr.l has been established
-	/////////////////////////////////////////////////////////////////////////////
-	////////// from this point on, execution is not interruptible anymore
-	/////////////////////////////////////////////////////////////////////////////
-	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2	// compute base of memory stack
-	cmp.ne pKStk,pUStk=r0,r0	// set pKStk <- 0, pUStk <- 1
+	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
+	mov rp=r14				// I0   set the real return addr
+	and r3=_TIF_SYSCALL_TRACEAUDIT,r3	// A
 	;;
-	st1 [r16]=r0			// clear current->thread.on_ustack flag
-	mov ar.bspstore=r22		// switch to kernel RBS
-	mov b6=r18			// copy syscall entry-point to b6 (7 cyc)
-	add r3=TI_FLAGS+IA64_TASK_SIZE,r2
-	;;
-	ld4 r3=[r3]				// r2 = current_thread_info()->flags
-	mov r18=ar.bsp			// save (kernel) ar.bsp (12 cyc)
-	mov ar.rsc=0x3			// set eager mode, pl 0, little-endian, loadrs=0
-	br.call.sptk.many b7=ia64_syscall_setup
-	;;
-	ssm psr.i
-	movl r2=ia64_ret_from_syscall
-	;;
-	mov rp=r2				// set the real return addr
-	tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
-	;;
-(p10)	br.cond.spnt.many ia64_ret_from_syscall	// p10==true means out registers are more than 8
-(p8)	br.call.sptk.many b6=b6		// ignore this return addr
-	br.cond.sptk ia64_trace_syscall
+	ssm psr.i				// M2   we're on kernel stacks now, reenable irqs
+	cmp.eq p8,p0=r3,r0			// A
+(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
+
+	nop.m 0
+(p8)	br.call.sptk.many b6=b6			// B    (ignore return address)
+	br.cond.spnt ia64_trace_syscall		// B
 END(fsys_bubble_down)
 
 	.rodata
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
index facf75a..86948ce 100644
--- a/arch/ia64/kernel/gate.S
+++ b/arch/ia64/kernel/gate.S
@@ -72,38 +72,40 @@
 	 * bundle get executed.  The remaining code must be safe even if
 	 * they do not get executed.
 	 */
-	adds r17=-1024,r15
-	mov r10=0				// default to successful syscall execution
-	epc
+	adds r17=-1024,r15			// A
+	mov r10=0				// A    default to successful syscall execution
+	epc					// B	causes split-issue
 }
 	;;
-	rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be"
-	LOAD_FSYSCALL_TABLE(r14)
+	rsm psr.be | psr.i			// M2 (5 cyc to srlz.d)
+	LOAD_FSYSCALL_TABLE(r14)		// X
+	;;
+	mov r16=IA64_KR(CURRENT)		// M2 (12 cyc)
+	shladd r18=r17,3,r14			// A
+	mov r19=NR_syscalls-1			// A
+	;;
+	lfetch [r18]				// M0|1
+	mov r29=psr				// M2 (12 cyc)
+	// If r17 is a NaT, p6 will be zero
+	cmp.geu p6,p7=r19,r17			// A    (sysnr > 0 && sysnr < 1024+NR_syscalls)?
+	;;
+	mov r21=ar.fpsr				// M2 (12 cyc)
+	tnat.nz p10,p9=r15			// I0
+	mov.i r26=ar.pfs			// I0 (would stall anyhow due to srlz.d...)
+	;;
+	srlz.d					// M0 (forces split-issue) ensure PSR.BE==0
+(p6)	ld8 r18=[r18]				// M0|1
+	nop.i 0
+	;;
+	nop.m 0
+(p6)	tbit.z.unc p8,p0=r18,0			// I0 (dual-issues with "mov b7=r18"!)
+	nop.i 0
+	;;
+(p8)	ssm psr.i
+(p6)	mov b7=r18				// I0
+(p8)	br.dptk.many b7				// B
 
-	mov r16=IA64_KR(CURRENT)		// 12 cycle read latency
-	tnat.nz p10,p9=r15
-	mov r19=NR_syscalls-1
-	;;
-	shladd r18=r17,3,r14
-
-	srlz.d
-	cmp.ne p8,p0=r0,r0			// p8 <- FALSE
-	/* Note: if r17 is a NaT, p6 will be set to zero.  */
-	cmp.geu p6,p7=r19,r17			// (syscall > 0 && syscall < 1024+NR_syscalls)?
-	;;
-(p6)	ld8 r18=[r18]
-	mov r21=ar.fpsr
-	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
-	;;
-(p6)	mov b7=r18
-(p6)	tbit.z p8,p0=r18,0
-(p8)	br.dptk.many b7
-
-(p6)	rsm psr.i
-	mov r27=ar.rsc
-	mov r26=ar.pfs
-	;;
-	mov r29=psr				// read psr (12 cyc load latency)
+	mov r27=ar.rsc				// M2 (12 cyc)
 /*
  * brl.cond doesn't work as intended because the linker would convert this branch
  * into a branch to a PLT.  Perhaps there will be a way to avoid this with some
@@ -111,6 +113,8 @@
  * instead.
  */
 #ifdef CONFIG_ITANIUM
+(p6)	add r14=-8,r14				// r14 <- addr of fsys_bubble_down entry
+	;;
 (p6)	ld8 r14=[r14]				// r14 <- fsys_bubble_down
 	;;
 (p6)	mov b7=r14
@@ -118,7 +122,7 @@
 #else
 	BRL_COND_FSYS_BUBBLE_DOWN(p6)
 #endif
-
+	ssm psr.i
 	mov r10=-1
 (p10)	mov r8=EINVAL
 (p9)	mov r8=ENOSYS
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 7bbf019..0157281 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -58,9 +58,6 @@
 EXPORT_SYMBOL(__strncpy_from_user);
 EXPORT_SYMBOL(__strnlen_user);
 
-#include <asm/unistd.h>
-EXPORT_SYMBOL(__ia64_syscall);
-
 /* from arch/ia64/lib */
 extern void __divsi3(void);
 extern void __udivsi3(void);
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index d9c05d5..386087e 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -1,7 +1,7 @@
 /*
  * arch/ia64/kernel/ivt.S
  *
- * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
  *	Stephane Eranian <eranian@hpl.hp.com>
  *	David Mosberger <davidm@hpl.hp.com>
  * Copyright (C) 2000, 2002-2003 Intel Co
@@ -687,82 +687,118 @@
 	 * to prevent leaking bits from kernel to user level.
 	 */
 	DBG_FAULT(11)
-	mov r16=IA64_KR(CURRENT)		// r16 = current task; 12 cycle read lat.
-	mov r17=cr.iim
-	mov r18=__IA64_BREAK_SYSCALL
-	mov r21=ar.fpsr
-	mov r29=cr.ipsr
-	mov r19=b6
-	mov r25=ar.unat
-	mov r27=ar.rsc
-	mov r26=ar.pfs
-	mov r28=cr.iip
-	mov r31=pr				// prepare to save predicates
-	mov r20=r1
+	mov.m r16=IA64_KR(CURRENT)		// M2 r16 <- current task (12 cyc)
+	mov r29=cr.ipsr				// M2 (12 cyc)
+	mov r31=pr				// I0 (2 cyc)
+
+	mov r17=cr.iim				// M2 (2 cyc)
+	mov.m r27=ar.rsc			// M2 (12 cyc)
+	mov r18=__IA64_BREAK_SYSCALL		// A
+
+	mov.m ar.rsc=0				// M2
+	mov.m r21=ar.fpsr			// M2 (12 cyc)
+	mov r19=b6				// I0 (2 cyc)
 	;;
+	mov.m r23=ar.bspstore			// M2 (12 cyc)
+	mov.m r24=ar.rnat			// M2 (5 cyc)
+	mov.i r26=ar.pfs			// I0 (2 cyc)
+
+	invala					// M0|1
+	nop.m 0					// M
+	mov r20=r1				// A			save r1
+
+	nop.m 0
+	movl r30=sys_call_table			// X
+
+	mov r28=cr.iip				// M2 (2 cyc)
+	cmp.eq p0,p7=r18,r17			// I0 is this a system call?
+(p7)	br.cond.spnt non_syscall		// B  no ->
+	//
+	// From this point on, we are definitely on the syscall-path
+	// and we can use (non-banked) scratch registers.
+	//
+///////////////////////////////////////////////////////////////////////
+	mov r1=r16				// A    move task-pointer to "addl"-addressable reg
+	mov r2=r16				// A    setup r2 for ia64_syscall_setup
+	add r9=TI_FLAGS+IA64_TASK_SIZE,r16	// A	r9 = &current_thread_info()->flags
+
 	adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
-	cmp.eq p0,p7=r18,r17			// is this a system call? (p7 <- false, if so)
-(p7)	br.cond.spnt non_syscall
-	;;
-	ld1 r17=[r16]				// load current->thread.on_ustack flag
-	st1 [r16]=r0				// clear current->thread.on_ustack flag
-	add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16	// set r1 for MINSTATE_START_SAVE_MIN_VIRT
-	;;
-	invala
-
-	/* adjust return address so we skip over the break instruction: */
-
-	extr.u r8=r29,41,2			// extract ei field from cr.ipsr
-	;;
-	cmp.eq p6,p7=2,r8			// isr.ei==2?
-	mov r2=r1				// setup r2 for ia64_syscall_setup
-	;;
-(p6)	mov r8=0				// clear ei to 0
-(p6)	adds r28=16,r28				// switch cr.iip to next bundle cr.ipsr.ei wrapped
-(p7)	adds r8=1,r8				// increment ei to next slot
-	;;
-	cmp.eq pKStk,pUStk=r0,r17		// are we in kernel mode already?
-	dep r29=r8,r29,41,2			// insert new ei into cr.ipsr
-	;;
-
-	// switch from user to kernel RBS:
-	MINSTATE_START_SAVE_MIN_VIRT
-	br.call.sptk.many b7=ia64_syscall_setup
-	;;
-	MINSTATE_END_SAVE_MIN_VIRT		// switch to bank 1
-	ssm psr.ic | PSR_DEFAULT_BITS
-	;;
-	srlz.i					// guarantee that interruption collection is on
+	adds r15=-1024,r15			// A    subtract 1024 from syscall number
 	mov r3=NR_syscalls - 1
 	;;
-(p15)	ssm psr.i				// restore psr.i
-	// p10==true means out registers are more than 8 or r15's Nat is true
-(p10)	br.cond.spnt.many ia64_ret_from_syscall
-	;;
-	movl r16=sys_call_table
+	ld1.bias r17=[r16]			// M0|1 r17 = current->thread.on_ustack flag
+	ld4 r9=[r9]				// M0|1 r9 = current_thread_info()->flags
+	extr.u r8=r29,41,2			// I0   extract ei field from cr.ipsr
 
-	adds r15=-1024,r15			// r15 contains the syscall number---subtract 1024
-	movl r2=ia64_ret_from_syscall
+	shladd r30=r15,3,r30			// A    r30 = sys_call_table + 8*(syscall-1024)
+	addl r22=IA64_RBS_OFFSET,r1		// A    compute base of RBS
+	cmp.leu p6,p7=r15,r3			// A    syscall number in range?
 	;;
-	shladd r20=r15,3,r16			// r20 = sys_call_table + 8*(syscall-1024)
-	cmp.leu p6,p7=r15,r3			// (syscall > 0 && syscall < 1024 + NR_syscalls) ?
-	mov rp=r2				// set the real return addr
-	;;
-(p6)	ld8 r20=[r20]				// load address of syscall entry point
-(p7)	movl r20=sys_ni_syscall
 
-	add r2=TI_FLAGS+IA64_TASK_SIZE,r13
+	lfetch.fault.excl.nt1 [r22]		// M0|1 prefetch RBS
+(p6)	ld8 r30=[r30]				// M0|1 load address of syscall entry point
+	tnat.nz.or p7,p0=r15			// I0	is syscall nr a NaT?
+
+	mov.m ar.bspstore=r22			// M2   switch to kernel RBS
+	cmp.eq p8,p9=2,r8			// A    isr.ei==2?
 	;;
-	ld4 r2=[r2]				// r2 = current_thread_info()->flags
+
+(p8)	mov r8=0				// A    clear ei to 0
+(p7)	movl r30=sys_ni_syscall			// X
+
+(p8)	adds r28=16,r28				// A    switch cr.iip to next bundle
+(p9)	adds r8=1,r8				// A    increment ei to next slot
+	nop.i 0
 	;;
-	and r2=_TIF_SYSCALL_TRACEAUDIT,r2	// mask trace or audit
+
+	mov.m r25=ar.unat			// M2 (5 cyc)
+	dep r29=r8,r29,41,2			// I0   insert new ei into cr.ipsr
+	adds r15=1024,r15			// A    restore original syscall number
+	//
+	// If any of the above loads miss in L1D, we'll stall here until
+	// the data arrives.
+	//
+///////////////////////////////////////////////////////////////////////
+	st1 [r16]=r0				// M2|3 clear current->thread.on_ustack flag
+	mov b6=r30				// I0   setup syscall handler branch reg early
+	cmp.eq pKStk,pUStk=r0,r17		// A    were we on kernel stacks already?
+
+	and r9=_TIF_SYSCALL_TRACEAUDIT,r9	// A    mask trace or audit
+	mov r18=ar.bsp				// M2 (12 cyc)
+(pKStk)	br.cond.spnt .break_fixup		// B	we're already in kernel-mode -- fix up RBS
 	;;
-	cmp.eq p8,p0=r2,r0
-	mov b6=r20
+.back_from_break_fixup:
+(pUStk)	addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A    compute base of memory stack
+	cmp.eq p14,p0=r9,r0			// A    are syscalls being traced/audited?
+	br.call.sptk.many b7=ia64_syscall_setup	// B
+1:
+	mov ar.rsc=0x3				// M2   set eager mode, pl 0, LE, loadrs=0
+	nop 0
+	bsw.1					// B (6 cyc) regs are saved, switch to bank 1
 	;;
-(p8)	br.call.sptk.many b6=b6			// ignore this return addr
-	br.cond.sptk ia64_trace_syscall
+
+	ssm psr.ic | PSR_DEFAULT_BITS		// M2	now it's safe to re-enable intr.-collection
+	movl r3=ia64_ret_from_syscall		// X
+	;;
+
+	srlz.i					// M0   ensure interruption collection is on
+	mov rp=r3				// I0   set the real return addr
+(p10)	br.cond.spnt.many ia64_ret_from_syscall	// B    return if bad call-frame or r15 is a NaT
+
+(p15)	ssm psr.i				// M2   restore psr.i
+(p14)	br.call.sptk.many b6=b6			// B    invoke syscall-handker (ignore return addr)
+	br.cond.spnt.many ia64_trace_syscall	// B	do syscall-tracing thingamagic
 	// NOT REACHED
+///////////////////////////////////////////////////////////////////////
+	// On entry, we optimistically assumed that we're coming from user-space.
+	// For the rare cases where a system-call is done from within the kernel,
+	// we fix things up at this point:
+.break_fixup:
+	add r1=-IA64_PT_REGS_SIZE,sp		// A    allocate space for pt_regs structure
+	mov ar.rnat=r24				// M2	restore kernel's AR.RNAT
+	;;
+	mov ar.bspstore=r23			// M2	restore kernel's AR.BSPSTORE
+	br.cond.sptk .back_from_break_fixup
 END(break_fault)
 
 	.org ia64_ivt+0x3000
@@ -837,8 +873,6 @@
 	 *	- r31: saved pr
 	 *	-  b0: original contents (to be saved)
 	 * On exit:
-	 *	- executing on bank 1 registers
-	 *	- psr.ic enabled, interrupts restored
 	 *	-  p10: TRUE if syscall is invoked with more than 8 out
 	 *		registers or r15's Nat is true
 	 *	-  r1: kernel's gp
@@ -846,8 +880,11 @@
 	 *	-  r8: -EINVAL if p10 is true
 	 *	- r12: points to kernel stack
 	 *	- r13: points to current task
+	 *	- r14: preserved (same as on entry)
+	 *	- p13: preserved
 	 *	- p15: TRUE if interrupts need to be re-enabled
 	 *	- ar.fpsr: set to kernel settings
+	 *	-  b6: preserved (same as on entry)
 	 */
 GLOBAL_ENTRY(ia64_syscall_setup)
 #if PT(B6) != 0
@@ -915,10 +952,10 @@
 (p13)	mov in5=-1
 	;;
 	st8 [r16]=r21,PT(R8)-PT(AR_FPSR)	// save ar.fpsr
-	tnat.nz p14,p0=in6
+	tnat.nz p13,p0=in6
 	cmp.lt p10,p9=r11,r8	// frame size can't be more than local+8
 	;;
-	stf8 [r16]=f1		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
+	mov r8=1
 (p9)	tnat.nz p10,p0=r15
 	adds r12=-16,r1		// switch to kernel memory stack (with 16 bytes of scratch)
 
@@ -929,9 +966,9 @@
 	mov r13=r2				// establish `current'
 	movl r1=__gp				// establish kernel global pointer
 	;;
-(p14)	mov in6=-1
+	st8 [r16]=r8		// ensure pt_regs.r8 != 0 (see handle_syscall_error)
+(p13)	mov in6=-1
 (p8)	mov in7=-1
-	nop.i 0
 
 	cmp.eq pSys,pNonSys=r0,r0		// set pSys=1, pNonSys=0
 	movl r17=FPSR_DEFAULT
@@ -1002,6 +1039,8 @@
 	FAULT(17)
 
 ENTRY(non_syscall)
+	mov ar.rsc=r27			// restore ar.rsc before SAVE_MIN_WITH_COVER
+	;;
 	SAVE_MIN_WITH_COVER
 
 	// There is no particular reason for this code to be here, other than that