Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal

Pull big execve/kernel_thread/fork unification series from Al Viro:
 "All architectures are converted to new model.  Quite a bit of that
  stuff is actually shared with architecture trees; in such cases it's
  literally shared branch pulled by both, not a cherry-pick.

  A lot of ugliness and black magic is gone (-3KLoC total in this one):

   - kernel_thread()/kernel_execve()/sys_execve() redesign.

     We don't do syscalls from kernel anymore for either kernel_thread()
     or kernel_execve():

     kernel_thread() is essentially clone(2) with callback run before we
     return to userland, the callbacks either never return or do
     successful do_execve() before returning.

     kernel_execve() is a wrapper for do_execve() - it doesn't need to
     do transition to user mode anymore.

     As a result kernel_thread() and kernel_execve() are
     arch-independent now - they live in kernel/fork.c and fs/exec.c
     resp.  sys_execve() is also in fs/exec.c and it's completely
     architecture-independent.

   - daemonize() is gone, along with its parts in fs/*.c

   - struct pt_regs * is no longer passed to do_fork/copy_process/
     copy_thread/do_execve/search_binary_handler/->load_binary/do_coredump.

   - sys_fork()/sys_vfork()/sys_clone() unified; some architectures
     still need wrappers (ones with callee-saved registers not saved in
     pt_regs on syscall entry), but the main part of those suckers is in
     kernel/fork.c now."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal: (113 commits)
  do_coredump(): get rid of pt_regs argument
  print_fatal_signal(): get rid of pt_regs argument
  ptrace_signal(): get rid of unused arguments
  get rid of ptrace_signal_deliver() arguments
  new helper: signal_pt_regs()
  unify default ptrace_signal_deliver
  flagday: kill pt_regs argument of do_fork()
  death to idle_regs()
  don't pass regs to copy_process()
  flagday: don't pass regs to copy_thread()
  bfin: switch to generic vfork, get rid of pointless wrappers
  xtensa: switch to generic clone()
  openrisc: switch to use of generic fork and clone
  unicore32: switch to generic clone(2)
  score: switch to generic fork/vfork/clone
  c6x: sanitize copy_thread(), get rid of clone(2) wrapper, switch to generic clone()
  take sys_fork/sys_vfork/sys_clone prototypes to linux/syscalls.h
  mn10300: switch to generic fork/vfork/clone
  h8300: switch to generic fork/vfork/clone
  tile: switch to generic clone()
  ...

Conflicts:
	arch/microblaze/include/asm/Kbuild
diff --git a/arch/Kconfig b/arch/Kconfig
index cc74aae..34884fa 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -342,4 +342,18 @@
 	  Modules only use ELF REL relocations.  Modules with ELF RELA
 	  relocations will give an error.
 
+#
+# ABI hall of shame
+#
+config CLONE_BACKWARDS
+	bool
+	help
+	  Architecture has tls passed as the 4th argument of clone(2),
+	  not the 5th one.
+
+config CLONE_BACKWARDS2
+	bool
+	help
+	  Architecture has the first two arguments of clone(2) swapped.
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/include/asm/ptrace.h b/arch/alpha/include/asm/ptrace.h
index b87755a..b4c5b2f 100644
--- a/arch/alpha/include/asm/ptrace.h
+++ b/arch/alpha/include/asm/ptrace.h
@@ -78,6 +78,7 @@
 
 #define current_pt_regs() \
   ((struct pt_regs *) ((char *)current_thread_info() + 2*PAGE_SIZE) - 1)
+#define signal_pt_regs current_pt_regs
 
 #define force_successful_syscall_return() (current_pt_regs()->r0 = 0)
 
diff --git a/arch/alpha/include/asm/signal.h b/arch/alpha/include/asm/signal.h
index a938830..4555286 100644
--- a/arch/alpha/include/asm/signal.h
+++ b/arch/alpha/include/asm/signal.h
@@ -164,9 +164,6 @@
 
 #ifdef __KERNEL__
 #include <asm/sigcontext.h>
-
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif
 
 #endif
diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index 7826e22..eb3a466 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -482,6 +482,9 @@
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
 
 /* "Conditional" syscalls.  What we want is
 
diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c
index d1f474d..9525660 100644
--- a/arch/alpha/kernel/binfmt_loader.c
+++ b/arch/alpha/kernel/binfmt_loader.c
@@ -5,7 +5,7 @@
 #include <linux/binfmts.h>
 #include <linux/a.out.h>
 
-static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+static int load_binary(struct linux_binprm *bprm)
 {
 	struct exec *eh = (struct exec *)bprm->buf;
 	unsigned long loader;
@@ -37,7 +37,7 @@
 	retval = prepare_binprm(bprm);
 	if (retval < 0)
 		return retval;
-	return search_binary_handler(bprm,regs);
+	return search_binary_handler(bprm);
 }
 
 static struct linux_binfmt loader_format = {
diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S
index a760783..f62a994 100644
--- a/arch/alpha/kernel/entry.S
+++ b/arch/alpha/kernel/entry.S
@@ -612,47 +612,24 @@
  * Special system calls.  Most of these are special in that they either
  * have to play switch_stack games or in some way use the pt_regs struct.
  */
-	.align	4
-	.globl	sys_fork
-	.ent	sys_fork
-sys_fork:
-	.prologue 0
-	mov	$sp, $21
-	bsr	$1, do_switch_stack
-	bis	$31, SIGCHLD, $16
-	mov	$31, $17
-	mov	$31, $18
-	mov	$31, $19
-	mov	$31, $20
-	jsr	$26, alpha_clone
-	bsr	$1, undo_switch_stack
-	ret
-.end sys_fork
 
+.macro	fork_like name
 	.align	4
-	.globl	sys_clone
-	.ent	sys_clone
-sys_clone:
+	.globl	alpha_\name
+	.ent	alpha_\name
+alpha_\name:
 	.prologue 0
-	mov	$sp, $21
 	bsr	$1, do_switch_stack
-	/* $16, $17, $18, $19, $20 come from the user.  */
-	jsr	$26, alpha_clone
-	bsr	$1, undo_switch_stack
+	jsr	$26, sys_\name
+	ldq	$26, 56($sp)
+	lda	$sp, SWITCH_STACK_SIZE($sp)
 	ret
-.end sys_clone
+.end	alpha_\name
+.endm
 
-	.align	4
-	.globl	sys_vfork
-	.ent	sys_vfork
-sys_vfork:
-	.prologue 0
-	mov	$sp, $16
-	bsr	$1, do_switch_stack
-	jsr	$26, alpha_vfork
-	bsr	$1, undo_switch_stack
-	ret
-.end sys_vfork
+fork_like fork
+fork_like vfork
+fork_like clone
 
 	.align	4
 	.globl	sys_sigreturn
@@ -661,8 +638,6 @@
 	.prologue 0
 	lda	$9, ret_from_straced
 	cmpult	$26, $9, $9
-	mov	$sp, $17
-	lda	$18, -SWITCH_STACK_SIZE($sp)
 	lda	$sp, -SWITCH_STACK_SIZE($sp)
 	jsr	$26, do_sigreturn
 	bne	$9, 1f
@@ -678,8 +653,6 @@
 	.prologue 0
 	lda	$9, ret_from_straced
 	cmpult	$26, $9, $9
-	mov	$sp, $17
-	lda	$18, -SWITCH_STACK_SIZE($sp)
 	lda	$sp, -SWITCH_STACK_SIZE($sp)
 	jsr	$26, do_rt_sigreturn
 	bne	$9, 1f
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 51987dc..b5d0d09 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -235,51 +235,28 @@
 }
 
 /*
- * "alpha_clone()".. By the time we get here, the
- * non-volatile registers have also been saved on the
- * stack. We do some ugly pointer stuff here.. (see
- * also copy_thread)
- *
- * Notice that "fork()" is implemented in terms of clone,
- * with parameters (SIGCHLD, 0).
- */
-int
-alpha_clone(unsigned long clone_flags, unsigned long usp,
-	    int __user *parent_tid, int __user *child_tid,
-	    unsigned long tls_value, struct pt_regs *regs)
-{
-	if (!usp)
-		usp = rdusp();
-
-	return do_fork(clone_flags, usp, regs, 0, parent_tid, child_tid);
-}
-
-int
-alpha_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, rdusp(),
-		       regs, 0, NULL, NULL);
-}
-
-/*
  * Copy an alpha thread..
  */
 
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
 	    unsigned long arg,
-	    struct task_struct * p, struct pt_regs * regs)
+	    struct task_struct *p)
 {
 	extern void ret_from_fork(void);
 	extern void ret_from_kernel_thread(void);
 
 	struct thread_info *childti = task_thread_info(p);
 	struct pt_regs *childregs = task_pt_regs(p);
+	struct pt_regs *regs = current_pt_regs();
 	struct switch_stack *childstack, *stack;
 	unsigned long settls;
 
 	childstack = ((struct switch_stack *) childregs) - 1;
-	if (unlikely(!regs)) {
+	childti->pcb.ksp = (unsigned long) childstack;
+	childti->pcb.flags = 1;	/* set FEN, clear everything else */
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
 		/* kernel thread */
 		memset(childstack, 0,
 			sizeof(struct switch_stack) + sizeof(struct pt_regs));
@@ -288,12 +265,17 @@
 		childstack->r10 = arg;
 		childregs->hae = alpha_mv.hae_cache,
 		childti->pcb.usp = 0;
-		childti->pcb.ksp = (unsigned long) childstack;
-		childti->pcb.flags = 1;	/* set FEN, clear everything else */
 		return 0;
 	}
+	/* Note: if CLONE_SETTLS is not set, then we must inherit the
+	   value from the parent, which will have been set by the block
+	   copy in dup_task_struct.  This is non-intuitive, but is
+	   required for proper operation in the case of a threaded
+	   application calling fork.  */
+	if (clone_flags & CLONE_SETTLS)
+		childti->pcb.unique = regs->r20;
+	childti->pcb.usp = usp ?: rdusp();
 	*childregs = *regs;
-	settls = regs->r20;
 	childregs->r0 = 0;
 	childregs->r19 = 0;
 	childregs->r20 = 1;	/* OSF/1 has some strange fork() semantics.  */
@@ -301,22 +283,6 @@
 	stack = ((struct switch_stack *) regs) - 1;
 	*childstack = *stack;
 	childstack->r26 = (unsigned long) ret_from_fork;
-	childti->pcb.usp = usp;
-	childti->pcb.ksp = (unsigned long) childstack;
-	childti->pcb.flags = 1;	/* set FEN, clear everything else */
-
-	/* Set a new TLS for the child thread?  Peek back into the
-	   syscall arguments that we saved on syscall entry.  Oops,
-	   except we'd have clobbered it with the parent/child set
-	   of r20.  Read the saved copy.  */
-	/* Note: if CLONE_SETTLS is not set, then we must inherit the
-	   value from the parent, which will have been set by the block
-	   copy in dup_task_struct.  This is non-intuitive, but is
-	   required for proper operation in the case of a threaded
-	   application calling fork.  */
-	if (clone_flags & CLONE_SETTLS)
-		childti->pcb.unique = settls;
-
 	return 0;
 }
 
diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 32575f8..336393c 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -160,10 +160,10 @@
 #define INSN_CALLSYS		0x00000083
 
 static long
-restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
-		   struct switch_stack *sw)
+restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs)
 {
 	unsigned long usp;
+	struct switch_stack *sw = (struct switch_stack *)regs - 1;
 	long i, err = __get_user(regs->pc, &sc->sc_pc);
 
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
@@ -215,9 +215,9 @@
    registers and transfer control from userland.  */
 
 asmlinkage void
-do_sigreturn(struct sigcontext __user *sc, struct pt_regs *regs,
-	     struct switch_stack *sw)
+do_sigreturn(struct sigcontext __user *sc)
 {
+	struct pt_regs *regs = current_pt_regs();
 	sigset_t set;
 
 	/* Verify that it's a good sigcontext before using it */
@@ -228,7 +228,7 @@
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(sc, regs, sw))
+	if (restore_sigcontext(sc, regs))
 		goto give_sigsegv;
 
 	/* Send SIGTRAP if we're single-stepping: */
@@ -249,9 +249,9 @@
 }
 
 asmlinkage void
-do_rt_sigreturn(struct rt_sigframe __user *frame, struct pt_regs *regs,
-		struct switch_stack *sw)
+do_rt_sigreturn(struct rt_sigframe __user *frame)
 {
+	struct pt_regs *regs = current_pt_regs();
 	sigset_t set;
 
 	/* Verify that it's a good ucontext_t before using it */
@@ -262,7 +262,7 @@
 
 	set_current_blocked(&set);
 
-	if (restore_sigcontext(&frame->uc.uc_mcontext, regs, sw))
+	if (restore_sigcontext(&frame->uc.uc_mcontext, regs))
 		goto give_sigsegv;
 
 	/* Send SIGTRAP if we're single-stepping: */
diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S
index 2ac6b45..4284ec7 100644
--- a/arch/alpha/kernel/systbls.S
+++ b/arch/alpha/kernel/systbls.S
@@ -12,7 +12,7 @@
 sys_call_table:
 	.quad alpha_ni_syscall			/* 0 */
 	.quad sys_exit
-	.quad sys_fork
+	.quad alpha_fork
 	.quad sys_read
 	.quad sys_write
 	.quad alpha_ni_syscall			/* 5 */
@@ -76,7 +76,7 @@
 	.quad sys_getpgrp
 	.quad sys_getpagesize
 	.quad alpha_ni_syscall			/* 65 */
-	.quad sys_vfork
+	.quad alpha_vfork
 	.quad sys_newstat
 	.quad sys_newlstat
 	.quad alpha_ni_syscall
@@ -330,7 +330,7 @@
 	.quad sys_ni_syscall			/* 309: old get_kernel_syms */
 	.quad sys_syslog			/* 310 */
 	.quad sys_reboot
-	.quad sys_clone
+	.quad alpha_clone
 	.quad sys_uselib
 	.quad sys_mlock
 	.quad sys_munlock			/* 315 */
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d7d7c2f..08330d9 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -57,6 +57,7 @@
 	select SYS_SUPPORTS_APM_EMULATION
 	select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
 	select MODULES_USE_ELF_REL
+	select CLONE_BACKWARDS
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/include/asm/signal.h b/arch/arm/include/asm/signal.h
index 5a7963d..9a0ea6a 100644
--- a/arch/arm/include/asm/signal.h
+++ b/arch/arm/include/asm/signal.h
@@ -35,5 +35,4 @@
 };
 
 #include <asm/sigcontext.h>
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
 #endif
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 8f60b6e..7cd13cc 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -42,6 +42,9 @@
 #define __ARCH_WANT_SYS_SOCKETCALL
 #endif
 #define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 831cd38..5935b6a02 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -11,7 +11,7 @@
  */
 /* 0 */		CALL(sys_restart_syscall)
 		CALL(sys_exit)
-		CALL(sys_fork_wrapper)
+		CALL(sys_fork)
 		CALL(sys_read)
 		CALL(sys_write)
 /* 5 */		CALL(sys_open)
@@ -129,7 +129,7 @@
 		CALL(OBSOLETE(ABI(sys_ipc, sys_oabi_ipc)))
 		CALL(sys_fsync)
 		CALL(sys_sigreturn_wrapper)
-/* 120 */	CALL(sys_clone_wrapper)
+/* 120 */	CALL(sys_clone)
 		CALL(sys_setdomainname)
 		CALL(sys_newuname)
 		CALL(sys_ni_syscall)		/* modify_ldt */
@@ -199,7 +199,7 @@
 		CALL(sys_sendfile)
 		CALL(sys_ni_syscall)		/* getpmsg */
 		CALL(sys_ni_syscall)		/* putpmsg */
-/* 190 */	CALL(sys_vfork_wrapper)
+/* 190 */	CALL(sys_vfork)
 		CALL(sys_getrlimit)
 		CALL(sys_mmap2)
 		CALL(ABI(sys_truncate64, sys_oabi_truncate64))
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 804153c..a6c301e 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -502,22 +502,6 @@
 		b	sys_ni_syscall
 ENDPROC(sys_syscall)
 
-sys_fork_wrapper:
-		add	r0, sp, #S_OFF
-		b	sys_fork
-ENDPROC(sys_fork_wrapper)
-
-sys_vfork_wrapper:
-		add	r0, sp, #S_OFF
-		b	sys_vfork
-ENDPROC(sys_vfork_wrapper)
-
-sys_clone_wrapper:
-		add	ip, sp, #S_OFF
-		str	ip, [sp, #4]
-		b	sys_clone
-ENDPROC(sys_clone_wrapper)
-
 sys_sigreturn_wrapper:
 		add	r0, sp, #S_OFF
 		mov	why, #0		@ prevent syscall restart handling
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 44bc0b3..c6dec5f 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -376,17 +376,18 @@
 
 int
 copy_thread(unsigned long clone_flags, unsigned long stack_start,
-	    unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
+	    unsigned long stk_sz, struct task_struct *p)
 {
 	struct thread_info *thread = task_thread_info(p);
 	struct pt_regs *childregs = task_pt_regs(p);
 
 	memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
 
-	if (likely(regs)) {
-		*childregs = *regs;
+	if (likely(!(p->flags & PF_KTHREAD))) {
+		*childregs = *current_pt_regs();
 		childregs->ARM_r0 = 0;
-		childregs->ARM_sp = stack_start;
+		if (stack_start)
+			childregs->ARM_sp = stack_start;
 	} else {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		thread->cpu_context.r4 = stk_sz;
@@ -399,7 +400,7 @@
 	clear_ptrace_hw_breakpoint(p);
 
 	if (clone_flags & CLONE_SETTLS)
-		thread->tp_value = regs->ARM_r3;
+		thread->tp_value = childregs->ARM_r3;
 
 	thread_notify(THREAD_NOTIFY_COPY, thread);
 
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index c2a898a..3151f56 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -28,37 +28,6 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 
-/* Fork a new task - this creates a new program thread.
- * This is called indirectly via a small wrapper
- */
-asmlinkage int sys_fork(struct pt_regs *regs)
-{
-#ifdef CONFIG_MMU
-	return do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
-#else
-	/* can not support in nommu mode */
-	return(-EINVAL);
-#endif
-}
-
-/* Clone a task - this clones the calling program thread.
- * This is called indirectly via a small wrapper
- */
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-			 int __user *parent_tidptr, int tls_val,
-			 int __user *child_tidptr, struct pt_regs *regs)
-{
-	if (!newsp)
-		newsp = regs->ARM_sp;
-
-	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
-}
-
-asmlinkage int sys_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL);
-}
-
 /*
  * Since loff_t is a 64 bit type we avoid a lot of ABI hassle
  * with a different argument ordering.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 2adf340..f9ccff9 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -33,6 +33,7 @@
 	select RTC_LIB
 	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
+	select CLONE_BACKWARDS
 	help
 	  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/include/asm/syscalls.h b/arch/arm64/include/asm/syscalls.h
index a1b00cd..20d63b2 100644
--- a/arch/arm64/include/asm/syscalls.h
+++ b/arch/arm64/include/asm/syscalls.h
@@ -27,12 +27,6 @@
 asmlinkage long sys_sigaltstack_wrapper(const stack_t __user *uss,
 					stack_t __user *uoss);
 
-/*
- * AArch64 sys_clone implementation has a different prototype than the generic
- * one (additional TLS value argument).
- */
-#define sys_clone	sys_clone
-
 #include <asm-generic/syscalls.h>
 
 #endif	/* __ASM_SYSCALLS_H */
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 43064a8..d69aeea 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -24,6 +24,9 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
 #endif
 #define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index 50104e8..5843262 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -23,7 +23,7 @@
 
 __SYSCALL(0,   sys_restart_syscall)
 __SYSCALL(1,   sys_exit)
-__SYSCALL(2,   compat_sys_fork)
+__SYSCALL(2,   sys_fork)
 __SYSCALL(3,   sys_read)
 __SYSCALL(4,   sys_write)
 __SYSCALL(5,   compat_sys_open)
@@ -211,7 +211,7 @@
 __SYSCALL(187, compat_sys_sendfile)
 __SYSCALL(188, sys_ni_syscall)			/* 188 reserved */
 __SYSCALL(189, sys_ni_syscall)			/* 189 reserved */
-__SYSCALL(190, compat_sys_vfork)
+__SYSCALL(190, sys_vfork)
 __SYSCALL(191, compat_sys_getrlimit)		/* SuS compliant getrlimit */
 __SYSCALL(192, sys_mmap_pgoff)
 __SYSCALL(193, compat_sys_truncate64_wrapper)
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 8a5f334..cb0956b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -234,16 +234,15 @@
 asmlinkage void ret_from_fork(void) asm("ret_from_fork");
 
 int copy_thread(unsigned long clone_flags, unsigned long stack_start,
-		unsigned long stk_sz, struct task_struct *p,
-		struct pt_regs *regs)
+		unsigned long stk_sz, struct task_struct *p)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 	unsigned long tls = p->thread.tp_value;
 
 	memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
 
-	if (likely(regs)) {
-		*childregs = *regs;
+	if (likely(!(p->flags & PF_KTHREAD))) {
+		*childregs = *current_pt_regs();
 		childregs->regs[0] = 0;
 		if (is_compat_thread(task_thread_info(p))) {
 			if (stack_start)
@@ -266,7 +265,7 @@
 		 * for the new thread.
 		 */
 		if (clone_flags & CLONE_SETTLS)
-			tls = regs->regs[3];
+			tls = childregs->regs[3];
 	} else {
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->pstate = PSR_MODE_EL1h;
diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index 4364df8..8292a9b 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c
@@ -26,17 +26,6 @@
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 
-/*
- * Clone a task - this clones the calling program thread.
- */
-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp,
-			  int __user *parent_tidptr, unsigned long tls_val,
-			  int __user *child_tidptr)
-{
-	return do_fork(clone_flags, newsp, current_pt_regs(), 0,
-			parent_tidptr, child_tidptr);
-}
-
 asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
 			 unsigned long prot, unsigned long flags,
 			 unsigned long fd, off_t off)
diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c
index 6fabc19..f7b05ed 100644
--- a/arch/arm64/kernel/sys_compat.c
+++ b/arch/arm64/kernel/sys_compat.c
@@ -28,17 +28,6 @@
 #include <asm/cacheflush.h>
 #include <asm/unistd32.h>
 
-asmlinkage int compat_sys_fork(void)
-{
-	return do_fork(SIGCHLD, 0, current_pt_regs(), 0, NULL, NULL);
-}
-
-asmlinkage int compat_sys_vfork(void)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0,
-		       current_pt_regs(), 0, NULL, NULL);
-}
-
 asmlinkage int compat_sys_sched_rr_get_interval(compat_pid_t pid,
 						struct compat_timespec __user *interval)
 {
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index c2bbc9a..202d71a 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -17,6 +17,8 @@
 	select GENERIC_CLOCKEVENTS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 	help
 	  AVR32 is a high-performance 32-bit RISC microprocessor core,
 	  designed for cost-sensitive embedded applications, with particular
diff --git a/arch/avr32/include/asm/processor.h b/arch/avr32/include/asm/processor.h
index 87d8bac..48d71c5 100644
--- a/arch/avr32/include/asm/processor.h
+++ b/arch/avr32/include/asm/processor.h
@@ -142,9 +142,6 @@
 /* Free all resources held by a thread */
 extern void release_thread(struct task_struct *);
 
-/* Create a kernel thread without removing it from tasklists */
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
 /* Return saved PC of a blocked thread */
 #define thread_saved_pc(tsk)    ((tsk)->thread.cpu_context.pc)
 
diff --git a/arch/avr32/include/asm/signal.h b/arch/avr32/include/asm/signal.h
index 4d502fd..9326d18 100644
--- a/arch/avr32/include/asm/signal.h
+++ b/arch/avr32/include/asm/signal.h
@@ -37,6 +37,4 @@
 #include <asm/sigcontext.h>
 #undef __HAVE_ARCH_SIG_BITOPS
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif
diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h
index 157b4bd..f05a980 100644
--- a/arch/avr32/include/asm/unistd.h
+++ b/arch/avr32/include/asm/unistd.h
@@ -39,6 +39,10 @@
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/avr32/kernel/Makefile b/arch/avr32/kernel/Makefile
index 9e2c465..119a2e4 100644
--- a/arch/avr32/kernel/Makefile
+++ b/arch/avr32/kernel/Makefile
@@ -7,7 +7,7 @@
 obj-$(CONFIG_SUBARCH_AVR32B)	+= entry-avr32b.o
 obj-y				+= syscall_table.o syscall-stubs.o irq.o
 obj-y				+= setup.o traps.o ocd.o ptrace.o
-obj-y				+= signal.o sys_avr32.o process.o time.o
+obj-y				+= signal.o process.o time.o
 obj-y				+= switch_to.o cpu.o
 obj-$(CONFIG_MODULES)		+= module.o avr32_ksyms.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o
diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S
index df28841..9899d3c 100644
--- a/arch/avr32/kernel/entry-avr32b.S
+++ b/arch/avr32/kernel/entry-avr32b.S
@@ -251,13 +251,15 @@
 	.global ret_from_fork
 ret_from_fork:
 	call   schedule_tail
+	mov	r12, 0
+	rjmp	syscall_return
 
-	/* check for syscall tracing */
-	get_thread_info r0
-	ld.w	r1, r0[TI_flags]
-	andl	r1, _TIF_ALLWORK_MASK, COH
-	brne	syscall_exit_work
-	rjmp    syscall_exit_cont
+	.global ret_from_kernel_thread
+ret_from_kernel_thread:
+	call   schedule_tail
+	mov	r12, r0
+	mov	lr, r2	/* syscall_return */
+	mov	pc, r1
 
 syscall_trace_enter:
 	pushm	r8-r12
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 1bb0a8a..fd78f58 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -69,44 +69,6 @@
 }
 
 /*
- * PC is actually discarded when returning from a system call -- the
- * return address must be stored in LR. This function will make sure
- * LR points to do_exit before starting the thread.
- *
- * Also, when returning from fork(), r12 is 0, so we must copy the
- * argument as well.
- *
- *  r0 : The argument to the main thread function
- *  r1 : The address of do_exit
- *  r2 : The address of the main thread function
- */
-asmlinkage extern void kernel_thread_helper(void);
-__asm__("	.type	kernel_thread_helper, @function\n"
-	"kernel_thread_helper:\n"
-	"	mov	r12, r0\n"
-	"	mov	lr, r2\n"
-	"	mov	pc, r1\n"
-	"	.size	kernel_thread_helper, . - kernel_thread_helper");
-
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-	regs.r0 = (unsigned long)arg;
-	regs.r1 = (unsigned long)fn;
-	regs.r2 = (unsigned long)do_exit;
-	regs.lr = (unsigned long)kernel_thread_helper;
-	regs.pc = (unsigned long)kernel_thread_helper;
-	regs.sr = MODE_SUPERVISOR;
-
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
-		       0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/*
  * Free current thread data structures etc
  */
 void exit_thread(void)
@@ -332,26 +294,32 @@
 }
 
 asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_kernel_thread(void);
+asmlinkage void syscall_return(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long unused,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg,
+		struct task_struct *p)
 {
-	struct pt_regs *childregs;
+	struct pt_regs *childregs = task_pt_regs(p);
 
-	childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)task_stack_page(p))) - 1;
-	*childregs = *regs;
-
-	if (user_mode(regs))
-		childregs->sp = usp;
-	else
-		childregs->sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
-
-	childregs->r12 = 0; /* Set return value for child */
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		p->thread.cpu_context.r0 = arg;
+		p->thread.cpu_context.r1 = usp; /* fn */
+		p->thread.cpu_context.r2 = syscall_return;
+		p->thread.cpu_context.pc = (unsigned long)ret_from_kernel_thread;
+		childregs->sr = MODE_SUPERVISOR;
+	} else {
+		*childregs = *current_pt_regs();
+		if (usp)
+			childregs->sp = usp;
+		childregs->r12 = 0; /* Set return value for child */
+		p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
+	}
 
 	p->thread.cpu_context.sr = MODE_SUPERVISOR | SR_GM;
 	p->thread.cpu_context.ksp = (unsigned long)childregs;
-	p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
 
 	clear_tsk_thread_flag(p, TIF_DEBUG);
 	if ((clone_flags & CLONE_PTRACE) && test_thread_flag(TIF_DEBUG))
@@ -360,49 +328,6 @@
 	return 0;
 }
 
-/* r12-r8 are dummy parameters to force the compiler to use the stack */
-asmlinkage int sys_fork(struct pt_regs *regs)
-{
-	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
-}
-
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-		void __user *parent_tidptr, void __user *child_tidptr,
-		struct pt_regs *regs)
-{
-	if (!newsp)
-		newsp = regs->sp;
-	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr,
-			child_tidptr);
-}
-
-asmlinkage int sys_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs,
-		       0, NULL, NULL);
-}
-
-asmlinkage int sys_execve(const char __user *ufilename,
-			  const char __user *const __user *uargv,
-			  const char __user *const __user *uenvp,
-			  struct pt_regs *regs)
-{
-	int error;
-	struct filename *filename;
-
-	filename = getname(ufilename);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-
-	error = do_execve(filename->name, uargv, uenvp, regs);
-	putname(filename);
-
-out:
-	return error;
-}
-
-
 /*
  * This function is supposed to answer the question "who called
  * schedule()?"
diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c
deleted file mode 100644
index 62635a0..0000000
--- a/arch/avr32/kernel/sys_avr32.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (C) 2004-2006 Atmel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/unistd.h>
-
-int kernel_execve(const char *file,
-		  const char *const *argv,
-		  const char *const *envp)
-{
-	register long scno asm("r8") = __NR_execve;
-	register long sc1 asm("r12") = (long)file;
-	register long sc2 asm("r11") = (long)argv;
-	register long sc3 asm("r10") = (long)envp;
-
-	asm volatile("scall"
-		     : "=r"(sc1)
-		     : "r"(scno), "0"(sc1), "r"(sc2), "r"(sc3)
-		     : "cc", "memory");
-	return sc1;
-}
diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S
index 0447a3e..275aab9 100644
--- a/arch/avr32/kernel/syscall-stubs.S
+++ b/arch/avr32/kernel/syscall-stubs.S
@@ -32,30 +32,6 @@
 	mov	r12, sp
 	rjmp	sys_rt_sigreturn
 
-	.global	__sys_fork
-	.type	__sys_fork,@function
-__sys_fork:
-	mov	r12, sp
-	rjmp	sys_fork
-
-	.global	__sys_clone
-	.type	__sys_clone,@function
-__sys_clone:
-	mov	r8, sp
-	rjmp	sys_clone
-
-	.global	__sys_vfork
-	.type	__sys_vfork,@function
-__sys_vfork:
-	mov	r12, sp
-	rjmp	sys_vfork
-
-	.global	__sys_execve
-	.type	__sys_execve,@function
-__sys_execve:
-	mov	r9, sp
-	rjmp	sys_execve
-
 	.global	__sys_mmap2
 	.type	__sys_mmap2,@function
 __sys_mmap2:
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index 6eba535..f27bb87 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -15,7 +15,7 @@
 sys_call_table:
 	.long	sys_restart_syscall
 	.long	sys_exit
-	.long	__sys_fork
+	.long	sys_fork
 	.long	sys_read
 	.long	sys_write
 	.long	sys_open		/* 5 */
@@ -24,7 +24,7 @@
 	.long	sys_creat
 	.long	sys_link
 	.long	sys_unlink		/* 10 */
-	.long	__sys_execve
+	.long	sys_execve
 	.long	sys_chdir
 	.long	sys_time
 	.long	sys_mknod
@@ -57,7 +57,7 @@
 	.long	sys_dup
 	.long	sys_pipe
 	.long	sys_times
-	.long	__sys_clone
+	.long	sys_clone
 	.long	sys_brk			/* 45 */
 	.long	sys_setgid
 	.long	sys_getgid
@@ -127,7 +127,7 @@
 	.long	sys_newuname
 	.long	sys_adjtimex
 	.long	sys_mprotect
-	.long	__sys_vfork
+	.long	sys_vfork
 	.long	sys_init_module		/* 115 */
 	.long	sys_delete_module
 	.long	sys_quotactl
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index b6f3ad5..ab9ff40 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -45,6 +45,8 @@
 	select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 config GENERIC_CSUM
 	def_bool y
diff --git a/arch/blackfin/include/asm/processor.h b/arch/blackfin/include/asm/processor.h
index 4ef7cfe..d0e72e9 100644
--- a/arch/blackfin/include/asm/processor.h
+++ b/arch/blackfin/include/asm/processor.h
@@ -75,8 +75,6 @@
 {
 }
 
-extern int kernel_thread(int (*fn) (void *), void *arg, unsigned long flags);
-
 /*
  * Free current thread data structures etc..
  */
diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h
index 5b2a074..460514a1 100644
--- a/arch/blackfin/include/asm/unistd.h
+++ b/arch/blackfin/include/asm/unistd.h
@@ -446,6 +446,8 @@
 #define __ARCH_WANT_SYS_NICE
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_VFORK
 
 /*
  * "Conditional" syscalls
diff --git a/arch/blackfin/kernel/entry.S b/arch/blackfin/kernel/entry.S
index f33792c..4071265 100644
--- a/arch/blackfin/kernel/entry.S
+++ b/arch/blackfin/kernel/entry.S
@@ -46,53 +46,14 @@
 	SP += -12;
 	pseudo_long_call _schedule_tail, p5;
 	SP += 12;
-	r0 = [sp + PT_IPEND];
-	cc = bittst(r0,1);
-	if cc jump .Lin_kernel;
+	p1 = [sp++];
+	r0 = [sp++];
+	cc = p1 == 0;
+	if cc jump .Lfork;
+	sp += -12;
+	call (p1);
+	sp += 12;
+.Lfork:
 	RESTORE_CONTEXT
 	rti;
-.Lin_kernel:
-	bitclr(r0,1);
-	[sp + PT_IPEND] = r0;
-	/* do a 'fake' RTI by jumping to [RETI]
-	 * to avoid clearing supervisor mode in child
-	 */
-	r0 = [sp + PT_PC];
-	[sp + PT_P0] = r0;
-
-	RESTORE_ALL_SYS
-	jump (p0);
 ENDPROC(_ret_from_fork)
-
-ENTRY(_sys_vfork)
-	r0 = sp;
-	r0 += 24;
-	[--sp] = rets;
-	SP += -12;
-	pseudo_long_call _bfin_vfork, p2;
-	SP += 12;
-	rets = [sp++];
-	rts;
-ENDPROC(_sys_vfork)
-
-ENTRY(_sys_clone)
-	r0 = sp;
-	r0 += 24;
-	[--sp] = rets;
-	SP += -12;
-	pseudo_long_call _bfin_clone, p2;
-	SP += 12;
-	rets = [sp++];
-	rts;
-ENDPROC(_sys_clone)
-
-ENTRY(_sys_rt_sigreturn)
-	r0 = sp;
-	r0 += 24;
-	[--sp] = rets;
-	SP += -12;
-	pseudo_long_call _do_rt_sigreturn, p2;
-	SP += 12;
-	rets = [sp++];
-	rts;
-ENDPROC(_sys_rt_sigreturn)
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index bb1cc72..3e16ad9 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -102,40 +102,6 @@
 }
 
 /*
- * This gets run with P1 containing the
- * function to call, and R1 containing
- * the "args".  Note P0 is clobbered on the way here.
- */
-void kernel_thread_helper(void);
-__asm__(".section .text\n"
-	".align 4\n"
-	"_kernel_thread_helper:\n\t"
-	"\tsp += -12;\n\t"
-	"\tr0 = r1;\n\t" "\tcall (p1);\n\t" "\tcall _do_exit;\n" ".previous");
-
-/*
- * Create a kernel thread.
- */
-pid_t kernel_thread(int (*fn) (void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-	regs.r1 = (unsigned long)arg;
-	regs.p1 = (unsigned long)fn;
-	regs.pc = (unsigned long)kernel_thread_helper;
-	regs.orig_p0 = -1;
-	/* Set bit 2 to tell ret_from_fork we should be returning to kernel
-	   mode.  */
-	regs.ipend = 0x8002;
-	__asm__ __volatile__("%0 = syscfg;":"=da"(regs.syscfg):);
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL,
-		       NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/*
  * Do necessary setup to start up a newly executed thread.
  *
  * pass the data segment into user programs if it exists,
@@ -161,70 +127,48 @@
 {
 }
 
-asmlinkage int bfin_vfork(struct pt_regs *regs)
+asmlinkage int bfin_clone(unsigned long clone_flags, unsigned long newsp)
 {
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, rdusp(), regs, 0, NULL,
-		       NULL);
-}
-
-asmlinkage int bfin_clone(struct pt_regs *regs)
-{
-	unsigned long clone_flags;
-	unsigned long newsp;
-
 #ifdef __ARCH_SYNC_CORE_DCACHE
 	if (current->nr_cpus_allowed == num_possible_cpus())
 		set_cpus_allowed_ptr(current, cpumask_of(smp_processor_id()));
 #endif
-
-	/* syscall2 puts clone_flags in r0 and usp in r1 */
-	clone_flags = regs->r0;
-	newsp = regs->r1;
-	if (!newsp)
-		newsp = rdusp();
-	else
+	if (newsp)
 		newsp -= 12;
-	return do_fork(clone_flags, newsp, regs, 0, NULL, NULL);
+	return do_fork(clone_flags, newsp, 0, NULL, NULL);
 }
 
 int
 copy_thread(unsigned long clone_flags,
 	    unsigned long usp, unsigned long topstk,
-	    struct task_struct *p, struct pt_regs *regs)
+	    struct task_struct *p)
 {
 	struct pt_regs *childregs;
+	unsigned long *v;
 
 	childregs = (struct pt_regs *) (task_stack_page(p) + THREAD_SIZE) - 1;
-	*childregs = *regs;
-	childregs->r0 = 0;
+	v = ((unsigned long *)childregs) - 2;
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		v[0] = usp;
+		v[1] = topstk;
+		childregs->orig_p0 = -1;
+		childregs->ipend = 0x8000;
+		__asm__ __volatile__("%0 = syscfg;":"=da"(childregs->syscfg):);
+		p->thread.usp = 0;
+	} else {
+		*childregs = *current_pt_regs();
+		childregs->r0 = 0;
+		p->thread.usp = usp ? : rdusp();
+		v[0] = v[1] = 0;
+	}
 
-	p->thread.usp = usp;
-	p->thread.ksp = (unsigned long)childregs;
+	p->thread.ksp = (unsigned long)v;
 	p->thread.pc = (unsigned long)ret_from_fork;
 
 	return 0;
 }
 
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage int sys_execve(const char __user *name,
-			  const char __user *const __user *argv,
-			  const char __user *const __user *envp)
-{
-	int error;
-	struct filename *filename;
-	struct pt_regs *regs = (struct pt_regs *)((&name) + 6);
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-	error = do_execve(filename->name, argv, envp, regs);
-	putname(filename);
-	return error;
-}
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long fp, pc;
diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c
index 6ed20a1..84b4be0 100644
--- a/arch/blackfin/kernel/signal.c
+++ b/arch/blackfin/kernel/signal.c
@@ -82,9 +82,9 @@
 	return err;
 }
 
-asmlinkage int do_rt_sigreturn(unsigned long __unused)
+asmlinkage int sys_rt_sigreturn(void)
 {
-	struct pt_regs *regs = (struct pt_regs *)__unused;
+	struct pt_regs *regs = current_pt_regs();
 	unsigned long usp = rdusp();
 	struct rt_sigframe *frame = (struct rt_sigframe *)(usp);
 	sigset_t set;
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 1c3d2c5..86b5a09 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -530,61 +530,6 @@
 	jump .Lsyscall_really_exit;
 ENDPROC(_trap)
 
-ENTRY(_kernel_execve)
-	link SIZEOF_PTREGS;
-	p0 = sp;
-	r3 = SIZEOF_PTREGS / 4;
-	r4 = 0(x);
-.Lclear_regs:
-	[p0++] = r4;
-	r3 += -1;
-	cc = r3 == 0;
-	if !cc jump .Lclear_regs (bp);
-
-	p0 = sp;
-	sp += -16;
-	[sp + 12] = p0;
-	pseudo_long_call _do_execve, p5;
-	SP += 16;
-	cc = r0 == 0;
-	if ! cc jump .Lexecve_failed;
-	/* Success.  Copy our temporary pt_regs to the top of the kernel
-	 * stack and do a normal exception return.
-	 */
-	r1 = sp;
-	r0 = (-KERNEL_STACK_SIZE) (x);
-	r1 = r1 & r0;
-	p2 = r1;
-	p3 = [p2];
-	r0 = KERNEL_STACK_SIZE - 4 (z);
-	p1 = r0;
-	p1 = p1 + p2;
-
-	p0 = fp;
-	r4 = [p0--];
-	r3 = SIZEOF_PTREGS / 4;
-.Lcopy_regs:
-	r4 = [p0--];
-	[p1--] = r4;
-	r3 += -1;
-	cc = r3 == 0;
-	if ! cc jump .Lcopy_regs (bp);
-
-	r0 = (KERNEL_STACK_SIZE - SIZEOF_PTREGS) (z);
-	p1 = r0;
-	p1 = p1 + p2;
-	sp = p1;
-	r0 = syscfg;
-	[SP + PT_SYSCFG] = r0;
-	[p3 + (TASK_THREAD + THREAD_KSP)] = sp;
-
-	RESTORE_CONTEXT;
-	rti;
-.Lexecve_failed:
-	unlink;
-	rts;
-ENDPROC(_kernel_execve)
-
 ENTRY(_system_call)
 	/* Store IPEND */
 	p2.l = lo(IPEND);
@@ -1486,7 +1431,7 @@
 	.long _sys_ni_syscall	/* old sys_ipc */
 	.long _sys_fsync
 	.long _sys_ni_syscall	/* old sys_sigreturn */
-	.long _sys_clone		/* 120 */
+	.long _bfin_clone		/* 120 */
 	.long _sys_setdomainname
 	.long _sys_newuname
 	.long _sys_ni_syscall	/* old sys_modify_ldt */
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index aee1b56..66eab37 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -18,6 +18,7 @@
 	select OF_EARLY_FLATTREE
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_RELA
 
 config MMU
diff --git a/arch/c6x/include/asm/syscalls.h b/arch/c6x/include/asm/syscalls.h
index e7b8991..df3d05f 100644
--- a/arch/c6x/include/asm/syscalls.h
+++ b/arch/c6x/include/asm/syscalls.h
@@ -41,10 +41,6 @@
 			      u32 len_lo, u32 len_hi);
 extern int sys_cache_sync(unsigned long s, unsigned long e);
 
-struct pt_regs;
-
-extern asmlinkage long sys_c6x_clone(struct pt_regs *regs);
-
 #include <asm-generic/syscalls.h>
 
 #endif /* __ASM_C6X_SYSCALLS_H */
diff --git a/arch/c6x/include/uapi/asm/unistd.h b/arch/c6x/include/uapi/asm/unistd.h
index 4ff747d..f3987a8 100644
--- a/arch/c6x/include/uapi/asm/unistd.h
+++ b/arch/c6x/include/uapi/asm/unistd.h
@@ -14,8 +14,8 @@
  *   more details.
  */
 
-#define __ARCH_WANT_KERNEL_EXECVE
 #define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
 
 /* Use the standard ABI for syscalls. */
 #include <asm-generic/unistd.h>
diff --git a/arch/c6x/kernel/entry.S b/arch/c6x/kernel/entry.S
index 0ed6157..5239057 100644
--- a/arch/c6x/kernel/entry.S
+++ b/arch/c6x/kernel/entry.S
@@ -415,19 +415,9 @@
 0:
 	B	.S2	B10		   /* call fn */
 	LDW	.D2T1	*+SP(REGS_A1+8),A4 /* get arg */
-	MVKL	.S2	sys_exit,B11
-	MVKH	.S2	sys_exit,B11
-	ADDKPC	.S2	0f,B3,1
-0:
-	BNOP	.S2	B11,5	/* jump to sys_exit */
+	ADDKPC	.S2	ret_from_fork_2,B3,3
 ENDPROC(ret_from_kernel_thread)
 
-ENTRY(ret_from_kernel_execve)
-	GET_THREAD_INFO A12
-	BNOP	.S2	syscall_exit,4
-	ADD	.D2X	A4,-8,SP
-ENDPROC(ret_from_kernel_execve)
-
 	;;
 	;; These are the interrupt handlers, responsible for calling c6x_do_IRQ()
 	;;
@@ -624,18 +614,6 @@
 	;; Special system calls
 	;; return address is in B3
 	;;
-ENTRY(sys_clone)
-	ADD	.D1X	SP,8,A4
-#ifdef CONFIG_C6X_BIG_KERNEL
- ||	MVKL	.S1	sys_c6x_clone,A0
-	MVKH	.S1	sys_c6x_clone,A0
-	BNOP	.S2X	A0,5
-#else
- ||	B	.S2	sys_c6x_clone
-	NOP	5
-#endif
-ENDPROC(sys_clone)
-
 ENTRY(sys_rt_sigreturn)
 	ADD	.D1X	SP,8,A4
 #ifdef CONFIG_C6X_BIG_KERNEL
diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c
index 2770d9a..6434df4 100644
--- a/arch/c6x/kernel/process.c
+++ b/arch/c6x/kernel/process.c
@@ -112,22 +112,6 @@
 {
 }
 
-SYSCALL_DEFINE1(c6x_clone, struct pt_regs *, regs)
-{
-	unsigned long clone_flags;
-	unsigned long newsp;
-
-	/* syscall puts clone_flags in A4 and usp in B4 */
-	clone_flags = regs->orig_a4;
-	if (regs->b4)
-		newsp = regs->b4;
-	else
-		newsp = regs->sp;
-
-	return do_fork(clone_flags, newsp, regs, 0, (int __user *)regs->a6,
-		       (int __user *)regs->b6);
-}
-
 /*
  * Do necessary setup to start up a newly executed thread.
  */
@@ -155,13 +139,13 @@
  */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
 		unsigned long ustk_size,
-		struct task_struct *p, struct pt_regs *regs)
+		struct task_struct *p)
 {
 	struct pt_regs *childregs;
 
 	childregs = task_pt_regs(p);
 
-	if (!regs) {
+	if (unlikely(p->flags & PF_KTHREAD)) {
 		/* case of  __kernel_thread: we return to supervisor space */
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->sp = (unsigned long)(childregs + 1);
@@ -170,8 +154,9 @@
 		childregs->a1 = ustk_size;	/* argument */
 	} else {
 		/* Otherwise use the given stack */
-		*childregs = *regs;
-		childregs->sp = usp;
+		*childregs = *current_pt_regs();
+		if (usp)
+			childregs->sp = usp;
 		p->thread.pc = (unsigned long) ret_from_fork;
 	}
 
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index a672444..0cac6a4 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -49,6 +49,9 @@
 	select GENERIC_SMP_IDLE_THREAD if ETRAX_ARCH_V32
 	select GENERIC_CMOS_UPDATE
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
+	select CLONE_BACKWARDS2
 
 config HZ
 	int
diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S
index 592fbe9..897bba6 100644
--- a/arch/cris/arch-v10/kernel/entry.S
+++ b/arch/cris/arch-v10/kernel/entry.S
@@ -35,6 +35,7 @@
 	.globl system_call
 	.globl ret_from_intr
 	.globl ret_from_fork
+	.globl ret_from_kernel_thread
 	.globl resume
 	.globl multiple_interrupt
 	.globl hwbreakpoint
@@ -81,7 +82,14 @@
 	jsr schedule_tail
 	ba  ret_from_sys_call
 	nop
-		
+
+ret_from_kernel_thread:
+	jsr schedule_tail
+	move.d	$r2, $r10	; argument is here
+	jsr	$r1		; call the payload
+	moveq	0, $r9		; no syscall restarts, TYVM...
+	ba  ret_from_sys_call
+
 ret_from_intr:
 	;; check for resched if preemptive kernel or if we're going back to user-mode 
 	;; this test matches the user_regs(regs) macro
@@ -586,13 +594,6 @@
 	ba	do_sigtrap		; SIGTRAP the offending process. 
 	pop	$dccr			; Restore dccr in delay slot.
 
-	.global kernel_execve
-kernel_execve:
-	move.d __NR_execve, $r9
-	break 13
-	ret
-	nop
-
 	.data
 
 hw_bp_trigs:
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index 15ac715..b101875 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -17,6 +17,7 @@
 #include <arch/svinto.h>
 #include <linux/init.h>
 #include <arch/system.h>
+#include <linux/ptrace.h>
 
 #ifdef CONFIG_ETRAX_GPIO
 void etrax_gpio_wake_up_check(void); /* drivers/gpio.c */
@@ -81,31 +82,6 @@
 	return task_pt_regs(t)->irp;
 }
 
-static void kernel_thread_helper(void* dummy, int (*fn)(void *), void * arg)
-{
-  fn(arg);
-  do_exit(-1); /* Should never be called, return bad exit value */
-}
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-        /* Don't use r10 since that is set to 0 in copy_thread */
-	regs.r11 = (unsigned long)fn;
-	regs.r12 = (unsigned long)arg;
-	regs.irp = (unsigned long)kernel_thread_helper;
-	regs.dccr = 1 << I_DCCR_BITNR;
-
-	/* Ok, create the new process.. */
-        return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-
 /* setup the child's kernel stack with a pt_regs and switch_stack on it.
  * it will be un-nested during _resume and _ret_from_sys_call when the
  * new thread is scheduled.
@@ -115,29 +91,34 @@
  *
  */
 asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_kernel_thread(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long unused,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
-	struct pt_regs * childregs;
-	struct switch_stack *swstack;
+	struct pt_regs *childregs = task_pt_regs(p);
+	struct switch_stack *swstack = ((struct switch_stack *)childregs) - 1;
 	
 	/* put the pt_regs structure at the end of the new kernel stack page and fix it up
 	 * remember that the task_struct doubles as the kernel stack for the task
 	 */
 
-	childregs = task_pt_regs(p);
-        
-	*childregs = *regs;  /* struct copy of pt_regs */
-        
-        p->set_child_tid = p->clear_child_tid = NULL;
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(swstack, 0,
+			sizeof(struct switch_stack) + sizeof(struct pt_regs));
+		swstack->r1 = usp;
+		swstack->r2 = arg;
+		childregs->dccr = 1 << I_DCCR_BITNR;
+		swstack->return_ip = (unsigned long) ret_from_kernel_thread;
+		p->thread.ksp = (unsigned long) swstack;
+		p->thread.usp = 0;
+		return 0;
+	}
+	*childregs = *current_pt_regs();  /* struct copy of pt_regs */
 
         childregs->r10 = 0;  /* child returns 0 after a fork/clone */
-	
-	/* put the switch stack right below the pt_regs */
 
-	swstack = ((struct switch_stack *)childregs) - 1;
+	/* put the switch stack right below the pt_regs */
 
 	swstack->r9 = 0; /* parameter to ret_from_sys_call, 0 == dont restart the syscall */
 
@@ -147,7 +128,7 @@
 	
 	/* fix the user-mode stackpointer */
 
-	p->thread.usp = usp;	
+	p->thread.usp = usp ?: rdusp();
 
 	/* and the kernel-mode one */
 
@@ -161,70 +142,6 @@
 	return 0;
 }
 
-/* 
- * Be aware of the "magic" 7th argument in the four system-calls below.
- * They need the latest stackframe, which is put as the 7th argument by
- * entry.S. The previous arguments are dummies or actually used, but need
- * to be defined to reach the 7th argument.
- *
- * N.B.: Another method to get the stackframe is to use current_regs(). But
- * it returns the latest stack-frame stacked when going from _user mode_ and
- * some of these (at least sys_clone) are called from kernel-mode sometimes
- * (for example during kernel_thread, above) and thus cannot use it. Thus,
- * to be sure not to get any surprises, we use the method for the other calls
- * as well.
- */
-
-asmlinkage int sys_fork(long r10, long r11, long r12, long r13, long mof, long srp,
-			struct pt_regs *regs)
-{
-	return do_fork(SIGCHLD, rdusp(), regs, 0, NULL, NULL);
-}
-
-/* if newusp is 0, we just grab the old usp */
-/* FIXME: Is parent_tid/child_tid really third/fourth argument? Update lib? */
-asmlinkage int sys_clone(unsigned long newusp, unsigned long flags,
-			 int* parent_tid, int* child_tid, long mof, long srp,
-			 struct pt_regs *regs)
-{
-	if (!newusp)
-		newusp = rdusp();
-	return do_fork(flags, newusp, regs, 0, parent_tid, child_tid);
-}
-
-/* vfork is a system call in i386 because of register-pressure - maybe
- * we can remove it and handle it in libc but we put it here until then.
- */
-
-asmlinkage int sys_vfork(long r10, long r11, long r12, long r13, long mof, long srp,
-			 struct pt_regs *regs)
-{
-        return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, rdusp(), regs, 0, NULL, NULL);
-}
-
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage int sys_execve(const char *fname,
-			  const char *const *argv,
-			  const char *const *envp,
-			  long r13, long mof, long srp, 
-			  struct pt_regs *regs)
-{
-	int error;
-	struct filename *filename;
-
-	filename = getname(fname);
-	error = PTR_ERR(filename);
-
-	if (IS_ERR(filename))
-	        goto out;
-	error = do_execve(filename->name, argv, envp, regs);
-	putname(filename);
- out:
-	return error;
-}
-
 unsigned long get_wchan(struct task_struct *p)
 {
 #if 0
diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S
index c3ea4694..faa6441 100644
--- a/arch/cris/arch-v32/kernel/entry.S
+++ b/arch/cris/arch-v32/kernel/entry.S
@@ -31,6 +31,7 @@
 	.globl system_call
 	.globl ret_from_intr
 	.globl ret_from_fork
+	.globl ret_from_kernel_thread
 	.globl resume
 	.globl multiple_interrupt
 	.globl nmi_interrupt
@@ -84,6 +85,18 @@
 	nop
 	.size	ret_from_fork, . - ret_from_fork
 
+	.type	ret_from_kernel_thread,@function
+ret_from_kernel_thread:
+	jsr schedule_tail
+	nop
+	move.d	$r2, $r10
+	jsr	$r1
+	nop
+	moveq	0, $r9			; no syscall restarts, TYVM...
+	ba  ret_from_sys_call
+	nop
+	.size	ret_from_kernel_thread, . - ret_from_kernel_thread
+
 	.type	ret_from_intr,@function
 ret_from_intr:
 	;; Check for resched if preemptive kernel, or if we're going back to
@@ -531,15 +544,6 @@
 	ba	do_sigtrap		; SIGTRAP the offending process.
 	move.d	[$sp+], $r0		; Restore R0 in delay slot.
 
-	.global kernel_execve
-	.type	kernel_execve,@function
-kernel_execve:
-	move.d __NR_execve, $r9
-	break 13
-	ret
-	nop
-	.size	kernel_execve, . - kernel_execve
-
 	.data
 
 	.section .rodata,"a"
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index 4e99922..2b23ef0 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -16,6 +16,7 @@
 #include <hwregs/reg_map.h>
 #include <hwregs/timer_defs.h>
 #include <hwregs/intr_vect_defs.h>
+#include <linux/ptrace.h>
 
 extern void stop_watchdog(void);
 
@@ -94,31 +95,6 @@
 	return task_pt_regs(t)->erp;
 }
 
-static void
-kernel_thread_helper(void* dummy, int (*fn)(void *), void * arg)
-{
-	fn(arg);
-	do_exit(-1); /* Should never be called, return bad exit value. */
-}
-
-/* Create a kernel thread. */
-int
-kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-        /* Don't use r10 since that is set to 0 in copy_thread. */
-	regs.r11 = (unsigned long) fn;
-	regs.r12 = (unsigned long) arg;
-	regs.erp = (unsigned long) kernel_thread_helper;
-	regs.ccs = 1 << (I_CCS_BITNR + CCS_SHIFT);
-
-	/* Create the new process. */
-        return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-
 /*
  * Setup the child's kernel stack with a pt_regs and call switch_stack() on it.
  * It will be unnested during _resume and _ret_from_sys_call when the new thread
@@ -129,34 +105,42 @@
  */
 
 extern asmlinkage void ret_from_fork(void);
+extern asmlinkage void ret_from_kernel_thread(void);
 
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-	unsigned long unused,
-	struct task_struct *p, struct pt_regs *regs)
+	unsigned long arg, struct task_struct *p)
 {
-	struct pt_regs *childregs;
-	struct switch_stack *swstack;
+	struct pt_regs *childregs = task_pt_regs(p);
+	struct switch_stack *swstack = ((struct switch_stack *) childregs) - 1;
 
 	/*
 	 * Put the pt_regs structure at the end of the new kernel stack page and
 	 * fix it up. Note: the task_struct doubles as the kernel stack for the
 	 * task.
 	 */
-	childregs = task_pt_regs(p);
-	*childregs = *regs;	/* Struct copy of pt_regs. */
-        p->set_child_tid = p->clear_child_tid = NULL;
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(swstack, 0,
+			sizeof(struct switch_stack) + sizeof(struct pt_regs));
+		swstack->r1 = usp;
+		swstack->r2 = arg;
+		childregs->ccs = 1 << (I_CCS_BITNR + CCS_SHIFT);
+		swstack->return_ip = (unsigned long) ret_from_kernel_thread;
+		p->thread.ksp = (unsigned long) swstack;
+		p->thread.usp = 0;
+		return 0;
+	}
+	*childregs = *current_pt_regs();	/* Struct copy of pt_regs. */
         childregs->r10 = 0;	/* Child returns 0 after a fork/clone. */
 
 	/* Set a new TLS ?
 	 * The TLS is in $mof because it is the 5th argument to sys_clone.
 	 */
 	if (p->mm && (clone_flags & CLONE_SETTLS)) {
-		task_thread_info(p)->tls = regs->mof;
+		task_thread_info(p)->tls = childregs->mof;
 	}
 
 	/* Put the switch stack right below the pt_regs. */
-	swstack = ((struct switch_stack *) childregs) - 1;
 
 	/* Parameter to ret_from_sys_call. 0 is don't restart the syscall. */
 	swstack->r9 = 0;
@@ -168,76 +152,12 @@
 	swstack->return_ip = (unsigned long) ret_from_fork;
 
 	/* Fix the user-mode and kernel-mode stackpointer. */
-	p->thread.usp = usp;
+	p->thread.usp = usp ?: rdusp();
 	p->thread.ksp = (unsigned long) swstack;
 
 	return 0;
 }
 
-/*
- * Be aware of the "magic" 7th argument in the four system-calls below.
- * They need the latest stackframe, which is put as the 7th argument by
- * entry.S. The previous arguments are dummies or actually used, but need
- * to be defined to reach the 7th argument.
- *
- * N.B.: Another method to get the stackframe is to use current_regs(). But
- * it returns the latest stack-frame stacked when going from _user mode_ and
- * some of these (at least sys_clone) are called from kernel-mode sometimes
- * (for example during kernel_thread, above) and thus cannot use it. Thus,
- * to be sure not to get any surprises, we use the method for the other calls
- * as well.
- */
-asmlinkage int
-sys_fork(long r10, long r11, long r12, long r13, long mof, long srp,
-	struct pt_regs *regs)
-{
-	return do_fork(SIGCHLD, rdusp(), regs, 0, NULL, NULL);
-}
-
-/* FIXME: Is parent_tid/child_tid really third/fourth argument? Update lib? */
-asmlinkage int
-sys_clone(unsigned long newusp, unsigned long flags, int *parent_tid, int *child_tid,
-	unsigned long tls, long srp, struct pt_regs *regs)
-{
-	if (!newusp)
-		newusp = rdusp();
-
-	return do_fork(flags, newusp, regs, 0, parent_tid, child_tid);
-}
-
-/*
- * vfork is a system call in i386 because of register-pressure - maybe
- * we can remove it and handle it in libc but we put it here until then.
- */
-asmlinkage int
-sys_vfork(long r10, long r11, long r12, long r13, long mof, long srp,
-	struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, rdusp(), regs, 0, NULL, NULL);
-}
-
-/* sys_execve() executes a new program. */
-asmlinkage int
-sys_execve(const char *fname,
-	   const char *const *argv,
-	   const char *const *envp, long r13, long mof, long srp,
-	   struct pt_regs *regs)
-{
-	int error;
-	struct filename *filename;
-
-	filename = getname(fname);
-	error = PTR_ERR(filename);
-
-	if (IS_ERR(filename))
-	        goto out;
-
-	error = do_execve(filename->name, argv, envp, regs);
-	putname(filename);
- out:
-	return error;
-}
-
 unsigned long
 get_wchan(struct task_struct *p)
 {
diff --git a/arch/cris/include/asm/processor.h b/arch/cris/include/asm/processor.h
index ef4e1bc..675823f 100644
--- a/arch/cris/include/asm/processor.h
+++ b/arch/cris/include/asm/processor.h
@@ -49,8 +49,6 @@
 #define task_pt_regs(task) user_regs(task_thread_info(task))
 #define current_regs() task_pt_regs(current)
 
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_ESP(tsk)   ((tsk) == current ? rdusp() : (tsk)->thread.usp)
diff --git a/arch/cris/include/asm/signal.h b/arch/cris/include/asm/signal.h
index ea6af9a..72dbbf5 100644
--- a/arch/cris/include/asm/signal.h
+++ b/arch/cris/include/asm/signal.h
@@ -152,12 +152,6 @@
 
 #ifdef __KERNEL__
 #include <asm/sigcontext.h>
-
-/* here we could define asm-optimized sigaddset, sigdelset etc. operations. 
- * if we don't, generic ones are used from linux/signal.h
- */
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* __KERNEL__ */
 
 #endif
diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h
index 51873a4..f27b542 100644
--- a/arch/cris/include/asm/unistd.h
+++ b/arch/cris/include/asm/unistd.h
@@ -371,6 +371,10 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c
index 7ac000f..5868cee 100644
--- a/arch/cris/kernel/crisksyms.c
+++ b/arch/cris/kernel/crisksyms.c
@@ -30,7 +30,6 @@
 extern void iounmap(volatile void * __iomem);
 
 /* Platform dependent support */
-EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(get_cmos_time);
 EXPORT_SYMBOL(loops_per_usec);
 
diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h
index 2358634..1807d8e 100644
--- a/arch/frv/include/asm/unistd.h
+++ b/arch/frv/include/asm/unistd.h
@@ -30,6 +30,9 @@
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 7e33215..23916b2 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -139,42 +139,12 @@
 	return user_mode(regs) ? regs->sp : 0;
 }
 
-asmlinkage int sys_fork(void)
-{
-#ifndef CONFIG_MMU
-	/* fork almost works, enough to trick you into looking elsewhere:-( */
-	return -EINVAL;
-#else
-	return do_fork(SIGCHLD, user_stack(__frame), __frame, 0, NULL, NULL);
-#endif
-}
-
-asmlinkage int sys_vfork(void)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, user_stack(__frame), __frame, 0,
-		       NULL, NULL);
-}
-
-/*****************************************************************************/
-/*
- * clone a process
- * - tlsptr is retrieved by copy_thread()
- */
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-			 int __user *parent_tidptr, int __user *child_tidptr,
-			 int __user *tlsptr)
-{
-	if (!newsp)
-		newsp = user_stack(__frame);
-	return do_fork(clone_flags, newsp, __frame, 0, parent_tidptr, child_tidptr);
-} /* end sys_clone() */
-
 /*
  * set up the kernel stack and exception frames for a new process
  */
 int copy_thread(unsigned long clone_flags,
 		unsigned long usp, unsigned long arg,
-		struct task_struct *p, struct pt_regs *regs)
+		struct task_struct *p)
 {
 	struct pt_regs *childregs;
 
@@ -182,9 +152,7 @@
 		(task_stack_page(p) + THREAD_SIZE - FRV_FRAME0_SIZE);
 
 	/* set up the userspace frame (the only place that the USP is stored) */
-	*childregs = *__kernel_frame0_ptr;
-
-	p->set_child_tid = p->clear_child_tid = NULL;
+	*childregs = *current_pt_regs();
 
 	p->thread.frame	 = childregs;
 	p->thread.curr	 = p;
@@ -193,18 +161,15 @@
 	p->thread.lr	 = 0;
 	p->thread.frame0 = childregs;
 
-	if (unlikely(!regs)) {
+	if (unlikely(p->flags & PF_KTHREAD)) {
 		childregs->gr9 = usp; /* function */
 		childregs->gr8 = arg;
 		p->thread.pc = (unsigned long) ret_from_kernel_thread;
 		save_user_regs(p->thread.user);
 		return 0;
 	}
-
-	/* set up the userspace frame (the only place that the USP is stored) */
-	*childregs = *regs;
-
-	childregs->sp		= usp;
+	if (usp)
+		childregs->sp = usp;
 	childregs->next_frame	= NULL;
 
 	p->thread.pc = (unsigned long) ret_from_fork;
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 98fabd1..04bef4d 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -8,6 +8,8 @@
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 config SYMBOL_PREFIX
 	string
diff --git a/arch/h8300/include/asm/processor.h b/arch/h8300/include/asm/processor.h
index 4c9f6f8..4b0ca49 100644
--- a/arch/h8300/include/asm/processor.h
+++ b/arch/h8300/include/asm/processor.h
@@ -107,8 +107,6 @@
 {
 }
 
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 /*
  * Free current thread data structures etc..
  */
diff --git a/arch/h8300/include/asm/ptrace.h b/arch/h8300/include/asm/ptrace.h
index d09c440..7468589 100644
--- a/arch/h8300/include/asm/ptrace.h
+++ b/arch/h8300/include/asm/ptrace.h
@@ -60,6 +60,9 @@
 #define user_mode(regs) (!((regs)->ccr & PS_S))
 #define instruction_pointer(regs) ((regs)->pc)
 #define profile_pc(regs) instruction_pointer(regs)
+#define current_pt_regs() ((struct pt_regs *) \
+	(THREAD_SIZE + (unsigned long)current_thread_info()) - 1)
+#define signal_pt_regs() ((struct pt_regs *)current->thread.esp0)
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 #endif /* _H8300_PTRACE_H */
diff --git a/arch/h8300/include/asm/signal.h b/arch/h8300/include/asm/signal.h
index fd8b66e..c43c0a7 100644
--- a/arch/h8300/include/asm/signal.h
+++ b/arch/h8300/include/asm/signal.h
@@ -154,8 +154,6 @@
 #include <asm/sigcontext.h>
 #undef __HAVE_ARCH_SIG_BITOPS
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* __KERNEL__ */
 
 #endif /* _H8300_SIGNAL_H */
diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h
index 5cd8828..c2c2f5c7 100644
--- a/arch/h8300/include/asm/unistd.h
+++ b/arch/h8300/include/asm/unistd.h
@@ -356,6 +356,10 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/h8300/kernel/entry.S b/arch/h8300/kernel/entry.S
index ca74316..617a687 100644
--- a/arch/h8300/kernel/entry.S
+++ b/arch/h8300/kernel/entry.S
@@ -158,6 +158,7 @@
 .globl SYMBOL_NAME(system_call)
 .globl SYMBOL_NAME(ret_from_exception)
 .globl SYMBOL_NAME(ret_from_fork)
+.globl SYMBOL_NAME(ret_from_kernel_thread)
 .globl SYMBOL_NAME(ret_from_interrupt)
 .globl SYMBOL_NAME(interrupt_redirect_table)
 .globl SYMBOL_NAME(sw_ksp),SYMBOL_NAME(sw_usp)
@@ -330,6 +331,14 @@
 	jsr	@SYMBOL_NAME(schedule_tail)
 	jmp	@SYMBOL_NAME(ret_from_exception)
 
+SYMBOL_NAME_LABEL(ret_from_kernel_thread)
+	mov.l	er2,er0
+	jsr	@SYMBOL_NAME(schedule_tail)
+	mov.l	@(LER4:16,sp),er0
+	mov.l	@(LER5:16,sp),er1
+	jsr	@er1
+	jmp	@SYMBOL_NAME(ret_from_exception)
+
 SYMBOL_NAME_LABEL(resume)
 	/*
 	 * Beware - when entering resume, offset of tss is in d1,
diff --git a/arch/h8300/kernel/h8300_ksyms.c b/arch/h8300/kernel/h8300_ksyms.c
index 6866bd9..53d7c0e 100644
--- a/arch/h8300/kernel/h8300_ksyms.c
+++ b/arch/h8300/kernel/h8300_ksyms.c
@@ -33,7 +33,6 @@
 
 EXPORT_SYMBOL(ip_fast_csum);
 
-EXPORT_SYMBOL(kernel_thread);
 EXPORT_SYMBOL(enable_irq);
 EXPORT_SYMBOL(disable_irq);
 
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index e8dc139..b609f63 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -47,6 +47,7 @@
 EXPORT_SYMBOL(pm_power_off);
 
 asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_kernel_thread(void);
 
 /*
  * The idle loop on an H8/300..
@@ -122,113 +123,34 @@
 		printk("\n");
 }
 
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	long retval;
-	long clone_arg;
-	mm_segment_t fs;
-
-	fs = get_fs();
-	set_fs (KERNEL_DS);
-	clone_arg = flags | CLONE_VM;
-	__asm__("mov.l sp,er3\n\t"
-		"sub.l er2,er2\n\t"
-		"mov.l %2,er1\n\t"
-		"mov.l %1,er0\n\t"
-		"trapa #0\n\t"
-		"cmp.l sp,er3\n\t"
-		"beq 1f\n\t"
-		"mov.l %4,er0\n\t"
-		"mov.l %3,er1\n\t"
-		"jsr @er1\n\t"
-		"mov.l %5,er0\n\t"
-		"trapa #0\n"
-		"1:\n\t"
-		"mov.l er0,%0"
-		:"=r"(retval)
-		:"i"(__NR_clone),"g"(clone_arg),"g"(fn),"g"(arg),"i"(__NR_exit)
-		:"er0","er1","er2","er3");
-	set_fs (fs);
-	return retval;
-}
-
 void flush_thread(void)
 {
 }
 
-/*
- * "h8300_fork()".. By the time we get here, the
- * non-volatile registers have also been saved on the
- * stack. We do some ugly pointer stuff here.. (see
- * also copy_thread)
- */
-
-asmlinkage int h8300_fork(struct pt_regs *regs)
-{
-	return -EINVAL;
-}
-
-asmlinkage int h8300_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, rdusp(), regs, 0, NULL, NULL);
-}
-
-asmlinkage int h8300_clone(struct pt_regs *regs)
-{
-	unsigned long clone_flags;
-	unsigned long newsp;
-
-	/* syscall2 puts clone_flags in er1 and usp in er2 */
-	clone_flags = regs->er1;
-	newsp = regs->er2;
-	if (!newsp)
-		newsp  = rdusp();
-	return do_fork(clone_flags, newsp, regs, 0, NULL, NULL);
-
-}
-
 int copy_thread(unsigned long clone_flags,
                 unsigned long usp, unsigned long topstk,
-		 struct task_struct * p, struct pt_regs * regs)
+		 struct task_struct * p)
 {
 	struct pt_regs * childregs;
 
 	childregs = (struct pt_regs *) (THREAD_SIZE + task_stack_page(p)) - 1;
 
-	*childregs = *regs;
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		childregs->retpc = (unsigned long) ret_from_kernel_thread;
+		childregs->er4 = topstk; /* arg */
+		childregs->er5 = usp; /* fn */
+		p->thread.ksp = (unsigned long)childregs;
+	}
+	*childregs = *current_pt_regs();
 	childregs->retpc = (unsigned long) ret_from_fork;
 	childregs->er0 = 0;
-
-	p->thread.usp = usp;
+	p->thread.usp = usp ?: rdusp();
 	p->thread.ksp = (unsigned long)childregs;
 
 	return 0;
 }
 
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage int sys_execve(const char *name,
-			  const char *const *argv,
-			  const char *const *envp,
-			  int dummy, ...)
-{
-	int error;
-	struct filename *filename;
-	struct pt_regs *regs = (struct pt_regs *) ((unsigned char *)&dummy-4);
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-	error = do_execve(filename->name, argv, envp, regs);
-	putname(filename);
-	return error;
-}
-
 unsigned long thread_saved_pc(struct task_struct *tsk)
 {
 	return ((struct pt_regs *)tsk->thread.esp0)->pc;
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index 4bdc731..bf350cb 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -46,29 +46,3 @@
                ((regs->pc)&0xffffff)-2,regs->orig_er0,regs->er1,regs->er2,regs->er3,regs->er0);
 }
 #endif
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-asmlinkage
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	register long res __asm__("er0");
-	register const char *const *_c __asm__("er3") = envp;
-	register const char *const *_b __asm__("er2") = argv;
-	register const char * _a __asm__("er1") = filename;
-	__asm__ __volatile__ ("mov.l %1,er0\n\t"
-			"trapa	#0\n\t"
-			: "=r" (res)
-			: "g" (__NR_execve),
-			  "g" (_a),
-			  "g" (_b),
-			  "g" (_c)
-			: "cc", "memory");
-	return res;
-}
-
-
diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S
index 9d77e71..b74dd0a 100644
--- a/arch/h8300/kernel/syscalls.S
+++ b/arch/h8300/kernel/syscalls.S
@@ -340,21 +340,12 @@
 	bra	SYMBOL_NAME(syscall_trampoline):8
 	.endm
 
-SYMBOL_NAME_LABEL(sys_clone)	
-	call_sp	h8300_clone
-	
 SYMBOL_NAME_LABEL(sys_sigreturn)
 	call_sp	do_sigreturn
 
 SYMBOL_NAME_LABEL(sys_rt_sigreturn)
 	call_sp	do_rt_sigreturn
 
-SYMBOL_NAME_LABEL(sys_fork)
-	call_sp	h8300_fork
-
-SYMBOL_NAME_LABEL(sys_vfork)
-	call_sp	h8300_vfork
-
 SYMBOL_NAME_LABEL(syscall_trampoline)
 	mov.l	sp,er0
 	jmp	@er6
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 0744f7d..e418803 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -31,6 +31,8 @@
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 	---help---
 	  Qualcomm Hexagon is a processor architecture designed for high
 	  performance and low power across a wide variety of applications.
diff --git a/arch/hexagon/include/asm/processor.h b/arch/hexagon/include/asm/processor.h
index a03323a..6dd5d37 100644
--- a/arch/hexagon/include/asm/processor.h
+++ b/arch/hexagon/include/asm/processor.h
@@ -34,7 +34,6 @@
 struct task_struct;
 
 /*  this is defined in arch/process.c  */
-extern pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
 extern void start_thread(struct pt_regs *, unsigned long, unsigned long);
diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h
index fb0e9d4..4af9c7b 100644
--- a/arch/hexagon/include/asm/syscall.h
+++ b/arch/hexagon/include/asm/syscall.h
@@ -25,14 +25,6 @@
 	unsigned long, unsigned long,
 	unsigned long, unsigned long);
 
-asmlinkage int sys_execve(char __user *ufilename, char __user * __user *argv,
-			  char __user * __user *envp);
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-			 unsigned long parent_tidp, unsigned long child_tidp);
-
-#define sys_execve	sys_execve
-#define sys_clone	sys_clone
-
 #include <asm-generic/syscalls.h>
 
 extern void *sys_call_table[];
diff --git a/arch/hexagon/include/uapi/asm/ptrace.h b/arch/hexagon/include/uapi/asm/ptrace.h
index 8ef7840..1ffce0c 100644
--- a/arch/hexagon/include/uapi/asm/ptrace.h
+++ b/arch/hexagon/include/uapi/asm/ptrace.h
@@ -32,4 +32,8 @@
 extern int regs_query_register_offset(const char *name);
 extern const char *regs_query_register_name(unsigned int offset);
 
+#define current_pt_regs() \
+	((struct pt_regs *) \
+	 ((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
+
 #endif
diff --git a/arch/hexagon/include/uapi/asm/unistd.h b/arch/hexagon/include/uapi/asm/unistd.h
index 81312d6..2af8153 100644
--- a/arch/hexagon/include/uapi/asm/unistd.h
+++ b/arch/hexagon/include/uapi/asm/unistd.h
@@ -27,5 +27,7 @@
  */
 
 #define sys_mmap2 sys_mmap_pgoff
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
 
 #include <asm-generic/unistd.h>
diff --git a/arch/hexagon/kernel/Makefile b/arch/hexagon/kernel/Makefile
index 536aec0..6c19501 100644
--- a/arch/hexagon/kernel/Makefile
+++ b/arch/hexagon/kernel/Makefile
@@ -3,8 +3,7 @@
 obj-$(CONFIG_SMP) += smp.o topology.o
 
 obj-y += setup.o irq_cpu.o traps.o syscalltab.o signal.o time.o
-obj-y += process.o syscall.o trampoline.o reset.o ptrace.o
-obj-y += vdso.o
+obj-y += process.o trampoline.o reset.o ptrace.o vdso.o
 
 obj-$(CONFIG_KGDB)    += kgdb.o
 obj-$(CONFIG_MODULES) += module.o hexagon_ksyms.o
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 9f6d741..06ae9ff 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -26,33 +26,6 @@
 #include <linux/slab.h>
 
 /*
- * Kernel thread creation.  The desired kernel function is "wrapped"
- * in the kernel_thread_helper function, which does cleanup
- * afterwards.
- */
-static void __noreturn kernel_thread_helper(void *arg, int (*fn)(void *))
-{
-	do_exit(fn(arg));
-}
-
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-	/*
-	 * Yes, we're exploting illicit knowledge of the ABI here.
-	 */
-	regs.r00 = (unsigned long) arg;
-	regs.r01 = (unsigned long) fn;
-	pt_set_elr(&regs, (unsigned long)kernel_thread_helper);
-	pt_set_kmode(&regs);
-
-	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/*
  * Program thread launch.  Often defined as a macro in processor.h,
  * but we're shooting for a small footprint and it's not an inner-loop
  * performance-critical operation.
@@ -114,8 +87,7 @@
  * Copy architecture-specific thread state
  */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long unused, struct task_struct *p,
-		struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct hexagon_switch_stack *ss;
@@ -125,61 +97,51 @@
 	childregs = (struct pt_regs *) (((unsigned long) ti + THREAD_SIZE) -
 					sizeof(*childregs));
 
-	memcpy(childregs, regs, sizeof(*childregs));
 	ti->regs = childregs;
 
 	/*
 	 * Establish kernel stack pointer and initial PC for new thread
+	 * Note that unlike the usual situation, we do not copy the
+	 * parent's callee-saved here; those are in pt_regs and whatever
+	 * we leave here will be overridden on return to userland.
 	 */
 	ss = (struct hexagon_switch_stack *) ((unsigned long) childregs -
 						    sizeof(*ss));
 	ss->lr = (unsigned long)ret_from_fork;
 	p->thread.switch_sp = ss;
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		/* r24 <- fn, r25 <- arg */
+		ss->r2524 = usp | ((u64)arg << 32);
+		pt_set_kmode(childregs);
+		return 0;
+	}
+	memcpy(childregs, current_pt_regs(), sizeof(*childregs));
+	ss->r2524 = 0;
 
-	/* If User mode thread, set pt_reg stack pointer as per parameter */
-	if (user_mode(childregs)) {
+	if (usp)
 		pt_set_rte_sp(childregs, usp);
 
-		/* Child sees zero return value */
-		childregs->r00 = 0;
-
-		/*
-		 * The clone syscall has the C signature:
-		 * int [r0] clone(int flags [r0],
-		 *           void *child_frame [r1],
-		 *           void *parent_tid [r2],
-		 *           void *child_tid [r3],
-		 *           void *thread_control_block [r4]);
-		 * ugp is used to provide TLS support.
-		 */
-		if (clone_flags & CLONE_SETTLS)
-			childregs->ugp = childregs->r04;
-
-		/*
-		 * Parent sees new pid -- not necessary, not even possible at
-		 * this point in the fork process
-		 * Might also want to set things like ti->addr_limit
-		 */
-	} else {
-		/*
-		 * If kernel thread, resume stack is kernel stack base.
-		 * Note that this is pointer arithmetic on pt_regs *
-		 */
-		pt_set_rte_sp(childregs, (unsigned long)(childregs + 1));
-		/*
-		 * We need the current thread_info fast path pointer
-		 * set up in pt_regs.  The register to be used is
-		 * parametric for assembler code, but the mechanism
-		 * doesn't drop neatly into C.  Needs to be fixed.
-		 */
-		childregs->THREADINFO_REG = (unsigned long) ti;
-	}
+	/* Child sees zero return value */
+	childregs->r00 = 0;
 
 	/*
-	 * thread_info pointer is pulled out of task_struct "stack"
-	 * field on switch_to.
+	 * The clone syscall has the C signature:
+	 * int [r0] clone(int flags [r0],
+	 *           void *child_frame [r1],
+	 *           void *parent_tid [r2],
+	 *           void *child_tid [r3],
+	 *           void *thread_control_block [r4]);
+	 * ugp is used to provide TLS support.
 	 */
-	p->stack = (void *)ti;
+	if (clone_flags & CLONE_SETTLS)
+		childregs->ugp = childregs->r04;
+
+	/*
+	 * Parent sees new pid -- not necessary, not even possible at
+	 * this point in the fork process
+	 * Might also want to set things like ti->addr_limit
+	 */
 
 	return 0;
 }
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index 5047b8b..fe0d137 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -249,14 +249,14 @@
  */
 asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss)
 {
-	struct pt_regs *regs = current_thread_info()->regs;
+	struct pt_regs *regs = current_pt_regs();
 
 	return do_sigaltstack(uss, uoss, regs->r29);
 }
 
 asmlinkage int sys_rt_sigreturn(void)
 {
-	struct pt_regs *regs = current_thread_info()->regs;
+	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe __user *frame;
 	sigset_t blocked;
 
diff --git a/arch/hexagon/kernel/syscall.c b/arch/hexagon/kernel/syscall.c
deleted file mode 100644
index 319fa64..0000000
--- a/arch/hexagon/kernel/syscall.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Hexagon system calls
- *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA.
- */
-
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/linkage.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/syscalls.h>
-#include <linux/unistd.h>
-#include <asm/mman.h>
-#include <asm/registers.h>
-
-/*
- * System calls with architecture-specific wrappers.
- * See signal.c for signal-related system call wrappers.
- */
-
-asmlinkage int sys_execve(char __user *ufilename,
-			  const char __user *const __user *argv,
-			  const char __user *const __user *envp)
-{
-	struct pt_regs *pregs = current_thread_info()->regs;
-	struct filename *filename;
-	int retval;
-
-	filename = getname(ufilename);
-	retval = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return retval;
-
-	retval = do_execve(filename->name, argv, envp, pregs);
-	putname(filename);
-
-	return retval;
-}
-
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-			 unsigned long parent_tidp, unsigned long child_tidp)
-{
-	struct pt_regs *pregs = current_thread_info()->regs;
-
-	if (!newsp)
-		newsp = pregs->SP;
-	return do_fork(clone_flags, newsp, pregs, 0, (int __user *)parent_tidp,
-		       (int __user *)child_tidp);
-}
-
-/*
- * Do a system call from the kernel, so as to have a proper pt_regs
- * and recycle the sys_execvpe infrustructure.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[], const char *const envp[])
-{
-	register unsigned long __a0 asm("r0") = (unsigned long) filename;
-	register unsigned long __a1 asm("r1") = (unsigned long) argv;
-	register unsigned long __a2 asm("r2") = (unsigned long) envp;
-	int retval;
-
-	__asm__ volatile(
-		"	R6 = #%4;\n"
-		"	trap0(#1);\n"
-		"	%0 = R0;\n"
-		: "=r" (retval)
-		: "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_execve)
-	);
-
-	return retval;
-}
diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S
index cd71673..425e50c 100644
--- a/arch/hexagon/kernel/vm_entry.S
+++ b/arch/hexagon/kernel/vm_entry.S
@@ -266,4 +266,8 @@
 	.globl ret_from_fork
 ret_from_fork:
 	call schedule_tail
+	P0 = cmp.eq(R24, #0);
+	if P0 jump return_from_syscall
+	R0 = R25;
+	callr R24
 	jump return_from_syscall
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 3279646..6706004 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -42,6 +42,8 @@
 	select GENERIC_TIME_VSYSCALL_OLD
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index 944152a..e0a899a 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -340,22 +340,6 @@
  */
 #define release_thread(dead_task)
 
-/*
- * This is the mechanism for creating a new kernel thread.
- *
- * NOTE 1: Only a kernel-only process (ie the swapper or direct
- * descendants who haven't done an "execve()") should use this: it
- * will work within a system call from a "real" process, but the
- * process memory space will not be free'd until both the parent and
- * the child have exited.
- *
- * NOTE 2: This MUST NOT be an inlined function.  Otherwise, we get
- * into trouble in init/main.c when the child thread returns to
- * do_basic_setup() and the timing is such that free_initmem() has
- * been called already.
- */
-extern pid_t kernel_thread (int (*fn)(void *), void *arg, unsigned long flags);
-
 /* Get wait channel for task P.  */
 extern unsigned long get_wchan (struct task_struct *p);
 
diff --git a/arch/ia64/include/asm/signal.h b/arch/ia64/include/asm/signal.h
index aecda5b..3a1b20e 100644
--- a/arch/ia64/include/asm/signal.h
+++ b/arch/ia64/include/asm/signal.h
@@ -38,7 +38,5 @@
 
 #  include <asm/sigcontext.h>
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 # endif /* !__ASSEMBLY__ */
 #endif /* _ASM_IA64_SIGNAL_H */
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 8b3ff2f..1574bca 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -29,6 +29,7 @@
 
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_EXECVE
 
 #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
 
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 1ccbe12..e25b784 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -61,14 +61,13 @@
 	 * Allocate 8 input registers since ptrace() may clobber them
 	 */
 	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
-	alloc loc1=ar.pfs,8,2,4,0
+	alloc loc1=ar.pfs,8,2,3,0
 	mov loc0=rp
 	.body
 	mov out0=in0			// filename
 	;;				// stop bit between alloc and call
 	mov out1=in1			// argv
 	mov out2=in2			// envp
-	add out3=16,sp			// regs
 	br.call.sptk.many rp=sys_execve
 .ret0:
 	cmp4.ge p6,p7=r8,r0
@@ -76,7 +75,6 @@
 	sxt4 r8=r8			// return 64-bit result
 	;;
 	stf.spill [sp]=f0
-(p6)	cmp.ne pKStk,pUStk=r0,r0	// a successful execve() lands us in user-mode...
 	mov rp=loc0
 (p6)	mov ar.pfs=r0			// clear ar.pfs on success
 (p7)	br.ret.sptk.many rp
@@ -118,13 +116,12 @@
 	mov loc1=r16				// save ar.pfs across do_fork
 	.body
 	mov out1=in1
-	mov out3=in2
+	mov out2=in2
 	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
-	mov out4=in3	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
+	mov out3=in3	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
 	;;
 (p6)	st8 [r2]=in5				// store TLS in r16 for copy_thread()
-	mov out5=in4	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
-	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
+	mov out4=in4	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
 	mov out0=in0				// out0 = clone_flags
 	br.call.sptk.many rp=do_fork
 .ret1:	.restore sp
@@ -150,13 +147,12 @@
 	mov loc1=r16				// save ar.pfs across do_fork
 	.body
 	mov out1=in1
-	mov out3=16				// stacksize (compensates for 16-byte scratch area)
+	mov out2=16				// stacksize (compensates for 16-byte scratch area)
 	tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
-	mov out4=in2	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
+	mov out3=in2	// parent_tidptr: valid only w/CLONE_PARENT_SETTID
 	;;
 (p6)	st8 [r2]=in4				// store TLS in r13 (tp)
-	mov out5=in3	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
-	adds out2=IA64_SWITCH_STACK_SIZE+16,sp	// out2 = &regs
+	mov out4=in3	// child_tidptr:  valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
 	mov out0=in0				// out0 = clone_flags
 	br.call.sptk.many rp=do_fork
 .ret2:	.restore sp
@@ -484,19 +480,6 @@
 	br.ret.sptk.many rp
 END(prefetch_stack)
 
-GLOBAL_ENTRY(kernel_execve)
-	rum psr.ac
-	mov r15=__NR_execve			// put syscall number in place
-	break __BREAK_SYSCALL
-	br.ret.sptk.many rp
-END(kernel_execve)
-
-GLOBAL_ENTRY(clone)
-	mov r15=__NR_clone			// put syscall number in place
-	break __BREAK_SYSCALL
-	br.ret.sptk.many rp
-END(clone)
-
 	/*
 	 * Invoke a system call, but do some tracing before and after the call.
 	 * We MUST preserve the current register frame throughout this routine
@@ -600,6 +583,27 @@
 .ret4:	br.cond.sptk ia64_leave_kernel
 END(ia64_strace_leave_kernel)
 
+ENTRY(call_payload)
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(0)
+	/* call the kernel_thread payload; fn is in r4, arg - in r5 */
+	alloc loc1=ar.pfs,0,3,1,0
+	mov loc0=rp
+	mov loc2=gp
+	mov out0=r5		// arg
+	ld8 r14 = [r4], 8	// fn.address
+	;;
+	mov b6 = r14
+	ld8 gp = [r4]		// fn.gp
+	;;
+	br.call.sptk.many rp=b6	// fn(arg)
+.ret12:	mov gp=loc2
+	mov rp=loc0
+	mov ar.pfs=loc1
+	/* ... and if it has returned, we are going to userland */
+	cmp.ne pKStk,pUStk=r0,r0
+	br.ret.sptk.many rp
+END(call_payload)
+
 GLOBAL_ENTRY(ia64_ret_from_clone)
 	PT_REGS_UNWIND_INFO(0)
 {	/*
@@ -616,6 +620,7 @@
 	br.call.sptk.many rp=ia64_invoke_schedule_tail
 }
 .ret8:
+(pKStk)	br.call.sptk.many rp=call_payload
 	adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
 	;;
 	ld4 r2=[r2]
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index 629a250..4738ff7 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -1093,19 +1093,6 @@
 END(cycle_to_cputime)
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
-GLOBAL_ENTRY(start_kernel_thread)
-	.prologue
-	.save rp, r0				// this is the end of the call-chain
-	.body
-	alloc r2 = ar.pfs, 0, 0, 2, 0
-	mov out0 = r9
-	mov out1 = r11;;
-	br.call.sptk.many rp = kernel_thread_helper;;
-	mov out0 = r8
-	br.call.sptk.many rp = sys_exit;;
-1:	br.sptk.few 1b				// not reached
-END(start_kernel_thread)
-
 #ifdef CONFIG_IA64_BRL_EMU
 
 /*
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 35e106f..31360cb 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -393,72 +393,24 @@
 int
 copy_thread(unsigned long clone_flags,
 	     unsigned long user_stack_base, unsigned long user_stack_size,
-	     struct task_struct *p, struct pt_regs *regs)
+	     struct task_struct *p)
 {
 	extern char ia64_ret_from_clone;
 	struct switch_stack *child_stack, *stack;
 	unsigned long rbs, child_rbs, rbs_size;
 	struct pt_regs *child_ptregs;
+	struct pt_regs *regs = current_pt_regs();
 	int retval = 0;
 
-#ifdef CONFIG_SMP
-	/*
-	 * For SMP idle threads, fork_by_hand() calls do_fork with
-	 * NULL regs.
-	 */
-	if (!regs)
-		return 0;
-#endif
-
-	stack = ((struct switch_stack *) regs) - 1;
-
 	child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
 	child_stack = (struct switch_stack *) child_ptregs - 1;
 
-	/* copy parent's switch_stack & pt_regs to child: */
-	memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
-
 	rbs = (unsigned long) current + IA64_RBS_OFFSET;
 	child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
-	rbs_size = stack->ar_bspstore - rbs;
-
-	/* copy the parent's register backing store to the child: */
-	memcpy((void *) child_rbs, (void *) rbs, rbs_size);
-
-	if (likely(user_mode(child_ptregs))) {
-		if (clone_flags & CLONE_SETTLS)
-			child_ptregs->r13 = regs->r16;	/* see sys_clone2() in entry.S */
-		if (user_stack_base) {
-			child_ptregs->r12 = user_stack_base + user_stack_size - 16;
-			child_ptregs->ar_bspstore = user_stack_base;
-			child_ptregs->ar_rnat = 0;
-			child_ptregs->loadrs = 0;
-		}
-	} else {
-		/*
-		 * Note: we simply preserve the relative position of
-		 * the stack pointer here.  There is no need to
-		 * allocate a scratch area here, since that will have
-		 * been taken care of by the caller of sys_clone()
-		 * already.
-		 */
-		child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */
-		child_ptregs->r13 = (unsigned long) p;		/* set `current' pointer */
-	}
-	child_stack->ar_bspstore = child_rbs + rbs_size;
-	child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
 
 	/* copy parts of thread_struct: */
 	p->thread.ksp = (unsigned long) child_stack - 16;
 
-	/* stop some PSR bits from being inherited.
-	 * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
-	 * therefore we must specify them explicitly here and not include them in
-	 * IA64_PSR_BITS_TO_CLEAR.
-	 */
-	child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
-				 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
-
 	/*
 	 * NOTE: The calling convention considers all floating point
 	 * registers in the high partition (fph) to be scratch.  Since
@@ -480,8 +432,66 @@
 #	define THREAD_FLAGS_TO_SET	0
 	p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
 			   | THREAD_FLAGS_TO_SET);
+
 	ia64_drop_fpu(p);	/* don't pick up stale state from a CPU's fph */
 
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		if (unlikely(!user_stack_base)) {
+			/* fork_idle() called us */
+			return 0;
+		}
+		memset(child_stack, 0, sizeof(*child_ptregs) + sizeof(*child_stack));
+		child_stack->r4 = user_stack_base;	/* payload */
+		child_stack->r5 = user_stack_size;	/* argument */
+		/*
+		 * Preserve PSR bits, except for bits 32-34 and 37-45,
+		 * which we can't read.
+		 */
+		child_ptregs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
+		/* mark as valid, empty frame */
+		child_ptregs->cr_ifs = 1UL << 63;
+		child_stack->ar_fpsr = child_ptregs->ar_fpsr
+			= ia64_getreg(_IA64_REG_AR_FPSR);
+		child_stack->pr = (1 << PRED_KERNEL_STACK);
+		child_stack->ar_bspstore = child_rbs;
+		child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+		/* stop some PSR bits from being inherited.
+		 * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+		 * therefore we must specify them explicitly here and not include them in
+		 * IA64_PSR_BITS_TO_CLEAR.
+		 */
+		child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+				 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
+
+		return 0;
+	}
+	stack = ((struct switch_stack *) regs) - 1;
+	/* copy parent's switch_stack & pt_regs to child: */
+	memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
+
+	/* copy the parent's register backing store to the child: */
+	rbs_size = stack->ar_bspstore - rbs;
+	memcpy((void *) child_rbs, (void *) rbs, rbs_size);
+	if (clone_flags & CLONE_SETTLS)
+		child_ptregs->r13 = regs->r16;	/* see sys_clone2() in entry.S */
+	if (user_stack_base) {
+		child_ptregs->r12 = user_stack_base + user_stack_size - 16;
+		child_ptregs->ar_bspstore = user_stack_base;
+		child_ptregs->ar_rnat = 0;
+		child_ptregs->loadrs = 0;
+	}
+	child_stack->ar_bspstore = child_rbs + rbs_size;
+	child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
+
+	/* stop some PSR bits from being inherited.
+	 * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
+	 * therefore we must specify them explicitly here and not include them in
+	 * IA64_PSR_BITS_TO_CLEAR.
+	 */
+	child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
+				 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
+
 #ifdef CONFIG_PERFMON
 	if (current->thread.pfm_context)
 		pfm_inherit(p, child_ptregs);
@@ -608,57 +618,6 @@
 	return 1;	/* f0-f31 are always valid so we always return 1 */
 }
 
-long
-sys_execve (const char __user *filename,
-	    const char __user *const __user *argv,
-	    const char __user *const __user *envp,
-	    struct pt_regs *regs)
-{
-	struct filename *fname;
-	int error;
-
-	fname = getname(filename);
-	error = PTR_ERR(fname);
-	if (IS_ERR(fname))
-		goto out;
-	error = do_execve(fname->name, argv, envp, regs);
-	putname(fname);
-out:
-	return error;
-}
-
-pid_t
-kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
-{
-	extern void start_kernel_thread (void);
-	unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
-	struct {
-		struct switch_stack sw;
-		struct pt_regs pt;
-	} regs;
-
-	memset(&regs, 0, sizeof(regs));
-	regs.pt.cr_iip = helper_fptr[0];	/* set entry point (IP) */
-	regs.pt.r1 = helper_fptr[1];		/* set GP */
-	regs.pt.r9 = (unsigned long) fn;	/* 1st argument */
-	regs.pt.r11 = (unsigned long) arg;	/* 2nd argument */
-	/* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read.  */
-	regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
-	regs.pt.cr_ifs = 1UL << 63;		/* mark as valid, empty frame */
-	regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
-	regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
-	regs.sw.pr = (1 << PRED_KERNEL_STACK);
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/* This gets called from kernel_thread() via ia64_invoke_thread_helper().  */
-int
-kernel_thread_helper (int (*fn)(void *), void *arg)
-{
-	return (*fn)(arg);
-}
-
 /*
  * Flush thread state.  This is called when a thread does an execve().
  */
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 963d2db..6a368cb 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -460,11 +460,6 @@
 	return 0;
 }
 
-struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
-{
-	return NULL;
-}
-
 static int __cpuinit
 do_boot_cpu (int sapicid, int cpu, struct task_struct *idle)
 {
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index f807721..5183f43 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -15,6 +15,8 @@
 	select GENERIC_ATOMIC64
 	select ARCH_USES_GETTIMEOFFSET
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 config SBUS
 	bool
diff --git a/arch/m32r/include/asm/processor.h b/arch/m32r/include/asm/processor.h
index da17253..5767367 100644
--- a/arch/m32r/include/asm/processor.h
+++ b/arch/m32r/include/asm/processor.h
@@ -118,11 +118,6 @@
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 /* Copy and release all segment info associated with a VM */
 extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
 extern void release_segments(struct mm_struct * mm);
diff --git a/arch/m32r/include/asm/ptrace.h b/arch/m32r/include/asm/ptrace.h
index 4313aa6..c4432f1 100644
--- a/arch/m32r/include/asm/ptrace.h
+++ b/arch/m32r/include/asm/ptrace.h
@@ -139,6 +139,8 @@
 
 #define task_pt_regs(task) \
         ((struct pt_regs *)(task_stack_page(task) + THREAD_SIZE) - 1)
+#define current_pt_regs() ((struct pt_regs *) \
+	((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
 
 #endif /* __KERNEL */
 
diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h
index ea5f95e..e4d2e2a 100644
--- a/arch/m32r/include/asm/signal.h
+++ b/arch/m32r/include/asm/signal.h
@@ -149,10 +149,6 @@
 
 #undef __HAVE_ARCH_SIG_BITOPS
 
-struct pt_regs;
-
-#define ptrace_signal_deliver(regs, cookie)	do { } while (0)
-
 #endif /* __KERNEL__ */
 
 #endif  /* _ASM_M32R_SIGNAL_H */
diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h
index d5e66a4..d9e7351 100644
--- a/arch/m32r/include/asm/unistd.h
+++ b/arch/m32r/include/asm/unistd.h
@@ -352,6 +352,10 @@
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
 
 #define __IGNORE_lchown
 #define __IGNORE_setuid
diff --git a/arch/m32r/kernel/entry.S b/arch/m32r/kernel/entry.S
index 225412b..0c01543 100644
--- a/arch/m32r/kernel/entry.S
+++ b/arch/m32r/kernel/entry.S
@@ -125,6 +125,15 @@
 	and	\reg, sp
 	.endm
 
+ENTRY(ret_from_kernel_thread)
+	pop	r0
+	bl	schedule_tail
+	GET_THREAD_INFO(r8)
+	ld	r0, R0(r8)
+	ld	r1, R1(r8)
+	jl	r1
+	bra	syscall_exit
+
 ENTRY(ret_from_fork)
 	pop	r0
 	bl	schedule_tail
diff --git a/arch/m32r/kernel/m32r_ksyms.c b/arch/m32r/kernel/m32r_ksyms.c
index 7005707..b727e69 100644
--- a/arch/m32r/kernel/m32r_ksyms.c
+++ b/arch/m32r/kernel/m32r_ksyms.c
@@ -21,7 +21,6 @@
 EXPORT_SYMBOL(dump_fpu);
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
-EXPORT_SYMBOL(kernel_thread);
 
 EXPORT_SYMBOL(strncpy_from_user);
 EXPORT_SYMBOL(__strncpy_from_user);
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index e736627..765d0f5 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -165,41 +165,6 @@
 }
 
 /*
- * Create a kernel thread
- */
-
-/*
- * This is the mechanism for creating a new kernel thread.
- *
- * NOTE! Only a kernel-only process(ie the swapper or direct descendants
- * who haven't done an "execve()") should use this: it will work within
- * a system call from a "real" process, but the process memory space will
- * not be free'd until both the parent and the child have exited.
- */
-static void kernel_thread_helper(void *nouse, int (*fn)(void *), void *arg)
-{
-	fn(arg);
-	do_exit(-1);
-}
-
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof (regs));
-	regs.r1 = (unsigned long)fn;
-	regs.r2 = (unsigned long)arg;
-
-	regs.bpc = (unsigned long)kernel_thread_helper;
-
-	regs.psw = M32R_PSW_BIE;
-
-	/* Ok, create the new process. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL,
-		NULL);
-}
-
-/*
  * Free current thread data structures etc..
  */
 void exit_thread(void)
@@ -227,88 +192,31 @@
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long spu,
-	unsigned long unused, struct task_struct *tsk, struct pt_regs *regs)
+	unsigned long arg, struct task_struct *tsk)
 {
 	struct pt_regs *childregs = task_pt_regs(tsk);
 	extern void ret_from_fork(void);
+	extern void ret_from_kernel_thread(void);
 
-	/* Copy registers */
-	*childregs = *regs;
-
-	childregs->spu = spu;
-	childregs->r0 = 0;	/* Child gets zero as return value */
-	regs->r0 = tsk->pid;
+	if (unlikely(tsk->flags & PF_KTHREAD)) {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		childregs->psw = M32R_PSW_BIE;
+		childregs->r1 = spu;	/* fn */
+		childregs->r0 = arg;
+		tsk->thread.lr = (unsigned long)ret_from_kernel_thread;
+	} else {
+		/* Copy registers */
+		*childregs = *current_pt_regs();
+		if (spu)
+			childregs->spu = spu;
+		childregs->r0 = 0;	/* Child gets zero as return value */
+		tsk->thread.lr = (unsigned long)ret_from_fork;
+	}
 	tsk->thread.sp = (unsigned long)childregs;
-	tsk->thread.lr = (unsigned long)ret_from_fork;
 
 	return 0;
 }
 
-asmlinkage int sys_fork(unsigned long r0, unsigned long r1, unsigned long r2,
-	unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6,
-	struct pt_regs regs)
-{
-#ifdef CONFIG_MMU
-	return do_fork(SIGCHLD, regs.spu, &regs, 0, NULL, NULL);
-#else
-	return -EINVAL;
-#endif /* CONFIG_MMU */
-}
-
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-			 unsigned long parent_tidptr,
-			 unsigned long child_tidptr,
-			 unsigned long r4, unsigned long r5, unsigned long r6,
-			 struct pt_regs regs)
-{
-	if (!newsp)
-		newsp = regs.spu;
-
-	return do_fork(clone_flags, newsp, &regs, 0,
-		       (int __user *)parent_tidptr, (int __user *)child_tidptr);
-}
-
-/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-asmlinkage int sys_vfork(unsigned long r0, unsigned long r1, unsigned long r2,
-	unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6,
-	struct pt_regs regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.spu, &regs, 0,
-			NULL, NULL);
-}
-
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage int sys_execve(const char __user *ufilename,
-			  const char __user *const __user *uargv,
-			  const char __user *const __user *uenvp,
-			  unsigned long r3, unsigned long r4, unsigned long r5,
-			  unsigned long r6, struct pt_regs regs)
-{
-	int error;
-	struct filename *filename;
-
-	filename = getname(ufilename);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-
-	error = do_execve(filename->name, uargv, uenvp, &regs);
-	putname(filename);
-out:
-	return error;
-}
-
 /*
  * These bracket the sleeping functions..
  */
diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
index d841fb6..c3fdd63 100644
--- a/arch/m32r/kernel/sys_m32r.c
+++ b/arch/m32r/kernel/sys_m32r.c
@@ -88,24 +88,3 @@
 	/* Not implemented yet. */
 	return -ENOSYS;
 }
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	register long __scno __asm__ ("r7") = __NR_execve;
-	register long __arg3 __asm__ ("r2") = (long)(envp);
-	register long __arg2 __asm__ ("r1") = (long)(argv);
-	register long __res __asm__ ("r0") = (long)(filename);
-	__asm__ __volatile__ (
-		"trap #" SYSCALL_VECTOR "|| nop"
-		: "=r" (__res)
-		: "r" (__scno), "0" (__res), "r" (__arg2),
-			"r" (__arg3)
-		: "memory");
-	return __res;
-}
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index e7c1614..953a7ba 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -16,6 +16,7 @@
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE
 	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
 	select MODULES_USE_ELF_RELA
diff --git a/arch/m68k/include/asm/signal.h b/arch/m68k/include/asm/signal.h
index 2df26b5..9c8c46b 100644
--- a/arch/m68k/include/asm/signal.h
+++ b/arch/m68k/include/asm/signal.h
@@ -86,11 +86,9 @@
 
 #endif /* !CONFIG_CPU_HAS_NO_BITFIELDS */
 
-#ifdef __uClinux__
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-#else
-struct pt_regs;
-extern void ptrace_signal_deliver(struct pt_regs *regs, void *cookie);
+#ifndef __uClinux__
+extern void ptrace_signal_deliver(void);
+#define ptrace_signal_deliver ptrace_signal_deliver
 #endif /* __uClinux__ */
 
 #endif /* _M68K_SIGNAL_H */
diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h
index 5fc7f7b..a021d67 100644
--- a/arch/m68k/include/asm/unistd.h
+++ b/arch/m68k/include/asm/unistd.h
@@ -32,7 +32,8 @@
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_EXECVE
-#define __ARCH_WANT_KERNEL_EXECVE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
 
 /*
  * "Conditional" syscalls
diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 946cb01..a78f564 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -44,34 +44,29 @@
 
 .globl system_call, buserr, trap, resume
 .globl sys_call_table
-.globl sys_fork, sys_clone, sys_vfork
+.globl __sys_fork, __sys_clone, __sys_vfork
 .globl ret_from_interrupt, bad_interrupt
 .globl auto_irqhandler_fixup
 .globl user_irqvec_fixup
 
 .text
-ENTRY(sys_fork)
+ENTRY(__sys_fork)
 	SAVE_SWITCH_STACK
-	pea	%sp@(SWITCH_STACK_SIZE)
-	jbsr	m68k_fork
-	addql	#4,%sp
-	RESTORE_SWITCH_STACK
+	jbsr	sys_fork
+	lea     %sp@(24),%sp
 	rts
 
-ENTRY(sys_clone)
+ENTRY(__sys_clone)
 	SAVE_SWITCH_STACK
 	pea	%sp@(SWITCH_STACK_SIZE)
 	jbsr	m68k_clone
-	addql	#4,%sp
-	RESTORE_SWITCH_STACK
+	lea     %sp@(28),%sp
 	rts
 
-ENTRY(sys_vfork)
+ENTRY(__sys_vfork)
 	SAVE_SWITCH_STACK
-	pea	%sp@(SWITCH_STACK_SIZE)
-	jbsr	m68k_vfork
-	addql	#4,%sp
-	RESTORE_SWITCH_STACK
+	jbsr	sys_vfork
+	lea     %sp@(24),%sp
 	rts
 
 ENTRY(sys_sigreturn)
@@ -115,16 +110,9 @@
 	| a3 contains the kernel thread payload, d7 - its argument
 	movel	%d1,%sp@-
 	jsr	schedule_tail
-	GET_CURRENT(%d0)
 	movel	%d7,(%sp)
 	jsr	%a3@
 	addql	#4,%sp
-	movel	%d0,(%sp)
-	jra	sys_exit
-
-ENTRY(ret_from_kernel_execve)
-	movel	4(%sp), %sp
-	GET_CURRENT(%d0)
 	jra	ret_from_exception
 
 #if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU)
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index c51bb17..d538694 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -136,57 +136,35 @@
 }
 
 /*
- * "m68k_fork()".. By the time we get here, the
- * non-volatile registers have also been saved on the
- * stack. We do some ugly pointer stuff here.. (see
- * also copy_thread)
+ * Why not generic sys_clone, you ask?  m68k passes all arguments on stack.
+ * And we need all registers saved, which means a bunch of stuff pushed
+ * on top of pt_regs, which means that sys_clone() arguments would be
+ * buried.  We could, of course, copy them, but it's too costly for no
+ * good reason - generic clone() would have to copy them *again* for
+ * do_fork() anyway.  So in this case it's actually better to pass pt_regs *
+ * and extract arguments for do_fork() from there.  Eventually we might
+ * go for calling do_fork() directly from the wrapper, but only after we
+ * are finished with do_fork() prototype conversion.
  */
-
-asmlinkage int m68k_fork(struct pt_regs *regs)
-{
-#ifdef CONFIG_MMU
-	return do_fork(SIGCHLD, rdusp(), regs, 0, NULL, NULL);
-#else
-	return -EINVAL;
-#endif
-}
-
-asmlinkage int m68k_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, rdusp(), regs, 0,
-		       NULL, NULL);
-}
-
 asmlinkage int m68k_clone(struct pt_regs *regs)
 {
-	unsigned long clone_flags;
-	unsigned long newsp;
-	int __user *parent_tidptr, *child_tidptr;
-
-	/* syscall2 puts clone_flags in d1 and usp in d2 */
-	clone_flags = regs->d1;
-	newsp = regs->d2;
-	parent_tidptr = (int __user *)regs->d3;
-	child_tidptr = (int __user *)regs->d4;
-	if (!newsp)
-		newsp = rdusp();
-	return do_fork(clone_flags, newsp, regs, 0,
-		       parent_tidptr, child_tidptr);
+	/* regs will be equal to current_pt_regs() */
+	return do_fork(regs->d1, regs->d2, 0,
+		       (int __user *)regs->d3, (int __user *)regs->d4);
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		 unsigned long arg,
-		 struct task_struct * p, struct pt_regs * regs)
+		 unsigned long arg, struct task_struct *p)
 {
-	struct pt_regs * childregs;
-	struct switch_stack *childstack;
+	struct fork_frame {
+		struct switch_stack sw;
+		struct pt_regs regs;
+	} *frame;
 
-	childregs = (struct pt_regs *) (task_stack_page(p) + THREAD_SIZE) - 1;
-	childstack = ((struct switch_stack *) childregs) - 1;
+	frame = (struct fork_frame *) (task_stack_page(p) + THREAD_SIZE) - 1;
 
-	p->thread.usp = usp;
-	p->thread.ksp = (unsigned long)childstack;
-	p->thread.esp0 = (unsigned long)childregs;
+	p->thread.ksp = (unsigned long)frame;
+	p->thread.esp0 = (unsigned long)&frame->regs;
 
 	/*
 	 * Must save the current SFC/DFC value, NOT the value when
@@ -194,25 +172,24 @@
 	 */
 	p->thread.fs = get_fs().seg;
 
-	if (unlikely(!regs)) {
+	if (unlikely(p->flags & PF_KTHREAD)) {
 		/* kernel thread */
-		memset(childstack, 0,
-			sizeof(struct switch_stack) + sizeof(struct pt_regs));
-		childregs->sr = PS_S;
-		childstack->a3 = usp; /* function */
-		childstack->d7 = arg;
-		childstack->retpc = (unsigned long)ret_from_kernel_thread;
+		memset(frame, 0, sizeof(struct fork_frame));
+		frame->regs.sr = PS_S;
+		frame->sw.a3 = usp; /* function */
+		frame->sw.d7 = arg;
+		frame->sw.retpc = (unsigned long)ret_from_kernel_thread;
 		p->thread.usp = 0;
 		return 0;
 	}
-	*childregs = *regs;
-	childregs->d0 = 0;
-
-	*childstack = ((struct switch_stack *) regs)[-1];
-	childstack->retpc = (unsigned long)ret_from_fork;
+	memcpy(frame, container_of(current_pt_regs(), struct fork_frame, regs),
+		sizeof(struct fork_frame));
+	frame->regs.d0 = 0;
+	frame->sw.retpc = (unsigned long)ret_from_fork;
+	p->thread.usp = usp ?: rdusp();
 
 	if (clone_flags & CLONE_SETTLS)
-		task_thread_info(p)->tp_value = regs->d5;
+		task_thread_info(p)->tp_value = frame->regs.d5;
 
 #ifdef CONFIG_FPU
 	if (!FPU_IS_EMU) {
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 710a528..9a396cd 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -108,8 +108,9 @@
 	return 1;
 }
 
-void ptrace_signal_deliver(struct pt_regs *regs, void *cookie)
+void ptrace_signal_deliver(void)
 {
+	struct pt_regs *regs = signal_pt_regs();
 	if (regs->orig_d0 < 0)
 		return;
 	switch (regs->d0) {
diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S
index 4fc2e29..c30da5b 100644
--- a/arch/m68k/kernel/syscalltable.S
+++ b/arch/m68k/kernel/syscalltable.S
@@ -22,7 +22,7 @@
 ENTRY(sys_call_table)
 	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
 	.long sys_exit
-	.long sys_fork
+	.long __sys_fork
 	.long sys_read
 	.long sys_write
 	.long sys_open			/* 5 */
@@ -140,7 +140,7 @@
 	.long sys_ipc
 	.long sys_fsync
 	.long sys_sigreturn
-	.long sys_clone			/* 120 */
+	.long __sys_clone		/* 120 */
 	.long sys_setdomainname
 	.long sys_newuname
 	.long sys_cacheflush		/* modify_ldt for i386 */
@@ -210,7 +210,7 @@
 	.long sys_sendfile
 	.long sys_ni_syscall		/* streams1 */
 	.long sys_ni_syscall		/* streams2 */
-	.long sys_vfork			/* 190 */
+	.long __sys_vfork		/* 190 */
 	.long sys_getrlimit
 	.long sys_mmap2
 	.long sys_truncate64
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 4cba743..4bcf891 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -26,6 +26,9 @@
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
+	select CLONE_BACKWARDS
 
 config SWAP
 	def_bool n
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 2957fcc..eb3a46c 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -4,3 +4,4 @@
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += trace_clock.h
+generic-y += syscalls.h
diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h
index af2bb96..0759153 100644
--- a/arch/microblaze/include/asm/processor.h
+++ b/arch/microblaze/include/asm/processor.h
@@ -31,6 +31,7 @@
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp);
 
 extern void ret_from_fork(void);
+extern void ret_from_kernel_thread(void);
 
 # endif /* __ASSEMBLY__ */
 
@@ -78,11 +79,6 @@
 
 extern unsigned long get_wchan(struct task_struct *p);
 
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
 # define KSTK_EIP(tsk)	(0)
 # define KSTK_ESP(tsk)	(0)
 
@@ -131,8 +127,6 @@
 {
 }
 
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
 /* Free current thread data structures etc.  */
 static inline void exit_thread(void)
 {
diff --git a/arch/microblaze/include/asm/syscalls.h b/arch/microblaze/include/asm/syscalls.h
deleted file mode 100644
index 27f2f4c..0000000
--- a/arch/microblaze/include/asm/syscalls.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef __ASM_MICROBLAZE_SYSCALLS_H
-
-asmlinkage long microblaze_vfork(struct pt_regs *regs);
-asmlinkage long microblaze_clone(int flags, unsigned long stack,
-							struct pt_regs *regs);
-asmlinkage long microblaze_execve(const char __user *filenamei,
-				  const char __user *const __user *argv,
-				  const char __user *const __user *envp,
-				  struct pt_regs *regs);
-
-asmlinkage long sys_clone(int flags, unsigned long stack, struct pt_regs *regs);
-#define sys_clone sys_clone
-
-#include <asm-generic/syscalls.h>
-
-#endif /* __ASM_MICROBLAZE_SYSCALLS_H */
diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h
index 6985e6e..94d9789 100644
--- a/arch/microblaze/include/asm/unistd.h
+++ b/arch/microblaze/include/asm/unistd.h
@@ -422,6 +422,12 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_VFORK
+#ifdef CONFIG_MMU
+#define __ARCH_WANT_SYS_FORK
+#endif
 
 /*
  * "Conditional" syscalls
diff --git a/arch/microblaze/kernel/entry-nommu.S b/arch/microblaze/kernel/entry-nommu.S
index 75c3ea1..cb0327f 100644
--- a/arch/microblaze/kernel/entry-nommu.S
+++ b/arch/microblaze/kernel/entry-nommu.S
@@ -474,6 +474,14 @@
 	brid	ret_to_user
 	nop
 
+ENTRY(ret_from_kernel_thread)
+	brlid	r15, schedule_tail
+	addk	r5, r0, r3
+	brald	r15, r20
+	addk	r5, r0, r19
+	brid	ret_to_user
+	addk	r3, r0, r0
+
 work_pending:
 	enable_irq
 
@@ -551,18 +559,6 @@
 	rtid	r14, 0
 	nop
 
-sys_vfork:
-	brid	microblaze_vfork
-	addk	r5, r1, r0
-
-sys_clone:
-	brid	microblaze_clone
-	addk	r7, r1, r0
-
-sys_execve:
-	brid	microblaze_execve
-	addk	r8, r1, r0
-
 sys_rt_sigreturn_wrapper:
 	brid	sys_rt_sigreturn
 	addk	r5, r1, r0
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
index 03f7b8c..c217367 100644
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -293,24 +293,6 @@
 	swi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)) /* save stack */
 	addi	r14, r14, 4	/* return address is 4 byte after call */
 
-	mfs	r1, rmsr
-	nop
-	andi	r1, r1, MSR_UMS
-	bnei	r1, 1f
-
-/* Kernel-mode state save - kernel execve */
-	lwi	r1, r0, TOPHYS(PER_CPU(ENTRY_SP)); /* Reload kernel stack-ptr*/
-	tophys(r1,r1);
-
-	addik	r1, r1, -PT_SIZE; /* Make room on the stack. */
-	SAVE_REGS
-
-	swi	r1, r1, PT_MODE; /* pt_regs -> kernel mode */
-	brid	2f;
-	nop;				/* Fill delay slot */
-
-/* User-mode state save.  */
-1:
 	lwi	r1, r0, TOPHYS(PER_CPU(CURRENT_SAVE)); /* get saved current */
 	tophys(r1,r1);
 	lwi	r1, r1, TS_THREAD_INFO;	/* get stack from task_struct */
@@ -460,18 +442,6 @@
 	nop;
 
 
-/* These syscalls need access to the struct pt_regs on the stack, so we
-   implement them in assembly (they're basically all wrappers anyway).  */
-
-C_ENTRY(sys_fork_wrapper):
-	addi	r5, r0, SIGCHLD			/* Arg 0: flags */
-	lwi	r6, r1, PT_R1	/* Arg 1: child SP (use parent's) */
-	addik	r7, r1, 0			/* Arg 2: parent context */
-	add	r8, r0, r0			/* Arg 3: (unused) */
-	add	r9, r0, r0;			/* Arg 4: (unused) */
-	brid	do_fork		/* Do real work (tail-call) */
-	add	r10, r0, r0;			/* Arg 5: (unused) */
-
 /* This the initial entry point for a new child thread, with an appropriate
    stack in place that makes it look the the child is in the middle of an
    syscall.  This function is actually `returned to' from switch_thread
@@ -479,28 +449,19 @@
    saved context).  */
 C_ENTRY(ret_from_fork):
 	bralid	r15, schedule_tail; /* ...which is schedule_tail's arg */
-	add	r3, r5, r0;	/* switch_thread returns the prev task */
+	add	r5, r3, r0;	/* switch_thread returns the prev task */
 				/* ( in the delay slot ) */
 	brid	ret_from_trap;	/* Do normal trap return */
 	add	r3, r0, r0;	/* Child's fork call should return 0. */
 
-C_ENTRY(sys_vfork):
-	brid	microblaze_vfork	/* Do real work (tail-call) */
-	addik	r5, r1, 0
-
-C_ENTRY(sys_clone):
-	bnei	r6, 1f;			/* See if child SP arg (arg 1) is 0. */
-	lwi	r6, r1, PT_R1;	/* If so, use paret's stack ptr */
-1:	addik	r7, r1, 0;			/* Arg 2: parent context */
-	lwi     r9, r1, PT_R8;          /* parent tid.  */
-	lwi     r10, r1, PT_R9;         /* child tid.  */
-	/* do_fork will pick up TLS from regs->r10.  */
-	brid	do_fork		/* Do real work (tail-call) */
-	add     r8, r0, r0;             /* Arg 3: (unused) */
-
-C_ENTRY(sys_execve):
-	brid	microblaze_execve;	/* Do real work (tail-call).*/
-	addik	r8, r1, 0;		/* add user context as 4th arg */
+C_ENTRY(ret_from_kernel_thread):
+	bralid	r15, schedule_tail; /* ...which is schedule_tail's arg */
+	add	r5, r3, r0;	/* switch_thread returns the prev task */
+				/* ( in the delay slot ) */
+	brald	r15, r20	/* fn was left in r20 */
+	addk	r5, r0, r19	/* ... and argument - in r19 */
+	brid	ret_from_trap
+	add	r3, r0, r0
 
 C_ENTRY(sys_rt_sigreturn_wrapper):
 	brid	sys_rt_sigreturn	/* Do real work */
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 1944e00..40823fd 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/pm.h>
 #include <linux/tick.h>
 #include <linux/bitops.h>
+#include <linux/ptrace.h>
 #include <asm/pgalloc.h>
 #include <asm/uaccess.h> /* for USER_DS macros */
 #include <asm/cacheflush.h>
@@ -119,46 +120,38 @@
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long unused,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 	struct thread_info *ti = task_thread_info(p);
 
-	*childregs = *regs;
-	if (user_mode(regs))
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		/* if we're creating a new kernel thread then just zeroing all
+		 * the registers. That's OK for a brand new thread.*/
+		memset(childregs, 0, sizeof(struct pt_regs));
+		memset(&ti->cpu_context, 0, sizeof(struct cpu_context));
+		ti->cpu_context.r1  = (unsigned long)childregs;
+		ti->cpu_context.r20 = (unsigned long)usp; /* fn */
+		ti->cpu_context.r19 = (unsigned long)arg;
+		childregs->pt_mode = 1;
+		local_save_flags(childregs->msr);
+#ifdef CONFIG_MMU
+		ti->cpu_context.msr = childregs->msr & ~MSR_IE;
+#endif
+		ti->cpu_context.r15 = (unsigned long)ret_from_kernel_thread - 8;
+		return 0;
+	}
+	*childregs = *current_pt_regs();
+	if (usp)
 		childregs->r1 = usp;
-	else
-		childregs->r1 = ((unsigned long) ti) + THREAD_SIZE;
 
-#ifndef CONFIG_MMU
 	memset(&ti->cpu_context, 0, sizeof(struct cpu_context));
 	ti->cpu_context.r1 = (unsigned long)childregs;
+#ifndef CONFIG_MMU
 	ti->cpu_context.msr = (unsigned long)childregs->msr;
 #else
+	childregs->msr |= MSR_UMS;
 
-	/* if creating a kernel thread then update the current reg (we don't
-	 * want to use the parent's value when restoring by POP_STATE) */
-	if (kernel_mode(regs))
-		/* save new current on stack to use POP_STATE */
-		childregs->CURRENT_TASK = (unsigned long)p;
-	/* if returning to user then use the parent's value of this register */
-
-	/* if we're creating a new kernel thread then just zeroing all
-	 * the registers. That's OK for a brand new thread.*/
-	/* Pls. note that some of them will be restored in POP_STATE */
-	if (kernel_mode(regs))
-		memset(&ti->cpu_context, 0, sizeof(struct cpu_context));
-	/* if this thread is created for fork/vfork/clone, then we want to
-	 * restore all the parent's context */
-	/* in addition to the registers which will be restored by POP_STATE */
-	else {
-		ti->cpu_context = *(struct cpu_context *)regs;
-		childregs->msr |= MSR_UMS;
-	}
-
-	/* FIXME STATE_SAVE_PT_OFFSET; */
-	ti->cpu_context.r1  = (unsigned long)childregs;
 	/* we should consider the fact that childregs is a copy of the parent
 	 * regs which were saved immediately after entering the kernel state
 	 * before enabling VM. This MSR will be restored in switch_to and
@@ -209,29 +202,6 @@
 }
 #endif
 
-static void kernel_thread_helper(int (*fn)(void *), void *arg)
-{
-	fn(arg);
-	do_exit(-1);
-}
-
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-	/* store them in non-volatile registers */
-	regs.r5 = (unsigned long)fn;
-	regs.r6 = (unsigned long)arg;
-	local_save_flags(regs.msr);
-	regs.pc = (unsigned long)kernel_thread_helper;
-	regs.pt_mode = 1;
-
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
-			&regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL_GPL(kernel_thread);
-
 unsigned long get_wchan(struct task_struct *p)
 {
 /* TBD (used by procfs) */
@@ -246,6 +216,7 @@
 	regs->pt_mode = 0;
 #ifdef CONFIG_MMU
 	regs->msr |= MSR_UMS;
+	regs->msr &= ~MSR_VM;
 #endif
 }
 
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index 404c0f2..63647c5 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -34,38 +34,6 @@
 
 #include <asm/syscalls.h>
 
-asmlinkage long microblaze_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->r1,
-						regs, 0, NULL, NULL);
-}
-
-asmlinkage long microblaze_clone(int flags, unsigned long stack,
-							struct pt_regs *regs)
-{
-	if (!stack)
-		stack = regs->r1;
-	return do_fork(flags, stack, regs, 0, NULL, NULL);
-}
-
-asmlinkage long microblaze_execve(const char __user *filenamei,
-				  const char __user *const __user *argv,
-				  const char __user *const __user *envp,
-				  struct pt_regs *regs)
-{
-	int error;
-	struct filename *filename;
-
-	filename = getname(filenamei);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename->name, argv, envp, regs);
-	putname(filename);
-out:
-	return error;
-}
-
 asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
 			unsigned long prot, unsigned long flags,
 			unsigned long fd, off_t pgoff)
@@ -75,24 +43,3 @@
 
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, pgoff >> PAGE_SHIFT);
 }
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	register const char *__a __asm__("r5") = filename;
-	register const void *__b __asm__("r6") = argv;
-	register const void *__c __asm__("r7") = envp;
-	register unsigned long __syscall __asm__("r12") = __NR_execve;
-	register unsigned long __ret __asm__("r3");
-	__asm__ __volatile__ ("brki r14, 0x8"
-			: "=r" (__ret), "=r" (__syscall)
-			: "1" (__syscall), "r" (__a), "r" (__b), "r" (__c)
-			: "r4", "r8", "r9",
-			"r10", "r11", "r14", "cc", "memory");
-	return __ret;
-}
diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S
index 6a2b294..ff6431e 100644
--- a/arch/microblaze/kernel/syscall_table.S
+++ b/arch/microblaze/kernel/syscall_table.S
@@ -2,11 +2,7 @@
 	.long sys_restart_syscall	/* 0 - old "setup()" system call,
 					 * used for restarting */
 	.long sys_exit
-#ifdef CONFIG_MMU
-	.long sys_fork_wrapper
-#else
-	.long sys_ni_syscall
-#endif
+	.long sys_fork
 	.long sys_read
 	.long sys_write
 	.long sys_open			/* 5 */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index dba9390..4183e62 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -40,6 +40,8 @@
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
 	select MODULES_USE_ELF_RELA if 64BIT
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 menu "Machine selection"
 
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index 5e33fab..d28c41e 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -310,8 +310,6 @@
 /* Free all resources held by a thread. */
 #define release_thread(thread) do { } while(0)
 
-extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
 /*
diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h
index 4f5da94..cec5e12 100644
--- a/arch/mips/include/asm/ptrace.h
+++ b/arch/mips/include/asm/ptrace.h
@@ -61,4 +61,10 @@
 		die(str, regs);
 }
 
+#define current_pt_regs()						\
+({									\
+	unsigned long sp = (unsigned long)__builtin_frame_address(0);	\
+	(struct pt_regs *)((sp | (THREAD_SIZE - 1)) + 1 - 32) - 1;	\
+})
+
 #endif /* _ASM_PTRACE_H */
diff --git a/arch/mips/include/asm/signal.h b/arch/mips/include/asm/signal.h
index 880240d..cf4a080 100644
--- a/arch/mips/include/asm/signal.h
+++ b/arch/mips/include/asm/signal.h
@@ -21,6 +21,4 @@
 #include <asm/sigcontext.h>
 #include <asm/siginfo.h>
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* _ASM_SIGNAL_H */
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index 9e47cc1..b306e20 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -20,6 +20,7 @@
 #define __ARCH_OMIT_COMPAT_SYS_GETDENTS64
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
+#define __ARCH_WANT_SYS_EXECVE
 #define __ARCH_WANT_SYS_GETHOSTNAME
 #define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index 9b00362..e578685 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -70,6 +70,12 @@
 	b	need_resched
 #endif
 
+FEXPORT(ret_from_kernel_thread)
+	jal	schedule_tail		# a0 = struct task_struct *prev
+	move	a0, s1
+	jal	s0
+	j	syscall_exit
+
 FEXPORT(ret_from_fork)
 	jal	schedule_tail		# a0 = struct task_struct *prev
 
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 3a21ace..7adab86 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -3,7 +3,6 @@
  *
  * Copyright (C) 2000 Silicon Graphics, Inc.
  * Written by Ulf Carlsson (ulfc@engr.sgi.com)
- * sys32_execve from ia64/ia32 code, Feb 2000, Kanoj Sarcar (kanoj@sgi.com)
  */
 #include <linux/compiler.h>
 #include <linux/mm.h>
@@ -77,26 +76,6 @@
 	return error;
 }
 
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage int sys32_execve(nabi_no_regargs struct pt_regs regs)
-{
-	int error;
-	struct filename *filename;
-
-	filename = getname(compat_ptr(regs.regs[4]));
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = compat_do_execve(filename->name, compat_ptr(regs.regs[5]),
-				 compat_ptr(regs.regs[6]), &regs);
-	putname(filename);
-
-out:
-	return error;
-}
-
 #define RLIM_INFINITY32	0x7fffffff
 #define RESOURCE32(x) ((x > RLIM_INFINITY32) ? RLIM_INFINITY32 : x)
 
@@ -333,7 +312,7 @@
 	/* Use __dummy4 instead of getting it off the stack, so that
 	   syscall() works.  */
 	child_tidptr = (int __user *) __dummy4;
-	return do_fork(clone_flags, newsp, &regs, 0,
+	return do_fork(clone_flags, newsp, 0,
 	               parent_tidptr, child_tidptr);
 }
 
diff --git a/arch/mips/kernel/mips_ksyms.c b/arch/mips/kernel/mips_ksyms.c
index 3fc1691..2d9304c 100644
--- a/arch/mips/kernel/mips_ksyms.c
+++ b/arch/mips/kernel/mips_ksyms.c
@@ -32,8 +32,6 @@
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memmove);
 
-EXPORT_SYMBOL(kernel_thread);
-
 /*
  * Functions that operate on entire pages.  Mostly used by memory management.
  */
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index e9a5fd7..38097652 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -84,6 +84,7 @@
 }
 
 asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_kernel_thread(void);
 
 void start_thread(struct pt_regs * regs, unsigned long pc, unsigned long sp)
 {
@@ -113,10 +114,10 @@
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-	unsigned long unused, struct task_struct *p, struct pt_regs *regs)
+	unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
-	struct pt_regs *childregs;
+	struct pt_regs *childregs, *regs = current_pt_regs();
 	unsigned long childksp;
 	p->set_child_tid = p->clear_child_tid = NULL;
 
@@ -136,19 +137,30 @@
 	childregs = (struct pt_regs *) childksp - 1;
 	/*  Put the stack after the struct pt_regs.  */
 	childksp = (unsigned long) childregs;
+	p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		unsigned long status = p->thread.cp0_status;
+		memset(childregs, 0, sizeof(struct pt_regs));
+		ti->addr_limit = KERNEL_DS;
+		p->thread.reg16 = usp; /* fn */
+		p->thread.reg17 = arg;
+		p->thread.reg29 = childksp;
+		p->thread.reg31 = (unsigned long) ret_from_kernel_thread;
+#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
+		status = (status & ~(ST0_KUP | ST0_IEP | ST0_IEC)) |
+			 ((status & (ST0_KUC | ST0_IEC)) << 2);
+#else
+		status |= ST0_EXL;
+#endif
+		childregs->cp0_status = status;
+		return 0;
+	}
 	*childregs = *regs;
 	childregs->regs[7] = 0;	/* Clear error flag */
-
 	childregs->regs[2] = 0;	/* Child gets zero as return value */
+	childregs->regs[29] = usp;
+	ti->addr_limit = USER_DS;
 
-	if (childregs->cp0_status & ST0_CU0) {
-		childregs->regs[28] = (unsigned long) ti;
-		childregs->regs[29] = childksp;
-		ti->addr_limit = KERNEL_DS;
-	} else {
-		childregs->regs[29] = usp;
-		ti->addr_limit = USER_DS;
-	}
 	p->thread.reg29 = (unsigned long) childregs;
 	p->thread.reg31 = (unsigned long) ret_from_fork;
 
@@ -156,7 +168,6 @@
 	 * New tasks lose permission to use the fpu. This accelerates context
 	 * switching for most programs since they don't use the fpu.
 	 */
-	p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1);
 	childregs->cp0_status &= ~(ST0_CU2|ST0_CU1);
 
 #ifdef CONFIG_MIPS_MT_SMTC
@@ -222,35 +233,6 @@
 }
 
 /*
- * Create a kernel thread
- */
-static void __noreturn kernel_thread_helper(void *arg, int (*fn)(void *))
-{
-	do_exit(fn(arg));
-}
-
-long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-	regs.regs[4] = (unsigned long) arg;
-	regs.regs[5] = (unsigned long) fn;
-	regs.cp0_epc = (unsigned long) kernel_thread_helper;
-	regs.cp0_status = read_c0_status();
-#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
-	regs.cp0_status = (regs.cp0_status & ~(ST0_KUP | ST0_IEP | ST0_IEC)) |
-			  ((regs.cp0_status & (ST0_KUC | ST0_IEC)) << 2);
-#else
-	regs.cp0_status |= ST0_EXL;
-#endif
-
-	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-
-/*
  *
  */
 struct mips_frame_info {
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 86ec03f..6297191 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -167,7 +167,7 @@
 	PTR	sys_getsockopt
 	PTR	sys_clone			/* 6055 */
 	PTR	sys_fork
-	PTR	sys32_execve
+	PTR	compat_sys_execve
 	PTR	sys_exit
 	PTR	compat_sys_wait4
 	PTR	sys_kill			/* 6060 */
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 53c2d72..9601be6 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -203,7 +203,7 @@
 	PTR	sys_creat
 	PTR	sys_link
 	PTR	sys_unlink			/* 4010 */
-	PTR	sys32_execve
+	PTR	compat_sys_execve
 	PTR	sys_chdir
 	PTR	compat_sys_time
 	PTR	sys_mknod
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 2bd561b..201cb76 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -92,7 +92,7 @@
 static int __used noinline
 _sys_fork(nabi_no_regargs struct pt_regs regs)
 {
-	return do_fork(SIGCHLD, regs.regs[29], &regs, 0, NULL, NULL);
+	return do_fork(SIGCHLD, regs.regs[29], 0, NULL, NULL);
 }
 
 save_static_function(sys_clone);
@@ -123,32 +123,10 @@
 #else
 	child_tidptr = (int __user *) regs.regs[8];
 #endif
-	return do_fork(clone_flags, newsp, &regs, 0,
+	return do_fork(clone_flags, newsp, 0,
 	               parent_tidptr, child_tidptr);
 }
 
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage int sys_execve(nabi_no_regargs struct pt_regs regs)
-{
-	int error;
-	struct filename *filename;
-
-	filename = getname((const char __user *) (long)regs.regs[4]);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename->name,
-			  (const char __user *const __user *) (long)regs.regs[5],
-	                  (const char __user *const __user *) (long)regs.regs[6],
-			  &regs);
-	putname(filename);
-
-out:
-	return error;
-}
-
 SYSCALL_DEFINE1(set_thread_area, unsigned long, addr)
 {
 	struct thread_info *ti = task_thread_info(current);
@@ -313,34 +291,3 @@
 {
 	do_exit(SIGSEGV);
 }
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	register unsigned long __a0 asm("$4") = (unsigned long) filename;
-	register unsigned long __a1 asm("$5") = (unsigned long) argv;
-	register unsigned long __a2 asm("$6") = (unsigned long) envp;
-	register unsigned long __a3 asm("$7");
-	unsigned long __v0;
-
-	__asm__ volatile ("					\n"
-	"	.set	noreorder				\n"
-	"	li	$2, %5		# __NR_execve		\n"
-	"	syscall						\n"
-	"	move	%0, $2					\n"
-	"	.set	reorder					\n"
-	: "=&r" (__v0), "=r" (__a3)
-	: "r" (__a0), "r" (__a1), "r" (__a2), "i" (__NR_execve)
-	: "$2", "$8", "$9", "$10", "$11", "$12", "$13", "$14", "$15", "$24",
-	  "memory");
-
-	if (__a3 == 0)
-		return __v0;
-
-	return -__v0;
-}
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 04669fa..7247174 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -9,6 +9,7 @@
 	select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_RELA
 
 config AM33_2
diff --git a/arch/mn10300/include/asm/signal.h b/arch/mn10300/include/asm/signal.h
index f9668ec..d280e97 100644
--- a/arch/mn10300/include/asm/signal.h
+++ b/arch/mn10300/include/asm/signal.h
@@ -45,8 +45,4 @@
 };
 #include <asm/sigcontext.h>
 
-
-struct pt_regs;
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* _ASM_SIGNAL_H */
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index 55bbec1..cabf8ba 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -44,7 +44,9 @@
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_SYS_EXECVE
-#define __ARCH_WANT_KERNEL_EXECVE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index 0c631d3..68fcab8 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -60,13 +60,8 @@
 	mov	(REG_D0,fp),d0
 	mov	(REG_A0,fp),a0
 	calls	(a0)
-	jmp	sys_exit
-
-ENTRY(ret_from_kernel_execve)
-	add	-12,d0	/* pt_regs -> frame */
-	mov	d0,sp
-	GET_THREAD_INFO a2
 	clr	d0
+	mov	d0,(REG_D0,fp)
 	jmp	syscall_exit
 
 ###############################################################################
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index d0c671b..eb09f5a 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -206,7 +206,7 @@
  */
 int copy_thread(unsigned long clone_flags,
 		unsigned long c_usp, unsigned long ustk_size,
-		struct task_struct *p, struct pt_regs *kregs)
+		struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct pt_regs *c_regs;
@@ -227,7 +227,7 @@
 	p->thread.wchan	= p->thread.pc;
 	p->thread.usp	= c_usp;
 
-	if (unlikely(!kregs)) {
+	if (unlikely(p->flags & PF_KTHREAD)) {
 		memset(c_regs, 0, sizeof(struct pt_regs));
 		c_regs->a0 = c_usp; /* function */
 		c_regs->d0 = ustk_size; /* argument */
@@ -236,8 +236,9 @@
 		p->thread.pc	= (unsigned long) ret_from_kernel_thread;
 		return 0;
 	}
-	*c_regs = *kregs;
-	c_regs->sp = c_usp;
+	*c_regs = *current_pt_regs();
+	if (c_usp)
+		c_regs->sp = c_usp;
 	c_regs->epsw &= ~EPSW_FE; /* my FPU */
 
 	/* the new TLS pointer is passed in as arg #5 to sys_clone() */
@@ -249,30 +250,6 @@
 	return 0;
 }
 
-/*
- * clone a process
- * - tlsptr is retrieved by copy_thread() from current_frame()->d3
- */
-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp,
-			  int __user *parent_tidptr, int __user *child_tidptr,
-			  int __user *tlsptr)
-{
-	return do_fork(clone_flags, newsp ?: current_frame()->sp,
-		       current_frame(), 0, parent_tidptr, child_tidptr);
-}
-
-asmlinkage long sys_fork(void)
-{
-	return do_fork(SIGCHLD, current_frame()->sp,
-		       current_frame(), 0, NULL, NULL);
-}
-
-asmlinkage long sys_vfork(void)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, current_frame()->sp,
-		       current_frame(), 0, NULL, NULL);
-}
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	return p->thread.wchan;
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 05f2ba4..e7f1a29 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -22,6 +22,8 @@
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 config MMU
 	def_bool y
diff --git a/arch/openrisc/include/asm/processor.h b/arch/openrisc/include/asm/processor.h
index 43decdb..3369138 100644
--- a/arch/openrisc/include/asm/processor.h
+++ b/arch/openrisc/include/asm/processor.h
@@ -81,8 +81,6 @@
 #define KSTK_ESP(tsk)   (task_pt_regs(tsk)->sp)
 
 
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
 void release_thread(struct task_struct *);
 unsigned long get_wchan(struct task_struct *p);
diff --git a/arch/openrisc/include/asm/syscalls.h b/arch/openrisc/include/asm/syscalls.h
index 84a978a..8ee8168 100644
--- a/arch/openrisc/include/asm/syscalls.h
+++ b/arch/openrisc/include/asm/syscalls.h
@@ -24,4 +24,11 @@
 
 #include <asm-generic/syscalls.h>
 
+asmlinkage long __sys_clone(unsigned long clone_flags, unsigned long newsp,
+			void __user *parent_tid, void __user *child_tid, int tls);
+asmlinkage long __sys_fork(void);
+
+#define sys_clone __sys_clone
+#define sys_fork __sys_fork
+
 #endif /* __ASM_OPENRISC_SYSCALLS_H */
diff --git a/arch/openrisc/include/uapi/asm/unistd.h b/arch/openrisc/include/uapi/asm/unistd.h
index 437bdbb..5082b80 100644
--- a/arch/openrisc/include/uapi/asm/unistd.h
+++ b/arch/openrisc/include/uapi/asm/unistd.h
@@ -20,6 +20,10 @@
 
 #define sys_mmap2 sys_mmap_pgoff
 
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_CLONE
+
 #include <asm-generic/unistd.h>
 
 #define __NR_or1k_atomic __NR_arch_specific_syscall
diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile
index e1ee0fa..35f92ce 100644
--- a/arch/openrisc/kernel/Makefile
+++ b/arch/openrisc/kernel/Makefile
@@ -5,7 +5,7 @@
 extra-y	:= head.o vmlinux.lds
 
 obj-y	:= setup.o idle.o or32_ksyms.o process.o dma.o \
-	   traps.o time.o irq.o entry.o ptrace.o signal.o sys_or32.o \
+	   traps.o time.o irq.o entry.o ptrace.o signal.o \
 	   sys_call_table.o
 
 obj-$(CONFIG_MODULES)		+= module.o
diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index ddfcaa8..5e5b306 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -894,6 +894,16 @@
 	l.jal	schedule_tail
 	 l.nop
 
+	/* Check if we are a kernel thread */
+	l.sfeqi	r20,0
+	l.bf	1f
+	 l.nop
+
+	/* ...we are a kernel thread so invoke the requested callback */
+	l.jalr	r20
+	 l.or	r3,r22,r0
+
+1:
 	/* _syscall_returns expect r11 to contain return value */
 	l.lwz	r11,PT_GPR11(r1)
 
@@ -915,26 +925,6 @@
 	l.j	_syscall_return
 	 l.nop
 
-/* Since syscalls don't save call-clobbered registers, the args to
- * kernel_thread_helper will need to be passed through callee-saved
- * registers and copied to the parameter registers when the thread
- * begins running.
- *
- * See arch/openrisc/kernel/process.c:
- * The args are passed as follows:
- *   arg1 (r3) : passed in r20
- *   arg2 (r4) : passed in r22
- */
-
-ENTRY(_kernel_thread_helper)
-	l.or	r3,r20,r0
-	l.or	r4,r22,r0
-	l.movhi	r31,hi(kernel_thread_helper)
-	l.ori	r31,r31,lo(kernel_thread_helper)
-	l.jr	r31
-	 l.nop
-
-
 /* ========================================================[ switch ] === */
 
 /*
@@ -1044,8 +1034,13 @@
 	/* Unwind stack to pre-switch state */
 	l.addi  r1,r1,(INT_FRAME_SIZE)
 
-	/* Return via the link-register back to where we 'came from', where that can be
-	 * either schedule() or return_from_fork()... */
+	/* Return via the link-register back to where we 'came from', where
+	 * that may be either schedule(), ret_from_fork(), or
+	 * ret_from_kernel_thread().  If we are returning to a new thread,
+	 * we are expected to have set up the arg to schedule_tail already,
+	 * hence we do so here unconditionally:
+	 */
+	l.lwz   r3,TI_STACK(r3)		/* Load 'prev' as schedule_tail arg */
 	l.jr	r9
 	 l.nop
 
@@ -1076,22 +1071,18 @@
 	l.jr	r29
 	 l.sw    PT_GPR28(r1),r28
 
-ENTRY(sys_clone)
-	l.movhi	r29,hi(_sys_clone)
-	l.ori	r29,r29,lo(_sys_clone)
+ENTRY(__sys_clone)
+	l.movhi	r29,hi(sys_clone)
+	l.ori	r29,r29,lo(sys_clone)
 	l.j	_fork_save_extra_regs_and_call
 	 l.addi	r7,r1,0
 
-ENTRY(sys_fork)
-	l.movhi	r29,hi(_sys_fork)
-	l.ori	r29,r29,lo(_sys_fork)
+ENTRY(__sys_fork)
+	l.movhi	r29,hi(sys_fork)
+	l.ori	r29,r29,lo(sys_fork)
 	l.j	_fork_save_extra_regs_and_call
 	 l.addi	r3,r1,0
 
-ENTRY(sys_execve)
-	l.j	_sys_execve
-	 l.addi	r6,r1,0
-
 ENTRY(sys_sigaltstack)
 	l.j	_sys_sigaltstack
 	 l.addi	r5,r1,0
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index c35f3ab..00c233b 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -109,66 +109,83 @@
  */
 extern asmlinkage void ret_from_fork(void);
 
+/*
+ * copy_thread
+ * @clone_flags: flags
+ * @usp: user stack pointer or fn for kernel thread
+ * @arg: arg to fn for kernel thread; always NULL for userspace thread
+ * @p: the newly created task
+ * @regs: CPU context to copy for userspace thread; always NULL for kthread
+ *
+ * At the top of a newly initialized kernel stack are two stacked pt_reg
+ * structures.  The first (topmost) is the userspace context of the thread.
+ * The second is the kernelspace context of the thread.
+ *
+ * A kernel thread will not be returning to userspace, so the topmost pt_regs
+ * struct can be uninitialized; it _does_ need to exist, though, because
+ * a kernel thread can become a userspace thread by doing a kernel_execve, in
+ * which case the topmost context will be initialized and used for 'returning'
+ * to userspace.
+ *
+ * The second pt_reg struct needs to be initialized to 'return' to
+ * ret_from_fork.  A kernel thread will need to set r20 to the address of
+ * a function to call into (with arg in r22); userspace threads need to set
+ * r20 to NULL in which case ret_from_fork will just continue a return to
+ * userspace.
+ *
+ * A kernel thread 'fn' may return; this is effectively what happens when
+ * kernel_execve is called.  In that case, the userspace pt_regs must have
+ * been initialized (which kernel_execve takes care of, see start_thread
+ * below); ret_from_fork will then continue its execution causing the
+ * 'kernel thread' to return to userspace as a userspace thread.
+ */
+
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-	    unsigned long unused, struct task_struct *p, struct pt_regs *regs)
+	    unsigned long arg, struct task_struct *p)
 {
-	struct pt_regs *childregs;
+	struct pt_regs *userregs;
 	struct pt_regs *kregs;
 	unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
-	struct thread_info *ti;
 	unsigned long top_of_kernel_stack;
 
 	top_of_kernel_stack = sp;
 
 	p->set_child_tid = p->clear_child_tid = NULL;
 
-	/* Copy registers */
-	/* redzone */
-	sp -= STACK_FRAME_OVERHEAD;
+	/* Locate userspace context on stack... */
+	sp -= STACK_FRAME_OVERHEAD;	/* redzone */
 	sp -= sizeof(struct pt_regs);
-	childregs = (struct pt_regs *)sp;
+	userregs = (struct pt_regs *) sp;
 
-	/* Copy parent registers */
-	*childregs = *regs;
-
-	if ((childregs->sr & SPR_SR_SM) == 1) {
-		/* for kernel thread, set `current_thread_info'
-		 * and stackptr in new task
-		 */
-		childregs->sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
-		childregs->gpr[10] = (unsigned long)task_thread_info(p);
-	} else {
-		childregs->sp = usp;
-	}
-
-	childregs->gpr[11] = 0;	/* Result from fork() */
-
-	/*
-	 * The way this works is that at some point in the future
-	 * some task will call _switch to switch to the new task.
-	 * That will pop off the stack frame created below and start
-	 * the new task running at ret_from_fork.  The new task will
-	 * do some house keeping and then return from the fork or clone
-	 * system call, using the stack frame created above.
-	 */
-	/* redzone */
-	sp -= STACK_FRAME_OVERHEAD;
+	/* ...and kernel context */
+	sp -= STACK_FRAME_OVERHEAD;	/* redzone */
 	sp -= sizeof(struct pt_regs);
 	kregs = (struct pt_regs *)sp;
 
-	ti = task_thread_info(p);
-	ti->ksp = sp;
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(kregs, 0, sizeof(struct pt_regs));
+		kregs->gpr[20] = usp; /* fn, kernel thread */
+		kregs->gpr[22] = arg;
+	} else {
+		*userregs = *current_pt_regs();
 
-	/* kregs->sp must store the location of the 'pre-switch' kernel stack
-	 * pointer... for a newly forked process, this is simply the top of
-	 * the kernel stack.
+		if (usp)
+			userregs->sp = usp;
+		userregs->gpr[11] = 0;	/* Result from fork() */
+
+		kregs->gpr[20] = 0;	/* Userspace thread */
+	}
+
+	/*
+	 * _switch wants the kernel stack page in pt_regs->sp so that it
+	 * can restore it to thread_info->ksp... see _switch for details.
 	 */
 	kregs->sp = top_of_kernel_stack;
-	kregs->gpr[3] = (unsigned long)current;	/* arg to schedule_tail */
-	kregs->gpr[10] = (unsigned long)task_thread_info(p);
 	kregs->gpr[9] = (unsigned long)ret_from_fork;
 
+	task_thread_info(p)->ksp = (unsigned long)kregs;
+
 	return 0;
 }
 
@@ -177,16 +194,14 @@
  */
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 {
-	unsigned long sr = regs->sr & ~SPR_SR_SM;
+	unsigned long sr = mfspr(SPR_SR) & ~SPR_SR_SM;
 
 	set_fs(USER_DS);
-	memset(regs->gpr, 0, sizeof(regs->gpr));
+	memset(regs, 0, sizeof(struct pt_regs));
 
 	regs->pc = pc;
 	regs->sr = sr;
 	regs->sp = sp;
-
-/*	printk("start thread, ksp = %lx\n", current_thread_info()->ksp);*/
 }
 
 /* Fill in the fpu structure for a core dump.  */
@@ -237,74 +252,9 @@
 	dest[35] = 0;
 }
 
-extern void _kernel_thread_helper(void);
-
-void __noreturn kernel_thread_helper(int (*fn) (void *), void *arg)
-{
-	do_exit(fn(arg));
-}
-
-/*
- * Create a kernel thread.
- */
-int kernel_thread(int (*fn) (void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-	regs.gpr[20] = (unsigned long)fn;
-	regs.gpr[22] = (unsigned long)arg;
-	regs.sr = mfspr(SPR_SR);
-	regs.pc = (unsigned long)_kernel_thread_helper;
-
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED,
-		       0, &regs, 0, NULL, NULL);
-}
-
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage long _sys_execve(const char __user *name,
-			    const char __user * const __user *argv,
-			    const char __user * const __user *envp,
-			    struct pt_regs *regs)
-{
-	int error;
-	struct filename *filename;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-
-	if (IS_ERR(filename))
-		goto out;
-
-	error = do_execve(filename->name, argv, envp, regs);
-	putname(filename);
-
-out:
-	return error;
-}
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	/* TODO */
 
 	return 0;
 }
-
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
-{
-	register long __res asm("r11") = __NR_execve;
-	register long __a asm("r3") = (long)(filename);
-	register long __b asm("r4") = (long)(argv);
-	register long __c asm("r5") = (long)(envp);
-	__asm__ volatile ("l.sys 1"
-			  : "=r" (__res), "=r"(__a), "=r"(__b), "=r"(__c)
-			  : "0"(__res), "1"(__a), "2"(__b), "3"(__c)
-			  : "r6", "r7", "r8", "r12", "r13", "r15",
-			    "r17", "r19", "r21", "r23", "r25", "r27",
-			    "r29", "r31");
-	__asm__ volatile ("l.nop");
-	return __res;
-}
diff --git a/arch/openrisc/kernel/sys_or32.c b/arch/openrisc/kernel/sys_or32.c
deleted file mode 100644
index 5706008..0000000
--- a/arch/openrisc/kernel/sys_or32.c
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * OpenRISC sys_or32.c
- *
- * Linux architectural port borrowing liberally from similar works of
- * others.  All original copyrights apply as per the original source
- * declaration.
- *
- * Modifications for the OpenRISC architecture:
- * Copyright (C) 2003 Matjaz Breskvar <phoenix@bsemi.com>
- * Copyright (C) 2010-2011 Jonas Bonn <jonas@southpole.se>
- *
- *      This program is free software; you can redistribute it and/or
- *      modify it under the terms of the GNU General Public License
- *      as published by the Free Software Foundation; either version
- *      2 of the License, or (at your option) any later version.
- *
- * This file contains various random system calls that
- * have a non-standard calling sequence on some platforms.
- * Since we don't have to do any backwards compatibility, our
- * versions are done in the most "normal" way possible.
- */
-
-#include <linux/errno.h>
-#include <linux/syscalls.h>
-#include <linux/mm.h>
-
-#include <asm/syscalls.h>
-
-/* These are secondary entry points as the primary entry points are defined in
- * entry.S where we add the 'regs' parameter value
- */
-
-asmlinkage long _sys_clone(unsigned long clone_flags, unsigned long newsp,
-			   int __user *parent_tid, int __user *child_tid,
-			   struct pt_regs *regs)
-{
-	long ret;
-
-	/* FIXME: Is alignment necessary? */
-	/* newsp = ALIGN(newsp, 4); */
-
-	if (!newsp)
-		newsp = regs->sp;
-
-	ret = do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
-
-	return ret;
-}
-
-asmlinkage int _sys_fork(struct pt_regs *regs)
-{
-#ifdef CONFIG_MMU
-	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
-#else
-	return -EINVAL;
-#endif
-}
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 11def45..e688a2b 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -22,6 +22,9 @@
 	select GENERIC_STRNCPY_FROM_USER
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
+	select CLONE_BACKWARDS
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h
index 0e8b7b8..09b54a5 100644
--- a/arch/parisc/include/asm/processor.h
+++ b/arch/parisc/include/asm/processor.h
@@ -326,7 +326,6 @@
 
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
 extern void map_hpux_gateway_page(struct task_struct *tsk, struct mm_struct *mm);
 
diff --git a/arch/parisc/include/asm/signal.h b/arch/parisc/include/asm/signal.h
index 21abf4f..0fdb3c8 100644
--- a/arch/parisc/include/asm/signal.h
+++ b/arch/parisc/include/asm/signal.h
@@ -34,8 +34,6 @@
 	struct sigaction sa;
 };
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #include <asm/sigcontext.h>
 
 #endif /* !__ASSEMBLY */
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index 541639c..1efef41 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -163,6 +163,10 @@
 #define __ARCH_WANT_SYS_RT_SIGACTION
 #define __ARCH_WANT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 18670a0..bfb4424 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -708,59 +708,9 @@
 	.import		do_cpu_irq_mask,code
 
 	/*
-	 * r26 = function to be called
-	 * r25 = argument to pass in
-	 * r24 = flags for do_fork()
-	 *
-	 * Kernel threads don't ever return, so they don't need
-	 * a true register context. We just save away the arguments
-	 * for copy_thread/ret_ to properly set up the child.
-	 */
-
-#define CLONE_VM 0x100	/* Must agree with <linux/sched.h> */
-#define CLONE_UNTRACED 0x00800000
-
-	.import do_fork
-ENTRY(__kernel_thread)
-	STREG	%r2, -RP_OFFSET(%r30)
-
-	copy	%r30, %r1
-	ldo	PT_SZ_ALGN(%r30),%r30
-#ifdef CONFIG_64BIT
-	/* Yo, function pointers in wide mode are little structs... -PB */
-	ldd	24(%r26), %r2
-	STREG	%r2, PT_GR27(%r1)	/* Store childs %dp */
-	ldd	16(%r26), %r26
-
-	STREG	%r22, PT_GR22(%r1)	/* save r22 (arg5) */
-	copy	%r0, %r22		/* user_tid */
-#endif
-	STREG	%r26, PT_GR26(%r1)  /* Store function & argument for child */
-	STREG	%r25, PT_GR25(%r1)
-	ldil	L%CLONE_UNTRACED, %r26
-	ldo	CLONE_VM(%r26), %r26   /* Force CLONE_VM since only init_mm */
-	or	%r26, %r24, %r26      /* will have kernel mappings.	 */
-	ldi	1, %r25			/* stack_start, signals kernel thread */
-	stw	%r0, -52(%r30)	     	/* user_tid */
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29		/* Reference param save area */
-#endif
-	BL	do_fork, %r2
-	copy	%r1, %r24		/* pt_regs */
-
-	/* Parent Returns here */
-
-	LDREG	-PT_SZ_ALGN-RP_OFFSET(%r30), %r2
-	ldo	-PT_SZ_ALGN(%r30), %r30
-	bv	%r0(%r2)
-	nop
-ENDPROC(__kernel_thread)
-
-	/*
 	 * Child Returns here
 	 *
-	 * copy_thread moved args from temp save area set up above
-	 * into task save area.
+	 * copy_thread moved args into task save area.
 	 */
 
 ENTRY(ret_from_kernel_thread)
@@ -769,51 +719,17 @@
 	BL	schedule_tail, %r2
 	nop
 
-	LDREG	TI_TASK-THREAD_SZ_ALGN(%r30), %r1
+	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30), %r1
 	LDREG	TASK_PT_GR25(%r1), %r26
 #ifdef CONFIG_64BIT
 	LDREG	TASK_PT_GR27(%r1), %r27
-	LDREG	TASK_PT_GR22(%r1), %r22
 #endif
 	LDREG	TASK_PT_GR26(%r1), %r1
 	ble	0(%sr7, %r1)
 	copy	%r31, %r2
-
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29		/* Reference param save area */
-	loadgp				/* Thread could have been in a module */
-#endif
-#ifndef CONFIG_64BIT
-	b	sys_exit
-#else
-	load32	sys_exit, %r1
-	bv	%r0(%r1)
-#endif
-	ldi	0, %r26
-ENDPROC(ret_from_kernel_thread)
-
-	.import	sys_execve, code
-ENTRY(__execve)
-	copy	%r2, %r15
-	copy	%r30, %r16
-	ldo	PT_SZ_ALGN(%r30), %r30
-	STREG	%r26, PT_GR26(%r16)
-	STREG	%r25, PT_GR25(%r16)
-	STREG	%r24, PT_GR24(%r16)
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29		/* Reference param save area */
-#endif
-	BL	sys_execve, %r2
-	copy	%r16, %r26
-
-	cmpib,=,n 0,%r28,intr_return    /* forward */
-
-	/* yes, this will trap and die. */
-	copy	%r15, %r2
-	copy	%r16, %r30
-	bv	%r0(%r2)
+	b	finish_child_return
 	nop
-ENDPROC(__execve)
+ENDPROC(ret_from_kernel_thread)
 
 
 	/*
@@ -1772,151 +1688,36 @@
 	LDREG   PT_GR18(\regs),%r18
 	.endm
 
-ENTRY(sys_fork_wrapper)
+	.macro	fork_like name
+ENTRY(sys_\name\()_wrapper)
 	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30), %r1
 	ldo	TASK_REGS(%r1),%r1
 	reg_save %r1
-	mfctl	%cr27, %r3
-	STREG	%r3, PT_CR27(%r1)
+	mfctl	%cr27, %r28
+	b	sys_\name
+	STREG	%r28, PT_CR27(%r1)
+ENDPROC(sys_\name\()_wrapper)
+	.endm
 
-	STREG	%r2,-RP_OFFSET(%r30)
-	ldo	FRAME_SIZE(%r30),%r30
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29		/* Reference param save area */
-#endif
-
-	/* These are call-clobbered registers and therefore
-	   also syscall-clobbered (we hope). */
-	STREG	%r2,PT_GR19(%r1)	/* save for child */
-	STREG	%r30,PT_GR21(%r1)
-
-	LDREG	PT_GR30(%r1),%r25
-	copy	%r1,%r24
-	BL	sys_clone,%r2
-	ldi	SIGCHLD,%r26
-
-	LDREG	-RP_OFFSET-FRAME_SIZE(%r30),%r2
-wrapper_exit:
-	ldo	-FRAME_SIZE(%r30),%r30		/* get the stackframe */
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
-	ldo	TASK_REGS(%r1),%r1	 /* get pt regs */
-
-	LDREG	PT_CR27(%r1), %r3
-	mtctl	%r3, %cr27
-	reg_restore %r1
-
-	/* strace expects syscall # to be preserved in r20 */
-	ldi	__NR_fork,%r20
-	bv %r0(%r2)
-	STREG	%r20,PT_GR20(%r1)
-ENDPROC(sys_fork_wrapper)
+fork_like clone
+fork_like fork
+fork_like vfork
 
 	/* Set the return value for the child */
 ENTRY(child_return)
 	BL	schedule_tail, %r2
 	nop
+finish_child_return:
+	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30), %r1
+	ldo	TASK_REGS(%r1),%r1	 /* get pt regs */
 
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE-FRAME_SIZE(%r30), %r1
-	LDREG	TASK_PT_GR19(%r1),%r2
-	b	wrapper_exit
+	LDREG	PT_CR27(%r1), %r3
+	mtctl	%r3, %cr27
+	reg_restore %r1
+	b	syscall_exit
 	copy	%r0,%r28
 ENDPROC(child_return)
 
-
-ENTRY(sys_clone_wrapper)
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
-	ldo	TASK_REGS(%r1),%r1	/* get pt regs */
-	reg_save %r1
-	mfctl	%cr27, %r3
-	STREG	%r3, PT_CR27(%r1)
-
-	STREG	%r2,-RP_OFFSET(%r30)
-	ldo	FRAME_SIZE(%r30),%r30
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29		/* Reference param save area */
-#endif
-
-	/* WARNING - Clobbers r19 and r21, userspace must save these! */
-	STREG	%r2,PT_GR19(%r1)	/* save for child */
-	STREG	%r30,PT_GR21(%r1)
-	BL	sys_clone,%r2
-	copy	%r1,%r24
-
-	b	wrapper_exit
-	LDREG	-RP_OFFSET-FRAME_SIZE(%r30),%r2
-ENDPROC(sys_clone_wrapper)
-
-
-ENTRY(sys_vfork_wrapper)
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
-	ldo	TASK_REGS(%r1),%r1	/* get pt regs */
-	reg_save %r1
-	mfctl	%cr27, %r3
-	STREG	%r3, PT_CR27(%r1)
-
-	STREG	%r2,-RP_OFFSET(%r30)
-	ldo	FRAME_SIZE(%r30),%r30
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29		/* Reference param save area */
-#endif
-
-	STREG	%r2,PT_GR19(%r1)	/* save for child */
-	STREG	%r30,PT_GR21(%r1)
-
-	BL	sys_vfork,%r2
-	copy	%r1,%r26
-
-	b	wrapper_exit
-	LDREG	-RP_OFFSET-FRAME_SIZE(%r30),%r2
-ENDPROC(sys_vfork_wrapper)
-
-	
-	.macro  execve_wrapper execve
-	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1
-	ldo	TASK_REGS(%r1),%r1	/* get pt regs */
-
-	/*
-	 * Do we need to save/restore r3-r18 here?
-	 * I don't think so. why would new thread need old
-	 * threads registers?
-	 */
-
-	/* %arg0 - %arg3 are already saved for us. */
-
-	STREG %r2,-RP_OFFSET(%r30)
-	ldo FRAME_SIZE(%r30),%r30
-#ifdef CONFIG_64BIT
-	ldo	-16(%r30),%r29		/* Reference param save area */
-#endif
-	BL \execve,%r2
-	copy %r1,%arg0
-
-	ldo -FRAME_SIZE(%r30),%r30
-	LDREG -RP_OFFSET(%r30),%r2
-
-	/* If exec succeeded we need to load the args */
-
-	ldo -1024(%r0),%r1
-	cmpb,>>= %r28,%r1,error_\execve
-	copy %r2,%r19
-
-error_\execve:
-	bv %r0(%r19)
-	nop
-	.endm
-
-	.import sys_execve
-ENTRY(sys_execve_wrapper)
-	execve_wrapper sys_execve
-ENDPROC(sys_execve_wrapper)
-
-#ifdef CONFIG_64BIT
-	.import sys32_execve
-ENTRY(sys32_execve_wrapper)
-	execve_wrapper sys32_execve
-ENDPROC(sys32_execve_wrapper)
-#endif
-
 ENTRY(sys_rt_sigreturn_wrapper)
 	LDREG	TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r26
 	ldo	TASK_REGS(%r26),%r26	/* get pt regs */
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index cbc3721..d135072 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -52,6 +52,7 @@
 
 #include <asm/io.h>
 #include <asm/asm-offsets.h>
+#include <asm/assembly.h>
 #include <asm/pdc.h>
 #include <asm/pdc_chassis.h>
 #include <asm/pgalloc.h>
@@ -165,23 +166,6 @@
 EXPORT_SYMBOL(pm_power_off);
 
 /*
- * Create a kernel thread
- */
-
-extern pid_t __kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-
-	/*
-	 * FIXME: Once we are sure we don't need any debug here,
-	 *	  kernel_thread can become a #define.
-	 */
-
-	return __kernel_thread(fn, arg, flags);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/*
  * Free current thread data structures etc..
  */
 void exit_thread(void)
@@ -218,48 +202,11 @@
 	return 1;
 }
 
-/* Note that "fork()" is implemented in terms of clone, with
-   parameters (SIGCHLD, regs->gr[30], regs). */
-int
-sys_clone(unsigned long clone_flags, unsigned long usp,
-	  struct pt_regs *regs)
-{
-  	/* Arugments from userspace are:
-	   r26 = Clone flags.
-	   r25 = Child stack.
-	   r24 = parent_tidptr.
-	   r23 = Is the TLS storage descriptor 
-	   r22 = child_tidptr 
-	   
-	   However, these last 3 args are only examined
-	   if the proper flags are set. */
-	int __user *parent_tidptr = (int __user *)regs->gr[24];
-	int __user *child_tidptr  = (int __user *)regs->gr[22];
-
-	/* usp must be word aligned.  This also prevents users from
-	 * passing in the value 1 (which is the signal for a special
-	 * return for a kernel thread) */
-	usp = ALIGN(usp, 4);
-
-	/* A zero value for usp means use the current stack */
-	if (usp == 0)
-	  usp = regs->gr[30];
-
-	return do_fork(clone_flags, usp, regs, 0, parent_tidptr, child_tidptr);
-}
-
-int
-sys_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gr[30], regs, 0, NULL, NULL);
-}
-
 int
 copy_thread(unsigned long clone_flags, unsigned long usp,
-	    unsigned long unused,	/* in ia64 this is "user_stack_size" */
-	    struct task_struct * p, struct pt_regs * pregs)
+	    unsigned long arg, struct task_struct *p)
 {
-	struct pt_regs * cregs = &(p->thread.regs);
+	struct pt_regs *cregs = &(p->thread.regs);
 	void *stack = task_stack_page(p);
 	
 	/* We have to use void * instead of a function pointer, because
@@ -270,48 +217,39 @@
 #ifdef CONFIG_HPUX
 	extern void * const hpux_child_return;
 #endif
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(cregs, 0, sizeof(struct pt_regs));
+		if (!usp) /* idle thread */
+			return 0;
 
-	*cregs = *pregs;
-
-	/* Set the return value for the child.  Note that this is not
-           actually restored by the syscall exit path, but we put it
-           here for consistency in case of signals. */
-	cregs->gr[28] = 0; /* child */
-
-	/*
-	 * We need to differentiate between a user fork and a
-	 * kernel fork. We can't use user_mode, because the
-	 * the syscall path doesn't save iaoq. Right now
-	 * We rely on the fact that kernel_thread passes
-	 * in zero for usp.
-	 */
-	if (usp == 1) {
 		/* kernel thread */
-		cregs->ksp = (unsigned long)stack + THREAD_SZ_ALGN;
 		/* Must exit via ret_from_kernel_thread in order
 		 * to call schedule_tail()
 		 */
+		cregs->ksp = (unsigned long)stack + THREAD_SZ_ALGN + FRAME_SIZE;
 		cregs->kpc = (unsigned long) &ret_from_kernel_thread;
 		/*
 		 * Copy function and argument to be called from
 		 * ret_from_kernel_thread.
 		 */
 #ifdef CONFIG_64BIT
-		cregs->gr[27] = pregs->gr[27];
+		cregs->gr[27] = ((unsigned long *)usp)[3];
+		cregs->gr[26] = ((unsigned long *)usp)[2];
+#else
+		cregs->gr[26] = usp;
 #endif
-		cregs->gr[26] = pregs->gr[26];
-		cregs->gr[25] = pregs->gr[25];
+		cregs->gr[25] = arg;
 	} else {
 		/* user thread */
-		/*
-		 * Note that the fork wrappers are responsible
-		 * for setting gr[21].
-		 */
-
-		/* Use same stack depth as parent */
-		cregs->ksp = (unsigned long)stack
-			+ (pregs->gr[21] & (THREAD_SIZE - 1));
-		cregs->gr[30] = usp;
+		/* usp must be word aligned.  This also prevents users from
+		 * passing in the value 1 (which is the signal for a special
+		 * return for a kernel thread) */
+		if (usp) {
+			usp = ALIGN(usp, 4);
+			if (likely(usp))
+				cregs->gr[30] = usp;
+		}
+		cregs->ksp = (unsigned long)stack + THREAD_SZ_ALGN + FRAME_SIZE;
 		if (personality(p->personality) == PER_HPUX) {
 #ifdef CONFIG_HPUX
 			cregs->kpc = (unsigned long) &hpux_child_return;
@@ -323,8 +261,7 @@
 		}
 		/* Setup thread TLS area from the 4th parameter in clone */
 		if (clone_flags & CLONE_SETTLS)
-		  cregs->cr27 = pregs->gr[23];
-	
+			cregs->cr27 = cregs->gr[23];
 	}
 
 	return 0;
@@ -335,39 +272,6 @@
 	return t->thread.regs.kpc;
 }
 
-/*
- * sys_execve() executes a new program.
- */
-
-asmlinkage int sys_execve(struct pt_regs *regs)
-{
-	int error;
-	struct filename *filename;
-
-	filename = getname((const char __user *) regs->gr[26]);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename->name,
-			  (const char __user *const __user *) regs->gr[25],
-			  (const char __user *const __user *) regs->gr[24],
-			  regs);
-	putname(filename);
-out:
-
-	return error;
-}
-
-extern int __execve(const char *filename,
-		    const char *const argv[],
-		    const char *const envp[], struct task_struct *task);
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	return __execve(filename, argv, envp, current);
-}
-
 unsigned long
 get_wchan(struct task_struct *p)
 {
diff --git a/arch/parisc/kernel/sys_parisc32.c b/arch/parisc/kernel/sys_parisc32.c
index bf5b93a..9cfdaa1 100644
--- a/arch/parisc/kernel/sys_parisc32.c
+++ b/arch/parisc/kernel/sys_parisc32.c
@@ -53,28 +53,6 @@
 #define DBG(x)
 #endif
 
-/*
- * sys32_execve() executes a new program.
- */
-
-asmlinkage int sys32_execve(struct pt_regs *regs)
-{
-	int error;
-	struct filename *filename;
-
-	DBG(("sys32_execve(%p) r26 = 0x%lx\n", regs, regs->gr[26]));
-	filename = getname((const char __user *) regs->gr[26]);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = compat_do_execve(filename->name, compat_ptr(regs->gr[25]),
-				 compat_ptr(regs->gr[24]), regs);
-	putname(filename);
-out:
-
-	return error;
-}
-
 asmlinkage long sys32_unimplemented(int r26, int r25, int r24, int r23,
 	int r22, int r21, int r20)
 {
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index cbf5d59..54d950b 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -66,7 +66,7 @@
 	ENTRY_SAME(creat)
 	ENTRY_SAME(link)
 	ENTRY_SAME(unlink)		/* 10 */
-	ENTRY_DIFF(execve_wrapper)
+	ENTRY_COMP(execve)
 	ENTRY_SAME(chdir)
 	/* See comments in kernel/time.c!!! Maybe we don't need this? */
 	ENTRY_COMP(time)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a902a5c..951a517 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -144,6 +144,8 @@
 	select GENERIC_KERNEL_THREAD
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_EXECVE
+	select CLONE_BACKWARDS
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
index 189998b..a101637 100644
--- a/arch/powerpc/include/asm/signal.h
+++ b/arch/powerpc/include/asm/signal.h
@@ -3,6 +3,4 @@
 
 #include <uapi/asm/signal.h>
 
-struct pt_regs;
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
 #endif /* _ASM_POWERPC_SIGNAL_H */
diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index 329db4e..b5308d3 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -17,15 +17,6 @@
 asmlinkage unsigned long sys_mmap2(unsigned long addr, size_t len,
 		unsigned long prot, unsigned long flags,
 		unsigned long fd, unsigned long pgoff);
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long usp,
-		int __user *parent_tidp, void __user *child_threadptr,
-		int __user *child_tidp, int p6, struct pt_regs *regs);
-asmlinkage int sys_fork(unsigned long p1, unsigned long p2,
-		unsigned long p3, unsigned long p4, unsigned long p5,
-		unsigned long p6, struct pt_regs *regs);
-asmlinkage int sys_vfork(unsigned long p1, unsigned long p2,
-		unsigned long p3, unsigned long p4, unsigned long p5,
-		unsigned long p6, struct pt_regs *regs);
 asmlinkage long sys_pipe(int __user *fildes);
 asmlinkage long sys_pipe2(int __user *fildes, int flags);
 asmlinkage long sys_rt_sigaction(int sig,
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 921dce6..76fe846 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -56,7 +56,9 @@
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
 #define __ARCH_WANT_SYS_EXECVE
-#define __ARCH_WANT_KERNEL_EXECVE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 9499385..d22e73e 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -444,11 +444,6 @@
 	PPC440EP_ERR42
 	blrl
 	li	r3,0
-	b	do_exit		# no return
-
-	.globl	__ret_from_kernel_execve
-__ret_from_kernel_execve:
-	addi	r1,r3,-STACK_FRAME_OVERHEAD
 	b	ret_from_syscall
 
 /* Traced system call support */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 56e0ff0..e9a906c 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -373,17 +373,11 @@
 _GLOBAL(ret_from_kernel_thread)
 	bl	.schedule_tail
 	REST_NVGPRS(r1)
-	REST_GPR(2,r1)
+	ld	r14, 0(r14)
 	mtlr	r14
 	mr	r3,r15
 	blrl
 	li	r3,0
-	b	.do_exit	# no return
-
-_GLOBAL(__ret_from_kernel_execve)
-	addi	r1,r3,-STACK_FRAME_OVERHEAD
-	li	r10,1
-	std	r10,SOFTE(r1)
 	b	syscall_exit
 
 	.section	".toc","aw"
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ba48233..8143067 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -733,8 +733,7 @@
 extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long arg, struct task_struct *p,
-		struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	struct pt_regs *childregs, *kregs;
 	extern void ret_from_fork(void);
@@ -745,25 +744,25 @@
 	/* Copy registers */
 	sp -= sizeof(struct pt_regs);
 	childregs = (struct pt_regs *) sp;
-	if (!regs) {
-		/* for kernel thread, set `current' and stackptr in new task */
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		struct thread_info *ti = (void *)task_stack_page(p);
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->gpr[1] = sp + sizeof(struct pt_regs);
-#ifdef CONFIG_PPC64
-		childregs->gpr[14] = *(unsigned long *)usp;
-		childregs->gpr[2] = ((unsigned long *)usp)[1],
-		clear_tsk_thread_flag(p, TIF_32BIT);
-#else
 		childregs->gpr[14] = usp;	/* function */
-		childregs->gpr[2] = (unsigned long) p;
+#ifdef CONFIG_PPC64
+		clear_tsk_thread_flag(p, TIF_32BIT);
+		childregs->softe = 1;
 #endif
 		childregs->gpr[15] = arg;
 		p->thread.regs = NULL;	/* no user register state */
+		ti->flags |= _TIF_RESTOREALL;
 		f = ret_from_kernel_thread;
 	} else {
+		struct pt_regs *regs = current_pt_regs();
 		CHECK_FULL_REGS(regs);
 		*childregs = *regs;
-		childregs->gpr[1] = usp;
+		if (usp)
+			childregs->gpr[1] = usp;
 		p->thread.regs = childregs;
 		childregs->gpr[3] = 0;  /* Result from fork() */
 		if (clone_flags & CLONE_SETTLS) {
@@ -1027,51 +1026,6 @@
 	return put_user(tsk->thread.align_ctl, (unsigned int __user *)adr);
 }
 
-#define TRUNC_PTR(x)	((typeof(x))(((unsigned long)(x)) & 0xffffffff))
-
-int sys_clone(unsigned long clone_flags, unsigned long usp,
-	      int __user *parent_tidp, void __user *child_threadptr,
-	      int __user *child_tidp, int p6,
-	      struct pt_regs *regs)
-{
-	CHECK_FULL_REGS(regs);
-	if (usp == 0)
-		usp = regs->gpr[1];	/* stack pointer for child */
-#ifdef CONFIG_PPC64
-	if (is_32bit_task()) {
-		parent_tidp = TRUNC_PTR(parent_tidp);
-		child_tidp = TRUNC_PTR(child_tidp);
-	}
-#endif
- 	return do_fork(clone_flags, usp, regs, 0, parent_tidp, child_tidp);
-}
-
-int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3,
-	     unsigned long p4, unsigned long p5, unsigned long p6,
-	     struct pt_regs *regs)
-{
-	CHECK_FULL_REGS(regs);
-	return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL);
-}
-
-int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3,
-	      unsigned long p4, unsigned long p5, unsigned long p6,
-	      struct pt_regs *regs)
-{
-	CHECK_FULL_REGS(regs);
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1],
-			regs, 0, NULL, NULL);
-}
-
-void __ret_from_kernel_execve(struct pt_regs *normal)
-__noreturn;
-
-void ret_from_kernel_execve(struct pt_regs *normal)
-{
-	set_thread_flag(TIF_RESTOREALL);
-	__ret_from_kernel_execve(normal);
-}
-
 static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
 				  unsigned long nbytes)
 {
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index d385f39..3cbb875 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -138,8 +138,10 @@
 	select KTIME_SCALAR if 32BIT
 	select HAVE_ARCH_SECCOMP_FILTER
 	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 	select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_RELA
+	select CLONE_BACKWARDS2
 
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
diff --git a/arch/s390/include/asm/signal.h b/arch/s390/include/asm/signal.h
index bffdbdd..db7ddfa 100644
--- a/arch/s390/include/asm/signal.h
+++ b/arch/s390/include/asm/signal.h
@@ -39,6 +39,4 @@
         struct sigaction sa;
 };
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index bbbae41..086bb8e 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -54,7 +54,9 @@
 #   define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 # endif
 #define __ARCH_WANT_SYS_EXECVE
-#define __ARCH_WANT_KERNEL_EXECVE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
+#define __ARCH_WANT_SYS_CLONE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index ef46f66..aa8f2ba 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -330,40 +330,18 @@
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	l	%r12,__LC_THREAD_INFO
 	l	%r13,__LC_SVC_NEW_PSW+4
+	l	%r1,BASED(.Lschedule_tail)
+	basr	%r14,%r1		# call schedule_tail
+	TRACE_IRQS_ON
+	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	tm	__PT_PSW+1(%r11),0x01	# forking a kernel thread ?
-	je	1f
-	l	%r1,BASED(.Lschedule_tail)
-	basr	%r14,%r1		# call schedule_tail
-	TRACE_IRQS_ON
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	j	sysc_tracenogo
-
-1:	# it's a kernel thread
-	st	%r15,__PT_R15(%r11)	# store stack pointer for new kthread
-	l	%r1,BASED(.Lschedule_tail)
-	basr	%r14,%r1		# call schedule_tail
-	TRACE_IRQS_ON
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	lm	%r9,%r11,__PT_R9(%r11)	# load gprs
+	jne	sysc_tracenogo
+	# it's a kernel thread
+	lm	%r9,%r10,__PT_R9(%r11)	# load gprs
 ENTRY(kernel_thread_starter)
 	la	%r2,0(%r10)
 	basr	%r14,%r9
-	la	%r2,0
-	br	%r11			# do_exit
-
-#
-# kernel_execve function needs to deal with pt_regs that is not
-# at the usual place
-#
-ENTRY(ret_from_kernel_execve)
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	lr	%r15,%r2
-	lr	%r11,%r2
-	ahi	%r15,-STACK_FRAME_OVERHEAD
-	xc	__SF_BACKCHAIN(4,%r15),__SF_BACKCHAIN(%r15)
-	l	%r12,__LC_THREAD_INFO
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	j	sysc_return
+	j	sysc_tracenogo
 
 /*
  * Program check handler routine
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index d0d3f69..d8251b9 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -54,10 +54,6 @@
 long sys_s390_fadvise64_64(struct fadvise64_64_args __user *args);
 long sys_s390_fallocate(int fd, int mode, loff_t offset, u32 len_high,
 			u32 len_low);
-long sys_fork(void);
-long sys_clone(unsigned long newsp, unsigned long clone_flags,
-	       int __user *parent_tidptr, int __user *child_tidptr);
-long sys_vfork(void);
 long sys_sigsuspend(int history0, int history1, old_sigset_t mask);
 long sys_sigaction(int sig, const struct old_sigaction __user *act,
 		   struct old_sigaction __user *oact);
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 07d8de3..499e95e 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -352,33 +352,17 @@
 ENTRY(ret_from_fork)
 	la	%r11,STACK_FRAME_OVERHEAD(%r15)
 	lg	%r12,__LC_THREAD_INFO
+	brasl	%r14,schedule_tail
+	TRACE_IRQS_ON
+	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
 	tm	__PT_PSW+1(%r11),0x01	# forking a kernel thread ?
-	je	1f
-	brasl	%r14,schedule_tail
-	TRACE_IRQS_ON
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	j	sysc_tracenogo
-1:	# it's a kernel thread
-	stg	%r15,__PT_R15(%r11)	# store stack pointer for new kthread
-	brasl	%r14,schedule_tail
-	TRACE_IRQS_ON
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	lmg	%r9,%r11,__PT_R9(%r11)	# load gprs
+	jne	sysc_tracenogo
+	# it's a kernel thread
+	lmg	%r9,%r10,__PT_R9(%r11)	# load gprs
 ENTRY(kernel_thread_starter)
 	la	%r2,0(%r10)
 	basr	%r14,%r9
-	la	%r2,0
-	br	%r11			# do_exit
-
-ENTRY(ret_from_kernel_execve)
-	ssm	__LC_PGM_NEW_PSW	# disable I/O and ext. interrupts
-	lgr	%r15,%r2
-	lgr	%r11,%r2
-	aghi	%r15,-STACK_FRAME_OVERHEAD
-	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
-	lg	%r12,__LC_THREAD_INFO
-	ssm	__LC_SVC_NEW_PSW	# reenable interrupts
-	j	sysc_return
+	j	sysc_tracenogo
 
 /*
  * Program check handler routine
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index cd31ad4..536d645 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -117,8 +117,7 @@
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
-		unsigned long arg,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *ti;
 	struct fake_frame
@@ -150,7 +149,7 @@
 	frame->sf.gprs[9] = (unsigned long) frame;
 
 	/* Store access registers to kernel stack of new process. */
-	if (unlikely(!regs)) {
+	if (unlikely(p->flags & PF_KTHREAD)) {
 		/* kernel thread */
 		memset(&frame->childregs, 0, sizeof(struct pt_regs));
 		frame->childregs.psw.mask = psw_kernel_bits | PSW_MASK_DAT |
@@ -164,9 +163,10 @@
 
 		return 0;
 	}
-	frame->childregs = *regs;
+	frame->childregs = *current_pt_regs();
 	frame->childregs.gprs[2] = 0;	/* child returns 0 on fork. */
-	frame->childregs.gprs[15] = new_stackp;
+	if (new_stackp)
+		frame->childregs.gprs[15] = new_stackp;
 
 	/* Don't copy runtime instrumentation info */
 	p->thread.ri_cb = NULL;
@@ -183,57 +183,24 @@
 	       sizeof(s390_fp_regs));
 	/* Set a new TLS ?  */
 	if (clone_flags & CLONE_SETTLS)
-		p->thread.acrs[0] = regs->gprs[6];
+		p->thread.acrs[0] = frame->childregs.gprs[6];
 #else /* CONFIG_64BIT */
 	/* Save the fpu registers to new thread structure. */
 	save_fp_regs(&p->thread.fp_regs);
 	/* Set a new TLS ?  */
 	if (clone_flags & CLONE_SETTLS) {
+		unsigned long tls = frame->childregs.gprs[6];
 		if (is_compat_task()) {
-			p->thread.acrs[0] = (unsigned int) regs->gprs[6];
+			p->thread.acrs[0] = (unsigned int)tls;
 		} else {
-			p->thread.acrs[0] = (unsigned int)(regs->gprs[6] >> 32);
-			p->thread.acrs[1] = (unsigned int) regs->gprs[6];
+			p->thread.acrs[0] = (unsigned int)(tls >> 32);
+			p->thread.acrs[1] = (unsigned int)tls;
 		}
 	}
 #endif /* CONFIG_64BIT */
 	return 0;
 }
 
-SYSCALL_DEFINE0(fork)
-{
-	struct pt_regs *regs = task_pt_regs(current);
-	return do_fork(SIGCHLD, regs->gprs[15], regs, 0, NULL, NULL);
-}
-
-SYSCALL_DEFINE4(clone, unsigned long, newsp, unsigned long, clone_flags,
-		int __user *, parent_tidptr, int __user *, child_tidptr)
-{
-	struct pt_regs *regs = task_pt_regs(current);
-
-	if (!newsp)
-		newsp = regs->gprs[15];
-	return do_fork(clone_flags, newsp, regs, 0,
-		       parent_tidptr, child_tidptr);
-}
-
-/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-SYSCALL_DEFINE0(vfork)
-{
-	struct pt_regs *regs = task_pt_regs(current);
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
-		       regs->gprs[15], regs, 0, NULL, NULL);
-}
-
 asmlinkage void execve_tail(void)
 {
 	current->thread.fp_regs.fpc = 0;
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index 4f93a43..4589339 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -13,6 +13,9 @@
        select GENERIC_CLOCKEVENTS
        select HAVE_MOD_ARCH_SPECIFIC
 	select MODULES_USE_ELF_REL
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
+	select CLONE_BACKWARDS
 
 choice
 	prompt "System type"
diff --git a/arch/score/include/asm/processor.h b/arch/score/include/asm/processor.h
index ab3aceb..d9a922d 100644
--- a/arch/score/include/asm/processor.h
+++ b/arch/score/include/asm/processor.h
@@ -13,7 +13,6 @@
  */
 extern void (*cpu_wait)(void);
 
-extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
 extern void start_thread(struct pt_regs *regs,
 			unsigned long pc, unsigned long sp);
diff --git a/arch/score/include/asm/syscalls.h b/arch/score/include/asm/syscalls.h
index 1dd5e0d..acaeed6 100644
--- a/arch/score/include/asm/syscalls.h
+++ b/arch/score/include/asm/syscalls.h
@@ -1,8 +1,6 @@
 #ifndef _ASM_SCORE_SYSCALLS_H
 #define _ASM_SCORE_SYSCALLS_H
 
-asmlinkage long score_clone(struct pt_regs *regs);
-asmlinkage long score_execve(struct pt_regs *regs);
 asmlinkage long score_sigaltstack(struct pt_regs *regs);
 asmlinkage long score_rt_sigreturn(struct pt_regs *regs);
 
diff --git a/arch/score/include/asm/unistd.h b/arch/score/include/asm/unistd.h
index a862384..56001c9 100644
--- a/arch/score/include/asm/unistd.h
+++ b/arch/score/include/asm/unistd.h
@@ -4,5 +4,9 @@
 #define __ARCH_WANT_SYSCALL_NO_FLAGS
 #define __ARCH_WANT_SYSCALL_OFF_T
 #define __ARCH_WANT_SYSCALL_DEPRECATED
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_FORK
+#define __ARCH_WANT_SYS_VFORK
 
 #include <asm-generic/unistd.h>
diff --git a/arch/score/kernel/entry.S b/arch/score/kernel/entry.S
index 83bb960..1557ca1 100644
--- a/arch/score/kernel/entry.S
+++ b/arch/score/kernel/entry.S
@@ -278,6 +278,13 @@
 	nop
 #endif
 
+ENTRY(ret_from_kernel_thread)
+	bl	schedule_tail			# r4=struct task_struct *prev
+	nop
+	mv	r4, r13
+	brl	r12
+	j	syscall_exit
+
 ENTRY(ret_from_fork)
 	bl	schedule_tail			# r4=struct task_struct *prev
 
@@ -480,16 +487,6 @@
 	sw	r9, [r0, PT_R7]
 	j	syscall_return
 
-ENTRY(sys_execve)
-	mv	r4, r0
-	la	r8, score_execve
-	br	r8
-
-ENTRY(sys_clone)
-	mv	r4, r0
-	la	r8, score_clone
-	br	r8
-
 ENTRY(sys_rt_sigreturn)
 	mv	r4, r0
 	la	r8, score_rt_sigreturn
@@ -499,16 +496,3 @@
 	mv	r4, r0
 	la	r8, score_sigaltstack
 	br	r8
-
-#ifdef __ARCH_WANT_SYSCALL_DEPRECATED
-ENTRY(sys_fork)
-	mv	r4, r0
-	la	r8, score_fork
-	br	r8
-
-ENTRY(sys_vfork)
-	mv	r4, r0
-	la	r8, score_vfork
-	br	r8
-#endif /* __ARCH_WANT_SYSCALL_DEPRECATED */
-
diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c
index 637970c..7956846 100644
--- a/arch/score/kernel/process.c
+++ b/arch/score/kernel/process.c
@@ -60,6 +60,7 @@
 }
 
 void ret_from_fork(void);
+void ret_from_kernel_thread(void);
 
 void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
 {
@@ -86,29 +87,27 @@
  * set up the kernel stack and exception frames for a new process
  */
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long unused,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct pt_regs *childregs = task_pt_regs(p);
-
-	p->set_child_tid = NULL;
-	p->clear_child_tid = NULL;
-
-	*childregs = *regs;
-	childregs->regs[7] = 0;		/* Clear error flag */
-	childregs->regs[4] = 0;		/* Child gets zero as return value */
-	regs->regs[4] = p->pid;
-
-	if (childregs->cp0_psr & 0x8) {	/* test kernel fork or user fork */
-		childregs->regs[0] = usp;		/* user fork */
-	} else {
-		childregs->regs[28] = (unsigned long) ti; /* kernel fork */
-		childregs->regs[0] = (unsigned long) childregs;
-	}
+	struct pt_regs *regs = current_pt_regs();
 
 	p->thread.reg0 = (unsigned long) childregs;
-	p->thread.reg3 = (unsigned long) ret_from_fork;
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		p->thread->reg12 = usp;
+		p->thread->reg13 = arg;
+		p->thread.reg3 = (unsigned long) ret_from_kernel_thread;
+	} else {
+		*childregs = *current_pt_regs();
+		childregs->regs[7] = 0;		/* Clear error flag */
+		childregs->regs[4] = 0;		/* Child gets zero as return value */
+		if (usp)
+			childregs->regs[0] = usp;	/* user fork */
+		p->thread.reg3 = (unsigned long) ret_from_fork;
+	}
+
 	p->thread.cp0_psr = 0;
 
 	return 0;
@@ -120,32 +119,6 @@
 	return 1;
 }
 
-static void __noreturn
-kernel_thread_helper(void *unused0, int (*fn)(void *),
-		 void *arg, void *unused1)
-{
-	do_exit(fn(arg));
-}
-
-/*
- * Create a kernel thread.
- */
-long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-	regs.regs[6] = (unsigned long) arg;
-	regs.regs[5] = (unsigned long) fn;
-	regs.cp0_epc = (unsigned long) kernel_thread_helper;
-	regs.cp0_psr = (regs.cp0_psr & ~(0x1|0x4|0x8)) | \
-			((regs.cp0_psr & 0x3) << 2);
-
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, \
-			0, &regs, 0, NULL, NULL);
-}
-
 unsigned long thread_saved_pc(struct task_struct *tsk)
 {
 	return task_pt_regs(tsk)->cp0_epc;
diff --git a/arch/score/kernel/sys_score.c b/arch/score/kernel/sys_score.c
index d45cf00..47c20ba 100644
--- a/arch/score/kernel/sys_score.c
+++ b/arch/score/kernel/sys_score.c
@@ -48,92 +48,3 @@
 		return -EINVAL;
 	return sys_mmap_pgoff(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
 }
-
-asmlinkage long
-score_fork(struct pt_regs *regs)
-{
-	return do_fork(SIGCHLD, regs->regs[0], regs, 0, NULL, NULL);
-}
-
-/*
- * Clone a task - this clones the calling program thread.
- * This is called indirectly via a small wrapper
- */
-asmlinkage long
-score_clone(struct pt_regs *regs)
-{
-	unsigned long clone_flags;
-	unsigned long newsp;
-	int __user *parent_tidptr, *child_tidptr;
-
-	clone_flags = regs->regs[4];
-	newsp = regs->regs[5];
-	if (!newsp)
-		newsp = regs->regs[0];
-	parent_tidptr = (int __user *)regs->regs[6];
-	child_tidptr = (int __user *)regs->regs[8];
-
-	return do_fork(clone_flags, newsp, regs, 0,
-			parent_tidptr, child_tidptr);
-}
-
-asmlinkage long
-score_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
-			regs->regs[0], regs, 0, NULL, NULL);
-}
-
-/*
- * sys_execve() executes a new program.
- * This is called indirectly via a small wrapper
- */
-asmlinkage long
-score_execve(struct pt_regs *regs)
-{
-	int error;
-	struct filename *filename;
-
-	filename = getname((char __user*)regs->regs[4]);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-
-	error = do_execve(filename->name,
-			  (const char __user *const __user *)regs->regs[5],
-			  (const char __user *const __user *)regs->regs[6],
-			  regs);
-
-	putname(filename);
-	return error;
-}
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-asmlinkage
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	register unsigned long __r4 asm("r4") = (unsigned long) filename;
-	register unsigned long __r5 asm("r5") = (unsigned long) argv;
-	register unsigned long __r6 asm("r6") = (unsigned long) envp;
-	register unsigned long __r7 asm("r7");
-
-	__asm__ __volatile__ ("	\n"
-		"ldi	r27, %5		\n"
-		"syscall		\n"
-		"mv	%0, r4		\n"
-		"mv	%1, r7		\n"
-		: "=&r" (__r4), "=r" (__r7)
-		: "r" (__r4), "r" (__r5), "r" (__r6), "i" (__NR_execve)
-		: "r8", "r9", "r10", "r11", "r22", "r23", "r24", "r25",
-		  "r26", "r27", "memory");
-
-	if (__r7 == 0)
-		return __r4;
-
-	return -__r4;
-}
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index babc2b8..8451317 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -40,6 +40,8 @@
 	select GENERIC_STRNLEN_USER
 	select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sh/include/asm/processor_32.h b/arch/sh/include/asm/processor_32.h
index b6311fd..b1320d5 100644
--- a/arch/sh/include/asm/processor_32.h
+++ b/arch/sh/include/asm/processor_32.h
@@ -126,11 +126,6 @@
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 /* Copy and release all segment info associated with a VM */
 #define copy_segments(p, mm)	do { } while(0)
 #define release_segments(mm)	do { } while(0)
diff --git a/arch/sh/include/asm/processor_64.h b/arch/sh/include/asm/processor_64.h
index cd6029f..1ee8946 100644
--- a/arch/sh/include/asm/processor_64.h
+++ b/arch/sh/include/asm/processor_64.h
@@ -159,11 +159,6 @@
 
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 
 /* Copy and release all segment info associated with a VM */
 #define copy_segments(p, mm)	do { } while (0)
diff --git a/arch/sh/include/asm/syscalls_32.h b/arch/sh/include/asm/syscalls_32.h
index 6c1fa55..cc25485 100644
--- a/arch/sh/include/asm/syscalls_32.h
+++ b/arch/sh/include/asm/syscalls_32.h
@@ -9,20 +9,6 @@
 
 struct pt_regs;
 
-asmlinkage int sys_fork(unsigned long r4, unsigned long r5,
-			unsigned long r6, unsigned long r7,
-			struct pt_regs __regs);
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-			 unsigned long parent_tidptr,
-			 unsigned long child_tidptr,
-			 struct pt_regs __regs);
-asmlinkage int sys_vfork(unsigned long r4, unsigned long r5,
-			 unsigned long r6, unsigned long r7,
-			 struct pt_regs __regs);
-asmlinkage int sys_execve(const char __user *ufilename,
-			  const char __user *const __user *uargv,
-			  const char __user *const __user *uenvp,
-			  unsigned long r7, struct pt_regs __regs);
 asmlinkage int sys_sigsuspend(old_sigset_t mask);
 asmlinkage int sys_sigaction(int sig, const struct old_sigaction __user *act,
 			     struct old_sigaction __user *oact);
diff --git a/arch/sh/include/asm/syscalls_64.h b/arch/sh/include/asm/syscalls_64.h
index ee519f4..d62e8eb 100644
--- a/arch/sh/include/asm/syscalls_64.h
+++ b/arch/sh/include/asm/syscalls_64.h
@@ -9,23 +9,6 @@
 
 struct pt_regs;
 
-asmlinkage int sys_fork(unsigned long r2, unsigned long r3,
-			unsigned long r4, unsigned long r5,
-			unsigned long r6, unsigned long r7,
-			struct pt_regs *pregs);
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-			 unsigned long r4, unsigned long r5,
-			 unsigned long r6, unsigned long r7,
-			 struct pt_regs *pregs);
-asmlinkage int sys_vfork(unsigned long r2, unsigned long r3,
-			 unsigned long r4, unsigned long r5,
-			 unsigned long r6, unsigned long r7,
-			 struct pt_regs *pregs);
-asmlinkage int sys_execve(const char *ufilename, char **uargv,
-			  char **uenvp, unsigned long r5,
-			  unsigned long r6, unsigned long r7,
-			  struct pt_regs *pregs);
-
 /* Misc syscall related bits */
 asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs);
 asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h
index 38956df..43d3f26b 100644
--- a/arch/sh/include/asm/unistd.h
+++ b/arch/sh/include/asm/unistd.h
@@ -28,6 +28,10 @@
 # define __ARCH_WANT_SYS_SIGPENDING
 # define __ARCH_WANT_SYS_SIGPROCMASK
 # define __ARCH_WANT_SYS_RT_SIGACTION
+# define __ARCH_WANT_SYS_EXECVE
+# define __ARCH_WANT_SYS_FORK
+# define __ARCH_WANT_SYS_VFORK
+# define __ARCH_WANT_SYS_CLONE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index 88571ff..f259b37 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -16,7 +16,7 @@
 	   machvec.o nmi_debug.o process.o				\
 	   process_$(BITS).o ptrace.o ptrace_$(BITS).o			\
 	   reboot.o return_address.o					\
-	   setup.o signal_$(BITS).o sys_sh.o sys_sh$(BITS).o		\
+	   setup.o signal_$(BITS).o sys_sh.o 				\
 	   syscalls_$(BITS).o time.o topology.o traps.o			\
 	   traps_$(BITS).o unwinder.o
 
@@ -25,6 +25,7 @@
 obj-$(CONFIG_HAS_IOPORT)	+= ioport.o
 endif
 
+obj-$(CONFIG_SUPERH32)		+= sys_sh32.o
 obj-y				+= cpu/
 obj-$(CONFIG_VSYSCALL)		+= vsyscall/
 obj-$(CONFIG_SMP)		+= smp.o
diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S
index 7e605b9..0c8d037 100644
--- a/arch/sh/kernel/cpu/sh5/entry.S
+++ b/arch/sh/kernel/cpu/sh5/entry.S
@@ -1228,6 +1228,25 @@
 	pta	ret_from_syscall, tr0
 	blink	tr0, ZERO
 
+.global	ret_from_kernel_thread
+ret_from_kernel_thread:
+
+	movi	schedule_tail,r5
+	ori	r5, 1, r5
+	ptabs	r5, tr0
+	blink	tr0, LINK
+
+	ld.q	SP, FRAME_R(2), r2
+	ld.q	SP, FRAME_R(3), r3
+	ptabs	r3, tr0
+	blink	tr0, LINK
+
+	ld.q	SP, FRAME_S(FSPC), r2
+	addi	r2, 4, r2		/* Move PC, being pre-execution event */
+	st.q	SP, FRAME_S(FSPC), r2
+	pta	ret_from_syscall, tr0
+	blink	tr0, ZERO
+
 syscall_allowed:
 	/* Use LINK to deflect the exit point, default is syscall_ret */
 	pta	syscall_ret, tr0
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index b96489d..9b6e4be 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -297,6 +297,19 @@
 	 mov	r0, r4
 	bra	syscall_exit
 	 nop
+
+	.align	2
+	.globl	ret_from_kernel_thread
+ret_from_kernel_thread:
+	mov.l	1f, r8
+	jsr	@r8
+	 mov	r0, r4
+	mov.l	@(OFF_R5,r15), r5   ! fn
+	jsr	@r5
+	 mov.l	@(OFF_R4,r15), r4   ! arg
+	bra	syscall_exit
+	 nop
+
 	.align	2
 1:	.long	schedule_tail
 
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index ba7345f..73eb66f 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -68,38 +68,6 @@
 	show_code(regs);
 }
 
-/*
- * Create a kernel thread
- */
-__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *))
-{
-	do_exit(fn(arg));
-}
-
-/* Don't use this in BL=1(cli).  Or else, CPU resets! */
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	struct pt_regs regs;
-	int pid;
-
-	memset(&regs, 0, sizeof(regs));
-	regs.regs[4] = (unsigned long)arg;
-	regs.regs[5] = (unsigned long)fn;
-
-	regs.pc = (unsigned long)kernel_thread_helper;
-	regs.sr = SR_MD;
-#if defined(CONFIG_SH_FPU)
-	regs.sr |= SR_FD;
-#endif
-
-	/* Ok, create the new process.. */
-	pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
-		      &regs, 0, NULL, NULL);
-
-	return pid;
-}
-EXPORT_SYMBOL(kernel_thread);
-
 void start_thread(struct pt_regs *regs, unsigned long new_pc,
 		  unsigned long new_sp)
 {
@@ -157,10 +125,10 @@
 EXPORT_SYMBOL(dump_fpu);
 
 asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_kernel_thread(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long unused,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
 	struct pt_regs *childregs;
@@ -177,29 +145,35 @@
 	}
 #endif
 
-	childregs = task_pt_regs(p);
-	*childregs = *regs;
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
-	if (user_mode(regs)) {
-		childregs->regs[15] = usp;
-		ti->addr_limit = USER_DS;
-	} else {
-		childregs->regs[15] = (unsigned long)childregs;
+	childregs = task_pt_regs(p);
+	p->thread.sp = (unsigned long) childregs;
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		p->thread.pc = (unsigned long) ret_from_kernel_thread;
+		childregs->regs[4] = arg;
+		childregs->regs[5] = usp;
+		childregs->sr = SR_MD;
+#if defined(CONFIG_SH_FPU)
+		childregs->sr |= SR_FD;
+#endif
 		ti->addr_limit = KERNEL_DS;
 		ti->status &= ~TS_USEDFPU;
 		p->fpu_counter = 0;
+		return 0;
 	}
+	*childregs = *current_pt_regs();
+
+	if (usp)
+		childregs->regs[15] = usp;
+	ti->addr_limit = USER_DS;
 
 	if (clone_flags & CLONE_SETTLS)
 		childregs->gbr = childregs->regs[0];
 
 	childregs->regs[0] = 0; /* Set return value for child */
-
-	p->thread.sp = (unsigned long) childregs;
 	p->thread.pc = (unsigned long) ret_from_fork;
-
-	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
-
 	return 0;
 }
 
@@ -243,74 +217,6 @@
 	return prev;
 }
 
-asmlinkage int sys_fork(unsigned long r4, unsigned long r5,
-			unsigned long r6, unsigned long r7,
-			struct pt_regs __regs)
-{
-#ifdef CONFIG_MMU
-	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
-	return do_fork(SIGCHLD, regs->regs[15], regs, 0, NULL, NULL);
-#else
-	/* fork almost works, enough to trick you into looking elsewhere :-( */
-	return -EINVAL;
-#endif
-}
-
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-			 unsigned long parent_tidptr,
-			 unsigned long child_tidptr,
-			 struct pt_regs __regs)
-{
-	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
-	if (!newsp)
-		newsp = regs->regs[15];
-	return do_fork(clone_flags, newsp, regs, 0,
-			(int __user *)parent_tidptr,
-			(int __user *)child_tidptr);
-}
-
-/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-asmlinkage int sys_vfork(unsigned long r4, unsigned long r5,
-			 unsigned long r6, unsigned long r7,
-			 struct pt_regs __regs)
-{
-	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->regs[15], regs,
-		       0, NULL, NULL);
-}
-
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage int sys_execve(const char __user *ufilename,
-			  const char __user *const __user *uargv,
-			  const char __user *const __user *uenvp,
-			  unsigned long r7, struct pt_regs __regs)
-{
-	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
-	int error;
-	struct filename *filename;
-
-	filename = getname(ufilename);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-
-	error = do_execve(filename->name, uargv, uenvp, regs);
-	putname(filename);
-out:
-	return error;
-}
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	unsigned long pc;
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 98a709f..e611c85 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -285,39 +285,6 @@
 }
 
 /*
- * Create a kernel thread
- */
-__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *))
-{
-	do_exit(fn(arg));
-}
-
-/*
- * This is the mechanism for creating a new kernel thread.
- *
- * NOTE! Only a kernel-only process(ie the swapper or direct descendants
- * who haven't done an "execve()") should use this: it will work within
- * a system call from a "real" process, but the process memory space will
- * not be freed until both the parent and the child have exited.
- */
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-	regs.regs[2] = (unsigned long)arg;
-	regs.regs[3] = (unsigned long)fn;
-
-	regs.pc = (unsigned long)kernel_thread_helper;
-	regs.sr = (1 << 30);
-
-	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
-		      &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/*
  * Free current thread data structures etc..
  */
 void exit_thread(void)
@@ -401,26 +368,37 @@
 EXPORT_SYMBOL(dump_fpu);
 
 asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_kernel_thread(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long unused,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
-	struct pt_regs *childregs;
+	struct pt_regs *childregs, *regs = current_pt_regs();
 
 #ifdef CONFIG_SH_FPU
-	if(last_task_used_math == current) {
+	/* can't happen for a kernel thread */
+	if (last_task_used_math == current) {
 		enable_fpu();
 		save_fpu(current);
 		disable_fpu();
 		last_task_used_math = NULL;
-		regs->sr |= SR_FD;
+		current_pt_regs()->sr |= SR_FD;
 	}
 #endif
 	/* Copy from sh version */
 	childregs = (struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1;
+	p->thread.sp = (unsigned long) childregs;
 
-	*childregs = *regs;
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		childregs->regs[2] = (unsigned long)arg;
+		childregs->regs[3] = (unsigned long)fn;
+		childregs->sr = (1 << 30); /* not user_mode */
+		childregs->sr |= SR_FD; /* Invalidate FPU flag */
+		p->thread.pc = (unsigned long) ret_from_kernel_thread;
+		return 0;
+	}
+	*childregs = *current_pt_regs();
 
 	/*
 	 * Sign extend the edited stack.
@@ -428,85 +406,18 @@
 	 * 32-bit wide and context switch must take care
 	 * of NEFF sign extension.
 	 */
-	if (user_mode(regs)) {
+	if (usp)
 		childregs->regs[15] = neff_sign_extend(usp);
-		p->thread.uregs = childregs;
-	} else {
-		childregs->regs[15] =
-			neff_sign_extend((unsigned long)task_stack_page(p) +
-					 THREAD_SIZE);
-	}
+	p->thread.uregs = childregs;
 
 	childregs->regs[9] = 0; /* Set return value for child */
 	childregs->sr |= SR_FD; /* Invalidate FPU flag */
 
-	p->thread.sp = (unsigned long) childregs;
 	p->thread.pc = (unsigned long) ret_from_fork;
 
 	return 0;
 }
 
-asmlinkage int sys_fork(unsigned long r2, unsigned long r3,
-			unsigned long r4, unsigned long r5,
-			unsigned long r6, unsigned long r7,
-			struct pt_regs *pregs)
-{
-	return do_fork(SIGCHLD, pregs->regs[15], pregs, 0, 0, 0);
-}
-
-asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp,
-			 unsigned long r4, unsigned long r5,
-			 unsigned long r6, unsigned long r7,
-			 struct pt_regs *pregs)
-{
-	if (!newsp)
-		newsp = pregs->regs[15];
-	return do_fork(clone_flags, newsp, pregs, 0, 0, 0);
-}
-
-/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-asmlinkage int sys_vfork(unsigned long r2, unsigned long r3,
-			 unsigned long r4, unsigned long r5,
-			 unsigned long r6, unsigned long r7,
-			 struct pt_regs *pregs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, pregs->regs[15], pregs, 0, 0, 0);
-}
-
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage int sys_execve(const char *ufilename, char **uargv,
-			  char **uenvp, unsigned long r5,
-			  unsigned long r6, unsigned long r7,
-			  struct pt_regs *pregs)
-{
-	int error;
-	struct filename *filename;
-
-	filename = getname((char __user *)ufilename);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-
-	error = do_execve(filename->name,
-			  (const char __user *const __user *)uargv,
-			  (const char __user *const __user *)uenvp,
-			  pregs);
-	putname(filename);
-out:
-	return error;
-}
-
 #ifdef CONFIG_FRAME_POINTER
 static int in_sh64_switch_to(unsigned long pc)
 {
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index f56b6fe5..497bab3 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -60,27 +60,3 @@
 				(u64)len0 << 32 | len1,	advice);
 #endif
 }
-
-#if defined(CONFIG_CPU_SH2) || defined(CONFIG_CPU_SH2A)
-#define SYSCALL_ARG3	"trapa #0x23"
-#else
-#define SYSCALL_ARG3	"trapa #0x13"
-#endif
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	register long __sc0 __asm__ ("r3") = __NR_execve;
-	register long __sc4 __asm__ ("r4") = (long) filename;
-	register long __sc5 __asm__ ("r5") = (long) argv;
-	register long __sc6 __asm__ ("r6") = (long) envp;
-	__asm__ __volatile__ (SYSCALL_ARG3 : "=z" (__sc0)
-			: "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6)
-			: "memory");
-	return __sc0;
-}
diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c
deleted file mode 100644
index c5a38c4..0000000
--- a/arch/sh/kernel/sys_sh64.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * arch/sh/kernel/sys_sh64.c
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- *
- * This file contains various random system calls that
- * have a non-standard calling sequence on the Linux/SH5
- * platform.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/errno.h>
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/syscalls.h>
-#include <linux/ipc.h>
-#include <asm/uaccess.h>
-#include <asm/ptrace.h>
-#include <asm/unistd.h>
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_execve);
-	register unsigned long __sc2 __asm__ ("r2") = (unsigned long) filename;
-	register unsigned long __sc3 __asm__ ("r3") = (unsigned long) argv;
-	register unsigned long __sc4 __asm__ ("r4") = (unsigned long) envp;
-	__asm__ __volatile__ ("trapa	%1 !\t\t\t execve(%2,%3,%4)"
-	: "=r" (__sc0)
-	: "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4) );
-	__asm__ __volatile__ ("!dummy	%0 %1 %2 %3"
-	: : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4) : "memory");
-	return __sc0;
-}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 9f2edb5..0c7d365 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -41,6 +41,8 @@
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index f74ac9e..c1e0191 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -106,7 +106,6 @@
 
 /* Free all resources held by a thread. */
 #define release_thread(tsk)		do { } while(0)
-extern pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
 
 extern unsigned long get_wchan(struct task_struct *);
 
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 721e25f..cce72ce 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -94,6 +94,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <asm/fpumacro.h>
 
 /* Return saved PC of a blocked thread. */
 struct task_struct;
@@ -143,6 +144,10 @@
 	: \
 	: "r" (regs), "r" (sp - sizeof(struct reg_window) - STACK_BIAS), \
 	  "i" ((const unsigned long)(&((struct pt_regs *)0)->u_regs[0]))); \
+	fprs_write(0);	\
+	current_thread_info()->xfsr[0] = 0;	\
+	current_thread_info()->fpsaved[0] = 0;	\
+	regs->tstate &= ~TSTATE_PEF;	\
 } while (0)
 
 #define start_thread32(regs, pc, sp) \
@@ -183,13 +188,15 @@
 	: \
 	: "r" (regs), "r" (sp - sizeof(struct reg_window32)), \
 	  "i" ((const unsigned long)(&((struct pt_regs *)0)->u_regs[0]))); \
+	fprs_write(0);	\
+	current_thread_info()->xfsr[0] = 0;	\
+	current_thread_info()->fpsaved[0] = 0;	\
+	regs->tstate &= ~TSTATE_PEF;	\
 } while (0)
 
 /* Free all resources held by a thread. */
 #define release_thread(tsk)		do { } while (0)
 
-extern pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 extern unsigned long get_wchan(struct task_struct *task);
 
 #define task_pt_regs(tsk) (task_thread_info(tsk)->kregs)
diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h
index da43bdc..bdfafd7 100644
--- a/arch/sparc/include/asm/ptrace.h
+++ b/arch/sparc/include/asm/ptrace.h
@@ -32,6 +32,9 @@
 #define arch_ptrace_stop(exit_code, info) \
 	synchronize_user_stack()
 
+#define current_pt_regs() \
+	((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
+
 struct global_reg_snapshot {
 	unsigned long		tstate;
 	unsigned long		tpc;
@@ -55,9 +58,7 @@
 
 extern union global_cpu_snapshot global_cpu_snapshot[NR_CPUS];
 
-#define force_successful_syscall_return()	    \
-do {	current_thread_info()->syscall_noerror = 1; \
-} while (0)
+#define force_successful_syscall_return() set_thread_noerror(1)
 #define user_mode(regs) (!((regs)->tstate & TSTATE_PRIV))
 #define instruction_pointer(regs) ((regs)->tpc)
 #define instruction_pointer_set(regs, val) ((regs)->tpc = (val))
@@ -100,6 +101,9 @@
 #define arch_ptrace_stop(exit_code, info) \
 	synchronize_user_stack()
 
+#define current_pt_regs() \
+	((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
+
 #define user_mode(regs) (!((regs)->psr & PSR_PS))
 #define instruction_pointer(regs) ((regs)->pc)
 #define user_stack_pointer(regs) ((regs)->u_regs[UREG_FP])
diff --git a/arch/sparc/include/asm/signal.h b/arch/sparc/include/asm/signal.h
index d243c2a..77b8585 100644
--- a/arch/sparc/include/asm/signal.h
+++ b/arch/sparc/include/asm/signal.h
@@ -26,7 +26,5 @@
 	void			__user *ka_restorer;
 };
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* !(__ASSEMBLY__) */
 #endif /* !(__SPARC_SIGNAL_H) */
diff --git a/arch/sparc/include/asm/switch_to_64.h b/arch/sparc/include/asm/switch_to_64.h
index 7923c4a..cad36f5 100644
--- a/arch/sparc/include/asm/switch_to_64.h
+++ b/arch/sparc/include/asm/switch_to_64.h
@@ -23,7 +23,7 @@
 	/* If you are tempted to conditionalize the following */	\
 	/* so that ASI is only written if it changes, think again. */	\
 	__asm__ __volatile__("wr %%g0, %0, %%asi"			\
-	: : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\
+	: : "r" (task_thread_info(next)->current_ds));\
 	trap_block[current_thread_info()->cpu].thread =			\
 		task_thread_info(next);					\
 	__asm__ __volatile__(						\
diff --git a/arch/sparc/include/asm/syscalls.h b/arch/sparc/include/asm/syscalls.h
index 45a43f6..bf8972a 100644
--- a/arch/sparc/include/asm/syscalls.h
+++ b/arch/sparc/include/asm/syscalls.h
@@ -8,6 +8,4 @@
 				     struct pt_regs *regs,
 				     unsigned long stack_size);
 
-extern asmlinkage int sparc_execve(struct pt_regs *regs);
-
 #endif /* _SPARC64_SYSCALLS_H */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index a3fe4dc..269bd92 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -14,12 +14,12 @@
 #define TI_FLAG_FAULT_CODE_SHIFT	56
 #define TI_FLAG_BYTE_WSTATE		1
 #define TI_FLAG_WSTATE_SHIFT		48
-#define TI_FLAG_BYTE_CWP		2
-#define TI_FLAG_CWP_SHIFT		40
-#define TI_FLAG_BYTE_CURRENT_DS		3
-#define TI_FLAG_CURRENT_DS_SHIFT	32
-#define TI_FLAG_BYTE_FPDEPTH		4
-#define TI_FLAG_FPDEPTH_SHIFT		24
+#define TI_FLAG_BYTE_NOERROR		2
+#define TI_FLAG_BYTE_NOERROR_SHIFT	40
+#define TI_FLAG_BYTE_FPDEPTH		3
+#define TI_FLAG_FPDEPTH_SHIFT		32
+#define TI_FLAG_BYTE_CWP		4
+#define TI_FLAG_CWP_SHIFT		24
 #define TI_FLAG_BYTE_WSAVED		5
 #define TI_FLAG_WSAVED_SHIFT		16
 
@@ -47,7 +47,7 @@
 	struct exec_domain	*exec_domain;
 	int			preempt_count;	/* 0 => preemptable, <0 => BUG */
 	__u8			new_child;
-	__u8			syscall_noerror;
+	__u8			current_ds;
 	__u16			cpu;
 
 	unsigned long		*utraps;
@@ -74,9 +74,9 @@
 #define TI_FAULT_CODE	(TI_FLAGS + TI_FLAG_BYTE_FAULT_CODE)
 #define TI_WSTATE	(TI_FLAGS + TI_FLAG_BYTE_WSTATE)
 #define TI_CWP		(TI_FLAGS + TI_FLAG_BYTE_CWP)
-#define TI_CURRENT_DS	(TI_FLAGS + TI_FLAG_BYTE_CURRENT_DS)
 #define TI_FPDEPTH	(TI_FLAGS + TI_FLAG_BYTE_FPDEPTH)
 #define TI_WSAVED	(TI_FLAGS + TI_FLAG_BYTE_WSAVED)
+#define TI_SYS_NOERROR	(TI_FLAGS + TI_FLAG_BYTE_NOERROR)
 #define TI_FPSAVED	0x00000010
 #define TI_KSP		0x00000018
 #define TI_FAULT_ADDR	0x00000020
@@ -84,7 +84,7 @@
 #define TI_EXEC_DOMAIN	0x00000030
 #define TI_PRE_COUNT	0x00000038
 #define TI_NEW_CHILD	0x0000003c
-#define TI_SYS_NOERROR	0x0000003d
+#define TI_CURRENT_DS	0x0000003d
 #define TI_CPU		0x0000003e
 #define TI_UTRAPS	0x00000040
 #define TI_REG_WINDOW	0x00000048
@@ -121,7 +121,7 @@
 #define INIT_THREAD_INFO(tsk)				\
 {							\
 	.task		=	&tsk,			\
-	.flags		= ((unsigned long)ASI_P) << TI_FLAG_CURRENT_DS_SHIFT,	\
+	.current_ds	=	ASI_P,			\
 	.exec_domain	=	&default_exec_domain,	\
 	.preempt_count	=	INIT_PREEMPT_COUNT,	\
 	.restart_block	= {				\
@@ -153,13 +153,12 @@
 #define set_thread_wstate(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSTATE] = (val))
 #define get_thread_cwp()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_CWP])
 #define set_thread_cwp(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_CWP] = (val))
-#define get_thread_current_ds()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_CURRENT_DS])
-#define set_thread_current_ds(val)	(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_CURRENT_DS] = (val))
+#define get_thread_noerror()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_NOERROR])
+#define set_thread_noerror(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_NOERROR] = (val))
 #define get_thread_fpdepth()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_FPDEPTH])
 #define set_thread_fpdepth(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_FPDEPTH] = (val))
 #define get_thread_wsaved()		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSAVED])
 #define set_thread_wsaved(val)		(__cur_thread_flag_byte_ptr[TI_FLAG_BYTE_WSAVED] = (val))
-
 #endif /* !(__ASSEMBLY__) */
 
 /*
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index 73083e1..e562d3c 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -38,14 +38,14 @@
 #define VERIFY_READ	0
 #define VERIFY_WRITE	1
 
-#define get_fs() ((mm_segment_t) { get_thread_current_ds() })
+#define get_fs() ((mm_segment_t){(current_thread_info()->current_ds)})
 #define get_ds() (KERNEL_DS)
 
 #define segment_eq(a,b)  ((a).seg == (b).seg)
 
 #define set_fs(val)								\
 do {										\
-	set_thread_current_ds((val).seg);					\
+	current_thread_info()->current_ds =(val).seg;				\
 	__asm__ __volatile__ ("wr %%g0, %0, %%asi" : : "r" ((val).seg));	\
 } while(0)
 
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index 0ecea6e..c3e5d8b 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -46,6 +46,7 @@
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
+#define __ARCH_WANT_SYS_EXECVE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index dcaa1cf..21fd1a8 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -806,23 +806,10 @@
 	call	c_sys_nis_syscall
 	 mov	%l5, %o7
 
-	.align	4
-	.globl	sys_execve
-sys_execve:
-	mov	%o7, %l5
-	add	%sp, STACKFRAME_SZ, %o0		! pt_regs *regs arg
-	call	sparc_execve
-	 mov	%l5, %o7
-
-	.globl	sunos_execv
 sunos_execv:
-	st	%g0, [%sp + STACKFRAME_SZ + PT_I2]
-
-	call	sparc_execve
-	 add	%sp, STACKFRAME_SZ, %o0
-
-	b	ret_sys_call
-	 ld	[%sp + STACKFRAME_SZ + PT_I0], %o0
+	.globl	sunos_execv
+	b	sys_execve
+	 clr	%i2
 
 	.align	4
 	.globl	sys_sparc_pipe
@@ -959,17 +946,9 @@
         .align  4
 linux_sparc_ni_syscall:
 	sethi   %hi(sys_ni_syscall), %l7
-	b       syscall_is_too_hard
+	b       do_syscall
 	 or     %l7, %lo(sys_ni_syscall), %l7
 
-linux_fast_syscall:
-	andn	%l7, 3, %l7
-	mov	%i0, %o0
-	mov	%i1, %o1
-	mov 	%i2, %o2
-	jmpl	%l7 + %g0, %g0
-	 mov	%i3, %o3
-
 linux_syscall_trace:
 	add	%sp, STACKFRAME_SZ, %o0
 	call	syscall_trace
@@ -991,6 +970,23 @@
 	b	ret_sys_call
 	 ld	[%sp + STACKFRAME_SZ + PT_I0], %o0
 
+	.globl	ret_from_kernel_thread
+ret_from_kernel_thread:
+	call	schedule_tail
+	 ld	[%g3 + TI_TASK], %o0
+	ld	[%sp + STACKFRAME_SZ + PT_G1], %l0
+	call	%l0
+	 ld	[%sp + STACKFRAME_SZ + PT_G2], %o0
+	rd	%psr, %l1
+	ld	[%sp + STACKFRAME_SZ + PT_PSR], %l0
+	andn	%l0, PSR_CWP, %l0
+	nop
+	and	%l1, PSR_CWP, %l1
+	or	%l0, %l1, %l0
+	st	%l0, [%sp + STACKFRAME_SZ + PT_PSR]
+	b	ret_sys_call
+	 mov	0, %o0
+
 	/* Linux native system calls enter here... */
 	.align	4
 	.globl	linux_sparc_syscall
@@ -1002,11 +998,8 @@
 	bgeu	linux_sparc_ni_syscall
 	 sll	%g1, 2, %l4
 	ld	[%l7 + %l4], %l7
-	andcc	%l7, 1, %g0
-	bne	linux_fast_syscall
-	 /* Just do first insn from SAVE_ALL in the delay slot */
 
-syscall_is_too_hard:
+do_syscall:
 	SAVE_ALL_HEAD
 	 rd	%wim, %l3
 
diff --git a/arch/sparc/kernel/etrap_64.S b/arch/sparc/kernel/etrap_64.S
index 786b185..1276ca2 100644
--- a/arch/sparc/kernel/etrap_64.S
+++ b/arch/sparc/kernel/etrap_64.S
@@ -92,8 +92,10 @@
 		rdpr	%wstate, %g2
 		wrpr	%g0, 0, %canrestore
 		sll	%g2, 3, %g2
+
+		/* Set TI_SYS_FPDEPTH to 1 and clear TI_SYS_NOERROR.  */
 		mov	1, %l5
-		stb	%l5, [%l6 + TI_FPDEPTH]
+		sth	%l5, [%l6 + TI_SYS_NOERROR]
 
 		wrpr	%g3, 0, %otherwin
 		wrpr	%g2, 0, %wstate
@@ -152,7 +154,9 @@
 		add	%l6, TI_FPSAVED + 1, %l4
 		srl	%l5, 1, %l3
 		add	%l5, 2, %l5
-		stb	%l5, [%l6 + TI_FPDEPTH]
+
+		/* Set TI_SYS_FPDEPTH to %l5 and clear TI_SYS_NOERROR.  */
+		sth	%l5, [%l6 + TI_SYS_NOERROR]
 		ba,pt	%xcc, 2b
 		 stb	%g0, [%l4 + %l3]
 		nop
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 487bffb..be8e862 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -286,8 +286,7 @@
 	parent_tid_ptr = regs->u_regs[UREG_I2];
 	child_tid_ptr = regs->u_regs[UREG_I4];
 
-	ret = do_fork(clone_flags, stack_start,
-		      regs, stack_size,
+	ret = do_fork(clone_flags, stack_start, stack_size,
 		      (int __user *) parent_tid_ptr,
 		      (int __user *) child_tid_ptr);
 
@@ -316,13 +315,13 @@
  * XXX See comment above sys_vfork in sparc64. todo.
  */
 extern void ret_from_fork(void);
+extern void ret_from_kernel_thread(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long unused,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *ti = task_thread_info(p);
-	struct pt_regs *childregs;
+	struct pt_regs *childregs, *regs = current_pt_regs();
 	char *new_stack;
 
 #ifndef CONFIG_SMP
@@ -336,16 +335,13 @@
 	}
 
 	/*
-	 *  p->thread_info         new_stack   childregs
-	 *  !                      !           !             {if(PSR_PS) }
-	 *  V                      V (stk.fr.) V  (pt_regs)  { (stk.fr.) }
-	 *  +----- - - - - - ------+===========+============={+==========}+
+	 *  p->thread_info         new_stack   childregs stack bottom
+	 *  !                      !           !             !
+	 *  V                      V (stk.fr.) V  (pt_regs)  V
+	 *  +----- - - - - - ------+===========+=============+
 	 */
 	new_stack = task_stack_page(p) + THREAD_SIZE;
-	if (regs->psr & PSR_PS)
-		new_stack -= STACKFRAME_SZ;
 	new_stack -= STACKFRAME_SZ + TRACEREG_SZ;
-	memcpy(new_stack, (char *)regs - STACKFRAME_SZ, STACKFRAME_SZ + TRACEREG_SZ);
 	childregs = (struct pt_regs *) (new_stack + STACKFRAME_SZ);
 
 	/*
@@ -356,55 +352,58 @@
 	 * Thus, kpsr|=PSR_PIL.
 	 */
 	ti->ksp = (unsigned long) new_stack;
+	p->thread.kregs = childregs;
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		extern int nwindows;
+		unsigned long psr;
+		memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ);
+		p->thread.flags |= SPARC_FLAG_KTHREAD;
+		p->thread.current_ds = KERNEL_DS;
+		ti->kpc = (((unsigned long) ret_from_kernel_thread) - 0x8);
+		childregs->u_regs[UREG_G1] = sp; /* function */
+		childregs->u_regs[UREG_G2] = arg;
+		psr = childregs->psr = get_psr();
+		ti->kpsr = psr | PSR_PIL;
+		ti->kwim = 1 << (((psr & PSR_CWP) + 1) % nwindows);
+		return 0;
+	}
+	memcpy(new_stack, (char *)regs - STACKFRAME_SZ, STACKFRAME_SZ + TRACEREG_SZ);
+	childregs->u_regs[UREG_FP] = sp;
+	p->thread.flags &= ~SPARC_FLAG_KTHREAD;
+	p->thread.current_ds = USER_DS;
 	ti->kpc = (((unsigned long) ret_from_fork) - 0x8);
 	ti->kpsr = current->thread.fork_kpsr | PSR_PIL;
 	ti->kwim = current->thread.fork_kwim;
 
-	if(regs->psr & PSR_PS) {
-		extern struct pt_regs fake_swapper_regs;
+	if (sp != regs->u_regs[UREG_FP]) {
+		struct sparc_stackf __user *childstack;
+		struct sparc_stackf __user *parentstack;
 
-		p->thread.kregs = &fake_swapper_regs;
-		new_stack += STACKFRAME_SZ + TRACEREG_SZ;
-		childregs->u_regs[UREG_FP] = (unsigned long) new_stack;
-		p->thread.flags |= SPARC_FLAG_KTHREAD;
-		p->thread.current_ds = KERNEL_DS;
-		memcpy(new_stack, (void *)regs->u_regs[UREG_FP], STACKFRAME_SZ);
-		childregs->u_regs[UREG_G6] = (unsigned long) ti;
-	} else {
-		p->thread.kregs = childregs;
-		childregs->u_regs[UREG_FP] = sp;
-		p->thread.flags &= ~SPARC_FLAG_KTHREAD;
-		p->thread.current_ds = USER_DS;
-
-		if (sp != regs->u_regs[UREG_FP]) {
-			struct sparc_stackf __user *childstack;
-			struct sparc_stackf __user *parentstack;
-
-			/*
-			 * This is a clone() call with supplied user stack.
-			 * Set some valid stack frames to give to the child.
-			 */
-			childstack = (struct sparc_stackf __user *)
-				(sp & ~0xfUL);
-			parentstack = (struct sparc_stackf __user *)
-				regs->u_regs[UREG_FP];
+		/*
+		 * This is a clone() call with supplied user stack.
+		 * Set some valid stack frames to give to the child.
+		 */
+		childstack = (struct sparc_stackf __user *)
+			(sp & ~0xfUL);
+		parentstack = (struct sparc_stackf __user *)
+			regs->u_regs[UREG_FP];
 
 #if 0
-			printk("clone: parent stack:\n");
-			show_stackframe(parentstack);
+		printk("clone: parent stack:\n");
+		show_stackframe(parentstack);
 #endif
 
-			childstack = clone_stackframe(childstack, parentstack);
-			if (!childstack)
-				return -EFAULT;
+		childstack = clone_stackframe(childstack, parentstack);
+		if (!childstack)
+			return -EFAULT;
 
 #if 0
-			printk("clone: child stack:\n");
-			show_stackframe(childstack);
+		printk("clone: child stack:\n");
+		show_stackframe(childstack);
 #endif
 
-			childregs->u_regs[UREG_FP] = (unsigned long)childstack;
-		}
+		childregs->u_regs[UREG_FP] = (unsigned long)childstack;
 	}
 
 #ifdef CONFIG_SMP
@@ -475,69 +474,6 @@
 	return 1;
 }
 
-/*
- * sparc_execve() executes a new program after the asm stub has set
- * things up for us.  This should basically do what I want it to.
- */
-asmlinkage int sparc_execve(struct pt_regs *regs)
-{
-	int error, base = 0;
-	struct filename *filename;
-
-	/* Check for indirect call. */
-	if(regs->u_regs[UREG_G1] == 0)
-		base = 1;
-
-	filename = getname((char __user *)regs->u_regs[base + UREG_I0]);
-	error = PTR_ERR(filename);
-	if(IS_ERR(filename))
-		goto out;
-	error = do_execve(filename->name,
-			  (const char __user *const  __user *)
-			  regs->u_regs[base + UREG_I1],
-			  (const char __user *const  __user *)
-			  regs->u_regs[base + UREG_I2],
-			  regs);
-	putname(filename);
-out:
-	return error;
-}
-
-/*
- * This is the mechanism for creating a new kernel thread.
- *
- * NOTE! Only a kernel-only process(ie the swapper or direct descendants
- * who haven't done an "execve()") should use this: it will work within
- * a system call from a "real" process, but the process memory space will
- * not be freed until both the parent and the child have exited.
- */
-pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	long retval;
-
-	__asm__ __volatile__("mov %4, %%g2\n\t"    /* Set aside fn ptr... */
-			     "mov %5, %%g3\n\t"    /* and arg. */
-			     "mov %1, %%g1\n\t"
-			     "mov %2, %%o0\n\t"    /* Clone flags. */
-			     "mov 0, %%o1\n\t"     /* usp arg == 0 */
-			     "t 0x10\n\t"          /* Linux/Sparc clone(). */
-			     "cmp %%o1, 0\n\t"
-			     "be 1f\n\t"           /* The parent, just return. */
-			     " nop\n\t"            /* Delay slot. */
-			     "jmpl %%g2, %%o7\n\t" /* Call the function. */
-			     " mov %%g3, %%o0\n\t" /* Get back the arg in delay. */
-			     "mov %3, %%g1\n\t"
-			     "t 0x10\n\t"          /* Linux/Sparc exit(). */
-			     /* Notreached by child. */
-			     "1: mov %%o0, %0\n\t" :
-			     "=r" (retval) :
-			     "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED),
-			     "i" (__NR_exit),  "r" (fn), "r" (arg) :
-			     "g1", "g2", "g3", "o0", "o1", "memory", "cc");
-	return retval;
-}
-EXPORT_SYMBOL(kernel_thread);
-
 unsigned long get_wchan(struct task_struct *task)
 {
 	unsigned long pc, fp, bias = 0;
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index c6e0c29..cdb80b2 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -601,8 +601,7 @@
 		child_tid_ptr = (int __user *) regs->u_regs[UREG_I4];
 	}
 
-	ret = do_fork(clone_flags, stack_start,
-		      regs, stack_size,
+	ret = do_fork(clone_flags, stack_start, stack_size,
 		      parent_tid_ptr, child_tid_ptr);
 
 	/* If we get an error and potentially restart the system
@@ -622,65 +621,56 @@
  * Child  -->  %o0 == parents pid, %o1 == 1
  */
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long unused,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	struct thread_info *t = task_thread_info(p);
+	struct pt_regs *regs = current_pt_regs();
 	struct sparc_stackf *parent_sf;
 	unsigned long child_stack_sz;
 	char *child_trap_frame;
-	int kernel_thread;
-
-	kernel_thread = (regs->tstate & TSTATE_PRIV) ? 1 : 0;
-	parent_sf = ((struct sparc_stackf *) regs) - 1;
 
 	/* Calculate offset to stack_frame & pt_regs */
-	child_stack_sz = ((STACKFRAME_SZ + TRACEREG_SZ) +
-			  (kernel_thread ? STACKFRAME_SZ : 0));
+	child_stack_sz = (STACKFRAME_SZ + TRACEREG_SZ);
 	child_trap_frame = (task_stack_page(p) +
 			    (THREAD_SIZE - child_stack_sz));
-	memcpy(child_trap_frame, parent_sf, child_stack_sz);
 
-	t->flags = (t->flags & ~((0xffUL << TI_FLAG_CWP_SHIFT) |
-				 (0xffUL << TI_FLAG_CURRENT_DS_SHIFT))) |
-		(((regs->tstate + 1) & TSTATE_CWP) << TI_FLAG_CWP_SHIFT);
 	t->new_child = 1;
 	t->ksp = ((unsigned long) child_trap_frame) - STACK_BIAS;
 	t->kregs = (struct pt_regs *) (child_trap_frame +
 				       sizeof(struct sparc_stackf));
 	t->fpsaved[0] = 0;
 
-	if (kernel_thread) {
-		struct sparc_stackf *child_sf = (struct sparc_stackf *)
-			(child_trap_frame + (STACKFRAME_SZ + TRACEREG_SZ));
-
-		/* Zero terminate the stack backtrace.  */
-		child_sf->fp = NULL;
-		t->kregs->u_regs[UREG_FP] =
-		  ((unsigned long) child_sf) - STACK_BIAS;
-
-		t->flags |= ((long)ASI_P << TI_FLAG_CURRENT_DS_SHIFT);
-		t->kregs->u_regs[UREG_G6] = (unsigned long) t;
-		t->kregs->u_regs[UREG_G4] = (unsigned long) t->task;
-	} else {
-		if (t->flags & _TIF_32BIT) {
-			sp &= 0x00000000ffffffffUL;
-			regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
-		}
-		t->kregs->u_regs[UREG_FP] = sp;
-		t->flags |= ((long)ASI_AIUS << TI_FLAG_CURRENT_DS_SHIFT);
-		if (sp != regs->u_regs[UREG_FP]) {
-			unsigned long csp;
-
-			csp = clone_stackframe(sp, regs->u_regs[UREG_FP]);
-			if (!csp)
-				return -EFAULT;
-			t->kregs->u_regs[UREG_FP] = csp;
-		}
-		if (t->utraps)
-			t->utraps[0]++;
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(child_trap_frame, 0, child_stack_sz);
+		__thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] = 
+			(current_pt_regs()->tstate + 1) & TSTATE_CWP;
+		t->current_ds = ASI_P;
+		t->kregs->u_regs[UREG_G1] = sp; /* function */
+		t->kregs->u_regs[UREG_G2] = arg;
+		return 0;
 	}
 
+	parent_sf = ((struct sparc_stackf *) regs) - 1;
+	memcpy(child_trap_frame, parent_sf, child_stack_sz);
+	if (t->flags & _TIF_32BIT) {
+		sp &= 0x00000000ffffffffUL;
+		regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
+	}
+	t->kregs->u_regs[UREG_FP] = sp;
+	__thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] = 
+		(regs->tstate + 1) & TSTATE_CWP;
+	t->current_ds = ASI_AIUS;
+	if (sp != regs->u_regs[UREG_FP]) {
+		unsigned long csp;
+
+		csp = clone_stackframe(sp, regs->u_regs[UREG_FP]);
+		if (!csp)
+			return -EFAULT;
+		t->kregs->u_regs[UREG_FP] = csp;
+	}
+	if (t->utraps)
+		t->utraps[0]++;
+
 	/* Set the return value for the child. */
 	t->kregs->u_regs[UREG_I0] = current->pid;
 	t->kregs->u_regs[UREG_I1] = 1;
@@ -694,45 +684,6 @@
 	return 0;
 }
 
-/*
- * This is the mechanism for creating a new kernel thread.
- *
- * NOTE! Only a kernel-only process(ie the swapper or direct descendants
- * who haven't done an "execve()") should use this: it will work within
- * a system call from a "real" process, but the process memory space will
- * not be freed until both the parent and the child have exited.
- */
-pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	long retval;
-
-	/* If the parent runs before fn(arg) is called by the child,
-	 * the input registers of this function can be clobbered.
-	 * So we stash 'fn' and 'arg' into global registers which
-	 * will not be modified by the parent.
-	 */
-	__asm__ __volatile__("mov %4, %%g2\n\t"	   /* Save FN into global */
-			     "mov %5, %%g3\n\t"	   /* Save ARG into global */
-			     "mov %1, %%g1\n\t"	   /* Clone syscall nr. */
-			     "mov %2, %%o0\n\t"	   /* Clone flags. */
-			     "mov 0, %%o1\n\t"	   /* usp arg == 0 */
-			     "t 0x6d\n\t"	   /* Linux/Sparc clone(). */
-			     "brz,a,pn %%o1, 1f\n\t" /* Parent, just return. */
-			     " mov %%o0, %0\n\t"
-			     "jmpl %%g2, %%o7\n\t"   /* Call the function. */
-			     " mov %%g3, %%o0\n\t"   /* Set arg in delay. */
-			     "mov %3, %%g1\n\t"
-			     "t 0x6d\n\t"	   /* Linux/Sparc exit(). */
-			     /* Notreached by child. */
-			     "1:" :
-			     "=r" (retval) :
-			     "i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED),
-			     "i" (__NR_exit),  "r" (fn), "r" (arg) :
-			     "g1", "g2", "g3", "o0", "o1", "memory", "cc");
-	return retval;
-}
-EXPORT_SYMBOL(kernel_thread);
-
 typedef struct {
 	union {
 		unsigned int	pr_regs[32];
@@ -799,41 +750,6 @@
 }
 EXPORT_SYMBOL(dump_fpu);
 
-/*
- * sparc_execve() executes a new program after the asm stub has set
- * things up for us.  This should basically do what I want it to.
- */
-asmlinkage int sparc_execve(struct pt_regs *regs)
-{
-	int error, base = 0;
-	struct filename *filename;
-
-	/* User register window flush is done by entry.S */
-
-	/* Check for indirect call. */
-	if (regs->u_regs[UREG_G1] == 0)
-		base = 1;
-
-	filename = getname((char __user *)regs->u_regs[base + UREG_I0]);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename->name,
-			  (const char __user *const __user *)
-			  regs->u_regs[base + UREG_I1],
-			  (const char __user *const __user *)
-			  regs->u_regs[base + UREG_I2], regs);
-	putname(filename);
-	if (!error) {
-		fprs_write(0);
-		current_thread_info()->xfsr[0] = 0;
-		current_thread_info()->fpsaved[0] = 0;
-		regs->tstate &= ~TSTATE_PEF;
-	}
-out:
-	return error;
-}
-
 unsigned long get_wchan(struct task_struct *task)
 {
 	unsigned long pc, fp, bias = 0;
diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c
index c323981..03c7e92 100644
--- a/arch/sparc/kernel/sys_sparc32.c
+++ b/arch/sparc/kernel/sys_sparc32.c
@@ -396,42 +396,6 @@
         return ret;
 }
 
-/*
- * sparc32_execve() executes a new program after the asm stub has set
- * things up for us.  This should basically do what I want it to.
- */
-asmlinkage long sparc32_execve(struct pt_regs *regs)
-{
-	int error, base = 0;
-	struct filename *filename;
-
-	/* User register window flush is done by entry.S */
-
-	/* Check for indirect call. */
-	if ((u32)regs->u_regs[UREG_G1] == 0)
-		base = 1;
-
-	filename = getname(compat_ptr(regs->u_regs[base + UREG_I0]));
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-
-	error = compat_do_execve(filename->name,
-				 compat_ptr(regs->u_regs[base + UREG_I1]),
-				 compat_ptr(regs->u_regs[base + UREG_I2]), regs);
-
-	putname(filename);
-
-	if (!error) {
-		fprs_write(0);
-		current_thread_info()->xfsr[0] = 0;
-		current_thread_info()->fpsaved[0] = 0;
-		regs->tstate &= ~TSTATE_PEF;
-	}
-out:
-	return error;
-}
-
 #ifdef CONFIG_MODULES
 
 asmlinkage long sys32_init_module(void __user *umod, u32 len,
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index 57277c8..2da0bdc 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -249,27 +249,3 @@
 	up_read(&uts_sem);
 	return err;
 }
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	long __res;
-	register long __g1 __asm__ ("g1") = __NR_execve;
-	register long __o0 __asm__ ("o0") = (long)(filename);
-	register long __o1 __asm__ ("o1") = (long)(argv);
-	register long __o2 __asm__ ("o2") = (long)(envp);
-	asm volatile ("t 0x10\n\t"
-		      "bcc 1f\n\t"
-		      "mov %%o0, %0\n\t"
-		      "sub %%g0, %%o0, %0\n\t"
-		      "1:\n\t"
-		      : "=r" (__res), "=&r" (__o0)
-		      : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1)
-		      : "cc");
-	return __res;
-}
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 97309c0..708bc29 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -646,28 +646,6 @@
 	return ret;
 }
 
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	long __res;
-	register long __g1 __asm__ ("g1") = __NR_execve;
-	register long __o0 __asm__ ("o0") = (long)(filename);
-	register long __o1 __asm__ ("o1") = (long)(argv);
-	register long __o2 __asm__ ("o2") = (long)(envp);
-	asm volatile ("t 0x6d\n\t"
-		      "sub %%g0, %%o0, %0\n\t"
-		      "movcc %%xcc, %%o0, %0\n\t"
-		      : "=r" (__res), "=&r" (__o0)
-		      : "1" (__o0), "r" (__o1), "r" (__o2), "r" (__g1)
-		      : "cc");
-	return __res;
-}
-
 asmlinkage long sys_kern_features(void)
 {
 	return KERN_FEATURE_MIXED_MODE_STACK;
diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S
index bf23477..e0fed77 100644
--- a/arch/sparc/kernel/syscalls.S
+++ b/arch/sparc/kernel/syscalls.S
@@ -1,23 +1,19 @@
 	/* SunOS's execv() call only specifies the argv argument, the
 	 * environment settings are the same as the calling processes.
 	 */
-sys_execve:
-	sethi	%hi(sparc_execve), %g1
-	ba,pt	%xcc, execve_merge
-	 or	%g1, %lo(sparc_execve), %g1
+sys64_execve:
+	set	sys_execve, %g1
+	jmpl	%g1, %g0
+	 flushw
 
 #ifdef CONFIG_COMPAT
 sunos_execv:
-	stx	%g0, [%sp + PTREGS_OFF + PT_V9_I2]
+	mov	%g0, %o2
 sys32_execve:
-	sethi	%hi(sparc32_execve), %g1
-	or	%g1, %lo(sparc32_execve), %g1
-#endif
-
-execve_merge:
-	flushw
+	set	compat_sys_execve, %g1
 	jmpl	%g1, %g0
-	 add	%sp, PTREGS_OFF, %o0
+	 flushw
+#endif
 
 	.align	32
 sys_sparc_pipe:
@@ -112,11 +108,16 @@
 ret_from_syscall:
 	/* Clear current_thread_info()->new_child. */
 	stb	%g0, [%g6 + TI_NEW_CHILD]
-	ldx	[%g6 + TI_FLAGS], %l0
 	call	schedule_tail
 	 mov	%g7, %o0
+	ldx	[%sp + PTREGS_OFF + PT_V9_I0], %o0
+	brnz,pt	%o0, ret_sys_call
+	 ldx	[%g6 + TI_FLAGS], %l0
+	ldx	[%sp + PTREGS_OFF + PT_V9_G1], %l1
+	call	%l1
+	 ldx	[%sp + PTREGS_OFF + PT_V9_G2], %o0
 	ba,pt	%xcc, ret_sys_call
-	 ldx	[%sp + PTREGS_OFF + PT_V9_I0], %o0
+	 mov	0, %o0
 
 	.globl	sparc_exit_group
 	.type	sparc_exit_group,#function
@@ -232,7 +233,6 @@
 	ldx	[%sp + PTREGS_OFF + PT_V9_TNPC], %l1 ! pc = npc
 
 2:
-	stb	%g0, [%g6 + TI_SYS_NOERROR]
 	/* System call success, clear Carry condition code. */
 	andn	%g3, %g2, %g3
 3:
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 017b74a..cdbd9b8 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -107,7 +107,7 @@
 /*40*/	.word sys_newlstat, sys_dup, sys_sparc_pipe, sys_times, sys_nis_syscall
 	.word sys_umount, sys_setgid, sys_getgid, sys_signal, sys_geteuid
 /*50*/	.word sys_getegid, sys_acct, sys_memory_ordering, sys_nis_syscall, sys_ioctl
-	.word sys_reboot, sys_nis_syscall, sys_symlink, sys_readlink, sys_execve
+	.word sys_reboot, sys_nis_syscall, sys_symlink, sys_readlink, sys64_execve
 /*60*/	.word sys_umask, sys_chroot, sys_newfstat, sys_fstat64, sys_getpagesize
 	.word sys_msync, sys_vfork, sys_pread64, sys_pwrite64, sys_nis_syscall
 /*70*/	.word sys_nis_syscall, sys_mmap, sys_nis_syscall, sys_64_munmap, sys_mprotect
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index b66a779..e7ecf15 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2688,8 +2688,8 @@
 		     TI_PRE_COUNT != offsetof(struct thread_info,
 					      preempt_count) ||
 		     TI_NEW_CHILD != offsetof(struct thread_info, new_child) ||
-		     TI_SYS_NOERROR != offsetof(struct thread_info,
-						syscall_noerror) ||
+		     TI_CURRENT_DS != offsetof(struct thread_info,
+						current_ds) ||
 		     TI_RESTART_BLOCK != offsetof(struct thread_info,
 						  restart_block) ||
 		     TI_KUNA_REGS != offsetof(struct thread_info,
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 9e28a11..85be1ca 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -624,7 +624,7 @@
 void prom_world(int enter)
 {
 	if (!enter)
-		set_fs((mm_segment_t) { get_thread_current_ds() });
+		set_fs(get_fs());
 
 	__asm__ __volatile__("flushw");
 }
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 875d008..ea7f61e 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -21,6 +21,8 @@
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CLOCKEVENTS
 	select MODULES_USE_ELF_RELA
+	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index 3063e6f..ca61fb4 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -275,18 +275,14 @@
 struct compat_sigaction;
 struct compat_siginfo;
 struct compat_sigaltstack;
-long compat_sys_execve(const char __user *path,
-		       compat_uptr_t __user *argv,
-		       compat_uptr_t __user *envp, struct pt_regs *);
 long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
 			     struct compat_sigaction __user *oact,
 			     size_t sigsetsize);
 long compat_sys_rt_sigqueueinfo(int pid, int sig,
 				struct compat_siginfo __user *uinfo);
-long compat_sys_rt_sigreturn(struct pt_regs *);
+long compat_sys_rt_sigreturn(void);
 long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
-			    struct compat_sigaltstack __user *uoss_ptr,
-			    struct pt_regs *);
+			    struct compat_sigaltstack __user *uoss_ptr);
 long compat_sys_truncate64(char __user *filename, u32 dummy, u32 low, u32 high);
 long compat_sys_ftruncate64(unsigned int fd, u32 dummy, u32 low, u32 high);
 long compat_sys_pread64(unsigned int fd, char __user *ubuf, size_t count,
@@ -303,12 +299,7 @@
 long compat_sys_sched_rr_get_interval(compat_pid_t pid,
 				      struct compat_timespec __user *interval);
 
-/* These are the intvec_64.S trampolines. */
-long _compat_sys_execve(const char __user *path,
-			const compat_uptr_t __user *argv,
-			const compat_uptr_t __user *envp);
-long _compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
-			    struct compat_sigaltstack __user *uoss_ptr);
+/* Assembly trampoline to avoid clobbering r0. */
 long _compat_sys_rt_sigreturn(void);
 
 #endif /* _ASM_TILE_COMPAT_H */
diff --git a/arch/tile/include/asm/elf.h b/arch/tile/include/asm/elf.h
index f8ccf08..b73e103 100644
--- a/arch/tile/include/asm/elf.h
+++ b/arch/tile/include/asm/elf.h
@@ -148,6 +148,7 @@
 #define compat_start_thread(regs, ip, usp) do { \
 		regs->pc = ptr_to_compat_reg((void *)(ip)); \
 		regs->sp = ptr_to_compat_reg((void *)(usp)); \
+		single_step_execve();	\
 	} while (0)
 
 /*
diff --git a/arch/tile/include/asm/processor.h b/arch/tile/include/asm/processor.h
index 8c4dd9f..2b70dfb 100644
--- a/arch/tile/include/asm/processor.h
+++ b/arch/tile/include/asm/processor.h
@@ -211,6 +211,7 @@
 {
 	regs->pc = pc;
 	regs->sp = usp;
+	single_step_execve();
 }
 
 /* Free all resources held by a thread. */
@@ -219,8 +220,6 @@
 	/* Nothing for now */
 }
 
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
 extern int do_work_pending(struct pt_regs *regs, u32 flags);
 
 
@@ -239,6 +238,9 @@
 #define KSTK_TOP(task)	(task_ksp0(task) - STACK_TOP_DELTA)
 #define task_pt_regs(task) \
   ((struct pt_regs *)(task_ksp0(task) - KSTK_PTREGS_GAP) - 1)
+#define current_pt_regs()                                   \
+  ((struct pt_regs *)((stack_pointer | (THREAD_SIZE - 1)) - \
+                      (KSTK_PTREGS_GAP - 1)) - 1)
 #define task_sp(task)	(task_pt_regs(task)->sp)
 #define task_pc(task)	(task_pt_regs(task)->pc)
 /* Aliases for pc and sp (used in fs/proc/array.c) */
diff --git a/arch/tile/include/asm/switch_to.h b/arch/tile/include/asm/switch_to.h
index 1d48c5f..b8f888c 100644
--- a/arch/tile/include/asm/switch_to.h
+++ b/arch/tile/include/asm/switch_to.h
@@ -68,7 +68,10 @@
 /* Support function for forking a new task. */
 void ret_from_fork(void);
 
-/* Called from ret_from_fork() when a new process starts up. */
+/* Support function for forking a new kernel thread. */
+void ret_from_kernel_thread(void *fn, void *arg);
+
+/* Called from ret_from_xxx() when a new process starts up. */
 struct task_struct *sim_notify_fork(struct task_struct *prev);
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/tile/include/asm/syscalls.h b/arch/tile/include/asm/syscalls.h
index 06f0464..4c8462a 100644
--- a/arch/tile/include/asm/syscalls.h
+++ b/arch/tile/include/asm/syscalls.h
@@ -51,8 +51,7 @@
 
 #ifndef __tilegx__
 /* mm/fault.c */
-long sys_cmpxchg_badaddr(unsigned long address, struct pt_regs *);
-long _sys_cmpxchg_badaddr(unsigned long address);
+long sys_cmpxchg_badaddr(unsigned long address);
 #endif
 
 #ifdef CONFIG_COMPAT
@@ -63,14 +62,16 @@
 long sys_ftruncate64(unsigned int fd, loff_t length);
 #endif
 
+/* Provide versions of standard syscalls that use current_pt_regs(). */
+long sys_rt_sigreturn(void);
+long sys_sigaltstack(const stack_t __user *, stack_t __user *);
+#define sys_rt_sigreturn sys_rt_sigreturn
+#define sys_sigaltstack sys_sigaltstack
+
 /* These are the intvec*.S trampolines. */
-long _sys_sigaltstack(const stack_t __user *, stack_t __user *);
 long _sys_rt_sigreturn(void);
 long _sys_clone(unsigned long clone_flags, unsigned long newsp,
 		void __user *parent_tid, void __user *child_tid);
-long _sys_execve(const char __user *filename,
-		 const char __user *const __user *argv,
-		 const char __user *const __user *envp);
 
 #include <asm-generic/syscalls.h>
 
diff --git a/arch/tile/include/asm/unistd.h b/arch/tile/include/asm/unistd.h
index 6e032a0..b51c6ee 100644
--- a/arch/tile/include/asm/unistd.h
+++ b/arch/tile/include/asm/unistd.h
@@ -16,4 +16,6 @@
 #define __ARCH_WANT_SYS_LLSEEK
 #endif
 #define __ARCH_WANT_SYS_NEWFSTATAT
+#define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
diff --git a/arch/tile/kernel/compat.c b/arch/tile/kernel/compat.c
index d67459b..9cd7cb6 100644
--- a/arch/tile/kernel/compat.c
+++ b/arch/tile/kernel/compat.c
@@ -102,9 +102,7 @@
 #define compat_sys_fadvise64_64 sys32_fadvise64_64
 #define compat_sys_readahead sys32_readahead
 
-/* Call the trampolines to manage pt_regs where necessary. */
-#define compat_sys_execve _compat_sys_execve
-#define compat_sys_sigaltstack _compat_sys_sigaltstack
+/* Call the assembly trampolines where necessary. */
 #define compat_sys_rt_sigreturn _compat_sys_rt_sigreturn
 #define sys_clone _sys_clone
 
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index 08b4fe1..2e4cc69 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -197,8 +197,7 @@
 }
 
 long compat_sys_sigaltstack(const struct compat_sigaltstack __user *uss_ptr,
-			    struct compat_sigaltstack __user *uoss_ptr,
-			    struct pt_regs *regs)
+			    struct compat_sigaltstack __user *uoss_ptr)
 {
 	stack_t uss, uoss;
 	int ret;
@@ -219,7 +218,7 @@
 	set_fs(KERNEL_DS);
 	ret = do_sigaltstack(uss_ptr ? (stack_t __user __force *)&uss : NULL,
 			     (stack_t __user __force *)&uoss,
-			     (unsigned long)compat_ptr(regs->sp));
+			     (unsigned long)compat_ptr(current_pt_regs()->sp));
 	set_fs(seg);
 	if (ret >= 0 && uoss_ptr)  {
 		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(*uoss_ptr)) ||
@@ -232,8 +231,9 @@
 }
 
 /* The assembly shim for this function arranges to ignore the return value. */
-long compat_sys_rt_sigreturn(struct pt_regs *regs)
+long compat_sys_rt_sigreturn(void)
 {
+	struct pt_regs *regs = current_pt_regs();
 	struct compat_rt_sigframe __user *frame =
 		(struct compat_rt_sigframe __user *) compat_ptr(regs->sp);
 	sigset_t set;
@@ -248,7 +248,7 @@
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
 		goto badframe;
 
-	if (compat_sys_sigaltstack(&frame->uc.uc_stack, NULL, regs) != 0)
+	if (compat_sys_sigaltstack(&frame->uc.uc_stack, NULL) == -EFAULT)
 		goto badframe;
 
 	return 0;
diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S
index c31637b..f116cb0 100644
--- a/arch/tile/kernel/entry.S
+++ b/arch/tile/kernel/entry.S
@@ -28,17 +28,6 @@
 	STD_ENDPROC(current_text_addr)
 
 /*
- * Implement execve().  The i386 code has a note that forking from kernel
- * space results in no copy on write until the execve, so we should be
- * careful not to write to the stack here.
- */
-STD_ENTRY(kernel_execve)
-	moveli TREG_SYSCALL_NR_NAME, __NR_execve
-	swint1
-	jrp lr
-	STD_ENDPROC(kernel_execve)
-
-/*
  * We don't run this function directly, but instead copy it to a page
  * we map into every user process.  See vdso_setup().
  *
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 6943515..f212bf7 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -1291,6 +1291,21 @@
 	}
 	STD_ENDPROC(ret_from_fork)
 
+STD_ENTRY(ret_from_kernel_thread)
+	jal     sim_notify_fork
+	jal     schedule_tail
+	FEEDBACK_REENTER(ret_from_fork)
+	{
+	 move   r0, r31
+	 jalr   r30
+	}
+	FEEDBACK_REENTER(ret_from_kernel_thread)
+	{
+	 movei  r30, 0               /* not an NMI */
+	 j      .Lresume_userspace   /* jump into middle of interrupt_return */
+	}
+	STD_ENDPROC(ret_from_kernel_thread)
+
 	/*
 	 * Code for ill interrupt.
 	 */
@@ -1437,15 +1452,6 @@
 	panic   "Unhandled interrupt %#x: PC %#lx"
 	STD_ENDPROC(bad_intr)
 
-/* Put address of pt_regs in reg and jump. */
-#define PTREGS_SYSCALL(x, reg)                          \
-	STD_ENTRY(_##x);                                \
-	{                                               \
-	 PTREGS_PTR(reg, PTREGS_OFFSET_BASE);           \
-	 j      x                                       \
-	};                                              \
-	STD_ENDPROC(_##x)
-
 /*
  * Special-case sigreturn to not write r0 to the stack on return.
  * This is technically more efficient, but it also avoids difficulties
@@ -1461,12 +1467,9 @@
 	};                                              \
 	STD_ENDPROC(_##x)
 
-PTREGS_SYSCALL(sys_execve, r3)
-PTREGS_SYSCALL(sys_sigaltstack, r2)
 PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0)
-PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1)
 
-/* Save additional callee-saves to pt_regs, put address in r4 and jump. */
+/* Save additional callee-saves to pt_regs and jump to standard function. */
 STD_ENTRY(_sys_clone)
 	push_extra_callee_saves r4
 	j       sys_clone
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 7c06d59..54bc9a6 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -1150,6 +1150,21 @@
 	}
 	STD_ENDPROC(ret_from_fork)
 
+STD_ENTRY(ret_from_kernel_thread)
+	jal     sim_notify_fork
+	jal     schedule_tail
+	FEEDBACK_REENTER(ret_from_fork)
+	{
+	 move   r0, r31
+	 jalr   r30
+	}
+	FEEDBACK_REENTER(ret_from_kernel_thread)
+	{
+	 movei  r30, 0               /* not an NMI */
+	 j      .Lresume_userspace   /* jump into middle of interrupt_return */
+	}
+	STD_ENDPROC(ret_from_kernel_thread)
+
 /* Various stub interrupt handlers and syscall handlers */
 
 STD_ENTRY_LOCAL(_kernel_double_fault)
@@ -1166,15 +1181,6 @@
 	panic   "Unhandled interrupt %#x: PC %#lx"
 	STD_ENDPROC(bad_intr)
 
-/* Put address of pt_regs in reg and jump. */
-#define PTREGS_SYSCALL(x, reg)                          \
-	STD_ENTRY(_##x);                                \
-	{                                               \
-	 PTREGS_PTR(reg, PTREGS_OFFSET_BASE);           \
-	 j      x                                       \
-	};                                              \
-	STD_ENDPROC(_##x)
-
 /*
  * Special-case sigreturn to not write r0 to the stack on return.
  * This is technically more efficient, but it also avoids difficulties
@@ -1190,16 +1196,12 @@
 	};                                              \
 	STD_ENDPROC(_##x)
 
-PTREGS_SYSCALL(sys_execve, r3)
-PTREGS_SYSCALL(sys_sigaltstack, r2)
 PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0)
 #ifdef CONFIG_COMPAT
-PTREGS_SYSCALL(compat_sys_execve, r3)
-PTREGS_SYSCALL(compat_sys_sigaltstack, r2)
 PTREGS_SYSCALL_SIGRETURN(compat_sys_rt_sigreturn, r0)
 #endif
 
-/* Save additional callee-saves to pt_regs, put address in r4 and jump. */
+/* Save additional callee-saves to pt_regs and jump to standard function. */
 STD_ENTRY(_sys_clone)
 	push_extra_callee_saves r4
 	j       sys_clone
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index 307d010..0e5661e 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -157,18 +157,49 @@
 static void save_arch_state(struct thread_struct *t);
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long stack_size,
-		struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
-	struct pt_regs *childregs;
+	struct pt_regs *childregs = task_pt_regs(p), *regs = current_pt_regs();
 	unsigned long ksp;
+	unsigned long *callee_regs;
 
 	/*
-	 * When creating a new kernel thread we pass sp as zero.
-	 * Assign it to a reasonable value now that we have the stack.
+	 * Set up the stack and stack pointer appropriately for the
+	 * new child to find itself woken up in __switch_to().
+	 * The callee-saved registers must be on the stack to be read;
+	 * the new task will then jump to assembly support to handle
+	 * calling schedule_tail(), etc., and (for userspace tasks)
+	 * returning to the context set up in the pt_regs.
 	 */
-	if (sp == 0 && regs->ex1 == PL_ICS_EX1(KERNEL_PL, 0))
-		sp = KSTK_TOP(p);
+	ksp = (unsigned long) childregs;
+	ksp -= C_ABI_SAVE_AREA_SIZE;   /* interrupt-entry save area */
+	((long *)ksp)[0] = ((long *)ksp)[1] = 0;
+	ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long);
+	callee_regs = (unsigned long *)ksp;
+	ksp -= C_ABI_SAVE_AREA_SIZE;   /* __switch_to() save area */
+	((long *)ksp)[0] = ((long *)ksp)[1] = 0;
+	p->thread.ksp = ksp;
+
+	/* Record the pid of the task that created this one. */
+	p->thread.creator_pid = current->pid;
+
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		/* kernel thread */
+		memset(childregs, 0, sizeof(struct pt_regs));
+		memset(&callee_regs[2], 0,
+		       (CALLEE_SAVED_REGS_COUNT - 2) * sizeof(unsigned long));
+		callee_regs[0] = sp;   /* r30 = function */
+		callee_regs[1] = arg;  /* r31 = arg */
+		childregs->ex1 = PL_ICS_EX1(KERNEL_PL, 0);
+		p->thread.pc = (unsigned long) ret_from_kernel_thread;
+		return 0;
+	}
+
+	/*
+	 * Start new thread in ret_from_fork so it schedules properly
+	 * and then return from interrupt like the parent.
+	 */
+	p->thread.pc = (unsigned long) ret_from_fork;
 
 	/*
 	 * Do not clone step state from the parent; each thread
@@ -177,51 +208,26 @@
 	task_thread_info(p)->step_state = NULL;
 
 	/*
-	 * Start new thread in ret_from_fork so it schedules properly
-	 * and then return from interrupt like the parent.
-	 */
-	p->thread.pc = (unsigned long) ret_from_fork;
-
-	/* Save user stack top pointer so we can ID the stack vm area later. */
-	p->thread.usp0 = sp;
-
-	/* Record the pid of the process that created this one. */
-	p->thread.creator_pid = current->pid;
-
-	/*
 	 * Copy the registers onto the kernel stack so the
 	 * return-from-interrupt code will reload it into registers.
 	 */
-	childregs = task_pt_regs(p);
-	*childregs = *regs;
+	*childregs = *current_pt_regs();
 	childregs->regs[0] = 0;         /* return value is zero */
-	childregs->sp = sp;  /* override with new user stack pointer */
+	if (sp)
+		childregs->sp = sp;  /* override with new user stack pointer */
+	memcpy(callee_regs, &childregs->regs[CALLEE_SAVED_FIRST_REG],
+	       CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long));
+
+	/* Save user stack top pointer so we can ID the stack vm area later. */
+	p->thread.usp0 = childregs->sp;
 
 	/*
 	 * If CLONE_SETTLS is set, set "tp" in the new task to "r4",
 	 * which is passed in as arg #5 to sys_clone().
 	 */
 	if (clone_flags & CLONE_SETTLS)
-		childregs->tp = regs->regs[4];
+		childregs->tp = childregs->regs[4];
 
-	/*
-	 * Copy the callee-saved registers from the passed pt_regs struct
-	 * into the context-switch callee-saved registers area.
-	 * This way when we start the interrupt-return sequence, the
-	 * callee-save registers will be correctly in registers, which
-	 * is how we assume the compiler leaves them as we start doing
-	 * the normal return-from-interrupt path after calling C code.
-	 * Zero out the C ABI save area to mark the top of the stack.
-	 */
-	ksp = (unsigned long) childregs;
-	ksp -= C_ABI_SAVE_AREA_SIZE;   /* interrupt-entry save area */
-	((long *)ksp)[0] = ((long *)ksp)[1] = 0;
-	ksp -= CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long);
-	memcpy((void *)ksp, &regs->regs[CALLEE_SAVED_FIRST_REG],
-	       CALLEE_SAVED_REGS_COUNT * sizeof(unsigned long));
-	ksp -= C_ABI_SAVE_AREA_SIZE;   /* __switch_to() save area */
-	((long *)ksp)[0] = ((long *)ksp)[1] = 0;
-	p->thread.ksp = ksp;
 
 #if CHIP_HAS_TILE_DMA()
 	/*
@@ -577,62 +583,6 @@
 	panic("work_pending: bad flags %#x\n", thread_info_flags);
 }
 
-/* Note there is an implicit fifth argument if (clone_flags & CLONE_SETTLS). */
-SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
-		void __user *, parent_tidptr, void __user *, child_tidptr,
-		struct pt_regs *, regs)
-{
-	if (!newsp)
-		newsp = regs->sp;
-	return do_fork(clone_flags, newsp, regs, 0,
-		       parent_tidptr, child_tidptr);
-}
-
-/*
- * sys_execve() executes a new program.
- */
-SYSCALL_DEFINE4(execve, const char __user *, path,
-		const char __user *const __user *, argv,
-		const char __user *const __user *, envp,
-		struct pt_regs *, regs)
-{
-	long error;
-	struct filename *filename;
-
-	filename = getname(path);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = do_execve(filename->name, argv, envp, regs);
-	putname(filename);
-	if (error == 0)
-		single_step_execve();
-out:
-	return error;
-}
-
-#ifdef CONFIG_COMPAT
-long compat_sys_execve(const char __user *path,
-		       compat_uptr_t __user *argv,
-		       compat_uptr_t __user *envp,
-		       struct pt_regs *regs)
-{
-	long error;
-	struct filename *filename;
-
-	filename = getname(path);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		goto out;
-	error = compat_do_execve(filename->name, argv, envp, regs);
-	putname(filename);
-	if (error == 0)
-		single_step_execve();
-out:
-	return error;
-}
-#endif
-
 unsigned long get_wchan(struct task_struct *p)
 {
 	struct KBacktraceIterator kbt;
@@ -650,37 +600,6 @@
 	return 0;
 }
 
-/*
- * We pass in lr as zero (cleared in kernel_thread) and the caller
- * part of the backtrace ABI on the stack also zeroed (in copy_thread)
- * so that backtraces will stop with this function.
- * Note that we don't use r0, since copy_thread() clears it.
- */
-static void start_kernel_thread(int dummy, int (*fn)(int), int arg)
-{
-	do_exit(fn(arg));
-}
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-	regs.ex1 = PL_ICS_EX1(KERNEL_PL, 0);  /* run at kernel PL, no ICS */
-	regs.pc = (long) start_kernel_thread;
-	regs.flags = PT_FLAGS_CALLER_SAVES;   /* need to restore r1 and r2 */
-	regs.regs[1] = (long) fn;             /* function pointer */
-	regs.regs[2] = (long) arg;            /* parameter register */
-
-	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs,
-		       0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
 /* Flush thread state. */
 void flush_thread(void)
 {
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 67efb65..657a7ac 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -37,10 +37,10 @@
 
 #define DEBUG_SIG 0
 
-SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss,
-		stack_t __user *, uoss, struct pt_regs *, regs)
+SYSCALL_DEFINE2(sigaltstack, const stack_t __user *, uss,
+		stack_t __user *, uoss)
 {
-	return do_sigaltstack(uss, uoss, regs->sp);
+	return do_sigaltstack(uss, uoss, current_pt_regs()->sp);
 }
 
 
@@ -83,8 +83,9 @@
 }
 
 /* The assembly shim for this function arranges to ignore the return value. */
-SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs)
+SYSCALL_DEFINE0(rt_sigreturn)
 {
+	struct pt_regs *regs = current_pt_regs();
 	struct rt_sigframe __user *frame =
 		(struct rt_sigframe __user *)(regs->sp);
 	sigset_t set;
diff --git a/arch/tile/kernel/sys.c b/arch/tile/kernel/sys.c
index b08095b..b881a7be 100644
--- a/arch/tile/kernel/sys.c
+++ b/arch/tile/kernel/sys.c
@@ -106,14 +106,10 @@
 #define sys_readahead sys32_readahead
 #endif
 
-/* Call the trampolines to manage pt_regs where necessary. */
-#define sys_execve _sys_execve
-#define sys_sigaltstack _sys_sigaltstack
+/* Call the assembly trampolines where necessary. */
+#undef sys_rt_sigreturn
 #define sys_rt_sigreturn _sys_rt_sigreturn
 #define sys_clone _sys_clone
-#ifndef __tilegx__
-#define sys_cmpxchg_badaddr _sys_cmpxchg_badaddr
-#endif
 
 /*
  * Note that we can't include <linux/unistd.h> here since the header
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index fe811fa..3d2b81c 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -70,9 +70,10 @@
  * Synthesize the fault a PL0 process would get by doing a word-load of
  * an unaligned address or a high kernel address.
  */
-SYSCALL_DEFINE2(cmpxchg_badaddr, unsigned long, address,
-		struct pt_regs *, regs)
+SYSCALL_DEFINE1(cmpxchg_badaddr, unsigned long, address)
 {
+	struct pt_regs *regs = current_pt_regs();
+
 	if (address >= PAGE_OFFSET)
 		force_sig_info_fault("atomic segfault", SIGSEGV, SEGV_MAPERR,
 				     address, INT_DTLB_MISS, current, regs);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index b6d699c..b462b13 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -161,8 +161,7 @@
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long arg, struct task_struct * p,
-		struct pt_regs *regs)
+		unsigned long arg, struct task_struct * p)
 {
 	void (*handler)(void);
 	int kthread = current->flags & PF_KTHREAD;
@@ -171,7 +170,7 @@
 	p->thread = (struct thread_struct) INIT_THREAD;
 
 	if (!kthread) {
-	  	memcpy(&p->thread.regs.regs, &regs->regs,
+	  	memcpy(&p->thread.regs.regs, current_pt_regs(),
 		       sizeof(p->thread.regs.regs));
 		PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0);
 		if (sp != 0)
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index a81f370..c1d0ae0 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -14,29 +14,6 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
-long sys_fork(void)
-{
-	return do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
-		      &current->thread.regs, 0, NULL, NULL);
-}
-
-long sys_vfork(void)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
-		      UPT_SP(&current->thread.regs.regs),
-		      &current->thread.regs, 0, NULL, NULL);
-}
-
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       void __user *parent_tid, void __user *child_tid)
-{
-	if (!newsp)
-		newsp = UPT_SP(&current->thread.regs.regs);
-
-	return do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
-		      child_tid);
-}
-
 long old_mmap(unsigned long addr, unsigned long len,
 	      unsigned long prot, unsigned long flags,
 	      unsigned long fd, unsigned long offset)
diff --git a/arch/unicore32/include/uapi/asm/unistd.h b/arch/unicore32/include/uapi/asm/unistd.h
index d18a3be8..00cf5e2 100644
--- a/arch/unicore32/include/uapi/asm/unistd.h
+++ b/arch/unicore32/include/uapi/asm/unistd.h
@@ -13,3 +13,4 @@
 /* Use the standard ABI for syscalls. */
 #include <asm-generic/unistd.h>
 #define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/unicore32/kernel/entry.S b/arch/unicore32/kernel/entry.S
index 7049350..581630d 100644
--- a/arch/unicore32/kernel/entry.S
+++ b/arch/unicore32/kernel/entry.S
@@ -668,12 +668,6 @@
 #endif
 	.ltorg
 
-ENTRY(sys_clone)
-		add	ip, sp, #S_OFF
-		stw	ip, [sp+], #4
-		b	__sys_clone
-ENDPROC(sys_clone)
-
 ENTRY(sys_rt_sigreturn)
 		add	r0, sp, #S_OFF
 		mov	why, #0		@ prevent syscall restart handling
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index a8fe265..62bad9f 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -262,26 +262,27 @@
 
 int
 copy_thread(unsigned long clone_flags, unsigned long stack_start,
-	    unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs)
+	    unsigned long stk_sz, struct task_struct *p)
 {
 	struct thread_info *thread = task_thread_info(p);
 	struct pt_regs *childregs = task_pt_regs(p);
 
 	memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save));
 	thread->cpu_context.sp = (unsigned long)childregs;
-	if (unlikely(!regs)) {
+	if (unlikely(p->flags & PF_KTHREAD)) {
 		thread->cpu_context.pc = (unsigned long)ret_from_kernel_thread;
 		thread->cpu_context.r4 = stack_start;
 		thread->cpu_context.r5 = stk_sz;
 		memset(childregs, 0, sizeof(struct pt_regs));
 	} else {
 		thread->cpu_context.pc = (unsigned long)ret_from_fork;
-		*childregs = *regs;
+		*childregs = *current_pt_regs();
 		childregs->UCreg_00 = 0;
-		childregs->UCreg_sp = stack_start;
+		if (stack_start)
+			childregs->UCreg_sp = stack_start;
 
 		if (clone_flags & CLONE_SETTLS)
-			childregs->UCreg_16 = regs->UCreg_03;
+			childregs->UCreg_16 = childregs->UCreg_03;
 	}
 	return 0;
 }
diff --git a/arch/unicore32/kernel/sys.c b/arch/unicore32/kernel/sys.c
index 9680134..cfe79c9 100644
--- a/arch/unicore32/kernel/sys.c
+++ b/arch/unicore32/kernel/sys.c
@@ -28,20 +28,6 @@
 #include <asm/syscalls.h>
 #include <asm/cacheflush.h>
 
-/* Clone a task - this clones the calling program thread.
- * This is called indirectly via a small wrapper
- */
-asmlinkage long __sys_clone(unsigned long clone_flags, unsigned long newsp,
-			 void __user *parent_tid, void __user *child_tid,
-			 struct pt_regs *regs)
-{
-	if (!newsp)
-		newsp = regs->UCreg_sp;
-
-	return do_fork(clone_flags, newsp, regs, 0,
-			parent_tid, child_tid);
-}
-
 /* Note: used by the compat code even in 64-bit Linux. */
 SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
 		unsigned long, prot, unsigned long, flags,
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 037c4e3..9195fd8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -112,6 +112,7 @@
 	select GENERIC_KERNEL_EXECVE
 	select MODULES_USE_ELF_REL if X86_32
 	select MODULES_USE_ELF_RELA if X86_64
+	select CLONE_BACKWARDS if X86_32
 
 config INSTRUCTION_DECODER
 	def_bool y
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 07b3a68..a703af1 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -35,7 +35,7 @@
 #undef WARN_OLD
 #undef CORE_DUMP /* definitely broken */
 
-static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs);
+static int load_aout_binary(struct linux_binprm *);
 static int load_aout_library(struct file *);
 
 #ifdef CORE_DUMP
@@ -260,9 +260,10 @@
  * These are the functions used to load a.out style executables and shared
  * libraries.  There is no binary dependent code anywhere else.
  */
-static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+static int load_aout_binary(struct linux_binprm *bprm)
 {
 	unsigned long error, fd_offset, rlim;
+	struct pt_regs *regs = current_pt_regs();
 	struct exec ex;
 	int retval;
 
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 076745f..32e6f05 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -467,11 +467,16 @@
 	PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
 	PTREGSCALL stub32_execve, compat_sys_execve, %rcx
 	PTREGSCALL stub32_fork, sys_fork, %rdi
-	PTREGSCALL stub32_clone, sys32_clone, %rdx
 	PTREGSCALL stub32_vfork, sys_vfork, %rdi
 	PTREGSCALL stub32_iopl, sys_iopl, %rsi
 
 	ALIGN
+GLOBAL(stub32_clone)
+	leaq sys_clone(%rip),%rax
+	mov	%r8, %rcx
+	jmp  ia32_ptregs_common	
+
+	ALIGN
 ia32_ptregs_common:
 	popq %r11
 	CFI_ENDPROC
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 86d68d1..d0b689b 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -385,17 +385,6 @@
 	return ret;
 }
 
-asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
-			    struct pt_regs *regs)
-{
-	void __user *parent_tid = (void __user *)regs->dx;
-	void __user *child_tid = (void __user *)regs->di;
-
-	if (!newsp)
-		newsp = regs->sp;
-	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
-}
-
 /*
  * Some system calls that need sign extended arguments. This could be
  * done by a generic wrapper.
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index e101b389..888184b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -178,8 +178,6 @@
 
 extern void cpu_detect(struct cpuinfo_x86 *c);
 
-extern struct pt_regs *idle_regs(struct pt_regs *);
-
 extern void early_cpu_init(void);
 extern void identify_boot_cpu(void);
 extern void identify_secondary_cpu(struct cpuinfo_x86 *);
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 323973f..0dba8b7a 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -260,8 +260,6 @@
 
 #endif /* !__i386__ */
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index a9a8cf3..c76fae4 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -54,8 +54,6 @@
 asmlinkage long sys32_personality(unsigned long);
 asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
 
-asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
-
 long sys32_lseek(unsigned int, int, unsigned int);
 long sys32_kill(int, int);
 long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 2be0b88..2f83747 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -20,15 +20,6 @@
 asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
 long sys_iopl(unsigned int, struct pt_regs *);
 
-/* kernel/process.c */
-int sys_fork(struct pt_regs *);
-int sys_vfork(struct pt_regs *);
-long sys_execve(const char __user *,
-		const char __user *const __user *,
-		const char __user *const __user *);
-long sys_clone(unsigned long, unsigned long, void __user *,
-	       void __user *, struct pt_regs *);
-
 /* kernel/ldt.c */
 asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
 
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 16f3fc6..0e7dea7 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -51,6 +51,9 @@
 # define __ARCH_WANT_SYS_UTIME
 # define __ARCH_WANT_SYS_WAITPID
 # define __ARCH_WANT_SYS_EXECVE
+# define __ARCH_WANT_SYS_FORK
+# define __ARCH_WANT_SYS_VFORK
+# define __ARCH_WANT_SYS_CLONE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ca165ac..9c3ab43 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1173,15 +1173,6 @@
 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
 
-/* Make sure %fs and %gs are initialized properly in idle threads */
-struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
-{
-	memset(regs, 0, sizeof(struct pt_regs));
-	regs->fs = __KERNEL_PERCPU;
-	regs->gs = __KERNEL_STACK_CANARY;
-
-	return regs;
-}
 #endif	/* CONFIG_X86_64 */
 
 /*
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 88b725a..c763116 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -739,30 +739,12 @@
 ENDPROC(ptregs_##name)
 
 PTREGSCALL1(iopl)
-PTREGSCALL0(fork)
-PTREGSCALL0(vfork)
 PTREGSCALL2(sigaltstack)
 PTREGSCALL0(sigreturn)
 PTREGSCALL0(rt_sigreturn)
 PTREGSCALL2(vm86)
 PTREGSCALL1(vm86old)
 
-/* Clone is an oddball.  The 4th arg is in %edi */
-ENTRY(ptregs_clone)
-	CFI_STARTPROC
-	leal 4(%esp),%eax
-	pushl_cfi %eax
-	pushl_cfi PT_EDI(%eax)
-	movl PT_EDX(%eax),%ecx
-	movl PT_ECX(%eax),%edx
-	movl PT_EBX(%eax),%eax
-	call sys_clone
-	addl $8,%esp
-	CFI_ADJUST_CFA_OFFSET -8
-	ret
-	CFI_ENDPROC
-ENDPROC(ptregs_clone)
-
 .macro FIXUP_ESPFIX_STACK
 /*
  * Switch back for ESPFIX stack to the normal zerobased stack
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 31b4612..70641af 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -845,9 +845,25 @@
 END(\label)
 	.endm
 
-	PTREGSCALL stub_clone, sys_clone, %r8
-	PTREGSCALL stub_fork, sys_fork, %rdi
-	PTREGSCALL stub_vfork, sys_vfork, %rdi
+	.macro FORK_LIKE func
+ENTRY(stub_\func)
+	CFI_STARTPROC
+	popq	%r11			/* save return address */
+	PARTIAL_FRAME 0
+	SAVE_REST
+	pushq	%r11			/* put it back on stack */
+	FIXUP_TOP_OF_STACK %r11, 8
+	DEFAULT_FRAME 0 8		/* offset 8: return address */
+	call sys_\func
+	RESTORE_TOP_OF_STACK %r11, 8
+	ret $REST_SKIP		/* pop extended registers */
+	CFI_ENDPROC
+END(stub_\func)
+	.endm
+
+	FORK_LIKE  clone
+	FORK_LIKE  fork
+	FORK_LIKE  vfork
 	PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
 	PTREGSCALL stub_iopl, sys_iopl, %rsi
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 2f99e31..2ed787f 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -262,36 +262,6 @@
 	propagate_user_return_notify(prev_p, next_p);
 }
 
-int sys_fork(struct pt_regs *regs)
-{
-	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
-}
-
-/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-int sys_vfork(struct pt_regs *regs)
-{
-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
-		       NULL, NULL);
-}
-
-long
-sys_clone(unsigned long clone_flags, unsigned long newsp,
-	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
-{
-	if (!newsp)
-		newsp = regs->sp;
-	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
-}
-
 /*
  * Idle related variables and functions
  */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 44e0bff..b5a8905 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -128,8 +128,7 @@
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-	unsigned long arg,
-	struct task_struct *p, struct pt_regs *regs)
+	unsigned long arg, struct task_struct *p)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 	struct task_struct *tsk;
@@ -138,7 +137,7 @@
 	p->thread.sp = (unsigned long) childregs;
 	p->thread.sp0 = (unsigned long) (childregs+1);
 
-	if (unlikely(!regs)) {
+	if (unlikely(p->flags & PF_KTHREAD)) {
 		/* kernel thread */
 		memset(childregs, 0, sizeof(struct pt_regs));
 		p->thread.ip = (unsigned long) ret_from_kernel_thread;
@@ -156,12 +155,13 @@
 		memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 		return 0;
 	}
-	*childregs = *regs;
+	*childregs = *current_pt_regs();
 	childregs->ax = 0;
-	childregs->sp = sp;
+	if (sp)
+		childregs->sp = sp;
 
 	p->thread.ip = (unsigned long) ret_from_fork;
-	task_user_gs(p) = get_user_gs(regs);
+	task_user_gs(p) = get_user_gs(current_pt_regs());
 
 	p->fpu_counter = 0;
 	p->thread.io_bitmap_ptr = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 16c6365..6e68a61 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -146,8 +146,7 @@
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long arg,
-	struct task_struct *p, struct pt_regs *regs)
+		unsigned long arg, struct task_struct *p)
 {
 	int err;
 	struct pt_regs *childregs;
@@ -169,7 +168,7 @@
 	savesegment(ds, p->thread.ds);
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
-	if (unlikely(!regs)) {
+	if (unlikely(p->flags & PF_KTHREAD)) {
 		/* kernel thread */
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->sp = (unsigned long)childregs;
@@ -181,10 +180,11 @@
 		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
 		return 0;
 	}
-	*childregs = *regs;
+	*childregs = *current_pt_regs();
 
 	childregs->ax = 0;
-	childregs->sp = sp;
+	if (sp)
+		childregs->sp = sp;
 
 	err = -ENOMEM;
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index a47103f..ee3c220 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -8,7 +8,7 @@
 #
 0	i386	restart_syscall		sys_restart_syscall
 1	i386	exit			sys_exit
-2	i386	fork			ptregs_fork			stub32_fork
+2	i386	fork			sys_fork			stub32_fork
 3	i386	read			sys_read
 4	i386	write			sys_write
 5	i386	open			sys_open			compat_sys_open
@@ -126,7 +126,7 @@
 117	i386	ipc			sys_ipc				sys32_ipc
 118	i386	fsync			sys_fsync
 119	i386	sigreturn		ptregs_sigreturn		stub32_sigreturn
-120	i386	clone			ptregs_clone			stub32_clone
+120	i386	clone			sys_clone			stub32_clone
 121	i386	setdomainname		sys_setdomainname
 122	i386	uname			sys_newuname
 123	i386	modify_ldt		sys_modify_ldt
@@ -196,7 +196,7 @@
 187	i386	sendfile		sys_sendfile			sys32_sendfile
 188	i386	getpmsg
 189	i386	putpmsg
-190	i386	vfork			ptregs_vfork			stub32_vfork
+190	i386	vfork			sys_vfork			stub32_vfork
 191	i386	ugetrlimit		sys_getrlimit			compat_sys_getrlimit
 192	i386	mmap2			sys_mmap_pgoff
 193	i386	truncate64		sys_truncate64			sys32_truncate64
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index b0c30da..9839970 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -25,6 +25,7 @@
 	select HAVE_AOUT
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select MODULES_USE_ELF_REL
+	select CLONE_BACKWARDS
 
 config X86_64
 	def_bool 64BIT
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
index ca255a8..bd9a89b 100644
--- a/arch/x86/um/shared/sysdep/syscalls.h
+++ b/arch/x86/um/shared/sysdep/syscalls.h
@@ -1,5 +1,3 @@
-extern long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       void __user *parent_tid, void __user *child_tid);
 #ifdef __i386__
 #include "syscalls_32.h"
 #else
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 232e605..812e98c 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -24,13 +24,10 @@
 
 #define old_mmap sys_old_mmap
 
-#define ptregs_fork sys_fork
 #define ptregs_iopl sys_iopl
 #define ptregs_vm86old sys_vm86old
-#define ptregs_clone i386_clone
 #define ptregs_vm86 sys_vm86
 #define ptregs_sigaltstack sys_sigaltstack
-#define ptregs_vfork sys_vfork
 
 #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
 #include <asm/syscalls_32.h>
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
index db444c7..e8bcea9 100644
--- a/arch/x86/um/syscalls_32.c
+++ b/arch/x86/um/syscalls_32.c
@@ -6,21 +6,6 @@
 #include <linux/syscalls.h>
 #include <sysdep/syscalls.h>
 
-/*
- * The prototype on i386 is:
- *
- *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls
- *
- * and the "newtls" arg. on i386 is read by copy_thread directly from the
- * register saved on the stack.
- */
-long i386_clone(unsigned long clone_flags, unsigned long newsp,
-		int __user *parent_tid, void *newtls, int __user *child_tid)
-{
-	return sys_clone(clone_flags, newsp, parent_tid, child_tid);
-}
-
-
 long sys_sigaction(int sig, const struct old_sigaction __user *act,
 			 struct old_sigaction __user *oact)
 {
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 0d1f36a..2481f26 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -16,6 +16,7 @@
 	select GENERIC_KERNEL_THREAD
 	select GENERIC_KERNEL_EXECVE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select CLONE_BACKWARDS
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
 	  primarily for embedded systems.  These processors are both
diff --git a/arch/xtensa/include/asm/signal.h b/arch/xtensa/include/asm/signal.h
index 72fd44c..6f586bd 100644
--- a/arch/xtensa/include/asm/signal.h
+++ b/arch/xtensa/include/asm/signal.h
@@ -27,7 +27,6 @@
 };
 
 #include <asm/sigcontext.h>
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
 
 #endif	/* __ASSEMBLY__ */
 #endif	/* _XTENSA_SIGNAL_H */
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index 124aeee..b00c928 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -10,8 +10,6 @@
 
 struct pt_regs;
 struct sigaction;
-asmlinkage long sys_execve(char*, char**, char**, struct pt_regs*);
-asmlinkage long xtensa_clone(unsigned long, unsigned long, struct pt_regs*);
 asmlinkage long xtensa_ptrace(long, long, long, long);
 asmlinkage long xtensa_sigreturn(struct pt_regs*);
 asmlinkage long xtensa_rt_sigreturn(struct pt_regs*);
diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h
index f4e6eaa..e002dbc 100644
--- a/arch/xtensa/include/asm/unistd.h
+++ b/arch/xtensa/include/asm/unistd.h
@@ -2,6 +2,7 @@
 #define _XTENSA_UNISTD_H
 
 #define __ARCH_WANT_SYS_EXECVE
+#define __ARCH_WANT_SYS_CLONE
 #include <uapi/asm/unistd.h>
 
 /*
diff --git a/arch/xtensa/include/uapi/asm/unistd.h b/arch/xtensa/include/uapi/asm/unistd.h
index 9f36d0e..5162418 100644
--- a/arch/xtensa/include/uapi/asm/unistd.h
+++ b/arch/xtensa/include/uapi/asm/unistd.h
@@ -260,7 +260,7 @@
 /* Process Operations */
 
 #define __NR_clone 				116
-__SYSCALL(116, xtensa_clone, 5)
+__SYSCALL(116, sys_clone, 5)
 #define __NR_execve 				117
 __SYSCALL(117, sys_execve, 3)
 #define __NR_exit 				118
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 09ae7bf..1accf28 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -199,8 +199,7 @@
  */
 
 int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
-		unsigned long thread_fn_arg,
-		struct task_struct *p, struct pt_regs *unused)
+		unsigned long thread_fn_arg, struct task_struct *p)
 {
 	struct pt_regs *childregs = task_pt_regs(p);
 
@@ -364,12 +363,3 @@
 {
 	return 0;
 }
-
-asmlinkage
-long xtensa_clone(unsigned long clone_flags, unsigned long newsp,
-                  void __user *parent_tid, void *child_tls,
-                  void __user *child_tid, long a5,
-                  struct pt_regs *regs)
-{
-        return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
-}
diff --git a/drivers/staging/gdm72xx/gdm_usb.c b/drivers/staging/gdm72xx/gdm_usb.c
index bce6104..e0cb2ff 100644
--- a/drivers/staging/gdm72xx/gdm_usb.c
+++ b/drivers/staging/gdm72xx/gdm_usb.c
@@ -739,8 +739,6 @@
 	unsigned long flags, flags2, expire;
 	int ret;
 
-	daemonize("k_mode_wimax");
-
 	while (!k_mode_stop) {
 
 		spin_lock_irqsave(&k_lock, flags2);
@@ -806,7 +804,7 @@
 static int __init usb_gdm_wimax_init(void)
 {
 #ifdef CONFIG_WIMAX_GDM72XX_K_MODE
-	kthread_run(k_mode_thread, NULL, "WiMax_thread");
+	kthread_run(k_mode_thread, NULL, "k_mode_wimax");
 #endif /* CONFIG_WIMAX_GDM72XX_K_MODE */
 	return usb_register(&gdm_usb_driver);
 }
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 0e7a6f8..6043567 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -30,7 +30,7 @@
 #include <asm/cacheflush.h>
 #include <asm/a.out-core.h>
 
-static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
+static int load_aout_binary(struct linux_binprm *);
 static int load_aout_library(struct file*);
 
 #ifdef CONFIG_COREDUMP
@@ -201,8 +201,9 @@
  * libraries.  There is no binary dependent code anywhere else.
  */
 
-static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+static int load_aout_binary(struct linux_binprm * bprm)
 {
+	struct pt_regs *regs = current_pt_regs();
 	struct exec ex;
 	unsigned long error;
 	unsigned long fd_offset;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index fbd9f60..6d7d164 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -44,7 +44,7 @@
 #define user_siginfo_t siginfo_t
 #endif
 
-static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
+static int load_elf_binary(struct linux_binprm *bprm);
 static int load_elf_library(struct file *);
 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
 				int, int, unsigned long);
@@ -558,7 +558,7 @@
 #endif
 }
 
-static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+static int load_elf_binary(struct linux_binprm *bprm)
 {
 	struct file *interpreter = NULL; /* to shut gcc up */
  	unsigned long load_addr = 0, load_bias = 0;
@@ -575,6 +575,7 @@
 	unsigned long reloc_func_desc __maybe_unused = 0;
 	int executable_stack = EXSTACK_DEFAULT;
 	unsigned long def_flags = 0;
+	struct pt_regs *regs = current_pt_regs();
 	struct {
 		struct elfhdr elf_ex;
 		struct elfhdr interp_elf_ex;
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index a460491..dc84732 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -56,7 +56,7 @@
 
 MODULE_LICENSE("GPL");
 
-static int load_elf_fdpic_binary(struct linux_binprm *, struct pt_regs *);
+static int load_elf_fdpic_binary(struct linux_binprm *);
 static int elf_fdpic_fetch_phdrs(struct elf_fdpic_params *, struct file *);
 static int elf_fdpic_map_file(struct elf_fdpic_params *, struct file *,
 			      struct mm_struct *, const char *);
@@ -164,10 +164,10 @@
 /*
  * load an fdpic binary into various bits of memory
  */
-static int load_elf_fdpic_binary(struct linux_binprm *bprm,
-				 struct pt_regs *regs)
+static int load_elf_fdpic_binary(struct linux_binprm *bprm)
 {
 	struct elf_fdpic_params exec_params, interp_params;
+	struct pt_regs *regs = current_pt_regs();
 	struct elf_phdr *phdr;
 	unsigned long stack_size, entryaddr;
 #ifdef ELF_FDPIC_PLAT_INIT
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index 2790c7e..4e6cce5 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -22,7 +22,7 @@
 #define EM86_INTERP	"/usr/bin/em86"
 #define EM86_I_NAME	"em86"
 
-static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs)
+static int load_em86(struct linux_binprm *bprm)
 {
 	char *interp, *i_name, *i_arg;
 	struct file * file;
@@ -90,7 +90,7 @@
 	if (retval < 0)
 		return retval;
 
-	return search_binary_handler(bprm, regs);
+	return search_binary_handler(bprm);
 }
 
 static struct linux_binfmt em86_format = {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index e280352..b563719 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -88,7 +88,7 @@
 static int load_flat_shared_library(int id, struct lib_info *p);
 #endif
 
-static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
+static int load_flat_binary(struct linux_binprm *);
 static int flat_core_dump(struct coredump_params *cprm);
 
 static struct linux_binfmt flat_format = {
@@ -858,9 +858,10 @@
  * libraries.  There is no binary dependent code anywhere else.
  */
 
-static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+static int load_flat_binary(struct linux_binprm * bprm)
 {
 	struct lib_info libinfo;
+	struct pt_regs *regs = current_pt_regs();
 	unsigned long p = bprm->p;
 	unsigned long stack_len;
 	unsigned long start_addr;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 790b3cd..b0b70fb 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -104,7 +104,7 @@
 /*
  * the loader itself
  */
-static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+static int load_misc_binary(struct linux_binprm *bprm)
 {
 	Node *fmt;
 	struct file * interp_file = NULL;
@@ -199,7 +199,7 @@
 
 	bprm->recursion_depth++;
 
-	retval = search_binary_handler (bprm, regs);
+	retval = search_binary_handler(bprm);
 	if (retval < 0)
 		goto _error;
 
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index d3b8c1f..8c95499 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -14,7 +14,7 @@
 #include <linux/err.h>
 #include <linux/fs.h>
 
-static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
+static int load_script(struct linux_binprm *bprm)
 {
 	const char *i_arg, *i_name;
 	char *cp;
@@ -95,7 +95,7 @@
 	retval = prepare_binprm(bprm);
 	if (retval < 0)
 		return retval;
-	return search_binary_handler(bprm,regs);
+	return search_binary_handler(bprm);
 }
 
 static struct linux_binfmt script_format = {
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index 4517aaf..4e00ed6 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -35,7 +35,7 @@
 
 #include <linux/elf.h>
 
-static int load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs);
+static int load_som_binary(struct linux_binprm * bprm);
 static int load_som_library(struct file *);
 
 /*
@@ -180,13 +180,14 @@
  */
 
 static int
-load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+load_som_binary(struct linux_binprm * bprm)
 {
 	int retval;
 	unsigned int size;
 	unsigned long som_entry;
 	struct som_hdr *som_ex;
 	struct som_exec_auxhdr *hpuxhdr;
+	struct pt_regs *regs = current_pt_regs();
 
 	/* Get the exec-header */
 	som_ex = (struct som_hdr *) bprm->buf;
diff --git a/fs/coredump.c b/fs/coredump.c
index ce47379..1774932 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -458,7 +458,7 @@
 	return err;
 }
 
-void do_coredump(siginfo_t *siginfo, struct pt_regs *regs)
+void do_coredump(siginfo_t *siginfo)
 {
 	struct core_state core_state;
 	struct core_name cn;
@@ -474,7 +474,7 @@
 	static atomic_t core_dump_count = ATOMIC_INIT(0);
 	struct coredump_params cprm = {
 		.siginfo = siginfo,
-		.regs = regs,
+		.regs = signal_pt_regs(),
 		.limit = rlimit(RLIMIT_CORE),
 		/*
 		 * We must use the same mm->flags while dumping core to avoid
diff --git a/fs/exec.c b/fs/exec.c
index 0039055..721a299 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1349,7 +1349,7 @@
 /*
  * cycle the list of binary formats handler, until one recognizes the image
  */
-int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
+int search_binary_handler(struct linux_binprm *bprm)
 {
 	unsigned int depth = bprm->recursion_depth;
 	int try,retval;
@@ -1374,13 +1374,13 @@
 	for (try=0; try<2; try++) {
 		read_lock(&binfmt_lock);
 		list_for_each_entry(fmt, &formats, lh) {
-			int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
+			int (*fn)(struct linux_binprm *) = fmt->load_binary;
 			if (!fn)
 				continue;
 			if (!try_module_get(fmt->module))
 				continue;
 			read_unlock(&binfmt_lock);
-			retval = fn(bprm, regs);
+			retval = fn(bprm);
 			/*
 			 * Restore the depth counter to its starting value
 			 * in this call, so we don't have to rely on every
@@ -1439,8 +1439,7 @@
  */
 static int do_execve_common(const char *filename,
 				struct user_arg_ptr argv,
-				struct user_arg_ptr envp,
-				struct pt_regs *regs)
+				struct user_arg_ptr envp)
 {
 	struct linux_binprm *bprm;
 	struct file *file;
@@ -1524,7 +1523,7 @@
 	if (retval < 0)
 		goto out;
 
-	retval = search_binary_handler(bprm,regs);
+	retval = search_binary_handler(bprm);
 	if (retval < 0)
 		goto out;
 
@@ -1566,19 +1565,17 @@
 
 int do_execve(const char *filename,
 	const char __user *const __user *__argv,
-	const char __user *const __user *__envp,
-	struct pt_regs *regs)
+	const char __user *const __user *__envp)
 {
 	struct user_arg_ptr argv = { .ptr.native = __argv };
 	struct user_arg_ptr envp = { .ptr.native = __envp };
-	return do_execve_common(filename, argv, envp, regs);
+	return do_execve_common(filename, argv, envp);
 }
 
 #ifdef CONFIG_COMPAT
-int compat_do_execve(const char *filename,
+static int compat_do_execve(const char *filename,
 	const compat_uptr_t __user *__argv,
-	const compat_uptr_t __user *__envp,
-	struct pt_regs *regs)
+	const compat_uptr_t __user *__envp)
 {
 	struct user_arg_ptr argv = {
 		.is_compat = true,
@@ -1588,7 +1585,7 @@
 		.is_compat = true,
 		.ptr.compat = __envp,
 	};
-	return do_execve_common(filename, argv, envp, regs);
+	return do_execve_common(filename, argv, envp);
 }
 #endif
 
@@ -1669,7 +1666,7 @@
 	struct filename *path = getname(filename);
 	int error = PTR_ERR(path);
 	if (!IS_ERR(path)) {
-		error = do_execve(path->name, argv, envp, current_pt_regs());
+		error = do_execve(path->name, argv, envp);
 		putname(path);
 	}
 	return error;
@@ -1682,8 +1679,7 @@
 	struct filename *path = getname(filename);
 	int error = PTR_ERR(path);
 	if (!IS_ERR(path)) {
-		error = compat_do_execve(path->name, argv, envp,
-							current_pt_regs());
+		error = compat_do_execve(path->name, argv, envp);
 		putname(path);
 	}
 	return error;
@@ -1696,12 +1692,9 @@
 		  const char *const argv[],
 		  const char *const envp[])
 {
-	struct pt_regs *p = current_pt_regs();
-	int ret;
-
-	ret = do_execve(filename,
+	int ret = do_execve(filename,
 			(const char __user *const __user *)argv,
-			(const char __user *const __user *)envp, p);
+			(const char __user *const __user *)envp);
 	if (ret < 0)
 		return ret;
 
@@ -1709,6 +1702,6 @@
 	 * We were successful.  We won't be returning to our caller, but
 	 * instead to user space by manipulating the kernel stack.
 	 */
-	ret_from_kernel_execve(p);
+	ret_from_kernel_execve(current_pt_regs());
 }
 #endif
diff --git a/fs/file.c b/fs/file.c
index eff2316..15cb861 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -519,12 +519,6 @@
 	.file_lock	= __SPIN_LOCK_UNLOCKED(init_task.file_lock),
 };
 
-void daemonize_descriptors(void)
-{
-	atomic_inc(&init_files.count);
-	reset_files_struct(&init_files);
-}
-
 /*
  * allocate a file descriptor, mark it busy.
  */
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 5df4775..fe6ca58 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -164,27 +164,3 @@
 	.seq		= SEQCNT_ZERO,
 	.umask		= 0022,
 };
-
-void daemonize_fs_struct(void)
-{
-	struct fs_struct *fs = current->fs;
-
-	if (fs) {
-		int kill;
-
-		task_lock(current);
-
-		spin_lock(&init_fs.lock);
-		init_fs.users++;
-		spin_unlock(&init_fs.lock);
-
-		spin_lock(&fs->lock);
-		current->fs = &init_fs;
-		kill = !--fs->users;
-		spin_unlock(&fs->lock);
-
-		task_unlock(current);
-		if (kill)
-			free_fs_struct(fs);
-	}
-}
diff --git a/include/asm-generic/signal.h b/include/asm-generic/signal.h
index 98caa30..d840c90 100644
--- a/include/asm-generic/signal.h
+++ b/include/asm-generic/signal.h
@@ -10,7 +10,5 @@
 #include <asm/sigcontext.h>
 #undef __HAVE_ARCH_SIG_BITOPS
 
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_GENERIC_SIGNAL_H */
diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index d89dec8..58f466f 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -8,26 +8,6 @@
  * Calling conventions for these system calls can differ, so
  * it's possible to override them.
  */
-#ifndef sys_clone
-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp,
-			void __user *parent_tid, void __user *child_tid,
-			struct pt_regs *regs);
-#endif
-
-#ifndef sys_fork
-asmlinkage long sys_fork(struct pt_regs *regs);
-#endif
-
-#ifndef sys_vfork
-asmlinkage long sys_vfork(struct pt_regs *regs);
-#endif
-
-#ifndef sys_execve
-asmlinkage long sys_execve(const char __user *filename,
-			   const char __user *const __user *argv,
-			   const char __user *const __user *envp,
-			   struct pt_regs *regs);
-#endif
 
 #ifndef sys_mmap2
 asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index cfcc6bf..2630c9b 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -72,7 +72,7 @@
 struct linux_binfmt {
 	struct list_head lh;
 	struct module *module;
-	int (*load_binary)(struct linux_binprm *, struct  pt_regs * regs);
+	int (*load_binary)(struct linux_binprm *);
 	int (*load_shlib)(struct file *);
 	int (*core_dump)(struct coredump_params *cprm);
 	unsigned long min_coredump;	/* minimal dump size */
@@ -95,7 +95,7 @@
 
 extern int prepare_binprm(struct linux_binprm *);
 extern int __must_check remove_arg_zero(struct linux_binprm *);
-extern int search_binary_handler(struct linux_binprm *, struct pt_regs *);
+extern int search_binary_handler(struct linux_binprm *);
 extern int flush_old_exec(struct linux_binprm * bprm);
 extern void setup_new_exec(struct linux_binprm * bprm);
 extern void would_dump(struct linux_binprm *, struct file *);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index d0ced10..784ebfe 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -284,12 +284,8 @@
 		const struct compat_iovec __user *vec,
 		unsigned long vlen, u32 pos_low, u32 pos_high);
 
-int compat_do_execve(const char *filename, const compat_uptr_t __user *argv,
-		     const compat_uptr_t __user *envp, struct pt_regs *regs);
-#ifdef __ARCH_WANT_SYS_EXECVE
 asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
 		     const compat_uptr_t __user *envp);
-#endif
 
 asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 1d73993..a98f1ca 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -13,9 +13,9 @@
 extern int dump_write(struct file *file, const void *addr, int nr);
 extern int dump_seek(struct file *file, loff_t off);
 #ifdef CONFIG_COREDUMP
-extern void do_coredump(siginfo_t *siginfo, struct pt_regs *regs);
+extern void do_coredump(siginfo_t *siginfo);
 #else
-static inline void do_coredump(siginfo_t *siginfo, struct pt_regs *regs) {}
+static inline void do_coredump(siginfo_t *siginfo) {}
 #endif
 
 #endif /* _LINUX_COREDUMP_H */
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 45052aa..fb7daca 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -95,7 +95,6 @@
 struct files_struct *get_files_struct(struct task_struct *);
 void put_files_struct(struct files_struct *fs);
 void reset_files_struct(struct files_struct *);
-void daemonize_descriptors(void);
 int unshare_files(struct files_struct **);
 struct files_struct *dup_fd(struct files_struct *, int *);
 void do_close_on_exec(struct files_struct *);
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index 003dc0f..d0ae3a8 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -21,7 +21,6 @@
 extern void set_fs_pwd(struct fs_struct *, struct path *);
 extern struct fs_struct *copy_fs_struct(struct fs_struct *);
 extern void free_fs_struct(struct fs_struct *);
-extern void daemonize_fs_struct(void);
 extern int unshare_fs_struct(void);
 
 static inline void get_fs_root(struct fs_struct *fs, struct path *root)
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index e0ff468..a89ff04 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -329,6 +329,19 @@
 #define current_pt_regs() task_pt_regs(current)
 #endif
 
+#ifndef ptrace_signal_deliver
+#define ptrace_signal_deliver() ((void)0)
+#endif
+
+/*
+ * unlike current_pt_regs(), this one is equal to task_pt_regs(current)
+ * on *all* architectures; the only reason to have a per-arch definition
+ * is optimisation.
+ */
+#ifndef signal_pt_regs
+#define signal_pt_regs() task_pt_regs(current)
+#endif
+
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b96ff1e..651b51a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2303,7 +2303,7 @@
 extern struct mm_struct *dup_mm(struct task_struct *tsk);
 
 extern int copy_thread(unsigned long, unsigned long, unsigned long,
-			struct task_struct *, struct pt_regs *);
+			struct task_struct *);
 extern void flush_thread(void);
 extern void exit_thread(void);
 
@@ -2315,14 +2315,13 @@
 
 extern void do_group_exit(int);
 
-extern void daemonize(const char *, ...);
 extern int allow_signal(int);
 extern int disallow_signal(int);
 
 extern int do_execve(const char *,
 		     const char __user * const __user *,
-		     const char __user * const __user *, struct pt_regs *);
-extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
+		     const char __user * const __user *);
+extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
 #ifdef CONFIG_GENERIC_KERNEL_THREAD
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 727f0cd7..91835e7 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -833,10 +833,22 @@
 #define kernel_execve(filename, argv, envp) \
 	do_execve(filename, \
 		(const char __user *const __user *)argv, \
-		(const char __user *const __user *)envp, \
-		current_pt_regs())
+		(const char __user *const __user *)envp)
 #endif
 
+asmlinkage long sys_fork(void);
+asmlinkage long sys_vfork(void);
+#ifdef CONFIG_CLONE_BACKWARDS
+asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int,
+	       int __user *);
+#else
+asmlinkage long sys_clone(unsigned long, unsigned long, int __user *,
+	       int __user *, int);
+#endif
+
+asmlinkage long sys_execve(const char __user *filename,
+		const char __user *const __user *argv,
+		const char __user *const __user *envp);
 
 asmlinkage long sys_perf_event_open(
 		struct perf_event_attr __user *attr_uptr,
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index fc7376b..e37e6a1 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -200,7 +200,6 @@
 	struct list_head    names_list;	/* anchor for struct audit_names->list */
 	char *		    filterkey;	/* key for rule that triggered record */
 	struct path	    pwd;
-	struct audit_context *previous; /* For nested syscalls */
 	struct audit_aux_data *aux;
 	struct audit_aux_data *aux_pids;
 	struct sockaddr_storage *sockaddr;
@@ -1091,29 +1090,13 @@
 
 static inline void audit_free_context(struct audit_context *context)
 {
-	struct audit_context *previous;
-	int		     count = 0;
-
-	do {
-		previous = context->previous;
-		if (previous || (count &&  count < 10)) {
-			++count;
-			printk(KERN_ERR "audit(:%d): major=%d name_count=%d:"
-			       " freeing multiple contexts (%d)\n",
-			       context->serial, context->major,
-			       context->name_count, count);
-		}
-		audit_free_names(context);
-		unroll_tree_refs(context, NULL, 0);
-		free_tree_refs(context);
-		audit_free_aux(context);
-		kfree(context->filterkey);
-		kfree(context->sockaddr);
-		kfree(context);
-		context  = previous;
-	} while (context);
-	if (count >= 10)
-		printk(KERN_ERR "audit: freed %d contexts\n", count);
+	audit_free_names(context);
+	unroll_tree_refs(context, NULL, 0);
+	free_tree_refs(context);
+	audit_free_aux(context);
+	kfree(context->filterkey);
+	kfree(context->sockaddr);
+	kfree(context);
 }
 
 void audit_log_task_context(struct audit_buffer *ab)
@@ -1783,42 +1766,6 @@
 	if (!context)
 		return;
 
-	/*
-	 * This happens only on certain architectures that make system
-	 * calls in kernel_thread via the entry.S interface, instead of
-	 * with direct calls.  (If you are porting to a new
-	 * architecture, hitting this condition can indicate that you
-	 * got the _exit/_leave calls backward in entry.S.)
-	 *
-	 * i386     no
-	 * x86_64   no
-	 * ppc64    yes (see arch/powerpc/platforms/iseries/misc.S)
-	 *
-	 * This also happens with vm86 emulation in a non-nested manner
-	 * (entries without exits), so this case must be caught.
-	 */
-	if (context->in_syscall) {
-		struct audit_context *newctx;
-
-#if AUDIT_DEBUG
-		printk(KERN_ERR
-		       "audit(:%d) pid=%d in syscall=%d;"
-		       " entering syscall=%d\n",
-		       context->serial, tsk->pid, context->major, major);
-#endif
-		newctx = audit_alloc_context(context->state);
-		if (newctx) {
-			newctx->previous   = context;
-			context		   = newctx;
-			tsk->audit_context = newctx;
-		} else	{
-			/* If we can't alloc a new context, the best we
-			 * can do is to leak memory (any pending putname
-			 * will be lost).  The only other alternative is
-			 * to abandon auditing. */
-			audit_zero_context(context, context->state);
-		}
-	}
 	BUG_ON(context->in_syscall || context->name_count);
 
 	if (!audit_enabled)
@@ -1881,28 +1828,21 @@
 	if (!list_empty(&context->killed_trees))
 		audit_kill_trees(&context->killed_trees);
 
-	if (context->previous) {
-		struct audit_context *new_context = context->previous;
-		context->previous  = NULL;
-		audit_free_context(context);
-		tsk->audit_context = new_context;
-	} else {
-		audit_free_names(context);
-		unroll_tree_refs(context, NULL, 0);
-		audit_free_aux(context);
-		context->aux = NULL;
-		context->aux_pids = NULL;
-		context->target_pid = 0;
-		context->target_sid = 0;
-		context->sockaddr_len = 0;
-		context->type = 0;
-		context->fds[0] = -1;
-		if (context->state != AUDIT_RECORD_CONTEXT) {
-			kfree(context->filterkey);
-			context->filterkey = NULL;
-		}
-		tsk->audit_context = context;
+	audit_free_names(context);
+	unroll_tree_refs(context, NULL, 0);
+	audit_free_aux(context);
+	context->aux = NULL;
+	context->aux_pids = NULL;
+	context->target_pid = 0;
+	context->target_sid = 0;
+	context->sockaddr_len = 0;
+	context->type = 0;
+	context->fds[0] = -1;
+	if (context->state != AUDIT_RECORD_CONTEXT) {
+		kfree(context->filterkey);
+		context->filterkey = NULL;
 	}
+	tsk->audit_context = context;
 }
 
 static inline void handle_one(const struct inode *inode)
diff --git a/kernel/exit.c b/kernel/exit.c
index 618f7ee..50d2e93 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -322,43 +322,6 @@
 	}
 }
 
-/**
- * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
- *
- * If a kernel thread is launched as a result of a system call, or if
- * it ever exits, it should generally reparent itself to kthreadd so it
- * isn't in the way of other processes and is correctly cleaned up on exit.
- *
- * The various task state such as scheduling policy and priority may have
- * been inherited from a user process, so we reset them to sane values here.
- *
- * NOTE that reparent_to_kthreadd() gives the caller full capabilities.
- */
-static void reparent_to_kthreadd(void)
-{
-	write_lock_irq(&tasklist_lock);
-
-	ptrace_unlink(current);
-	/* Reparent to init */
-	current->real_parent = current->parent = kthreadd_task;
-	list_move_tail(&current->sibling, &current->real_parent->children);
-
-	/* Set the exit signal to SIGCHLD so we signal init on exit */
-	current->exit_signal = SIGCHLD;
-
-	if (task_nice(current) < 0)
-		set_user_nice(current, 0);
-	/* cpus_allowed? */
-	/* rt_priority? */
-	/* signals? */
-	memcpy(current->signal->rlim, init_task.signal->rlim,
-	       sizeof(current->signal->rlim));
-
-	atomic_inc(&init_cred.usage);
-	commit_creds(&init_cred);
-	write_unlock_irq(&tasklist_lock);
-}
-
 void __set_special_pids(struct pid *pid)
 {
 	struct task_struct *curr = current->group_leader;
@@ -370,13 +333,6 @@
 		change_pid(curr, PIDTYPE_PGID, pid);
 }
 
-static void set_special_pids(struct pid *pid)
-{
-	write_lock_irq(&tasklist_lock);
-	__set_special_pids(pid);
-	write_unlock_irq(&tasklist_lock);
-}
-
 /*
  * Let kernel threads use this to say that they allow a certain signal.
  * Must not be used if kthread was cloned with CLONE_SIGHAND.
@@ -416,54 +372,6 @@
 
 EXPORT_SYMBOL(disallow_signal);
 
-/*
- *	Put all the gunge required to become a kernel thread without
- *	attached user resources in one place where it belongs.
- */
-
-void daemonize(const char *name, ...)
-{
-	va_list args;
-	sigset_t blocked;
-
-	va_start(args, name);
-	vsnprintf(current->comm, sizeof(current->comm), name, args);
-	va_end(args);
-
-	/*
-	 * If we were started as result of loading a module, close all of the
-	 * user space pages.  We don't need them, and if we didn't close them
-	 * they would be locked into memory.
-	 */
-	exit_mm(current);
-	/*
-	 * We don't want to get frozen, in case system-wide hibernation
-	 * or suspend transition begins right now.
-	 */
-	current->flags |= (PF_NOFREEZE | PF_KTHREAD);
-
-	if (current->nsproxy != &init_nsproxy) {
-		get_nsproxy(&init_nsproxy);
-		switch_task_namespaces(current, &init_nsproxy);
-	}
-	set_special_pids(&init_struct_pid);
-	proc_clear_tty(current);
-
-	/* Block and flush all signals */
-	sigfillset(&blocked);
-	sigprocmask(SIG_BLOCK, &blocked, NULL);
-	flush_signals(current);
-
-	/* Become as one with the init task */
-
-	daemonize_fs_struct();
-	daemonize_descriptors();
-
-	reparent_to_kthreadd();
-}
-
-EXPORT_SYMBOL(daemonize);
-
 #ifdef CONFIG_MM_OWNER
 /*
  * A task is exiting.   If it owned this mm, find a new owner for the mm.
diff --git a/kernel/fork.c b/kernel/fork.c
index 79de9f9..3c31e87 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1129,7 +1129,6 @@
  */
 static struct task_struct *copy_process(unsigned long clone_flags,
 					unsigned long stack_start,
-					struct pt_regs *regs,
 					unsigned long stack_size,
 					int __user *child_tidptr,
 					struct pid *pid,
@@ -1321,7 +1320,7 @@
 	retval = copy_io(clone_flags, p);
 	if (retval)
 		goto bad_fork_cleanup_namespaces;
-	retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
+	retval = copy_thread(clone_flags, stack_start, stack_size, p);
 	if (retval)
 		goto bad_fork_cleanup_io;
 
@@ -1510,12 +1509,6 @@
 	return ERR_PTR(retval);
 }
 
-noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
-{
-	memset(regs, 0, sizeof(struct pt_regs));
-	return regs;
-}
-
 static inline void init_idle_pids(struct pid_link *links)
 {
 	enum pid_type type;
@@ -1529,10 +1522,7 @@
 struct task_struct * __cpuinit fork_idle(int cpu)
 {
 	struct task_struct *task;
-	struct pt_regs regs;
-
-	task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
-			    &init_struct_pid, 0);
+	task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
 	if (!IS_ERR(task)) {
 		init_idle_pids(task->pids);
 		init_idle(task, cpu);
@@ -1549,7 +1539,6 @@
  */
 long do_fork(unsigned long clone_flags,
 	      unsigned long stack_start,
-	      struct pt_regs *regs,
 	      unsigned long stack_size,
 	      int __user *parent_tidptr,
 	      int __user *child_tidptr)
@@ -1579,7 +1568,7 @@
 	 * requested, no event is reported; otherwise, report if the event
 	 * for the type of forking is enabled.
 	 */
-	if (!(clone_flags & CLONE_UNTRACED) && likely(user_mode(regs))) {
+	if (!(clone_flags & CLONE_UNTRACED)) {
 		if (clone_flags & CLONE_VFORK)
 			trace = PTRACE_EVENT_VFORK;
 		else if ((clone_flags & CSIGNAL) != SIGCHLD)
@@ -1591,7 +1580,7 @@
 			trace = 0;
 	}
 
-	p = copy_process(clone_flags, stack_start, regs, stack_size,
+	p = copy_process(clone_flags, stack_start, stack_size,
 			 child_tidptr, NULL, trace);
 	/*
 	 * Do this prior waking up the new thread - the thread pointer
@@ -1635,11 +1624,54 @@
  */
 pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 {
-	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn, NULL,
+	return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
 		(unsigned long)arg, NULL, NULL);
 }
 #endif
 
+#ifdef __ARCH_WANT_SYS_FORK
+SYSCALL_DEFINE0(fork)
+{
+#ifdef CONFIG_MMU
+	return do_fork(SIGCHLD, 0, 0, NULL, NULL);
+#else
+	/* can not support in nommu mode */
+	return(-EINVAL);
+#endif
+}
+#endif
+
+#ifdef __ARCH_WANT_SYS_VFORK
+SYSCALL_DEFINE0(vfork)
+{
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 
+			0, NULL, NULL);
+}
+#endif
+
+#ifdef __ARCH_WANT_SYS_CLONE
+#ifdef CONFIG_CLONE_BACKWARDS
+SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
+		 int __user *, parent_tidptr,
+		 int, tls_val,
+		 int __user *, child_tidptr)
+#elif defined(CONFIG_CLONE_BACKWARDS2)
+SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
+		 int __user *, parent_tidptr,
+		 int __user *, child_tidptr,
+		 int, tls_val)
+#else
+SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
+		 int __user *, parent_tidptr,
+		 int __user *, child_tidptr,
+		 int, tls_val)
+#endif
+{
+	return do_fork(clone_flags, newsp, 0,
+		parent_tidptr, child_tidptr);
+}
+#endif
+
 #ifndef ARCH_MIN_MMSTRUCT_ALIGN
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
diff --git a/kernel/signal.c b/kernel/signal.c
index 5ffb5626..a49c7f3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1159,8 +1159,9 @@
 	return __send_signal(sig, info, t, group, from_ancestor_ns);
 }
 
-static void print_fatal_signal(struct pt_regs *regs, int signr)
+static void print_fatal_signal(int signr)
 {
+	struct pt_regs *regs = signal_pt_regs();
 	printk("%s/%d: potentially unexpected fatal signal %d.\n",
 		current->comm, task_pid_nr(current), signr);
 
@@ -2131,10 +2132,9 @@
 	}
 }
 
-static int ptrace_signal(int signr, siginfo_t *info,
-			 struct pt_regs *regs, void *cookie)
+static int ptrace_signal(int signr, siginfo_t *info)
 {
-	ptrace_signal_deliver(regs, cookie);
+	ptrace_signal_deliver();
 	/*
 	 * We do not check sig_kernel_stop(signr) but set this marker
 	 * unconditionally because we do not know whether debugger will
@@ -2257,8 +2257,7 @@
 			break; /* will return 0 */
 
 		if (unlikely(current->ptrace) && signr != SIGKILL) {
-			signr = ptrace_signal(signr, info,
-					      regs, cookie);
+			signr = ptrace_signal(signr, info);
 			if (!signr)
 				continue;
 		}
@@ -2343,7 +2342,7 @@
 
 		if (sig_kernel_coredump(signr)) {
 			if (print_fatal_signals)
-				print_fatal_signal(regs, info->si_signo);
+				print_fatal_signal(info->si_signo);
 			/*
 			 * If it was able to dump core, this kills all
 			 * other threads in the group and synchronizes with
@@ -2352,7 +2351,7 @@
 			 * first and our do_group_exit call below will use
 			 * that value and ignore the one we pass it.
 			 */
-			do_coredump(info, regs);
+			do_coredump(info);
 		}
 
 		/*