Merge gregkh@master.kernel.org:/home/rmk/linux-2.6-arm
diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c
index 5b7c263..028bdc9 100644
--- a/arch/arm/common/dmabounce.c
+++ b/arch/arm/common/dmabounce.c
@@ -179,17 +179,19 @@
 static inline struct safe_buffer *
 find_safe_buffer(struct dmabounce_device_info *device_info, dma_addr_t safe_dma_addr)
 {
-	struct safe_buffer *b = NULL;
+	struct safe_buffer *b, *rb = NULL;
 	unsigned long flags;
 
 	read_lock_irqsave(&device_info->lock, flags);
 
 	list_for_each_entry(b, &device_info->safe_buffers, node)
-		if (b->safe_dma_addr == safe_dma_addr)
+		if (b->safe_dma_addr == safe_dma_addr) {
+			rb = b;
 			break;
+		}
 
 	read_unlock_irqrestore(&device_info->lock, flags);
-	return b;
+	return rb;
 }
 
 static inline void
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 7ea5f01..de4e331 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -634,6 +634,14 @@
  * purpose.
  */
 
+	.macro	usr_ret, reg
+#ifdef CONFIG_ARM_THUMB
+	bx	\reg
+#else
+	mov	pc, \reg
+#endif
+	.endm
+
 	.align	5
 	.globl	__kuser_helper_start
 __kuser_helper_start:
@@ -675,7 +683,7 @@
 #if __LINUX_ARM_ARCH__ >= 6 && defined(CONFIG_SMP)
 	mcr	p15, 0, r0, c7, c10, 5	@ dmb
 #endif
-	mov	pc, lr
+	usr_ret	lr
 
 	.align	5
 
@@ -778,7 +786,7 @@
 	mov	r0, #-1
 	adds	r0, r0, #0
 #endif
-	mov	pc, lr
+	usr_ret	lr
 
 #else
 
@@ -792,7 +800,7 @@
 #ifdef CONFIG_SMP
 	mcr	p15, 0, r0, c7, c10, 5	@ dmb
 #endif
-	mov	pc, lr
+	usr_ret	lr
 
 #endif
 
@@ -834,16 +842,11 @@
 __kuser_get_tls:				@ 0xffff0fe0
 
 #if !defined(CONFIG_HAS_TLS_REG) && !defined(CONFIG_TLS_REG_EMUL)
-
 	ldr	r0, [pc, #(16 - 8)]		@ TLS stored at 0xffff0ff0
-	mov	pc, lr
-
 #else
-
 	mrc	p15, 0, r0, c13, c0, 3		@ read TLS register
-	mov	pc, lr
-
 #endif
+	usr_ret	lr
 
 	.rep	5
 	.word	0			@ pad up to __kuser_helper_version
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 4fe386e..5365d4e 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -118,7 +118,7 @@
 	sub	r4, r4, r5			@ mmu has been enabled
 	ldr	r4, [r7, r4]			@ get secondary_data.pgdir
 	adr	lr, __enable_mmu		@ return address
-	add	pc, r10, #12			@ initialise processor
+	add	pc, r10, #PROCINFO_INITFUNC	@ initialise processor
 						@ (return control reg)
 
 	/*
diff --git a/arch/arm/mach-s3c2410/Makefile b/arch/arm/mach-s3c2410/Makefile
index 0c79386..273e05f 100644
--- a/arch/arm/mach-s3c2410/Makefile
+++ b/arch/arm/mach-s3c2410/Makefile
@@ -10,45 +10,47 @@
 obj-n			:=
 obj-			:=
 
+# DMA
+obj-$(CONFIG_S3C2410_DMA)	+= dma.o
+
 # S3C2400 support files
-obj-$(CONFIG_CPU_S3C2400)  += s3c2400-gpio.o
+obj-$(CONFIG_CPU_S3C2400)	+= s3c2400-gpio.o
 
 # S3C2410 support files
 
-obj-$(CONFIG_CPU_S3C2410)  += s3c2410.o
-obj-$(CONFIG_CPU_S3C2410)  += s3c2410-gpio.o
-obj-$(CONFIG_S3C2410_DMA)  += dma.o
+obj-$(CONFIG_CPU_S3C2410)	+= s3c2410.o
+obj-$(CONFIG_CPU_S3C2410)	+= s3c2410-gpio.o
 
 # Power Management support
 
-obj-$(CONFIG_PM)	   += pm.o sleep.o
-obj-$(CONFIG_PM_SIMTEC)	   += pm-simtec.o
+obj-$(CONFIG_PM)		+= pm.o sleep.o
+obj-$(CONFIG_PM_SIMTEC)		+= pm-simtec.o
 
 # S3C2412 support
-obj-$(CONFIG_CPU_S3C2412)  += s3c2412.o
-obj-$(CONFIG_CPU_S3C2412)  += s3c2412-clock.o
+obj-$(CONFIG_CPU_S3C2412)	+= s3c2412.o
+obj-$(CONFIG_CPU_S3C2412)	+= s3c2412-clock.o
 
 #
 # S3C244X support
 
-obj-$(CONFIG_CPU_S3C244X)  += s3c244x.o
-obj-$(CONFIG_CPU_S3C244X)  += s3c244x-irq.o
+obj-$(CONFIG_CPU_S3C244X)	+= s3c244x.o
+obj-$(CONFIG_CPU_S3C244X)	+= s3c244x-irq.o
 
 # Clock control
 
-obj-$(CONFIG_S3C2410_CLOCK) += s3c2410-clock.o
+obj-$(CONFIG_S3C2410_CLOCK)	+= s3c2410-clock.o
 
 # S3C2440 support
 
-obj-$(CONFIG_CPU_S3C2440)  += s3c2440.o s3c2440-dsc.o
-obj-$(CONFIG_CPU_S3C2440)  += s3c2440-irq.o
-obj-$(CONFIG_CPU_S3C2440)  += s3c2440-clock.o
-obj-$(CONFIG_CPU_S3C2440)  += s3c2410-gpio.o
+obj-$(CONFIG_CPU_S3C2440)	+= s3c2440.o s3c2440-dsc.o
+obj-$(CONFIG_CPU_S3C2440)	+= s3c2440-irq.o
+obj-$(CONFIG_CPU_S3C2440)	+= s3c2440-clock.o
+obj-$(CONFIG_CPU_S3C2440)	+= s3c2410-gpio.o
 
 # S3C2442 support
 
-obj-$(CONFIG_CPU_S3C2442)  += s3c2442.o
-obj-$(CONFIG_CPU_S3C2442)  += s3c2442-clock.o
+obj-$(CONFIG_CPU_S3C2442)	+= s3c2442.o
+obj-$(CONFIG_CPU_S3C2442)	+= s3c2442-clock.o
 
 # bast extras
 
diff --git a/arch/arm/mach-s3c2410/dma.c b/arch/arm/mach-s3c2410/dma.c
index 094cc52..2585545 100644
--- a/arch/arm/mach-s3c2410/dma.c
+++ b/arch/arm/mach-s3c2410/dma.c
@@ -112,7 +112,7 @@
 }
 
 static void
-dmadbg_showregs(const char *fname, int line, s3c2410_dma_chan_t *chan,
+dmadbg_dumpregs(const char *fname, int line, s3c2410_dma_chan_t *chan,
 		 struct s3c2410_dma_regstate *regs)
 {
 	printk(KERN_DEBUG "dma%d: %s:%d: DCSRC=%08lx, DISRC=%08lx, DSTAT=%08lx DMT=%02lx, DCON=%08lx\n",
@@ -132,7 +132,16 @@
 	       chan->number, fname, line, chan->load_state,
 	       chan->curr, chan->next, chan->end);
 
-	dmadbg_showregs(fname, line, chan, &state);
+	dmadbg_dumpregs(fname, line, chan, &state);
+}
+
+static void
+dmadbg_showregs(const char *fname, int line, s3c2410_dma_chan_t *chan)
+{
+	struct s3c2410_dma_regstate state;
+
+	dmadbg_capture(chan, &state);
+	dmadbg_dumpregs(fname, line, chan, &state);
 }
 
 #define dbg_showregs(chan) dmadbg_showregs(__FUNCTION__, __LINE__, (chan))
@@ -253,10 +262,14 @@
 			 buf->next);
 		reload = (buf->next == NULL) ? S3C2410_DCON_NORELOAD : 0;
 	} else {
-		pr_debug("load_state is %d => autoreload\n", chan->load_state);
+		//pr_debug("load_state is %d => autoreload\n", chan->load_state);
 		reload = S3C2410_DCON_AUTORELOAD;
 	}
 
+	if ((buf->data & 0xf0000000) != 0x30000000) {
+		dmawarn("dmaload: buffer is %p\n", (void *)buf->data);
+	}
+
 	writel(buf->data, chan->addr_reg);
 
 	dma_wrreg(chan, S3C2410_DMA_DCON,
@@ -370,7 +383,7 @@
 	tmp |= S3C2410_DMASKTRIG_ON;
 	dma_wrreg(chan, S3C2410_DMA_DMASKTRIG, tmp);
 
-	pr_debug("wrote %08lx to DMASKTRIG\n", tmp);
+	pr_debug("dma%d: %08lx to DMASKTRIG\n", chan->number, tmp);
 
 #if 0
 	/* the dma buffer loads should take care of clearing the AUTO
@@ -384,7 +397,30 @@
 
 	dbg_showchan(chan);
 
+	/* if we've only loaded one buffer onto the channel, then chec
+	 * to see if we have another, and if so, try and load it so when
+	 * the first buffer is finished, the new one will be loaded onto
+	 * the channel */
+
+	if (chan->next != NULL) {
+		if (chan->load_state == S3C2410_DMALOAD_1LOADED) {
+
+			if (s3c2410_dma_waitforload(chan, __LINE__) == 0) {
+				pr_debug("%s: buff not yet loaded, no more todo\n",
+					 __FUNCTION__);
+			} else {
+				chan->load_state = S3C2410_DMALOAD_1RUNNING;
+				s3c2410_dma_loadbuffer(chan, chan->next);
+			}
+
+		} else if (chan->load_state == S3C2410_DMALOAD_1RUNNING) {
+			s3c2410_dma_loadbuffer(chan, chan->next);
+		}
+	}
+
+
 	local_irq_restore(flags);
+
 	return 0;
 }
 
@@ -436,12 +472,11 @@
 	buf = kmem_cache_alloc(dma_kmem, GFP_ATOMIC);
 	if (buf == NULL) {
 		pr_debug("%s: out of memory (%ld alloc)\n",
-			 __FUNCTION__, sizeof(*buf));
+			 __FUNCTION__, (long)sizeof(*buf));
 		return -ENOMEM;
 	}
 
-	pr_debug("%s: new buffer %p\n", __FUNCTION__, buf);
-
+	//pr_debug("%s: new buffer %p\n", __FUNCTION__, buf);
 	//dbg_showchan(chan);
 
 	buf->next  = NULL;
@@ -537,14 +572,20 @@
 	case S3C2410_DMALOAD_1LOADED:
 		if (s3c2410_dma_waitforload(chan, __LINE__) == 0) {
 				/* flag error? */
-			printk(KERN_ERR "dma%d: timeout waiting for load\n",
-			       chan->number);
+			printk(KERN_ERR "dma%d: timeout waiting for load (%s)\n",
+			       chan->number, __FUNCTION__);
 			return;
 		}
 		break;
 
+	case S3C2410_DMALOAD_1LOADED_1RUNNING:
+		/* I belive in this case we do not have anything to do
+		 * until the next buffer comes along, and we turn off the
+		 * reload */
+		return;
+
 	default:
-		pr_debug("dma%d: lastxfer: unhandled load_state %d with no next",
+		pr_debug("dma%d: lastxfer: unhandled load_state %d with no next\n",
 			 chan->number, chan->load_state);
 		return;
 
@@ -629,7 +670,14 @@
 	} else {
 	}
 
-	if (chan->next != NULL) {
+	/* only reload if the channel is still running... our buffer done
+	 * routine may have altered the state by requesting the dma channel
+	 * to stop or shutdown... */
+
+	/* todo: check that when the channel is shut-down from inside this
+	 * function, we cope with unsetting reload, etc */
+
+	if (chan->next != NULL && chan->state != S3C2410_DMA_IDLE) {
 		unsigned long flags;
 
 		switch (chan->load_state) {
@@ -644,8 +692,8 @@
 		case S3C2410_DMALOAD_1LOADED:
 			if (s3c2410_dma_waitforload(chan, __LINE__) == 0) {
 				/* flag error? */
-				printk(KERN_ERR "dma%d: timeout waiting for load\n",
-				       chan->number);
+				printk(KERN_ERR "dma%d: timeout waiting for load (%s)\n",
+				       chan->number, __FUNCTION__);
 				return IRQ_HANDLED;
 			}
 
@@ -678,8 +726,6 @@
 	return IRQ_HANDLED;
 }
 
-
-
 /* s3c2410_request_dma
  *
  * get control of an dma channel
@@ -718,11 +764,17 @@
 		pr_debug("dma%d: %s : requesting irq %d\n",
 			 channel, __FUNCTION__, chan->irq);
 
+		chan->irq_claimed = 1;
+		local_irq_restore(flags);
+
 		err = request_irq(chan->irq, s3c2410_dma_irq, IRQF_DISABLED,
 				  client->name, (void *)chan);
 
+		local_irq_save(flags);
+
 		if (err) {
 			chan->in_use = 0;
+			chan->irq_claimed = 0;
 			local_irq_restore(flags);
 
 			printk(KERN_ERR "%s: cannot get IRQ %d for DMA %d\n",
@@ -730,7 +782,6 @@
 			return err;
 		}
 
-		chan->irq_claimed = 1;
 		chan->irq_enabled = 1;
 	}
 
@@ -810,6 +861,7 @@
 
 	tmp = dma_rdreg(chan, S3C2410_DMA_DMASKTRIG);
 	tmp |= S3C2410_DMASKTRIG_STOP;
+	//tmp &= ~S3C2410_DMASKTRIG_ON;
 	dma_wrreg(chan, S3C2410_DMA_DMASKTRIG, tmp);
 
 #if 0
@@ -819,6 +871,7 @@
 	dma_wrreg(chan, S3C2410_DMA_DCON, tmp);
 #endif
 
+	/* should stop do this, or should we wait for flush? */
 	chan->state      = S3C2410_DMA_IDLE;
 	chan->load_state = S3C2410_DMALOAD_NONE;
 
@@ -827,6 +880,22 @@
 	return 0;
 }
 
+void s3c2410_dma_waitforstop(s3c2410_dma_chan_t *chan)
+{
+	unsigned long tmp;
+	unsigned int timeout = 0x10000;
+
+	while (timeout-- > 0) {
+		tmp = dma_rdreg(chan, S3C2410_DMA_DMASKTRIG);
+
+		if (!(tmp & S3C2410_DMASKTRIG_ON))
+			return;
+	}
+
+	pr_debug("dma%d: failed to stop?\n", chan->number);
+}
+
+
 /* s3c2410_dma_flush
  *
  * stop the channel, and remove all current and pending transfers
@@ -837,7 +906,9 @@
 	s3c2410_dma_buf_t *buf, *next;
 	unsigned long flags;
 
-	pr_debug("%s:\n", __FUNCTION__);
+	pr_debug("%s: chan %p (%d)\n", __FUNCTION__, chan, chan->number);
+
+	dbg_showchan(chan);
 
 	local_irq_save(flags);
 
@@ -864,11 +935,64 @@
 		}
 	}
 
+	dbg_showregs(chan);
+
+	s3c2410_dma_waitforstop(chan);
+
+#if 0
+	/* should also clear interrupts, according to WinCE BSP */
+	{
+		unsigned long tmp;
+
+		tmp = dma_rdreg(chan, S3C2410_DMA_DCON);
+		tmp |= S3C2410_DCON_NORELOAD;
+		dma_wrreg(chan, S3C2410_DMA_DCON, tmp);
+	}
+#endif
+
+	dbg_showregs(chan);
+
 	local_irq_restore(flags);
 
 	return 0;
 }
 
+int
+s3c2410_dma_started(s3c2410_dma_chan_t *chan)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	dbg_showchan(chan);
+
+	/* if we've only loaded one buffer onto the channel, then chec
+	 * to see if we have another, and if so, try and load it so when
+	 * the first buffer is finished, the new one will be loaded onto
+	 * the channel */
+
+	if (chan->next != NULL) {
+		if (chan->load_state == S3C2410_DMALOAD_1LOADED) {
+
+			if (s3c2410_dma_waitforload(chan, __LINE__) == 0) {
+				pr_debug("%s: buff not yet loaded, no more todo\n",
+					 __FUNCTION__);
+			} else {
+				chan->load_state = S3C2410_DMALOAD_1RUNNING;
+				s3c2410_dma_loadbuffer(chan, chan->next);
+			}
+
+		} else if (chan->load_state == S3C2410_DMALOAD_1RUNNING) {
+			s3c2410_dma_loadbuffer(chan, chan->next);
+		}
+	}
+
+
+	local_irq_restore(flags);
+
+	return 0;
+
+}
 
 int
 s3c2410_dma_ctrl(dmach_t channel, s3c2410_chan_op_t op)
@@ -885,14 +1009,15 @@
 		return s3c2410_dma_dostop(chan);
 
 	case S3C2410_DMAOP_PAUSE:
-		return -ENOENT;
-
 	case S3C2410_DMAOP_RESUME:
 		return -ENOENT;
 
 	case S3C2410_DMAOP_FLUSH:
 		return s3c2410_dma_flush(chan);
 
+	case S3C2410_DMAOP_STARTED:
+		return s3c2410_dma_started(chan);
+
 	case S3C2410_DMAOP_TIMEOUT:
 		return 0;
 
diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index c4e3f8c..f2bbef0 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -285,7 +285,7 @@
 
 static struct resource versatile_flash_resource = {
 	.start			= VERSATILE_FLASH_BASE,
-	.end			= VERSATILE_FLASH_BASE + VERSATILE_FLASH_SIZE,
+	.end			= VERSATILE_FLASH_BASE + VERSATILE_FLASH_SIZE - 1,
 	.flags			= IORESOURCE_MEM,
 };
 
diff --git a/include/asm-arm/arch-s3c2410/dma.h b/include/asm-arm/arch-s3c2410/dma.h
index 72964f9..7463fd5 100644
--- a/include/asm-arm/arch-s3c2410/dma.h
+++ b/include/asm-arm/arch-s3c2410/dma.h
@@ -104,6 +104,7 @@
 	S3C2410_DMAOP_RESUME,
 	S3C2410_DMAOP_FLUSH,
 	S3C2410_DMAOP_TIMEOUT,           /* internal signal to handler */
+	S3C2410_DMAOP_STARTED,		/* indicate channel started */
 };
 
 typedef enum s3c2410_chan_op_e s3c2410_chan_op_t;
diff --git a/include/asm-arm/procinfo.h b/include/asm-arm/procinfo.h
index edb7b65..91a31ad 100644
--- a/include/asm-arm/procinfo.h
+++ b/include/asm-arm/procinfo.h
@@ -55,5 +55,6 @@
 #define HWCAP_VFP	64
 #define HWCAP_EDSP	128
 #define HWCAP_JAVA	256
+#define HWCAP_IWMMXT	512
 
 #endif