Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/drzeus/mmc

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/drzeus/mmc: (24 commits)
  MMC: Use timeout values from CSR
  MMC: CSD and CID timeout values
  sdhci: 'scratch' may be used uninitialized
  mmc: explicitly mention SDIO support in Kconfig
  mmc: remove redundant "depends on"
  Fix comment in include/linux/mmc/host.h
  sdio: high-speed support
  mmc_block: hard code 512 byte block size
  sdhci: force high speed capability on some controllers
  mmc_block: filter out PC requests
  mmc_block: indicate strict ordering
  mmc_block: inform block layer about sector count restriction
  sdio: give sdio irq thread a host specific name
  sdio: make sleep on error interruptable
  sdhci: reduce card detection delay
  sdhci: let the controller wait for busy state to end
  atmel-mci: Add missing flush_dcache_page() in PIO transfer code
  atmel-mci: Don't overwrite error bits when NOTBUSY is set
  atmel-mci: Add experimental DMA support
  atmel-mci: support multiple mmc slots
  ...
diff --git a/arch/avr32/boards/atngw100/setup.c b/arch/avr32/boards/atngw100/setup.c
index b8286f1ce..f308520 100644
--- a/arch/avr32/boards/atngw100/setup.c
+++ b/arch/avr32/boards/atngw100/setup.c
@@ -53,8 +53,11 @@
 };
 
 static struct mci_platform_data __initdata mci0_data = {
-	.detect_pin	= GPIO_PIN_PC(25),
-	.wp_pin		= GPIO_PIN_PE(0),
+	.slot[0] = {
+		.bus_width	= 4,
+		.detect_pin	= GPIO_PIN_PC(25),
+		.wp_pin		= GPIO_PIN_PE(0),
+	},
 };
 
 /*
diff --git a/arch/avr32/boards/atstk1000/atstk1002.c b/arch/avr32/boards/atstk1000/atstk1002.c
index dfc3443..4fedbc4 100644
--- a/arch/avr32/boards/atstk1000/atstk1002.c
+++ b/arch/avr32/boards/atstk1000/atstk1002.c
@@ -264,16 +264,20 @@
 
 #ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
 
+static struct mci_platform_data __initdata mci0_data = {
+	.slot[0] = {
+		.bus_width	= 4,
+
 /* MMC card detect requires MACB0 *NOT* be used */
 #ifdef CONFIG_BOARD_ATSTK1002_SW6_CUSTOM
-static struct mci_platform_data __initdata mci0_data = {
-	.detect_pin	= GPIO_PIN_PC(14),	/* gpio30/sdcd */
-	.wp_pin		= GPIO_PIN_PC(15),	/* gpio31/sdwp */
-};
-#define MCI_PDATA	&mci0_data
+		.detect_pin	= GPIO_PIN_PC(14), /* gpio30/sdcd */
+		.wp_pin		= GPIO_PIN_PC(15), /* gpio31/sdwp */
 #else
-#define MCI_PDATA	NULL
+		.detect_pin	= -ENODEV,
+		.wp_pin		= -ENODEV,
 #endif	/* SW6 for sd{cd,wp} routing */
+	},
+};
 
 #endif	/* SW2 for MMC signal routing */
 
@@ -326,7 +330,7 @@
 	at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info));
 #endif
 #ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
-	at32_add_device_mci(0, MCI_PDATA);
+	at32_add_device_mci(0, &mci0_pdata);
 #endif
 #ifdef CONFIG_BOARD_ATSTK1002_SW5_CUSTOM
 	set_hw_addr(at32_add_device_eth(1, &eth_data[1]));
diff --git a/arch/avr32/boards/atstk1000/atstk1003.c b/arch/avr32/boards/atstk1000/atstk1003.c
index 0cf6641..acc6123 100644
--- a/arch/avr32/boards/atstk1000/atstk1003.c
+++ b/arch/avr32/boards/atstk1000/atstk1003.c
@@ -66,6 +66,16 @@
 } };
 #endif
 
+#ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
+static struct mci_platform_data __initdata mci0_data = {
+	.slot[0] = {
+		.bus_width	= 4,
+		.detect_pin	= -ENODEV,
+		.wp_pin		= -ENODEV,
+	},
+};
+#endif
+
 #ifdef CONFIG_BOARD_ATSTK1000_EXTDAC
 static void __init atstk1003_setup_extdac(void)
 {
@@ -154,7 +164,7 @@
 	at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info));
 #endif
 #ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
-	at32_add_device_mci(0, NULL);
+	at32_add_device_mci(0, &mci0_data);
 #endif
 	at32_add_device_usba(0, NULL);
 #ifndef CONFIG_BOARD_ATSTK100X_SW3_CUSTOM
diff --git a/arch/avr32/boards/atstk1000/atstk1004.c b/arch/avr32/boards/atstk1000/atstk1004.c
index 50a5273..d6a2d02 100644
--- a/arch/avr32/boards/atstk1000/atstk1004.c
+++ b/arch/avr32/boards/atstk1000/atstk1004.c
@@ -71,6 +71,16 @@
 } };
 #endif
 
+#ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
+static struct mci_platform_data __initdata mci0_data = {
+	.slot[0] = {
+		.bus_width	= 4,
+		.detect_pin	= -ENODEV,
+		.wp_pin		= -ENODEV,
+	},
+};
+#endif
+
 #ifdef CONFIG_BOARD_ATSTK1000_EXTDAC
 static void __init atstk1004_setup_extdac(void)
 {
@@ -137,7 +147,7 @@
 	at32_add_device_spi(1, spi1_board_info, ARRAY_SIZE(spi1_board_info));
 #endif
 #ifndef CONFIG_BOARD_ATSTK100X_SW2_CUSTOM
-	at32_add_device_mci(0, NULL);
+	at32_add_device_mci(0, &mci0_data);
 #endif
 	at32_add_device_lcdc(0, &atstk1000_lcdc_data,
 			     fbmem_start, fbmem_size, 0);
diff --git a/arch/avr32/include/asm/atmel-mci.h b/arch/avr32/include/asm/atmel-mci.h
index c2ea6e1..59f3fad 100644
--- a/arch/avr32/include/asm/atmel-mci.h
+++ b/arch/avr32/include/asm/atmel-mci.h
@@ -1,9 +1,39 @@
 #ifndef __ASM_AVR32_ATMEL_MCI_H
 #define __ASM_AVR32_ATMEL_MCI_H
 
-struct mci_platform_data {
+#define ATMEL_MCI_MAX_NR_SLOTS	2
+
+struct dma_slave;
+
+/**
+ * struct mci_slot_pdata - board-specific per-slot configuration
+ * @bus_width: Number of data lines wired up the slot
+ * @detect_pin: GPIO pin wired to the card detect switch
+ * @wp_pin: GPIO pin wired to the write protect sensor
+ *
+ * If a given slot is not present on the board, @bus_width should be
+ * set to 0. The other fields are ignored in this case.
+ *
+ * Any pins that aren't available should be set to a negative value.
+ *
+ * Note that support for multiple slots is experimental -- some cards
+ * might get upset if we don't get the clock management exactly right.
+ * But in most cases, it should work just fine.
+ */
+struct mci_slot_pdata {
+	unsigned int		bus_width;
 	int			detect_pin;
 	int			wp_pin;
 };
 
+/**
+ * struct mci_platform_data - board-specific MMC/SDcard configuration
+ * @dma_slave: DMA slave interface to use in data transfers, or NULL.
+ * @slot: Per-slot configuration data.
+ */
+struct mci_platform_data {
+	struct dma_slave	*dma_slave;
+	struct mci_slot_pdata	slot[ATMEL_MCI_MAX_NR_SLOTS];
+};
+
 #endif /* __ASM_AVR32_ATMEL_MCI_H */
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index e01dbe4..f1b9a3a 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1272,10 +1272,14 @@
 struct platform_device *__init
 at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 {
-	struct mci_platform_data	_data;
 	struct platform_device		*pdev;
+	struct dw_dma_slave		*dws;
 
-	if (id != 0)
+	if (id != 0 || !data)
+		return NULL;
+
+	/* Must have at least one usable slot */
+	if (!data->slot[0].bus_width && !data->slot[1].bus_width)
 		return NULL;
 
 	pdev = platform_device_alloc("atmel_mci", id);
@@ -1286,28 +1290,76 @@
 				ARRAY_SIZE(atmel_mci0_resource)))
 		goto fail;
 
-	if (!data) {
-		data = &_data;
-		memset(data, -1, sizeof(struct mci_platform_data));
-		data->detect_pin = GPIO_PIN_NONE;
-		data->wp_pin = GPIO_PIN_NONE;
-	}
+	if (data->dma_slave)
+		dws = kmemdup(to_dw_dma_slave(data->dma_slave),
+				sizeof(struct dw_dma_slave), GFP_KERNEL);
+	else
+		dws = kzalloc(sizeof(struct dw_dma_slave), GFP_KERNEL);
+
+	dws->slave.dev = &pdev->dev;
+	dws->slave.dma_dev = &dw_dmac0_device.dev;
+	dws->slave.reg_width = DMA_SLAVE_WIDTH_32BIT;
+	dws->cfg_hi = (DWC_CFGH_SRC_PER(0)
+				| DWC_CFGH_DST_PER(1));
+	dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL
+				| DWC_CFGL_HS_SRC_POL);
+
+	data->dma_slave = &dws->slave;
 
 	if (platform_device_add_data(pdev, data,
 				sizeof(struct mci_platform_data)))
 		goto fail;
 
-	select_peripheral(PA(10), PERIPH_A, 0);	/* CLK	 */
-	select_peripheral(PA(11), PERIPH_A, 0);	/* CMD	 */
-	select_peripheral(PA(12), PERIPH_A, 0);	/* DATA0 */
-	select_peripheral(PA(13), PERIPH_A, 0);	/* DATA1 */
-	select_peripheral(PA(14), PERIPH_A, 0);	/* DATA2 */
-	select_peripheral(PA(15), PERIPH_A, 0);	/* DATA3 */
+	/* CLK line is common to both slots */
+	select_peripheral(PA(10), PERIPH_A, 0);
 
-	if (gpio_is_valid(data->detect_pin))
-		at32_select_gpio(data->detect_pin, 0);
-	if (gpio_is_valid(data->wp_pin))
-		at32_select_gpio(data->wp_pin, 0);
+	switch (data->slot[0].bus_width) {
+	case 4:
+		select_peripheral(PA(13), PERIPH_A, 0);	/* DATA1 */
+		select_peripheral(PA(14), PERIPH_A, 0);	/* DATA2 */
+		select_peripheral(PA(15), PERIPH_A, 0);	/* DATA3 */
+		/* fall through */
+	case 1:
+		select_peripheral(PA(11), PERIPH_A, 0);	/* CMD	 */
+		select_peripheral(PA(12), PERIPH_A, 0);	/* DATA0 */
+
+		if (gpio_is_valid(data->slot[0].detect_pin))
+			at32_select_gpio(data->slot[0].detect_pin, 0);
+		if (gpio_is_valid(data->slot[0].wp_pin))
+			at32_select_gpio(data->slot[0].wp_pin, 0);
+		break;
+	case 0:
+		/* Slot is unused */
+		break;
+	default:
+		goto fail;
+	}
+
+	switch (data->slot[1].bus_width) {
+	case 4:
+		select_peripheral(PB(8),  PERIPH_B, 0);	/* DATA1 */
+		select_peripheral(PB(9),  PERIPH_B, 0);	/* DATA2 */
+		select_peripheral(PB(10), PERIPH_B, 0);	/* DATA3 */
+		/* fall through */
+	case 1:
+		select_peripheral(PB(6),  PERIPH_B, 0); /* CMD	 */
+		select_peripheral(PB(7),  PERIPH_B, 0); /* DATA0 */
+
+		if (gpio_is_valid(data->slot[1].detect_pin))
+			at32_select_gpio(data->slot[1].detect_pin, 0);
+		if (gpio_is_valid(data->slot[1].wp_pin))
+			at32_select_gpio(data->slot[1].wp_pin, 0);
+		break;
+	case 0:
+		/* Slot is unused */
+		break;
+	default:
+		if (!data->slot[0].bus_width)
+			goto fail;
+
+		data->slot[1].bus_width = 0;
+		break;
+	}
 
 	atmel_mci0_pclk.dev = &pdev->dev;
 
diff --git a/drivers/mmc/Kconfig b/drivers/mmc/Kconfig
index c0b41e8..f2eeb38 100644
--- a/drivers/mmc/Kconfig
+++ b/drivers/mmc/Kconfig
@@ -3,13 +3,14 @@
 #
 
 menuconfig MMC
-	tristate "MMC/SD card support"
+	tristate "MMC/SD/SDIO card support"
 	depends on HAS_IOMEM
 	help
-	  MMC is the "multi-media card" bus protocol.
+	  This selects MultiMediaCard, Secure Digital and Secure
+	  Digital I/O support.
 
-	  If you want MMC support, you should say Y here and also
-	  to the specific driver for your MMC interface.
+	  If you want MMC/SD/SDIO support, you should say Y here and
+	  also to your specific host controller driver.
 
 config MMC_DEBUG
 	bool "MMC debugging"
diff --git a/drivers/mmc/card/Kconfig b/drivers/mmc/card/Kconfig
index dd0f398..3f2a912 100644
--- a/drivers/mmc/card/Kconfig
+++ b/drivers/mmc/card/Kconfig
@@ -2,7 +2,7 @@
 # MMC/SD card drivers
 #
 
-comment "MMC/SD Card Drivers"
+comment "MMC/SD/SDIO Card Drivers"
 
 config MMC_BLOCK
 	tristate "MMC block device driver"
@@ -34,7 +34,6 @@
 
 config SDIO_UART
 	tristate "SDIO UART/GPS class support"
-	depends on MMC
 	help
 	  SDIO function driver for SDIO cards that implements the UART
 	  class, as well as the GPS class which appears like a UART.
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index efacee0..24c97d3 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -58,7 +58,6 @@
 	struct mmc_queue queue;
 
 	unsigned int	usage;
-	unsigned int	block_bits;
 	unsigned int	read_only;
 };
 
@@ -216,8 +215,7 @@
 	struct mmc_blk_data *md = mq->data;
 	struct mmc_card *card = md->queue.card;
 	struct mmc_blk_request brq;
-	int ret = 1, data_size, i;
-	struct scatterlist *sg;
+	int ret = 1;
 
 	mmc_claim_host(card->host);
 
@@ -233,13 +231,11 @@
 		if (!mmc_card_blockaddr(card))
 			brq.cmd.arg <<= 9;
 		brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
-		brq.data.blksz = 1 << md->block_bits;
+		brq.data.blksz = 512;
 		brq.stop.opcode = MMC_STOP_TRANSMISSION;
 		brq.stop.arg = 0;
 		brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
-		brq.data.blocks = req->nr_sectors >> (md->block_bits - 9);
-		if (brq.data.blocks > card->host->max_blk_count)
-			brq.data.blocks = card->host->max_blk_count;
+		brq.data.blocks = req->nr_sectors;
 
 		if (brq.data.blocks > 1) {
 			/* SPI multiblock writes terminate using a special
@@ -271,24 +267,6 @@
 
 		mmc_queue_bounce_pre(mq);
 
-		/*
-		 * Adjust the sg list so it is the same size as the
-		 * request.
-		 */
-		if (brq.data.blocks !=
-		    (req->nr_sectors >> (md->block_bits - 9))) {
-			data_size = brq.data.blocks * brq.data.blksz;
-			for_each_sg(brq.data.sg, sg, brq.data.sg_len, i) {
-				data_size -= sg->length;
-				if (data_size <= 0) {
-					sg->length += data_size;
-					i++;
-					break;
-				}
-			}
-			brq.data.sg_len = i;
-		}
-
 		mmc_wait_for_req(card->host, &brq.mrq);
 
 		mmc_queue_bounce_post(mq);
@@ -373,16 +351,11 @@
 	if (rq_data_dir(req) != READ) {
 		if (mmc_card_sd(card)) {
 			u32 blocks;
-			unsigned int bytes;
 
 			blocks = mmc_sd_num_wr_blocks(card);
 			if (blocks != (u32)-1) {
-				if (card->csd.write_partial)
-					bytes = blocks << md->block_bits;
-				else
-					bytes = blocks << 9;
 				spin_lock_irq(&md->lock);
-				ret = __blk_end_request(req, 0, bytes);
+				ret = __blk_end_request(req, 0, blocks << 9);
 				spin_unlock_irq(&md->lock);
 			}
 		} else {
@@ -432,13 +405,6 @@
 	 */
 	md->read_only = mmc_blk_readonly(card);
 
-	/*
-	 * Both SD and MMC specifications state (although a bit
-	 * unclearly in the MMC case) that a block size of 512
-	 * bytes must always be supported by the card.
-	 */
-	md->block_bits = 9;
-
 	md->disk = alloc_disk(1 << MMC_SHIFT);
 	if (md->disk == NULL) {
 		ret = -ENOMEM;
@@ -476,7 +442,7 @@
 
 	sprintf(md->disk->disk_name, "mmcblk%d", devidx);
 
-	blk_queue_hardsect_size(md->queue.queue, 1 << md->block_bits);
+	blk_queue_hardsect_size(md->queue.queue, 512);
 
 	if (!mmc_card_sd(card) && mmc_card_blockaddr(card)) {
 		/*
@@ -514,7 +480,7 @@
 
 	mmc_claim_host(card->host);
 	cmd.opcode = MMC_SET_BLOCKLEN;
-	cmd.arg = 1 << md->block_bits;
+	cmd.arg = 512;
 	cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
 	err = mmc_wait_for_cmd(card->host, &cmd, 5);
 	mmc_release_host(card->host);
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 3dee97e..406989e 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -31,7 +31,7 @@
 	/*
 	 * We only like normal block requests.
 	 */
-	if (!blk_fs_request(req) && !blk_pc_request(req)) {
+	if (!blk_fs_request(req)) {
 		blk_dump_rq_flags(req, "MMC bad request");
 		return BLKPREP_KILL;
 	}
@@ -131,6 +131,7 @@
 	mq->req = NULL;
 
 	blk_queue_prep_rq(mq->queue, mmc_prep_request);
+	blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN, NULL);
 
 #ifdef CONFIG_MMC_BLOCK_BOUNCE
 	if (host->max_hw_segs == 1) {
@@ -142,12 +143,19 @@
 			bouncesz = host->max_req_size;
 		if (bouncesz > host->max_seg_size)
 			bouncesz = host->max_seg_size;
+		if (bouncesz > (host->max_blk_count * 512))
+			bouncesz = host->max_blk_count * 512;
 
-		mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
-		if (!mq->bounce_buf) {
-			printk(KERN_WARNING "%s: unable to allocate "
-				"bounce buffer\n", mmc_card_name(card));
-		} else {
+		if (bouncesz > 512) {
+			mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
+			if (!mq->bounce_buf) {
+				printk(KERN_WARNING "%s: unable to "
+					"allocate bounce buffer\n",
+					mmc_card_name(card));
+			}
+		}
+
+		if (mq->bounce_buf) {
 			blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_ANY);
 			blk_queue_max_sectors(mq->queue, bouncesz / 512);
 			blk_queue_max_phys_segments(mq->queue, bouncesz / 512);
@@ -175,7 +183,8 @@
 
 	if (!mq->bounce_buf) {
 		blk_queue_bounce_limit(mq->queue, limit);
-		blk_queue_max_sectors(mq->queue, host->max_req_size / 512);
+		blk_queue_max_sectors(mq->queue,
+			min(host->max_blk_count, host->max_req_size / 512));
 		blk_queue_max_phys_segments(mq->queue, host->max_phys_segs);
 		blk_queue_max_hw_segments(mq->queue, host->max_hw_segs);
 		blk_queue_max_segment_size(mq->queue, host->max_seg_size);
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 64b05c6..9c50e6f 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -248,8 +248,12 @@
 
 	sg_init_one(&sg, data_buf, len);
 
-	if (card)
-		mmc_set_data_timeout(&data, card);
+	/*
+	 * The spec states that CSR and CID accesses have a timeout
+	 * of 64 clock cycles.
+	 */
+	data.timeout_ns = 0;
+	data.timeout_clks = 64;
 
 	mmc_wait_for_req(host, &mrq);
 
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 4eab79e..fb99ccf 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -165,6 +165,36 @@
 }
 
 /*
+ * Test if the card supports high-speed mode and, if so, switch to it.
+ */
+static int sdio_enable_hs(struct mmc_card *card)
+{
+	int ret;
+	u8 speed;
+
+	if (!(card->host->caps & MMC_CAP_SD_HIGHSPEED))
+		return 0;
+
+	if (!card->cccr.high_speed)
+		return 0;
+
+	ret = mmc_io_rw_direct(card, 0, 0, SDIO_CCCR_SPEED, 0, &speed);
+	if (ret)
+		return ret;
+
+	speed |= SDIO_SPEED_EHS;
+
+	ret = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_SPEED, speed, NULL);
+	if (ret)
+		return ret;
+
+	mmc_card_set_highspeed(card);
+	mmc_set_timing(card->host, MMC_TIMING_SD_HS);
+
+	return 0;
+}
+
+/*
  * Host is being removed. Free up the current card.
  */
 static void mmc_sdio_remove(struct mmc_host *host)
@@ -333,10 +363,26 @@
 		goto remove;
 
 	/*
-	 * No support for high-speed yet, so just set
-	 * the card's maximum speed.
+	 * Switch to high-speed (if supported).
 	 */
-	mmc_set_clock(host, card->cis.max_dtr);
+	err = sdio_enable_hs(card);
+	if (err)
+		goto remove;
+
+	/*
+	 * Change to the card's maximum speed.
+	 */
+	if (mmc_card_highspeed(card)) {
+		/*
+		 * The SDIO specification doesn't mention how
+		 * the CIS transfer speed register relates to
+		 * high-speed, but it seems that 50 MHz is
+		 * mandatory.
+		 */
+		mmc_set_clock(host, 50000000);
+	} else {
+		mmc_set_clock(host, card->cis.max_dtr);
+	}
 
 	/*
 	 * Switch to wider bus (if supported).
diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index c292e12..bb192f9 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -5,6 +5,8 @@
  * Created:     June 18, 2007
  * Copyright:   MontaVista Software Inc.
  *
+ * Copyright 2008 Pierre Ossman
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or (at
@@ -107,11 +109,14 @@
 
 		/*
 		 * Give other threads a chance to run in the presence of
-		 * errors.  FIXME: determine if due to card removal and
-		 * possibly exit this thread if so.
+		 * errors.
 		 */
-		if (ret < 0)
-			ssleep(1);
+		if (ret < 0) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if (!kthread_should_stop())
+				schedule_timeout(HZ);
+			set_current_state(TASK_RUNNING);
+		}
 
 		/*
 		 * Adaptive polling frequency based on the assumption
@@ -154,7 +159,8 @@
 	if (!host->sdio_irqs++) {
 		atomic_set(&host->sdio_irq_thread_abort, 0);
 		host->sdio_irq_thread =
-			kthread_run(sdio_irq_thread, host, "ksdiorqd");
+			kthread_run(sdio_irq_thread, host, "ksdioirqd/%s",
+				mmc_hostname(host));
 		if (IS_ERR(host->sdio_irq_thread)) {
 			int err = PTR_ERR(host->sdio_irq_thread);
 			host->sdio_irqs--;
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index ea8d7a3..dfa585f 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -2,7 +2,7 @@
 # MMC/SD host controller drivers
 #
 
-comment "MMC/SD Host Controller Drivers"
+comment "MMC/SD/SDIO Host Controller Drivers"
 
 config MMC_ARMMMCI
 	tristate "ARM AMBA Multimedia Card Interface support"
@@ -114,6 +114,17 @@
 
 	  If unsure, say N.
 
+config MMC_ATMELMCI_DMA
+	bool "Atmel MCI DMA support (EXPERIMENTAL)"
+	depends on MMC_ATMELMCI && DMA_ENGINE && EXPERIMENTAL
+	help
+	  Say Y here to have the Atmel MCI driver use a DMA engine to
+	  do data transfers and thus increase the throughput and
+	  reduce the CPU utilization. Note that this is highly
+	  experimental and may cause the driver to lock up.
+
+	  If unsure, say N.
+
 config MMC_IMX
 	tristate "Motorola i.MX Multimedia Card Interface support"
 	depends on ARCH_IMX
@@ -141,21 +152,22 @@
 	  module will be called tifm_sd.
 
 config MMC_SPI
-	tristate "MMC/SD over SPI"
-	depends on MMC && SPI_MASTER && !HIGHMEM && HAS_DMA
+	tristate "MMC/SD/SDIO over SPI"
+	depends on SPI_MASTER && !HIGHMEM && HAS_DMA
 	select CRC7
 	select CRC_ITU_T
 	help
-	  Some systems accss MMC/SD cards using a SPI controller instead of
-	  using a "native" MMC/SD controller.  This has a disadvantage of
-	  being relatively high overhead, but a compensating advantage of
-	  working on many systems without dedicated MMC/SD controllers.
+	  Some systems accss MMC/SD/SDIO cards using a SPI controller
+	  instead of using a "native" MMC/SD/SDIO controller.  This has a
+	  disadvantage of being relatively high overhead, but a compensating
+	  advantage of working on many systems without dedicated MMC/SD/SDIO
+	  controllers.
 
 	  If unsure, or if your system has no SPI master driver, say N.
 
 config MMC_S3C
 	tristate "Samsung S3C SD/MMC Card Interface support"
-	depends on ARCH_S3C2410 && MMC
+	depends on ARCH_S3C2410
 	help
 	  This selects a driver for the MCI interface found in
           Samsung's S3C2410, S3C2412, S3C2440, S3C2442 CPUs.
@@ -166,7 +178,7 @@
 
 config MMC_SDRICOH_CS
 	tristate "MMC/SD driver for Ricoh Bay1Controllers (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && MMC && PCI && PCMCIA
+	depends on EXPERIMENTAL && PCI && PCMCIA
 	help
 	  Say Y here if your Notebook reports a Ricoh Bay1Controller PCMCIA
 	  card whenever you insert a MMC or SD card into the card slot.
diff --git a/drivers/mmc/host/atmel-mci-regs.h b/drivers/mmc/host/atmel-mci-regs.h
index 26bd80e..b58364e 100644
--- a/drivers/mmc/host/atmel-mci-regs.h
+++ b/drivers/mmc/host/atmel-mci-regs.h
@@ -25,8 +25,10 @@
 #define MCI_SDCR		0x000c	/* SD Card / SDIO */
 # define MCI_SDCSEL_SLOT_A	(  0 <<  0)	/* Select SD slot A */
 # define MCI_SDCSEL_SLOT_B	(  1 <<  0)	/* Select SD slot A */
-# define MCI_SDCBUS_1BIT	(  0 <<  7)	/* 1-bit data bus */
-# define MCI_SDCBUS_4BIT	(  1 <<  7)	/* 4-bit data bus */
+# define MCI_SDCSEL_MASK	(  3 <<  0)
+# define MCI_SDCBUS_1BIT	(  0 <<  6)	/* 1-bit data bus */
+# define MCI_SDCBUS_4BIT	(  2 <<  6)	/* 4-bit data bus */
+# define MCI_SDCBUS_MASK	(  3 <<  6)
 #define MCI_ARGR		0x0010	/* Command Argument */
 #define MCI_CMDR		0x0014	/* Command */
 # define MCI_CMDR_CMDNB(x)	((x) <<  0)	/* Command Opcode */
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 0000896..7a3f243 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -11,6 +11,8 @@
 #include <linux/clk.h>
 #include <linux/debugfs.h>
 #include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/gpio.h>
 #include <linux/init.h>
@@ -33,64 +35,178 @@
 #include "atmel-mci-regs.h"
 
 #define ATMCI_DATA_ERROR_FLAGS	(MCI_DCRCE | MCI_DTOE | MCI_OVRE | MCI_UNRE)
+#define ATMCI_DMA_THRESHOLD	16
 
 enum {
 	EVENT_CMD_COMPLETE = 0,
-	EVENT_DATA_ERROR,
-	EVENT_DATA_COMPLETE,
-	EVENT_STOP_SENT,
-	EVENT_STOP_COMPLETE,
 	EVENT_XFER_COMPLETE,
+	EVENT_DATA_COMPLETE,
+	EVENT_DATA_ERROR,
 };
 
+enum atmel_mci_state {
+	STATE_IDLE = 0,
+	STATE_SENDING_CMD,
+	STATE_SENDING_DATA,
+	STATE_DATA_BUSY,
+	STATE_SENDING_STOP,
+	STATE_DATA_ERROR,
+};
+
+struct atmel_mci_dma {
+#ifdef CONFIG_MMC_ATMELMCI_DMA
+	struct dma_client		client;
+	struct dma_chan			*chan;
+	struct dma_async_tx_descriptor	*data_desc;
+#endif
+};
+
+/**
+ * struct atmel_mci - MMC controller state shared between all slots
+ * @lock: Spinlock protecting the queue and associated data.
+ * @regs: Pointer to MMIO registers.
+ * @sg: Scatterlist entry currently being processed by PIO code, if any.
+ * @pio_offset: Offset into the current scatterlist entry.
+ * @cur_slot: The slot which is currently using the controller.
+ * @mrq: The request currently being processed on @cur_slot,
+ *	or NULL if the controller is idle.
+ * @cmd: The command currently being sent to the card, or NULL.
+ * @data: The data currently being transferred, or NULL if no data
+ *	transfer is in progress.
+ * @dma: DMA client state.
+ * @data_chan: DMA channel being used for the current data transfer.
+ * @cmd_status: Snapshot of SR taken upon completion of the current
+ *	command. Only valid when EVENT_CMD_COMPLETE is pending.
+ * @data_status: Snapshot of SR taken upon completion of the current
+ *	data transfer. Only valid when EVENT_DATA_COMPLETE or
+ *	EVENT_DATA_ERROR is pending.
+ * @stop_cmdr: Value to be loaded into CMDR when the stop command is
+ *	to be sent.
+ * @tasklet: Tasklet running the request state machine.
+ * @pending_events: Bitmask of events flagged by the interrupt handler
+ *	to be processed by the tasklet.
+ * @completed_events: Bitmask of events which the state machine has
+ *	processed.
+ * @state: Tasklet state.
+ * @queue: List of slots waiting for access to the controller.
+ * @need_clock_update: Update the clock rate before the next request.
+ * @need_reset: Reset controller before next request.
+ * @mode_reg: Value of the MR register.
+ * @bus_hz: The rate of @mck in Hz. This forms the basis for MMC bus
+ *	rate and timeout calculations.
+ * @mapbase: Physical address of the MMIO registers.
+ * @mck: The peripheral bus clock hooked up to the MMC controller.
+ * @pdev: Platform device associated with the MMC controller.
+ * @slot: Slots sharing this MMC controller.
+ *
+ * Locking
+ * =======
+ *
+ * @lock is a softirq-safe spinlock protecting @queue as well as
+ * @cur_slot, @mrq and @state. These must always be updated
+ * at the same time while holding @lock.
+ *
+ * @lock also protects mode_reg and need_clock_update since these are
+ * used to synchronize mode register updates with the queue
+ * processing.
+ *
+ * The @mrq field of struct atmel_mci_slot is also protected by @lock,
+ * and must always be written at the same time as the slot is added to
+ * @queue.
+ *
+ * @pending_events and @completed_events are accessed using atomic bit
+ * operations, so they don't need any locking.
+ *
+ * None of the fields touched by the interrupt handler need any
+ * locking. However, ordering is important: Before EVENT_DATA_ERROR or
+ * EVENT_DATA_COMPLETE is set in @pending_events, all data-related
+ * interrupts must be disabled and @data_status updated with a
+ * snapshot of SR. Similarly, before EVENT_CMD_COMPLETE is set, the
+ * CMDRDY interupt must be disabled and @cmd_status updated with a
+ * snapshot of SR, and before EVENT_XFER_COMPLETE can be set, the
+ * bytes_xfered field of @data must be written. This is ensured by
+ * using barriers.
+ */
 struct atmel_mci {
-	struct mmc_host		*mmc;
+	spinlock_t		lock;
 	void __iomem		*regs;
 
 	struct scatterlist	*sg;
 	unsigned int		pio_offset;
 
+	struct atmel_mci_slot	*cur_slot;
 	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
 	struct mmc_data		*data;
 
+	struct atmel_mci_dma	dma;
+	struct dma_chan		*data_chan;
+
 	u32			cmd_status;
 	u32			data_status;
-	u32			stop_status;
 	u32			stop_cmdr;
 
-	u32			mode_reg;
-	u32			sdc_reg;
-
 	struct tasklet_struct	tasklet;
 	unsigned long		pending_events;
 	unsigned long		completed_events;
+	enum atmel_mci_state	state;
+	struct list_head	queue;
 
-	int			present;
-	int			detect_pin;
-	int			wp_pin;
-
-	/* For detect pin debouncing */
-	struct timer_list	detect_timer;
-
+	bool			need_clock_update;
+	bool			need_reset;
+	u32			mode_reg;
 	unsigned long		bus_hz;
 	unsigned long		mapbase;
 	struct clk		*mck;
 	struct platform_device	*pdev;
+
+	struct atmel_mci_slot	*slot[ATMEL_MCI_MAX_NR_SLOTS];
 };
 
-#define atmci_is_completed(host, event)				\
-	test_bit(event, &host->completed_events)
+/**
+ * struct atmel_mci_slot - MMC slot state
+ * @mmc: The mmc_host representing this slot.
+ * @host: The MMC controller this slot is using.
+ * @sdc_reg: Value of SDCR to be written before using this slot.
+ * @mrq: mmc_request currently being processed or waiting to be
+ *	processed, or NULL when the slot is idle.
+ * @queue_node: List node for placing this node in the @queue list of
+ *	&struct atmel_mci.
+ * @clock: Clock rate configured by set_ios(). Protected by host->lock.
+ * @flags: Random state bits associated with the slot.
+ * @detect_pin: GPIO pin used for card detection, or negative if not
+ *	available.
+ * @wp_pin: GPIO pin used for card write protect sending, or negative
+ *	if not available.
+ * @detect_timer: Timer used for debouncing @detect_pin interrupts.
+ */
+struct atmel_mci_slot {
+	struct mmc_host		*mmc;
+	struct atmel_mci	*host;
+
+	u32			sdc_reg;
+
+	struct mmc_request	*mrq;
+	struct list_head	queue_node;
+
+	unsigned int		clock;
+	unsigned long		flags;
+#define ATMCI_CARD_PRESENT	0
+#define ATMCI_CARD_NEED_INIT	1
+#define ATMCI_SHUTDOWN		2
+
+	int			detect_pin;
+	int			wp_pin;
+
+	struct timer_list	detect_timer;
+};
+
 #define atmci_test_and_clear_pending(host, event)		\
 	test_and_clear_bit(event, &host->pending_events)
-#define atmci_test_and_set_completed(host, event)		\
-	test_and_set_bit(event, &host->completed_events)
 #define atmci_set_completed(host, event)			\
 	set_bit(event, &host->completed_events)
 #define atmci_set_pending(host, event)				\
 	set_bit(event, &host->pending_events)
-#define atmci_clear_pending(host, event)			\
-	clear_bit(event, &host->pending_events)
 
 /*
  * The debugfs stuff below is mostly optimized away when
@@ -98,14 +214,15 @@
  */
 static int atmci_req_show(struct seq_file *s, void *v)
 {
-	struct atmel_mci	*host = s->private;
-	struct mmc_request	*mrq = host->mrq;
+	struct atmel_mci_slot	*slot = s->private;
+	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
 	struct mmc_command	*stop;
 	struct mmc_data		*data;
 
 	/* Make sure we get a consistent snapshot */
-	spin_lock_irq(&host->mmc->lock);
+	spin_lock_bh(&slot->host->lock);
+	mrq = slot->mrq;
 
 	if (mrq) {
 		cmd = mrq->cmd;
@@ -130,7 +247,7 @@
 				stop->resp[2], stop->error);
 	}
 
-	spin_unlock_irq(&host->mmc->lock);
+	spin_unlock_bh(&slot->host->lock);
 
 	return 0;
 }
@@ -193,12 +310,16 @@
 	if (!buf)
 		return -ENOMEM;
 
-	/* Grab a more or less consistent snapshot */
-	spin_lock_irq(&host->mmc->lock);
+	/*
+	 * Grab a more or less consistent snapshot. Note that we're
+	 * not disabling interrupts, so IMR and SR may not be
+	 * consistent.
+	 */
+	spin_lock_bh(&host->lock);
 	clk_enable(host->mck);
 	memcpy_fromio(buf, host->regs, MCI_REGS_SIZE);
 	clk_disable(host->mck);
-	spin_unlock_irq(&host->mmc->lock);
+	spin_unlock_bh(&host->lock);
 
 	seq_printf(s, "MR:\t0x%08x%s%s CLKDIV=%u\n",
 			buf[MCI_MR / 4],
@@ -236,13 +357,13 @@
 	.release	= single_release,
 };
 
-static void atmci_init_debugfs(struct atmel_mci *host)
+static void atmci_init_debugfs(struct atmel_mci_slot *slot)
 {
-	struct mmc_host	*mmc;
-	struct dentry	*root;
-	struct dentry	*node;
+	struct mmc_host		*mmc = slot->mmc;
+	struct atmel_mci	*host = slot->host;
+	struct dentry		*root;
+	struct dentry		*node;
 
-	mmc = host->mmc;
 	root = mmc->debugfs_root;
 	if (!root)
 		return;
@@ -254,7 +375,11 @@
 	if (!node)
 		goto err;
 
-	node = debugfs_create_file("req", S_IRUSR, root, host, &atmci_req_fops);
+	node = debugfs_create_file("req", S_IRUSR, root, slot, &atmci_req_fops);
+	if (!node)
+		goto err;
+
+	node = debugfs_create_u32("state", S_IRUSR, root, (u32 *)&host->state);
 	if (!node)
 		goto err;
 
@@ -271,25 +396,7 @@
 	return;
 
 err:
-	dev_err(&host->pdev->dev,
-		"failed to initialize debugfs for controller\n");
-}
-
-static void atmci_enable(struct atmel_mci *host)
-{
-	clk_enable(host->mck);
-	mci_writel(host, CR, MCI_CR_MCIEN);
-	mci_writel(host, MR, host->mode_reg);
-	mci_writel(host, SDCR, host->sdc_reg);
-}
-
-static void atmci_disable(struct atmel_mci *host)
-{
-	mci_writel(host, CR, MCI_CR_SWRST);
-
-	/* Stall until write is complete, then disable the bus clock */
-	mci_readl(host, SR);
-	clk_disable(host->mck);
+	dev_err(&mmc->class_dev, "failed to initialize debugfs for slot\n");
 }
 
 static inline unsigned int ns_to_clocks(struct atmel_mci *host,
@@ -299,7 +406,7 @@
 }
 
 static void atmci_set_timeout(struct atmel_mci *host,
-			      struct mmc_data *data)
+		struct atmel_mci_slot *slot, struct mmc_data *data)
 {
 	static unsigned	dtomul_to_shift[] = {
 		0, 4, 7, 8, 10, 12, 16, 20
@@ -322,7 +429,7 @@
 		dtocyc = 15;
 	}
 
-	dev_vdbg(&host->mmc->class_dev, "setting timeout to %u cycles\n",
+	dev_vdbg(&slot->mmc->class_dev, "setting timeout to %u cycles\n",
 			dtocyc << dtomul_to_shift[dtomul]);
 	mci_writel(host, DTOR, (MCI_DTOMUL(dtomul) | MCI_DTOCYC(dtocyc)));
 }
@@ -375,15 +482,12 @@
 }
 
 static void atmci_start_command(struct atmel_mci *host,
-				struct mmc_command *cmd,
-				u32 cmd_flags)
+		struct mmc_command *cmd, u32 cmd_flags)
 {
-	/* Must read host->cmd after testing event flags */
-	smp_rmb();
 	WARN_ON(host->cmd);
 	host->cmd = cmd;
 
-	dev_vdbg(&host->mmc->class_dev,
+	dev_vdbg(&host->pdev->dev,
 			"start command: ARGR=0x%08x CMDR=0x%08x\n",
 			cmd->arg, cmd_flags);
 
@@ -391,34 +495,157 @@
 	mci_writel(host, CMDR, cmd_flags);
 }
 
-static void send_stop_cmd(struct mmc_host *mmc, struct mmc_data *data)
+static void send_stop_cmd(struct atmel_mci *host, struct mmc_data *data)
 {
-	struct atmel_mci *host = mmc_priv(mmc);
-
 	atmci_start_command(host, data->stop, host->stop_cmdr);
 	mci_writel(host, IER, MCI_CMDRDY);
 }
 
-static void atmci_request_end(struct mmc_host *mmc, struct mmc_request *mrq)
+#ifdef CONFIG_MMC_ATMELMCI_DMA
+static void atmci_dma_cleanup(struct atmel_mci *host)
 {
-	struct atmel_mci *host = mmc_priv(mmc);
+	struct mmc_data			*data = host->data;
 
-	WARN_ON(host->cmd || host->data);
-	host->mrq = NULL;
-
-	atmci_disable(host);
-
-	mmc_request_done(mmc, mrq);
+	dma_unmap_sg(&host->pdev->dev, data->sg, data->sg_len,
+		     ((data->flags & MMC_DATA_WRITE)
+		      ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
 }
 
+static void atmci_stop_dma(struct atmel_mci *host)
+{
+	struct dma_chan *chan = host->data_chan;
+
+	if (chan) {
+		chan->device->device_terminate_all(chan);
+		atmci_dma_cleanup(host);
+	} else {
+		/* Data transfer was stopped by the interrupt handler */
+		atmci_set_pending(host, EVENT_XFER_COMPLETE);
+		mci_writel(host, IER, MCI_NOTBUSY);
+	}
+}
+
+/* This function is called by the DMA driver from tasklet context. */
+static void atmci_dma_complete(void *arg)
+{
+	struct atmel_mci	*host = arg;
+	struct mmc_data		*data = host->data;
+
+	dev_vdbg(&host->pdev->dev, "DMA complete\n");
+
+	atmci_dma_cleanup(host);
+
+	/*
+	 * If the card was removed, data will be NULL. No point trying
+	 * to send the stop command or waiting for NBUSY in this case.
+	 */
+	if (data) {
+		atmci_set_pending(host, EVENT_XFER_COMPLETE);
+		tasklet_schedule(&host->tasklet);
+
+		/*
+		 * Regardless of what the documentation says, we have
+		 * to wait for NOTBUSY even after block read
+		 * operations.
+		 *
+		 * When the DMA transfer is complete, the controller
+		 * may still be reading the CRC from the card, i.e.
+		 * the data transfer is still in progress and we
+		 * haven't seen all the potential error bits yet.
+		 *
+		 * The interrupt handler will schedule a different
+		 * tasklet to finish things up when the data transfer
+		 * is completely done.
+		 *
+		 * We may not complete the mmc request here anyway
+		 * because the mmc layer may call back and cause us to
+		 * violate the "don't submit new operations from the
+		 * completion callback" rule of the dma engine
+		 * framework.
+		 */
+		mci_writel(host, IER, MCI_NOTBUSY);
+	}
+}
+
+static int
+atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
+{
+	struct dma_chan			*chan;
+	struct dma_async_tx_descriptor	*desc;
+	struct scatterlist		*sg;
+	unsigned int			i;
+	enum dma_data_direction		direction;
+
+	/*
+	 * We don't do DMA on "complex" transfers, i.e. with
+	 * non-word-aligned buffers or lengths. Also, we don't bother
+	 * with all the DMA setup overhead for short transfers.
+	 */
+	if (data->blocks * data->blksz < ATMCI_DMA_THRESHOLD)
+		return -EINVAL;
+	if (data->blksz & 3)
+		return -EINVAL;
+
+	for_each_sg(data->sg, sg, data->sg_len, i) {
+		if (sg->offset & 3 || sg->length & 3)
+			return -EINVAL;
+	}
+
+	/* If we don't have a channel, we can't do DMA */
+	chan = host->dma.chan;
+	if (chan) {
+		dma_chan_get(chan);
+		host->data_chan = chan;
+	}
+
+	if (!chan)
+		return -ENODEV;
+
+	if (data->flags & MMC_DATA_READ)
+		direction = DMA_FROM_DEVICE;
+	else
+		direction = DMA_TO_DEVICE;
+
+	desc = chan->device->device_prep_slave_sg(chan,
+			data->sg, data->sg_len, direction,
+			DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!desc)
+		return -ENOMEM;
+
+	host->dma.data_desc = desc;
+	desc->callback = atmci_dma_complete;
+	desc->callback_param = host;
+	desc->tx_submit(desc);
+
+	/* Go! */
+	chan->device->device_issue_pending(chan);
+
+	return 0;
+}
+
+#else /* CONFIG_MMC_ATMELMCI_DMA */
+
+static int atmci_submit_data_dma(struct atmel_mci *host, struct mmc_data *data)
+{
+	return -ENOSYS;
+}
+
+static void atmci_stop_dma(struct atmel_mci *host)
+{
+	/* Data transfer was stopped by the interrupt handler */
+	atmci_set_pending(host, EVENT_XFER_COMPLETE);
+	mci_writel(host, IER, MCI_NOTBUSY);
+}
+
+#endif /* CONFIG_MMC_ATMELMCI_DMA */
+
 /*
  * Returns a mask of interrupt flags to be enabled after the whole
  * request has been prepared.
  */
-static u32 atmci_submit_data(struct mmc_host *mmc, struct mmc_data *data)
+static u32 atmci_submit_data(struct atmel_mci *host, struct mmc_data *data)
 {
-	struct atmel_mci	*host = mmc_priv(mmc);
-	u32			iflags;
+	u32 iflags;
 
 	data->error = -EINPROGRESS;
 
@@ -426,77 +653,89 @@
 	host->sg = NULL;
 	host->data = data;
 
-	dev_vdbg(&mmc->class_dev, "BLKR=0x%08x\n",
-			MCI_BCNT(data->blocks) | MCI_BLKLEN(data->blksz));
-
 	iflags = ATMCI_DATA_ERROR_FLAGS;
-	host->sg = data->sg;
-	host->pio_offset = 0;
-	if (data->flags & MMC_DATA_READ)
-		iflags |= MCI_RXRDY;
-	else
-		iflags |= MCI_TXRDY;
+	if (atmci_submit_data_dma(host, data)) {
+		host->data_chan = NULL;
+
+		/*
+		 * Errata: MMC data write operation with less than 12
+		 * bytes is impossible.
+		 *
+		 * Errata: MCI Transmit Data Register (TDR) FIFO
+		 * corruption when length is not multiple of 4.
+		 */
+		if (data->blocks * data->blksz < 12
+				|| (data->blocks * data->blksz) & 3)
+			host->need_reset = true;
+
+		host->sg = data->sg;
+		host->pio_offset = 0;
+		if (data->flags & MMC_DATA_READ)
+			iflags |= MCI_RXRDY;
+		else
+			iflags |= MCI_TXRDY;
+	}
 
 	return iflags;
 }
 
-static void atmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
+static void atmci_start_request(struct atmel_mci *host,
+		struct atmel_mci_slot *slot)
 {
-	struct atmel_mci	*host = mmc_priv(mmc);
-	struct mmc_data		*data;
+	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
+	struct mmc_data		*data;
 	u32			iflags;
-	u32			cmdflags = 0;
+	u32			cmdflags;
+
+	mrq = slot->mrq;
+	host->cur_slot = slot;
+	host->mrq = mrq;
+
+	host->pending_events = 0;
+	host->completed_events = 0;
+	host->data_status = 0;
+
+	if (host->need_reset) {
+		mci_writel(host, CR, MCI_CR_SWRST);
+		mci_writel(host, CR, MCI_CR_MCIEN);
+		mci_writel(host, MR, host->mode_reg);
+		host->need_reset = false;
+	}
+	mci_writel(host, SDCR, slot->sdc_reg);
 
 	iflags = mci_readl(host, IMR);
 	if (iflags)
-		dev_warn(&mmc->class_dev, "WARNING: IMR=0x%08x\n",
-				mci_readl(host, IMR));
+		dev_warn(&slot->mmc->class_dev, "WARNING: IMR=0x%08x\n",
+				iflags);
 
-	WARN_ON(host->mrq != NULL);
-
-	/*
-	 * We may "know" the card is gone even though there's still an
-	 * electrical connection. If so, we really need to communicate
-	 * this to the MMC core since there won't be any more
-	 * interrupts as the card is completely removed. Otherwise,
-	 * the MMC core might believe the card is still there even
-	 * though the card was just removed very slowly.
-	 */
-	if (!host->present) {
-		mrq->cmd->error = -ENOMEDIUM;
-		mmc_request_done(mmc, mrq);
-		return;
+	if (unlikely(test_and_clear_bit(ATMCI_CARD_NEED_INIT, &slot->flags))) {
+		/* Send init sequence (74 clock cycles) */
+		mci_writel(host, CMDR, MCI_CMDR_SPCMD_INIT);
+		while (!(mci_readl(host, SR) & MCI_CMDRDY))
+			cpu_relax();
 	}
-
-	host->mrq = mrq;
-	host->pending_events = 0;
-	host->completed_events = 0;
-
-	atmci_enable(host);
-
-	/* We don't support multiple blocks of weird lengths. */
 	data = mrq->data;
 	if (data) {
-		if (data->blocks > 1 && data->blksz & 3)
-			goto fail;
-		atmci_set_timeout(host, data);
+		atmci_set_timeout(host, slot, data);
 
 		/* Must set block count/size before sending command */
 		mci_writel(host, BLKR, MCI_BCNT(data->blocks)
 				| MCI_BLKLEN(data->blksz));
+		dev_vdbg(&slot->mmc->class_dev, "BLKR=0x%08x\n",
+			MCI_BCNT(data->blocks) | MCI_BLKLEN(data->blksz));
 	}
 
 	iflags = MCI_CMDRDY;
 	cmd = mrq->cmd;
-	cmdflags = atmci_prepare_command(mmc, cmd);
+	cmdflags = atmci_prepare_command(slot->mmc, cmd);
 	atmci_start_command(host, cmd, cmdflags);
 
 	if (data)
-		iflags |= atmci_submit_data(mmc, data);
+		iflags |= atmci_submit_data(host, data);
 
 	if (mrq->stop) {
-		host->stop_cmdr = atmci_prepare_command(mmc, mrq->stop);
+		host->stop_cmdr = atmci_prepare_command(slot->mmc, mrq->stop);
 		host->stop_cmdr |= MCI_CMDR_STOP_XFER;
 		if (!(data->flags & MMC_DATA_WRITE))
 			host->stop_cmdr |= MCI_CMDR_TRDIR_READ;
@@ -513,59 +752,156 @@
 	 * prepared yet.)
 	 */
 	mci_writel(host, IER, iflags);
+}
 
-	return;
+static void atmci_queue_request(struct atmel_mci *host,
+		struct atmel_mci_slot *slot, struct mmc_request *mrq)
+{
+	dev_vdbg(&slot->mmc->class_dev, "queue request: state=%d\n",
+			host->state);
 
-fail:
-	atmci_disable(host);
-	host->mrq = NULL;
-	mrq->cmd->error = -EINVAL;
-	mmc_request_done(mmc, mrq);
+	spin_lock_bh(&host->lock);
+	slot->mrq = mrq;
+	if (host->state == STATE_IDLE) {
+		host->state = STATE_SENDING_CMD;
+		atmci_start_request(host, slot);
+	} else {
+		list_add_tail(&slot->queue_node, &host->queue);
+	}
+	spin_unlock_bh(&host->lock);
+}
+
+static void atmci_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	struct atmel_mci_slot	*slot = mmc_priv(mmc);
+	struct atmel_mci	*host = slot->host;
+	struct mmc_data		*data;
+
+	WARN_ON(slot->mrq);
+
+	/*
+	 * We may "know" the card is gone even though there's still an
+	 * electrical connection. If so, we really need to communicate
+	 * this to the MMC core since there won't be any more
+	 * interrupts as the card is completely removed. Otherwise,
+	 * the MMC core might believe the card is still there even
+	 * though the card was just removed very slowly.
+	 */
+	if (!test_bit(ATMCI_CARD_PRESENT, &slot->flags)) {
+		mrq->cmd->error = -ENOMEDIUM;
+		mmc_request_done(mmc, mrq);
+		return;
+	}
+
+	/* We don't support multiple blocks of weird lengths. */
+	data = mrq->data;
+	if (data && data->blocks > 1 && data->blksz & 3) {
+		mrq->cmd->error = -EINVAL;
+		mmc_request_done(mmc, mrq);
+	}
+
+	atmci_queue_request(host, slot, mrq);
 }
 
 static void atmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
-	struct atmel_mci	*host = mmc_priv(mmc);
+	struct atmel_mci_slot	*slot = mmc_priv(mmc);
+	struct atmel_mci	*host = slot->host;
+	unsigned int		i;
+
+	slot->sdc_reg &= ~MCI_SDCBUS_MASK;
+	switch (ios->bus_width) {
+	case MMC_BUS_WIDTH_1:
+		slot->sdc_reg |= MCI_SDCBUS_1BIT;
+		break;
+	case MMC_BUS_WIDTH_4:
+		slot->sdc_reg = MCI_SDCBUS_4BIT;
+		break;
+	}
 
 	if (ios->clock) {
+		unsigned int clock_min = ~0U;
 		u32 clkdiv;
 
-		/* Set clock rate */
-		clkdiv = DIV_ROUND_UP(host->bus_hz, 2 * ios->clock) - 1;
+		spin_lock_bh(&host->lock);
+		if (!host->mode_reg) {
+			clk_enable(host->mck);
+			mci_writel(host, CR, MCI_CR_SWRST);
+			mci_writel(host, CR, MCI_CR_MCIEN);
+		}
+
+		/*
+		 * Use mirror of ios->clock to prevent race with mmc
+		 * core ios update when finding the minimum.
+		 */
+		slot->clock = ios->clock;
+		for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+			if (host->slot[i] && host->slot[i]->clock
+					&& host->slot[i]->clock < clock_min)
+				clock_min = host->slot[i]->clock;
+		}
+
+		/* Calculate clock divider */
+		clkdiv = DIV_ROUND_UP(host->bus_hz, 2 * clock_min) - 1;
 		if (clkdiv > 255) {
 			dev_warn(&mmc->class_dev,
 				"clock %u too slow; using %lu\n",
-				ios->clock, host->bus_hz / (2 * 256));
+				clock_min, host->bus_hz / (2 * 256));
 			clkdiv = 255;
 		}
 
+		/*
+		 * WRPROOF and RDPROOF prevent overruns/underruns by
+		 * stopping the clock when the FIFO is full/empty.
+		 * This state is not expected to last for long.
+		 */
 		host->mode_reg = MCI_MR_CLKDIV(clkdiv) | MCI_MR_WRPROOF
 					| MCI_MR_RDPROOF;
-	}
 
-	switch (ios->bus_width) {
-	case MMC_BUS_WIDTH_1:
-		host->sdc_reg = 0;
-		break;
-	case MMC_BUS_WIDTH_4:
-		host->sdc_reg = MCI_SDCBUS_4BIT;
-		break;
+		if (list_empty(&host->queue))
+			mci_writel(host, MR, host->mode_reg);
+		else
+			host->need_clock_update = true;
+
+		spin_unlock_bh(&host->lock);
+	} else {
+		bool any_slot_active = false;
+
+		spin_lock_bh(&host->lock);
+		slot->clock = 0;
+		for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+			if (host->slot[i] && host->slot[i]->clock) {
+				any_slot_active = true;
+				break;
+			}
+		}
+		if (!any_slot_active) {
+			mci_writel(host, CR, MCI_CR_MCIDIS);
+			if (host->mode_reg) {
+				mci_readl(host, MR);
+				clk_disable(host->mck);
+			}
+			host->mode_reg = 0;
+		}
+		spin_unlock_bh(&host->lock);
 	}
 
 	switch (ios->power_mode) {
-	case MMC_POWER_ON:
-		/* Send init sequence (74 clock cycles) */
-		atmci_enable(host);
-		mci_writel(host, CMDR, MCI_CMDR_SPCMD_INIT);
-		while (!(mci_readl(host, SR) & MCI_CMDRDY))
-			cpu_relax();
-		atmci_disable(host);
+	case MMC_POWER_UP:
+		set_bit(ATMCI_CARD_NEED_INIT, &slot->flags);
 		break;
 	default:
 		/*
 		 * TODO: None of the currently available AVR32-based
 		 * boards allow MMC power to be turned off. Implement
 		 * power control when this can be tested properly.
+		 *
+		 * We also need to hook this into the clock management
+		 * somehow so that newly inserted cards aren't
+		 * subjected to a fast clock before we have a chance
+		 * to figure out what the maximum rate is. Currently,
+		 * there's no way to avoid this, and there never will
+		 * be for boards that don't support power control.
 		 */
 		break;
 	}
@@ -573,31 +909,82 @@
 
 static int atmci_get_ro(struct mmc_host *mmc)
 {
-	int			read_only = 0;
-	struct atmel_mci	*host = mmc_priv(mmc);
+	int			read_only = -ENOSYS;
+	struct atmel_mci_slot	*slot = mmc_priv(mmc);
 
-	if (gpio_is_valid(host->wp_pin)) {
-		read_only = gpio_get_value(host->wp_pin);
+	if (gpio_is_valid(slot->wp_pin)) {
+		read_only = gpio_get_value(slot->wp_pin);
 		dev_dbg(&mmc->class_dev, "card is %s\n",
 				read_only ? "read-only" : "read-write");
-	} else {
-		dev_dbg(&mmc->class_dev,
-			"no pin for checking read-only switch."
-			" Assuming write-enable.\n");
 	}
 
 	return read_only;
 }
 
-static struct mmc_host_ops atmci_ops = {
+static int atmci_get_cd(struct mmc_host *mmc)
+{
+	int			present = -ENOSYS;
+	struct atmel_mci_slot	*slot = mmc_priv(mmc);
+
+	if (gpio_is_valid(slot->detect_pin)) {
+		present = !gpio_get_value(slot->detect_pin);
+		dev_dbg(&mmc->class_dev, "card is %spresent\n",
+				present ? "" : "not ");
+	}
+
+	return present;
+}
+
+static const struct mmc_host_ops atmci_ops = {
 	.request	= atmci_request,
 	.set_ios	= atmci_set_ios,
 	.get_ro		= atmci_get_ro,
+	.get_cd		= atmci_get_cd,
 };
 
-static void atmci_command_complete(struct atmel_mci *host,
-			struct mmc_command *cmd, u32 status)
+/* Called with host->lock held */
+static void atmci_request_end(struct atmel_mci *host, struct mmc_request *mrq)
+	__releases(&host->lock)
+	__acquires(&host->lock)
 {
+	struct atmel_mci_slot	*slot = NULL;
+	struct mmc_host		*prev_mmc = host->cur_slot->mmc;
+
+	WARN_ON(host->cmd || host->data);
+
+	/*
+	 * Update the MMC clock rate if necessary. This may be
+	 * necessary if set_ios() is called when a different slot is
+	 * busy transfering data.
+	 */
+	if (host->need_clock_update)
+		mci_writel(host, MR, host->mode_reg);
+
+	host->cur_slot->mrq = NULL;
+	host->mrq = NULL;
+	if (!list_empty(&host->queue)) {
+		slot = list_entry(host->queue.next,
+				struct atmel_mci_slot, queue_node);
+		list_del(&slot->queue_node);
+		dev_vdbg(&host->pdev->dev, "list not empty: %s is next\n",
+				mmc_hostname(slot->mmc));
+		host->state = STATE_SENDING_CMD;
+		atmci_start_request(host, slot);
+	} else {
+		dev_vdbg(&host->pdev->dev, "list empty\n");
+		host->state = STATE_IDLE;
+	}
+
+	spin_unlock(&host->lock);
+	mmc_request_done(prev_mmc, mrq);
+	spin_lock(&host->lock);
+}
+
+static void atmci_command_complete(struct atmel_mci *host,
+			struct mmc_command *cmd)
+{
+	u32		status = host->cmd_status;
+
 	/* Read the response from the card (up to 16 bytes) */
 	cmd->resp[0] = mci_readl(host, RSPR);
 	cmd->resp[1] = mci_readl(host, RSPR);
@@ -614,11 +1001,12 @@
 		cmd->error = 0;
 
 	if (cmd->error) {
-		dev_dbg(&host->mmc->class_dev,
+		dev_dbg(&host->pdev->dev,
 			"command error: status=0x%08x\n", status);
 
 		if (cmd->data) {
 			host->data = NULL;
+			atmci_stop_dma(host);
 			mci_writel(host, IDR, MCI_NOTBUSY
 					| MCI_TXRDY | MCI_RXRDY
 					| ATMCI_DATA_ERROR_FLAGS);
@@ -628,146 +1016,222 @@
 
 static void atmci_detect_change(unsigned long data)
 {
-	struct atmel_mci *host = (struct atmel_mci *)data;
-	struct mmc_request *mrq = host->mrq;
-	int present;
+	struct atmel_mci_slot	*slot = (struct atmel_mci_slot *)data;
+	bool			present;
+	bool			present_old;
 
 	/*
-	 * atmci_remove() sets detect_pin to -1 before freeing the
-	 * interrupt. We must not re-enable the interrupt if it has
-	 * been freed.
+	 * atmci_cleanup_slot() sets the ATMCI_SHUTDOWN flag before
+	 * freeing the interrupt. We must not re-enable the interrupt
+	 * if it has been freed, and if we're shutting down, it
+	 * doesn't really matter whether the card is present or not.
 	 */
 	smp_rmb();
-	if (!gpio_is_valid(host->detect_pin))
+	if (test_bit(ATMCI_SHUTDOWN, &slot->flags))
 		return;
 
-	enable_irq(gpio_to_irq(host->detect_pin));
-	present = !gpio_get_value(host->detect_pin);
+	enable_irq(gpio_to_irq(slot->detect_pin));
+	present = !gpio_get_value(slot->detect_pin);
+	present_old = test_bit(ATMCI_CARD_PRESENT, &slot->flags);
 
-	dev_vdbg(&host->pdev->dev, "detect change: %d (was %d)\n",
-			present, host->present);
+	dev_vdbg(&slot->mmc->class_dev, "detect change: %d (was %d)\n",
+			present, present_old);
 
-	if (present != host->present) {
-		dev_dbg(&host->mmc->class_dev, "card %s\n",
+	if (present != present_old) {
+		struct atmel_mci	*host = slot->host;
+		struct mmc_request	*mrq;
+
+		dev_dbg(&slot->mmc->class_dev, "card %s\n",
 			present ? "inserted" : "removed");
-		host->present = present;
 
-		/* Reset controller if card is gone */
-		if (!present) {
-			mci_writel(host, CR, MCI_CR_SWRST);
-			mci_writel(host, IDR, ~0UL);
-			mci_writel(host, CR, MCI_CR_MCIEN);
-		}
+		spin_lock(&host->lock);
+
+		if (!present)
+			clear_bit(ATMCI_CARD_PRESENT, &slot->flags);
+		else
+			set_bit(ATMCI_CARD_PRESENT, &slot->flags);
 
 		/* Clean up queue if present */
+		mrq = slot->mrq;
 		if (mrq) {
-			/*
-			 * Reset controller to terminate any ongoing
-			 * commands or data transfers.
-			 */
-			mci_writel(host, CR, MCI_CR_SWRST);
+			if (mrq == host->mrq) {
+				/*
+				 * Reset controller to terminate any ongoing
+				 * commands or data transfers.
+				 */
+				mci_writel(host, CR, MCI_CR_SWRST);
+				mci_writel(host, CR, MCI_CR_MCIEN);
+				mci_writel(host, MR, host->mode_reg);
 
-			if (!atmci_is_completed(host, EVENT_CMD_COMPLETE))
-				mrq->cmd->error = -ENOMEDIUM;
-
-			if (mrq->data && !atmci_is_completed(host,
-						EVENT_DATA_COMPLETE)) {
 				host->data = NULL;
-				mrq->data->error = -ENOMEDIUM;
+				host->cmd = NULL;
+
+				switch (host->state) {
+				case STATE_IDLE:
+					break;
+				case STATE_SENDING_CMD:
+					mrq->cmd->error = -ENOMEDIUM;
+					if (!mrq->data)
+						break;
+					/* fall through */
+				case STATE_SENDING_DATA:
+					mrq->data->error = -ENOMEDIUM;
+					atmci_stop_dma(host);
+					break;
+				case STATE_DATA_BUSY:
+				case STATE_DATA_ERROR:
+					if (mrq->data->error == -EINPROGRESS)
+						mrq->data->error = -ENOMEDIUM;
+					if (!mrq->stop)
+						break;
+					/* fall through */
+				case STATE_SENDING_STOP:
+					mrq->stop->error = -ENOMEDIUM;
+					break;
+				}
+
+				atmci_request_end(host, mrq);
+			} else {
+				list_del(&slot->queue_node);
+				mrq->cmd->error = -ENOMEDIUM;
+				if (mrq->data)
+					mrq->data->error = -ENOMEDIUM;
+				if (mrq->stop)
+					mrq->stop->error = -ENOMEDIUM;
+
+				spin_unlock(&host->lock);
+				mmc_request_done(slot->mmc, mrq);
+				spin_lock(&host->lock);
 			}
-			if (mrq->stop && !atmci_is_completed(host,
-						EVENT_STOP_COMPLETE))
-				mrq->stop->error = -ENOMEDIUM;
-
-			host->cmd = NULL;
-			atmci_request_end(host->mmc, mrq);
 		}
+		spin_unlock(&host->lock);
 
-		mmc_detect_change(host->mmc, 0);
+		mmc_detect_change(slot->mmc, 0);
 	}
 }
 
 static void atmci_tasklet_func(unsigned long priv)
 {
-	struct mmc_host		*mmc = (struct mmc_host *)priv;
-	struct atmel_mci	*host = mmc_priv(mmc);
+	struct atmel_mci	*host = (struct atmel_mci *)priv;
 	struct mmc_request	*mrq = host->mrq;
 	struct mmc_data		*data = host->data;
+	struct mmc_command	*cmd = host->cmd;
+	enum atmel_mci_state	state = host->state;
+	enum atmel_mci_state	prev_state;
+	u32			status;
 
-	dev_vdbg(&mmc->class_dev,
-		"tasklet: pending/completed/mask %lx/%lx/%x\n",
-		host->pending_events, host->completed_events,
+	spin_lock(&host->lock);
+
+	state = host->state;
+
+	dev_vdbg(&host->pdev->dev,
+		"tasklet: state %u pending/completed/mask %lx/%lx/%x\n",
+		state, host->pending_events, host->completed_events,
 		mci_readl(host, IMR));
 
-	if (atmci_test_and_clear_pending(host, EVENT_CMD_COMPLETE)) {
-		/*
-		 * host->cmd must be set to NULL before the interrupt
-		 * handler sees EVENT_CMD_COMPLETE
-		 */
-		host->cmd = NULL;
-		smp_wmb();
-		atmci_set_completed(host, EVENT_CMD_COMPLETE);
-		atmci_command_complete(host, mrq->cmd, host->cmd_status);
+	do {
+		prev_state = state;
 
-		if (!mrq->cmd->error && mrq->stop
-				&& atmci_is_completed(host, EVENT_XFER_COMPLETE)
-				&& !atmci_test_and_set_completed(host,
-					EVENT_STOP_SENT))
-			send_stop_cmd(host->mmc, mrq->data);
-	}
-	if (atmci_test_and_clear_pending(host, EVENT_STOP_COMPLETE)) {
-		/*
-		 * host->cmd must be set to NULL before the interrupt
-		 * handler sees EVENT_STOP_COMPLETE
-		 */
-		host->cmd = NULL;
-		smp_wmb();
-		atmci_set_completed(host, EVENT_STOP_COMPLETE);
-		atmci_command_complete(host, mrq->stop, host->stop_status);
-	}
-	if (atmci_test_and_clear_pending(host, EVENT_DATA_ERROR)) {
-		u32 status = host->data_status;
+		switch (state) {
+		case STATE_IDLE:
+			break;
 
-		dev_vdbg(&mmc->class_dev, "data error: status=%08x\n", status);
+		case STATE_SENDING_CMD:
+			if (!atmci_test_and_clear_pending(host,
+						EVENT_CMD_COMPLETE))
+				break;
 
-		atmci_set_completed(host, EVENT_DATA_ERROR);
-		atmci_set_completed(host, EVENT_DATA_COMPLETE);
+			host->cmd = NULL;
+			atmci_set_completed(host, EVENT_CMD_COMPLETE);
+			atmci_command_complete(host, mrq->cmd);
+			if (!mrq->data || cmd->error) {
+				atmci_request_end(host, host->mrq);
+				goto unlock;
+			}
 
-		if (status & MCI_DTOE) {
-			dev_dbg(&mmc->class_dev,
-					"data timeout error\n");
-			data->error = -ETIMEDOUT;
-		} else if (status & MCI_DCRCE) {
-			dev_dbg(&mmc->class_dev, "data CRC error\n");
-			data->error = -EILSEQ;
-		} else {
-			dev_dbg(&mmc->class_dev,
-					"data FIFO error (status=%08x)\n",
-					status);
-			data->error = -EIO;
+			prev_state = state = STATE_SENDING_DATA;
+			/* fall through */
+
+		case STATE_SENDING_DATA:
+			if (atmci_test_and_clear_pending(host,
+						EVENT_DATA_ERROR)) {
+				atmci_stop_dma(host);
+				if (data->stop)
+					send_stop_cmd(host, data);
+				state = STATE_DATA_ERROR;
+				break;
+			}
+
+			if (!atmci_test_and_clear_pending(host,
+						EVENT_XFER_COMPLETE))
+				break;
+
+			atmci_set_completed(host, EVENT_XFER_COMPLETE);
+			prev_state = state = STATE_DATA_BUSY;
+			/* fall through */
+
+		case STATE_DATA_BUSY:
+			if (!atmci_test_and_clear_pending(host,
+						EVENT_DATA_COMPLETE))
+				break;
+
+			host->data = NULL;
+			atmci_set_completed(host, EVENT_DATA_COMPLETE);
+			status = host->data_status;
+			if (unlikely(status & ATMCI_DATA_ERROR_FLAGS)) {
+				if (status & MCI_DTOE) {
+					dev_dbg(&host->pdev->dev,
+							"data timeout error\n");
+					data->error = -ETIMEDOUT;
+				} else if (status & MCI_DCRCE) {
+					dev_dbg(&host->pdev->dev,
+							"data CRC error\n");
+					data->error = -EILSEQ;
+				} else {
+					dev_dbg(&host->pdev->dev,
+						"data FIFO error (status=%08x)\n",
+						status);
+					data->error = -EIO;
+				}
+			} else {
+				data->bytes_xfered = data->blocks * data->blksz;
+				data->error = 0;
+			}
+
+			if (!data->stop) {
+				atmci_request_end(host, host->mrq);
+				goto unlock;
+			}
+
+			prev_state = state = STATE_SENDING_STOP;
+			if (!data->error)
+				send_stop_cmd(host, data);
+			/* fall through */
+
+		case STATE_SENDING_STOP:
+			if (!atmci_test_and_clear_pending(host,
+						EVENT_CMD_COMPLETE))
+				break;
+
+			host->cmd = NULL;
+			atmci_command_complete(host, mrq->stop);
+			atmci_request_end(host, host->mrq);
+			goto unlock;
+
+		case STATE_DATA_ERROR:
+			if (!atmci_test_and_clear_pending(host,
+						EVENT_XFER_COMPLETE))
+				break;
+
+			state = STATE_DATA_BUSY;
+			break;
 		}
+	} while (state != prev_state);
 
-		if (host->present && data->stop
-				&& atmci_is_completed(host, EVENT_CMD_COMPLETE)
-				&& !atmci_test_and_set_completed(
-					host, EVENT_STOP_SENT))
-			send_stop_cmd(host->mmc, data);
+	host->state = state;
 
-		host->data = NULL;
-	}
-	if (atmci_test_and_clear_pending(host, EVENT_DATA_COMPLETE)) {
-		atmci_set_completed(host, EVENT_DATA_COMPLETE);
-
-		if (!atmci_is_completed(host, EVENT_DATA_ERROR)) {
-			data->bytes_xfered = data->blocks * data->blksz;
-			data->error = 0;
-		}
-
-		host->data = NULL;
-	}
-
-	if (host->mrq && !host->cmd && !host->data)
-		atmci_request_end(mmc, host->mrq);
+unlock:
+	spin_unlock(&host->lock);
 }
 
 static void atmci_read_data_pio(struct atmel_mci *host)
@@ -789,6 +1253,7 @@
 			nbytes += 4;
 
 			if (offset == sg->length) {
+				flush_dcache_page(sg_page(sg));
 				host->sg = sg = sg_next(sg);
 				if (!sg)
 					goto done;
@@ -817,9 +1282,11 @@
 			mci_writel(host, IDR, (MCI_NOTBUSY | MCI_RXRDY
 						| ATMCI_DATA_ERROR_FLAGS));
 			host->data_status = status;
+			data->bytes_xfered += nbytes;
+			smp_wmb();
 			atmci_set_pending(host, EVENT_DATA_ERROR);
 			tasklet_schedule(&host->tasklet);
-			break;
+			return;
 		}
 	} while (status & MCI_RXRDY);
 
@@ -832,10 +1299,8 @@
 	mci_writel(host, IDR, MCI_RXRDY);
 	mci_writel(host, IER, MCI_NOTBUSY);
 	data->bytes_xfered += nbytes;
-	atmci_set_completed(host, EVENT_XFER_COMPLETE);
-	if (data->stop && atmci_is_completed(host, EVENT_CMD_COMPLETE)
-			&& !atmci_test_and_set_completed(host, EVENT_STOP_SENT))
-		send_stop_cmd(host->mmc, data);
+	smp_wmb();
+	atmci_set_pending(host, EVENT_XFER_COMPLETE);
 }
 
 static void atmci_write_data_pio(struct atmel_mci *host)
@@ -888,9 +1353,11 @@
 			mci_writel(host, IDR, (MCI_NOTBUSY | MCI_TXRDY
 						| ATMCI_DATA_ERROR_FLAGS));
 			host->data_status = status;
+			data->bytes_xfered += nbytes;
+			smp_wmb();
 			atmci_set_pending(host, EVENT_DATA_ERROR);
 			tasklet_schedule(&host->tasklet);
-			break;
+			return;
 		}
 	} while (status & MCI_TXRDY);
 
@@ -903,38 +1370,26 @@
 	mci_writel(host, IDR, MCI_TXRDY);
 	mci_writel(host, IER, MCI_NOTBUSY);
 	data->bytes_xfered += nbytes;
-	atmci_set_completed(host, EVENT_XFER_COMPLETE);
-	if (data->stop && atmci_is_completed(host, EVENT_CMD_COMPLETE)
-			&& !atmci_test_and_set_completed(host, EVENT_STOP_SENT))
-		send_stop_cmd(host->mmc, data);
+	smp_wmb();
+	atmci_set_pending(host, EVENT_XFER_COMPLETE);
 }
 
-static void atmci_cmd_interrupt(struct mmc_host *mmc, u32 status)
+static void atmci_cmd_interrupt(struct atmel_mci *host, u32 status)
 {
-	struct atmel_mci	*host = mmc_priv(mmc);
-
 	mci_writel(host, IDR, MCI_CMDRDY);
 
-	if (atmci_is_completed(host, EVENT_STOP_SENT)) {
-		host->stop_status = status;
-		atmci_set_pending(host, EVENT_STOP_COMPLETE);
-	} else {
-		host->cmd_status = status;
-		atmci_set_pending(host, EVENT_CMD_COMPLETE);
-	}
-
+	host->cmd_status = status;
+	smp_wmb();
+	atmci_set_pending(host, EVENT_CMD_COMPLETE);
 	tasklet_schedule(&host->tasklet);
 }
 
 static irqreturn_t atmci_interrupt(int irq, void *dev_id)
 {
-	struct mmc_host		*mmc = dev_id;
-	struct atmel_mci	*host = mmc_priv(mmc);
+	struct atmel_mci	*host = dev_id;
 	u32			status, mask, pending;
 	unsigned int		pass_count = 0;
 
-	spin_lock(&mmc->lock);
-
 	do {
 		status = mci_readl(host, SR);
 		mask = mci_readl(host, IMR);
@@ -946,13 +1401,18 @@
 			mci_writel(host, IDR, ATMCI_DATA_ERROR_FLAGS
 					| MCI_RXRDY | MCI_TXRDY);
 			pending &= mci_readl(host, IMR);
+
 			host->data_status = status;
+			smp_wmb();
 			atmci_set_pending(host, EVENT_DATA_ERROR);
 			tasklet_schedule(&host->tasklet);
 		}
 		if (pending & MCI_NOTBUSY) {
-			mci_writel(host, IDR, (MCI_NOTBUSY
-					       | ATMCI_DATA_ERROR_FLAGS));
+			mci_writel(host, IDR,
+					ATMCI_DATA_ERROR_FLAGS | MCI_NOTBUSY);
+			if (!host->data_status)
+				host->data_status = status;
+			smp_wmb();
 			atmci_set_pending(host, EVENT_DATA_COMPLETE);
 			tasklet_schedule(&host->tasklet);
 		}
@@ -962,18 +1422,15 @@
 			atmci_write_data_pio(host);
 
 		if (pending & MCI_CMDRDY)
-			atmci_cmd_interrupt(mmc, status);
+			atmci_cmd_interrupt(host, status);
 	} while (pass_count++ < 5);
 
-	spin_unlock(&mmc->lock);
-
 	return pass_count ? IRQ_HANDLED : IRQ_NONE;
 }
 
 static irqreturn_t atmci_detect_interrupt(int irq, void *dev_id)
 {
-	struct mmc_host		*mmc = dev_id;
-	struct atmel_mci	*host = mmc_priv(mmc);
+	struct atmel_mci_slot	*slot = dev_id;
 
 	/*
 	 * Disable interrupts until the pin has stabilized and check
@@ -981,19 +1438,176 @@
 	 * middle of the timer routine when this interrupt triggers.
 	 */
 	disable_irq_nosync(irq);
-	mod_timer(&host->detect_timer, jiffies + msecs_to_jiffies(20));
+	mod_timer(&slot->detect_timer, jiffies + msecs_to_jiffies(20));
 
 	return IRQ_HANDLED;
 }
 
+#ifdef CONFIG_MMC_ATMELMCI_DMA
+
+static inline struct atmel_mci *
+dma_client_to_atmel_mci(struct dma_client *client)
+{
+	return container_of(client, struct atmel_mci, dma.client);
+}
+
+static enum dma_state_client atmci_dma_event(struct dma_client *client,
+		struct dma_chan *chan, enum dma_state state)
+{
+	struct atmel_mci	*host;
+	enum dma_state_client	ret = DMA_NAK;
+
+	host = dma_client_to_atmel_mci(client);
+
+	switch (state) {
+	case DMA_RESOURCE_AVAILABLE:
+		spin_lock_bh(&host->lock);
+		if (!host->dma.chan) {
+			host->dma.chan = chan;
+			ret = DMA_ACK;
+		}
+		spin_unlock_bh(&host->lock);
+
+		if (ret == DMA_ACK)
+			dev_info(&host->pdev->dev,
+					"Using %s for DMA transfers\n",
+					chan->dev.bus_id);
+		break;
+
+	case DMA_RESOURCE_REMOVED:
+		spin_lock_bh(&host->lock);
+		if (host->dma.chan == chan) {
+			host->dma.chan = NULL;
+			ret = DMA_ACK;
+		}
+		spin_unlock_bh(&host->lock);
+
+		if (ret == DMA_ACK)
+			dev_info(&host->pdev->dev,
+					"Lost %s, falling back to PIO\n",
+					chan->dev.bus_id);
+		break;
+
+	default:
+		break;
+	}
+
+
+	return ret;
+}
+#endif /* CONFIG_MMC_ATMELMCI_DMA */
+
+static int __init atmci_init_slot(struct atmel_mci *host,
+		struct mci_slot_pdata *slot_data, unsigned int id,
+		u32 sdc_reg)
+{
+	struct mmc_host			*mmc;
+	struct atmel_mci_slot		*slot;
+
+	mmc = mmc_alloc_host(sizeof(struct atmel_mci_slot), &host->pdev->dev);
+	if (!mmc)
+		return -ENOMEM;
+
+	slot = mmc_priv(mmc);
+	slot->mmc = mmc;
+	slot->host = host;
+	slot->detect_pin = slot_data->detect_pin;
+	slot->wp_pin = slot_data->wp_pin;
+	slot->sdc_reg = sdc_reg;
+
+	mmc->ops = &atmci_ops;
+	mmc->f_min = DIV_ROUND_UP(host->bus_hz, 512);
+	mmc->f_max = host->bus_hz / 2;
+	mmc->ocr_avail	= MMC_VDD_32_33 | MMC_VDD_33_34;
+	if (slot_data->bus_width >= 4)
+		mmc->caps |= MMC_CAP_4_BIT_DATA;
+
+	mmc->max_hw_segs = 64;
+	mmc->max_phys_segs = 64;
+	mmc->max_req_size = 32768 * 512;
+	mmc->max_blk_size = 32768;
+	mmc->max_blk_count = 512;
+
+	/* Assume card is present initially */
+	set_bit(ATMCI_CARD_PRESENT, &slot->flags);
+	if (gpio_is_valid(slot->detect_pin)) {
+		if (gpio_request(slot->detect_pin, "mmc_detect")) {
+			dev_dbg(&mmc->class_dev, "no detect pin available\n");
+			slot->detect_pin = -EBUSY;
+		} else if (gpio_get_value(slot->detect_pin)) {
+			clear_bit(ATMCI_CARD_PRESENT, &slot->flags);
+		}
+	}
+
+	if (!gpio_is_valid(slot->detect_pin))
+		mmc->caps |= MMC_CAP_NEEDS_POLL;
+
+	if (gpio_is_valid(slot->wp_pin)) {
+		if (gpio_request(slot->wp_pin, "mmc_wp")) {
+			dev_dbg(&mmc->class_dev, "no WP pin available\n");
+			slot->wp_pin = -EBUSY;
+		}
+	}
+
+	host->slot[id] = slot;
+	mmc_add_host(mmc);
+
+	if (gpio_is_valid(slot->detect_pin)) {
+		int ret;
+
+		setup_timer(&slot->detect_timer, atmci_detect_change,
+				(unsigned long)slot);
+
+		ret = request_irq(gpio_to_irq(slot->detect_pin),
+				atmci_detect_interrupt,
+				IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
+				"mmc-detect", slot);
+		if (ret) {
+			dev_dbg(&mmc->class_dev,
+				"could not request IRQ %d for detect pin\n",
+				gpio_to_irq(slot->detect_pin));
+			gpio_free(slot->detect_pin);
+			slot->detect_pin = -EBUSY;
+		}
+	}
+
+	atmci_init_debugfs(slot);
+
+	return 0;
+}
+
+static void __exit atmci_cleanup_slot(struct atmel_mci_slot *slot,
+		unsigned int id)
+{
+	/* Debugfs stuff is cleaned up by mmc core */
+
+	set_bit(ATMCI_SHUTDOWN, &slot->flags);
+	smp_wmb();
+
+	mmc_remove_host(slot->mmc);
+
+	if (gpio_is_valid(slot->detect_pin)) {
+		int pin = slot->detect_pin;
+
+		free_irq(gpio_to_irq(pin), slot);
+		del_timer_sync(&slot->detect_timer);
+		gpio_free(pin);
+	}
+	if (gpio_is_valid(slot->wp_pin))
+		gpio_free(slot->wp_pin);
+
+	slot->host->slot[id] = NULL;
+	mmc_free_host(slot->mmc);
+}
+
 static int __init atmci_probe(struct platform_device *pdev)
 {
 	struct mci_platform_data	*pdata;
-	struct atmel_mci *host;
-	struct mmc_host *mmc;
-	struct resource *regs;
-	int irq;
-	int ret;
+	struct atmel_mci		*host;
+	struct resource			*regs;
+	unsigned int			nr_slots;
+	int				irq;
+	int				ret;
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!regs)
@@ -1005,15 +1619,13 @@
 	if (irq < 0)
 		return irq;
 
-	mmc = mmc_alloc_host(sizeof(struct atmel_mci), &pdev->dev);
-	if (!mmc)
+	host = kzalloc(sizeof(struct atmel_mci), GFP_KERNEL);
+	if (!host)
 		return -ENOMEM;
 
-	host = mmc_priv(mmc);
 	host->pdev = pdev;
-	host->mmc = mmc;
-	host->detect_pin = pdata->detect_pin;
-	host->wp_pin = pdata->wp_pin;
+	spin_lock_init(&host->lock);
+	INIT_LIST_HEAD(&host->queue);
 
 	host->mck = clk_get(&pdev->dev, "mci_clk");
 	if (IS_ERR(host->mck)) {
@@ -1033,122 +1645,102 @@
 
 	host->mapbase = regs->start;
 
-	mmc->ops = &atmci_ops;
-	mmc->f_min = (host->bus_hz + 511) / 512;
-	mmc->f_max = host->bus_hz / 2;
-	mmc->ocr_avail	= MMC_VDD_32_33 | MMC_VDD_33_34;
-	mmc->caps |= MMC_CAP_4_BIT_DATA;
+	tasklet_init(&host->tasklet, atmci_tasklet_func, (unsigned long)host);
 
-	mmc->max_hw_segs = 64;
-	mmc->max_phys_segs = 64;
-	mmc->max_req_size = 32768 * 512;
-	mmc->max_blk_size = 32768;
-	mmc->max_blk_count = 512;
-
-	tasklet_init(&host->tasklet, atmci_tasklet_func, (unsigned long)mmc);
-
-	ret = request_irq(irq, atmci_interrupt, 0, pdev->dev.bus_id, mmc);
+	ret = request_irq(irq, atmci_interrupt, 0, pdev->dev.bus_id, host);
 	if (ret)
 		goto err_request_irq;
 
-	/* Assume card is present if we don't have a detect pin */
-	host->present = 1;
-	if (gpio_is_valid(host->detect_pin)) {
-		if (gpio_request(host->detect_pin, "mmc_detect")) {
-			dev_dbg(&mmc->class_dev, "no detect pin available\n");
-			host->detect_pin = -1;
-		} else {
-			host->present = !gpio_get_value(host->detect_pin);
-		}
-	}
+#ifdef CONFIG_MMC_ATMELMCI_DMA
+	if (pdata->dma_slave) {
+		struct dma_slave *slave = pdata->dma_slave;
 
-	if (!gpio_is_valid(host->detect_pin))
-		mmc->caps |= MMC_CAP_NEEDS_POLL;
+		slave->tx_reg = regs->start + MCI_TDR;
+		slave->rx_reg = regs->start + MCI_RDR;
 
-	if (gpio_is_valid(host->wp_pin)) {
-		if (gpio_request(host->wp_pin, "mmc_wp")) {
-			dev_dbg(&mmc->class_dev, "no WP pin available\n");
-			host->wp_pin = -1;
-		}
+		/* Try to grab a DMA channel */
+		host->dma.client.event_callback = atmci_dma_event;
+		dma_cap_set(DMA_SLAVE, host->dma.client.cap_mask);
+		host->dma.client.slave = slave;
+
+		dma_async_client_register(&host->dma.client);
+		dma_async_client_chan_request(&host->dma.client);
+	} else {
+		dev_notice(&pdev->dev, "DMA not available, using PIO\n");
 	}
+#endif /* CONFIG_MMC_ATMELMCI_DMA */
 
 	platform_set_drvdata(pdev, host);
 
-	mmc_add_host(mmc);
-
-	if (gpio_is_valid(host->detect_pin)) {
-		setup_timer(&host->detect_timer, atmci_detect_change,
-				(unsigned long)host);
-
-		ret = request_irq(gpio_to_irq(host->detect_pin),
-				atmci_detect_interrupt,
-				IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-				"mmc-detect", mmc);
-		if (ret) {
-			dev_dbg(&mmc->class_dev,
-				"could not request IRQ %d for detect pin\n",
-				gpio_to_irq(host->detect_pin));
-			gpio_free(host->detect_pin);
-			host->detect_pin = -1;
-		}
+	/* We need at least one slot to succeed */
+	nr_slots = 0;
+	ret = -ENODEV;
+	if (pdata->slot[0].bus_width) {
+		ret = atmci_init_slot(host, &pdata->slot[0],
+				MCI_SDCSEL_SLOT_A, 0);
+		if (!ret)
+			nr_slots++;
+	}
+	if (pdata->slot[1].bus_width) {
+		ret = atmci_init_slot(host, &pdata->slot[1],
+				MCI_SDCSEL_SLOT_B, 1);
+		if (!ret)
+			nr_slots++;
 	}
 
-	dev_info(&mmc->class_dev,
-			"Atmel MCI controller at 0x%08lx irq %d\n",
-			host->mapbase, irq);
+	if (!nr_slots)
+		goto err_init_slot;
 
-	atmci_init_debugfs(host);
+	dev_info(&pdev->dev,
+			"Atmel MCI controller at 0x%08lx irq %d, %u slots\n",
+			host->mapbase, irq, nr_slots);
 
 	return 0;
 
+err_init_slot:
+#ifdef CONFIG_MMC_ATMELMCI_DMA
+	if (pdata->dma_slave)
+		dma_async_client_unregister(&host->dma.client);
+#endif
+	free_irq(irq, host);
 err_request_irq:
 	iounmap(host->regs);
 err_ioremap:
 	clk_put(host->mck);
 err_clk_get:
-	mmc_free_host(mmc);
+	kfree(host);
 	return ret;
 }
 
 static int __exit atmci_remove(struct platform_device *pdev)
 {
-	struct atmel_mci *host = platform_get_drvdata(pdev);
+	struct atmel_mci	*host = platform_get_drvdata(pdev);
+	unsigned int		i;
 
 	platform_set_drvdata(pdev, NULL);
 
-	if (host) {
-		/* Debugfs stuff is cleaned up by mmc core */
-
-		if (gpio_is_valid(host->detect_pin)) {
-			int pin = host->detect_pin;
-
-			/* Make sure the timer doesn't enable the interrupt */
-			host->detect_pin = -1;
-			smp_wmb();
-
-			free_irq(gpio_to_irq(pin), host->mmc);
-			del_timer_sync(&host->detect_timer);
-			gpio_free(pin);
-		}
-
-		mmc_remove_host(host->mmc);
-
-		clk_enable(host->mck);
-		mci_writel(host, IDR, ~0UL);
-		mci_writel(host, CR, MCI_CR_MCIDIS);
-		mci_readl(host, SR);
-		clk_disable(host->mck);
-
-		if (gpio_is_valid(host->wp_pin))
-			gpio_free(host->wp_pin);
-
-		free_irq(platform_get_irq(pdev, 0), host->mmc);
-		iounmap(host->regs);
-
-		clk_put(host->mck);
-
-		mmc_free_host(host->mmc);
+	for (i = 0; i < ATMEL_MCI_MAX_NR_SLOTS; i++) {
+		if (host->slot[i])
+			atmci_cleanup_slot(host->slot[i], i);
 	}
+
+	clk_enable(host->mck);
+	mci_writel(host, IDR, ~0UL);
+	mci_writel(host, CR, MCI_CR_MCIDIS);
+	mci_readl(host, SR);
+	clk_disable(host->mck);
+
+#ifdef CONFIG_MMC_ATMELMCI_DMA
+	if (host->dma.client.slave)
+		dma_async_client_unregister(&host->dma.client);
+#endif
+
+	free_irq(platform_get_irq(pdev, 0), host);
+	iounmap(host->regs);
+
+	clk_put(host->mck);
+	kfree(host);
+
 	return 0;
 }
 
diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c
index 7503b81..07faf54 100644
--- a/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@ -95,8 +95,6 @@
  * reads which takes nowhere near that long.  Older cards may be able to use
  * shorter timeouts ... but why bother?
  */
-#define readblock_timeout	ktime_set(0, 100 * 1000 * 1000)
-#define writeblock_timeout	ktime_set(0, 250 * 1000 * 1000)
 #define r1b_timeout		ktime_set(3, 0)
 
 
@@ -220,9 +218,9 @@
 	return mmc_spi_skip(host, timeout, sizeof(host->data->status), 0);
 }
 
-static int mmc_spi_readtoken(struct mmc_spi_host *host)
+static int mmc_spi_readtoken(struct mmc_spi_host *host, ktime_t timeout)
 {
-	return mmc_spi_skip(host, readblock_timeout, 1, 0xff);
+	return mmc_spi_skip(host, timeout, 1, 0xff);
 }
 
 
@@ -605,7 +603,8 @@
  * Return negative errno, else success.
  */
 static int
-mmc_spi_writeblock(struct mmc_spi_host *host, struct spi_transfer *t)
+mmc_spi_writeblock(struct mmc_spi_host *host, struct spi_transfer *t,
+	ktime_t timeout)
 {
 	struct spi_device	*spi = host->spi;
 	int			status, i;
@@ -673,7 +672,7 @@
 		if (scratch->status[i] != 0)
 			return 0;
 	}
-	return mmc_spi_wait_unbusy(host, writeblock_timeout);
+	return mmc_spi_wait_unbusy(host, timeout);
 }
 
 /*
@@ -693,7 +692,8 @@
  * STOP_TRANSMISSION command.
  */
 static int
-mmc_spi_readblock(struct mmc_spi_host *host, struct spi_transfer *t)
+mmc_spi_readblock(struct mmc_spi_host *host, struct spi_transfer *t,
+	ktime_t timeout)
 {
 	struct spi_device	*spi = host->spi;
 	int			status;
@@ -707,7 +707,7 @@
 		return status;
 	status = scratch->status[0];
 	if (status == 0xff || status == 0)
-		status = mmc_spi_readtoken(host);
+		status = mmc_spi_readtoken(host, timeout);
 
 	if (status == SPI_TOKEN_SINGLE) {
 		if (host->dma_dev) {
@@ -778,6 +778,8 @@
 	struct scatterlist	*sg;
 	unsigned		n_sg;
 	int			multiple = (data->blocks > 1);
+	u32			clock_rate;
+	ktime_t			timeout;
 
 	if (data->flags & MMC_DATA_READ)
 		direction = DMA_FROM_DEVICE;
@@ -786,6 +788,14 @@
 	mmc_spi_setup_data_message(host, multiple, direction);
 	t = &host->t;
 
+	if (t->speed_hz)
+		clock_rate = t->speed_hz;
+	else
+		clock_rate = spi->max_speed_hz;
+
+	timeout = ktime_add_ns(ktime_set(0, 0), data->timeout_ns +
+			data->timeout_clks * 1000000 / clock_rate);
+
 	/* Handle scatterlist segments one at a time, with synch for
 	 * each 512-byte block
 	 */
@@ -832,9 +842,9 @@
 				t->len);
 
 			if (direction == DMA_TO_DEVICE)
-				status = mmc_spi_writeblock(host, t);
+				status = mmc_spi_writeblock(host, t, timeout);
 			else
-				status = mmc_spi_readblock(host, t);
+				status = mmc_spi_readblock(host, t, timeout);
 			if (status < 0)
 				break;
 
@@ -917,7 +927,7 @@
 			if (scratch->status[tmp] != 0)
 				return;
 		}
-		tmp = mmc_spi_wait_unbusy(host, writeblock_timeout);
+		tmp = mmc_spi_wait_unbusy(host, timeout);
 		if (tmp < 0 && !data->error)
 			data->error = tmp;
 	}
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index fcb14c2..0a84f10 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -144,7 +144,8 @@
 			  SDHCI_QUIRK_32BIT_DMA_SIZE |
 			  SDHCI_QUIRK_32BIT_ADMA_SIZE |
 			  SDHCI_QUIRK_RESET_AFTER_REQUEST |
-			  SDHCI_QUIRK_BROKEN_SMALL_PIO;
+			  SDHCI_QUIRK_BROKEN_SMALL_PIO |
+			  SDHCI_QUIRK_FORCE_HIGHSPEED;
 	}
 
 	/*
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index e3a8133..30f64b1 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -177,7 +177,7 @@
 {
 	unsigned long flags;
 	size_t blksize, len, chunk;
-	u32 scratch;
+	u32 uninitialized_var(scratch);
 	u8 *buf;
 
 	DBG("PIO reading\n");
@@ -1154,7 +1154,7 @@
 
 	spin_unlock_irqrestore(&host->lock, flags);
 
-	mmc_detect_change(host->mmc, msecs_to_jiffies(500));
+	mmc_detect_change(host->mmc, msecs_to_jiffies(200));
 }
 
 static void sdhci_tasklet_finish(unsigned long param)
@@ -1266,9 +1266,31 @@
 			SDHCI_INT_INDEX))
 		host->cmd->error = -EILSEQ;
 
-	if (host->cmd->error)
+	if (host->cmd->error) {
 		tasklet_schedule(&host->finish_tasklet);
-	else if (intmask & SDHCI_INT_RESPONSE)
+		return;
+	}
+
+	/*
+	 * The host can send and interrupt when the busy state has
+	 * ended, allowing us to wait without wasting CPU cycles.
+	 * Unfortunately this is overloaded on the "data complete"
+	 * interrupt, so we need to take some care when handling
+	 * it.
+	 *
+	 * Note: The 1.0 specification is a bit ambiguous about this
+	 *       feature so there might be some problems with older
+	 *       controllers.
+	 */
+	if (host->cmd->flags & MMC_RSP_BUSY) {
+		if (host->cmd->data)
+			DBG("Cannot wait for busy signal when also "
+				"doing a data transfer");
+		else
+			return;
+	}
+
+	if (intmask & SDHCI_INT_RESPONSE)
 		sdhci_finish_command(host);
 }
 
@@ -1278,11 +1300,16 @@
 
 	if (!host->data) {
 		/*
-		 * A data end interrupt is sent together with the response
-		 * for the stop command.
+		 * The "data complete" interrupt is also used to
+		 * indicate that a busy state has ended. See comment
+		 * above in sdhci_cmd_irq().
 		 */
-		if (intmask & SDHCI_INT_DATA_END)
-			return;
+		if (host->cmd && (host->cmd->flags & MMC_RSP_BUSY)) {
+			if (intmask & SDHCI_INT_DATA_END) {
+				sdhci_finish_command(host);
+				return;
+			}
+		}
 
 		printk(KERN_ERR "%s: Got data interrupt 0x%08x even "
 			"though no data operation was in progress.\n",
@@ -1604,7 +1631,8 @@
 	mmc->f_max = host->max_clk;
 	mmc->caps = MMC_CAP_4_BIT_DATA | MMC_CAP_SDIO_IRQ;
 
-	if (caps & SDHCI_CAN_DO_HISPD)
+	if ((caps & SDHCI_CAN_DO_HISPD) ||
+		(host->quirks & SDHCI_QUIRK_FORCE_HIGHSPEED))
 		mmc->caps |= MMC_CAP_SD_HIGHSPEED;
 
 	mmc->ocr_avail = 0;
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 197d4a0..31f4b15 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -208,6 +208,8 @@
 #define SDHCI_QUIRK_BROKEN_TIMEOUT_VAL			(1<<12)
 /* Controller has an issue with buffer bits for small transfers */
 #define SDHCI_QUIRK_BROKEN_SMALL_PIO			(1<<13)
+/* Controller supports high speed but doesn't have the caps bit set */
+#define SDHCI_QUIRK_FORCE_HIGHSPEED			(1<<14)
 
 	int			irq;		/* Device IRQ */
 	void __iomem *		ioaddr;		/* Mapped address */
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 9c288c9..bde891f 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -65,7 +65,7 @@
 	 *   -ENOSYS when not supported (equal to NULL callback)
 	 *   or a negative errno value when something bad happened
 	 *
-	 * Return values for the get_ro callback should be:
+	 * Return values for the get_cd callback should be:
 	 *   0 for a absent card
 	 *   1 for a present card
 	 *   -ENOSYS when not supported (equal to NULL callback)