Merge branch 'for-2.6.36' of git://git.kernel.dk/linux-2.6-block * 'for-2.6.36' of git://git.kernel.dk/linux-2.6-block: (149 commits) block: make sure that REQ_* types are seen even with CONFIG_BLOCK=n xen-blkfront: fix missing out label blkdev: fix blkdev_issue_zeroout return value block: update request stacking methods to support discards block: fix missing export of blk_types.h writeback: fix bad _bh spinlock nesting drbd: revert "delay probes", feature is being re-implemented differently drbd: Initialize all members of sync_conf to their defaults [Bugz 315] drbd: Disable delay probes for the upcomming release writeback: cleanup bdi_register writeback: add new tracepoints writeback: remove unnecessary init_timer call writeback: optimize periodic bdi thread wakeups writeback: prevent unnecessary bdi threads wakeups writeback: move bdi threads exiting logic to the forker thread writeback: restructure bdi forker loop a little writeback: move last_active to bdi writeback: do not remove bdi from bdi_list writeback: simplify bdi code a little writeback: do not lose wake-ups in bdi threads ... Fixed up pretty trivial conflicts in drivers/block/virtio_blk.c and drivers/scsi/scsi_error.c as per Jens.

commit: 2f9e825d3e0e2b407ae8f082de5c00afcf7378fb [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Tue Aug 10 15:22:42 2010 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> Tue Aug 10 15:22:42 2010 -0700
tree: f8b3ee40674ce4acd5508a0a0bf52a30904caf6c
parent: 7ae0dea900b027cd90e8a3e14deca9a19e17638b [diff]
parent: de75d60d5ea235e6e09f4962ab22541ce0fe176a [diff]
diff --git a/arch/alpha/include/asm/scatterlist.h b/arch/alpha/include/asm/scatterlist.h
index 5728c52..017d747 100644
--- a/arch/alpha/include/asm/scatterlist.h
+++ b/arch/alpha/include/asm/scatterlist.h

@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (~0UL)
-
 #endif /* !(_ALPHA_SCATTERLIST_H) */

diff --git a/arch/avr32/include/asm/scatterlist.h b/arch/avr32/include/asm/scatterlist.h
index 06394e5..a5902d98 100644
--- a/arch/avr32/include/asm/scatterlist.h
+++ b/arch/avr32/include/asm/scatterlist.h

@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0xffffffff)
-
 #endif /* __ASM_AVR32_SCATTERLIST_H */

diff --git a/arch/blackfin/include/asm/scatterlist.h b/arch/blackfin/include/asm/scatterlist.h
index 64d41d3..d177a15 100644
--- a/arch/blackfin/include/asm/scatterlist.h
+++ b/arch/blackfin/include/asm/scatterlist.h

@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD	(0xffffffff)
-
 #endif				/* !(_BLACKFIN_SCATTERLIST_H) */

diff --git a/arch/cris/include/asm/scatterlist.h b/arch/cris/include/asm/scatterlist.h
index 249a784..f11f8f4 100644
--- a/arch/cris/include/asm/scatterlist.h
+++ b/arch/cris/include/asm/scatterlist.h

@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0x1fffffff)
-
 #endif /* !(__ASM_CRIS_SCATTERLIST_H) */

diff --git a/arch/frv/include/asm/scatterlist.h b/arch/frv/include/asm/scatterlist.h
index 1614bfd..0e5eb30 100644
--- a/arch/frv/include/asm/scatterlist.h
+++ b/arch/frv/include/asm/scatterlist.h

@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0xffffffffUL)
-
 #endif /* !_ASM_SCATTERLIST_H */

diff --git a/arch/h8300/include/asm/scatterlist.h b/arch/h8300/include/asm/scatterlist.h
index de08a4a..82130ed 100644
--- a/arch/h8300/include/asm/scatterlist.h
+++ b/arch/h8300/include/asm/scatterlist.h

@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD	(0xffffffff)
-
 #endif /* !(_H8300_SCATTERLIST_H) */

diff --git a/arch/ia64/include/asm/scatterlist.h b/arch/ia64/include/asm/scatterlist.h
index f299a4f..08fd93b 100644
--- a/arch/ia64/include/asm/scatterlist.h
+++ b/arch/ia64/include/asm/scatterlist.h

@@ -2,15 +2,6 @@
 #define _ASM_IA64_SCATTERLIST_H
 
 #include <asm-generic/scatterlist.h>
-/*
- * It used to be that ISA_DMA_THRESHOLD had something to do with the
- * DMA-limits of ISA-devices.  Nowadays, its only remaining use (apart
- * from the aha1542.c driver, which isn't 64-bit clean anyhow) is to
- * tell the block-layer (via BLK_BOUNCE_ISA) what the max. physical
- * address of a page is that is allocated with GFP_DMA.  On IA-64,
- * that's 4GB - 1.
- */
-#define ISA_DMA_THRESHOLD	0xffffffff
 #define ARCH_HAS_SG_CHAIN
 
 #endif /* _ASM_IA64_SCATTERLIST_H */

diff --git a/arch/m32r/include/asm/scatterlist.h b/arch/m32r/include/asm/scatterlist.h
index aeeddd8..7370b8b 100644
--- a/arch/m32r/include/asm/scatterlist.h
+++ b/arch/m32r/include/asm/scatterlist.h

@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0x1fffffff)
-
 #endif /* _ASM_M32R_SCATTERLIST_H */

diff --git a/arch/m68k/include/asm/scatterlist.h b/arch/m68k/include/asm/scatterlist.h
index 175da06..3125054 100644
--- a/arch/m68k/include/asm/scatterlist.h
+++ b/arch/m68k/include/asm/scatterlist.h

@@ -3,7 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-/* This is bogus and should go away. */
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
 #endif /* !(_M68K_SCATTERLIST_H) */

diff --git a/arch/microblaze/include/asm/scatterlist.h b/arch/microblaze/include/asm/scatterlist.h
index dc4a890..35d786f 100644
--- a/arch/microblaze/include/asm/scatterlist.h
+++ b/arch/microblaze/include/asm/scatterlist.h

@@ -1,3 +1 @@
 #include <asm-generic/scatterlist.h>
-
-#define ISA_DMA_THRESHOLD	(~0UL)

diff --git a/arch/mips/include/asm/scatterlist.h b/arch/mips/include/asm/scatterlist.h
index 9af65e7..7ee0e64 100644
--- a/arch/mips/include/asm/scatterlist.h
+++ b/arch/mips/include/asm/scatterlist.h

@@ -3,6 +3,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0x00ffffffUL)
-
 #endif /* __ASM_SCATTERLIST_H */

diff --git a/arch/mn10300/include/asm/scatterlist.h b/arch/mn10300/include/asm/scatterlist.h
index 7bd00b9..7baa400 100644
--- a/arch/mn10300/include/asm/scatterlist.h
+++ b/arch/mn10300/include/asm/scatterlist.h

@@ -13,6 +13,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0x00ffffff)
-
 #endif /* _ASM_SCATTERLIST_H */

diff --git a/arch/parisc/include/asm/scatterlist.h b/arch/parisc/include/asm/scatterlist.h
index 2c3b79b..8bf1f0d 100644
--- a/arch/parisc/include/asm/scatterlist.h
+++ b/arch/parisc/include/asm/scatterlist.h

@@ -5,7 +5,6 @@
 #include <asm/types.h>
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (~0UL)
 #define sg_virt_addr(sg) ((unsigned long)sg_virt(sg))
 
 #endif /* _ASM_PARISC_SCATTERLIST_H */

diff --git a/arch/powerpc/include/asm/scatterlist.h b/arch/powerpc/include/asm/scatterlist.h
index 34cc78f..de1f620 100644
--- a/arch/powerpc/include/asm/scatterlist.h
+++ b/arch/powerpc/include/asm/scatterlist.h

@@ -12,9 +12,6 @@
 #include <asm/dma.h>
 #include <asm-generic/scatterlist.h>
 
-#ifdef __powerpc64__
-#define ISA_DMA_THRESHOLD	(~0UL)
-#endif
 #define ARCH_HAS_SG_CHAIN
 
 #endif /* _ASM_POWERPC_SCATTERLIST_H */

diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
index be44d94..35d786f 100644
--- a/arch/s390/include/asm/scatterlist.h
+++ b/arch/s390/include/asm/scatterlist.h

@@ -1,3 +1 @@
-#define ISA_DMA_THRESHOLD	(~0UL)
-
 #include <asm-generic/scatterlist.h>

diff --git a/arch/score/include/asm/scatterlist.h b/arch/score/include/asm/scatterlist.h
index 4fa1a66..9f533b8 100644
--- a/arch/score/include/asm/scatterlist.h
+++ b/arch/score/include/asm/scatterlist.h

@@ -1,8 +1,6 @@
 #ifndef _ASM_SCORE_SCATTERLIST_H
 #define _ASM_SCORE_SCATTERLIST_H
 
-#define ISA_DMA_THRESHOLD	(~0UL)
-
 #include <asm-generic/scatterlist.h>
 
 #endif /* _ASM_SCORE_SCATTERLIST_H */

diff --git a/arch/sh/include/asm/scatterlist.h b/arch/sh/include/asm/scatterlist.h
index e38d1d4..98dfc35 100644
--- a/arch/sh/include/asm/scatterlist.h
+++ b/arch/sh/include/asm/scatterlist.h

@@ -1,8 +1,6 @@
 #ifndef __ASM_SH_SCATTERLIST_H
 #define __ASM_SH_SCATTERLIST_H
 
-#define ISA_DMA_THRESHOLD	phys_addr_mask()
-
 #include <asm-generic/scatterlist.h>
 
 #endif /* __ASM_SH_SCATTERLIST_H */

diff --git a/arch/sparc/include/asm/scatterlist.h b/arch/sparc/include/asm/scatterlist.h
index 433e45f..92bb638 100644
--- a/arch/sparc/include/asm/scatterlist.h
+++ b/arch/sparc/include/asm/scatterlist.h

@@ -3,7 +3,6 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD	(~0UL)
 #define ARCH_HAS_SG_CHAIN
 
 #endif /* !(_SPARC_SCATTERLIST_H) */

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index da992a3..1bcd208 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c

@@ -33,6 +33,7 @@
 #include "linux/mm.h"
 #include "linux/slab.h"
 #include "linux/vmalloc.h"
+#include "linux/smp_lock.h"
 #include "linux/blkpg.h"
 #include "linux/genhd.h"
 #include "linux/spinlock.h"
@@ -1098,6 +1099,7 @@
 	struct ubd *ubd_dev = disk->private_data;
 	int err = 0;
 
+	lock_kernel();
 	if(ubd_dev->count == 0){
 		err = ubd_open_dev(ubd_dev);
 		if(err){
@@ -1115,7 +1117,8 @@
 	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
 	        err = -EROFS;
 	}*/
- out:
+out:
+	unlock_kernel();
 	return err;
 }
 
@@ -1123,8 +1126,10 @@
 {
 	struct ubd *ubd_dev = disk->private_data;
 
+	lock_kernel();
 	if(--ubd_dev->count == 0)
 		ubd_close_dev(ubd_dev);
+	unlock_kernel();
 	return 0;
 }
 

diff --git a/arch/x86/include/asm/scatterlist.h b/arch/x86/include/asm/scatterlist.h
index fb0b187..4240878 100644
--- a/arch/x86/include/asm/scatterlist.h
+++ b/arch/x86/include/asm/scatterlist.h

@@ -3,7 +3,6 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (0x00ffffff)
 #define ARCH_HAS_SG_CHAIN
 
 #endif /* _ASM_X86_SCATTERLIST_H */

diff --git a/arch/xtensa/include/asm/scatterlist.h b/arch/xtensa/include/asm/scatterlist.h
index b1f9fdc..a0421a6 100644
--- a/arch/xtensa/include/asm/scatterlist.h
+++ b/arch/xtensa/include/asm/scatterlist.h

@@ -13,6 +13,4 @@
 
 #include <asm-generic/scatterlist.h>
 
-#define ISA_DMA_THRESHOLD (~0UL)
-
 #endif	/* _XTENSA_SCATTERLIST_H */

diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 0d710c9..f0faefc 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c

@@ -13,7 +13,6 @@
  * blk_queue_ordered - does this queue support ordered writes
  * @q:        the request queue
  * @ordered:  one of QUEUE_ORDERED_*
- * @prepare_flush_fn: rq setup helper for cache flush ordered writes
  *
  * Description:
  *   For journalled file systems, doing ordered writes on a commit
@@ -22,15 +21,8 @@
  *   feature should call this function and indicate so.
  *
  **/
-int blk_queue_ordered(struct request_queue *q, unsigned ordered,
-		      prepare_flush_fn *prepare_flush_fn)
+int blk_queue_ordered(struct request_queue *q, unsigned ordered)
 {
-	if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH |
-					     QUEUE_ORDERED_DO_POSTFLUSH))) {
-		printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__);
-		return -EINVAL;
-	}
-
 	if (ordered != QUEUE_ORDERED_NONE &&
 	    ordered != QUEUE_ORDERED_DRAIN &&
 	    ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
@@ -44,7 +36,6 @@
 
 	q->ordered = ordered;
 	q->next_ordered = ordered;
-	q->prepare_flush_fn = prepare_flush_fn;
 
 	return 0;
 }
@@ -79,7 +70,7 @@
 	 *
 	 * http://thread.gmane.org/gmane.linux.kernel/537473
 	 */
-	if (!blk_fs_request(rq))
+	if (rq->cmd_type != REQ_TYPE_FS)
 		return QUEUE_ORDSEQ_DRAIN;
 
 	if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
@@ -143,10 +134,10 @@
 	}
 
 	blk_rq_init(q, rq);
-	rq->cmd_flags = REQ_HARDBARRIER;
-	rq->rq_disk = q->bar_rq.rq_disk;
+	rq->cmd_type = REQ_TYPE_FS;
+	rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH;
+	rq->rq_disk = q->orig_bar_rq->rq_disk;
 	rq->end_io = end_io;
-	q->prepare_flush_fn(q, rq);
 
 	elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
 }
@@ -203,7 +194,7 @@
 		/* initialize proxy request and queue it */
 		blk_rq_init(q, rq);
 		if (bio_data_dir(q->orig_bar_rq->bio) == WRITE)
-			rq->cmd_flags |= REQ_RW;
+			rq->cmd_flags |= REQ_WRITE;
 		if (q->ordered & QUEUE_ORDERED_DO_FUA)
 			rq->cmd_flags |= REQ_FUA;
 		init_request_from_bio(rq, q->orig_bar_rq->bio);
@@ -236,7 +227,8 @@
 bool blk_do_ordered(struct request_queue *q, struct request **rqp)
 {
 	struct request *rq = *rqp;
-	const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
+	const int is_barrier = rq->cmd_type == REQ_TYPE_FS &&
+				(rq->cmd_flags & REQ_HARDBARRIER);
 
 	if (!q->ordseq) {
 		if (!is_barrier)
@@ -261,7 +253,7 @@
 	 */
 
 	/* Special requests are not subject to ordering rules. */
-	if (!blk_fs_request(rq) &&
+	if (rq->cmd_type != REQ_TYPE_FS &&
 	    rq != &q->pre_flush_rq && rq != &q->post_flush_rq)
 		return true;
 
@@ -319,6 +311,15 @@
 	if (!q)
 		return -ENXIO;
 
+	/*
+	 * some block devices may not have their queue correctly set up here
+	 * (e.g. loop device without a backing file) and so issuing a flush
+	 * here will panic. Ensure there is a request function before issuing
+	 * the barrier.
+	 */
+	if (!q->make_request_fn)
+		return -ENXIO;
+
 	bio = bio_alloc(gfp_mask, 0);
 	bio->bi_end_io = bio_end_empty_barrier;
 	bio->bi_bdev = bdev;

diff --git a/block/blk-core.c b/block/blk-core.c
index f0640d7..7da630e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c

@@ -184,7 +184,7 @@
 	printk(KERN_INFO "  bio %p, biotail %p, buffer %p, len %u\n",
 	       rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq));
 
-	if (blk_pc_request(rq)) {
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		printk(KERN_INFO "  cdb: ");
 		for (bit = 0; bit < BLK_MAX_CDB; bit++)
 			printk("%02x ", rq->cmd[bit]);
@@ -608,6 +608,7 @@
 
 	q->request_fn		= rfn;
 	q->prep_rq_fn		= NULL;
+	q->unprep_rq_fn		= NULL;
 	q->unplug_fn		= generic_unplug_device;
 	q->queue_flags		= QUEUE_FLAG_DEFAULT;
 	q->queue_lock		= lock;
@@ -1135,30 +1136,46 @@
 }
 EXPORT_SYMBOL(blk_put_request);
 
+/**
+ * blk_add_request_payload - add a payload to a request
+ * @rq: request to update
+ * @page: page backing the payload
+ * @len: length of the payload.
+ *
+ * This allows to later add a payload to an already submitted request by
+ * a block driver.  The driver needs to take care of freeing the payload
+ * itself.
+ *
+ * Note that this is a quite horrible hack and nothing but handling of
+ * discard requests should ever use it.
+ */
+void blk_add_request_payload(struct request *rq, struct page *page,
+		unsigned int len)
+{
+	struct bio *bio = rq->bio;
+
+	bio->bi_io_vec->bv_page = page;
+	bio->bi_io_vec->bv_offset = 0;
+	bio->bi_io_vec->bv_len = len;
+
+	bio->bi_size = len;
+	bio->bi_vcnt = 1;
+	bio->bi_phys_segments = 1;
+
+	rq->__data_len = rq->resid_len = len;
+	rq->nr_phys_segments = 1;
+	rq->buffer = bio_data(bio);
+}
+EXPORT_SYMBOL_GPL(blk_add_request_payload);
+
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
 	req->cpu = bio->bi_comp_cpu;
 	req->cmd_type = REQ_TYPE_FS;
 
-	/*
-	 * Inherit FAILFAST from bio (for read-ahead, and explicit
-	 * FAILFAST).  FAILFAST flags are identical for req and bio.
-	 */
-	if (bio_rw_flagged(bio, BIO_RW_AHEAD))
+	req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK;
+	if (bio->bi_rw & REQ_RAHEAD)
 		req->cmd_flags |= REQ_FAILFAST_MASK;
-	else
-		req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;
-
-	if (bio_rw_flagged(bio, BIO_RW_DISCARD))
-		req->cmd_flags |= REQ_DISCARD;
-	if (bio_rw_flagged(bio, BIO_RW_BARRIER))
-		req->cmd_flags |= REQ_HARDBARRIER;
-	if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
-		req->cmd_flags |= REQ_RW_SYNC;
-	if (bio_rw_flagged(bio, BIO_RW_META))
-		req->cmd_flags |= REQ_RW_META;
-	if (bio_rw_flagged(bio, BIO_RW_NOIDLE))
-		req->cmd_flags |= REQ_NOIDLE;
 
 	req->errors = 0;
 	req->__sector = bio->bi_sector;
@@ -1181,12 +1198,12 @@
 	int el_ret;
 	unsigned int bytes = bio->bi_size;
 	const unsigned short prio = bio_prio(bio);
-	const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
-	const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
+	const bool sync = (bio->bi_rw & REQ_SYNC);
+	const bool unplug = (bio->bi_rw & REQ_UNPLUG);
 	const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
 	int rw_flags;
 
-	if (bio_rw_flagged(bio, BIO_RW_BARRIER) &&
+	if ((bio->bi_rw & REQ_HARDBARRIER) &&
 	    (q->next_ordered == QUEUE_ORDERED_NONE)) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
@@ -1200,7 +1217,7 @@
 
 	spin_lock_irq(q->queue_lock);
 
-	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
+	if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q))
 		goto get_rq;
 
 	el_ret = elv_merge(q, &req, bio);
@@ -1275,7 +1292,7 @@
 	 */
 	rw_flags = bio_data_dir(bio);
 	if (sync)
-		rw_flags |= REQ_RW_SYNC;
+		rw_flags |= REQ_SYNC;
 
 	/*
 	 * Grab a free request. This is might sleep but can not fail.
@@ -1464,7 +1481,7 @@
 			goto end_io;
 		}
 
-		if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+		if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
 			     nr_sectors > queue_max_hw_sectors(q))) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
 			       bdevname(bio->bi_bdev, b),
@@ -1497,8 +1514,7 @@
 		if (bio_check_eod(bio, nr_sectors))
 			goto end_io;
 
-		if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
-		    !blk_queue_discard(q)) {
+		if ((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(q)) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
@@ -1583,7 +1599,7 @@
 	 * If it's a regular read/write or a barrier with data attached,
 	 * go through the normal accounting stuff before submission.
 	 */
-	if (bio_has_data(bio) && !(rw & (1 << BIO_RW_DISCARD))) {
+	if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
 		if (rw & WRITE) {
 			count_vm_events(PGPGOUT, count);
 		} else {
@@ -1628,6 +1644,9 @@
  */
 int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 {
+	if (rq->cmd_flags & REQ_DISCARD)
+		return 0;
+
 	if (blk_rq_sectors(rq) > queue_max_sectors(q) ||
 	    blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
 		printk(KERN_ERR "%s: over max size limit.\n", __func__);
@@ -1796,7 +1815,7 @@
 			 * sees this request (possibly after
 			 * requeueing).  Notify IO scheduler.
 			 */
-			if (blk_sorted_rq(rq))
+			if (rq->cmd_flags & REQ_SORTED)
 				elv_activate_rq(q, rq);
 
 			/*
@@ -1984,10 +2003,11 @@
 	 * TODO: tj: This is too subtle.  It would be better to let
 	 * low level drivers do what they see fit.
 	 */
-	if (blk_fs_request(req))
+	if (req->cmd_type == REQ_TYPE_FS)
 		req->errors = 0;
 
-	if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
+	if (error && req->cmd_type == REQ_TYPE_FS &&
+	    !(req->cmd_flags & REQ_QUIET)) {
 		printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n",
 				req->rq_disk ? req->rq_disk->disk_name : "?",
 				(unsigned long long)blk_rq_pos(req));
@@ -2074,7 +2094,7 @@
 	req->buffer = bio_data(req->bio);
 
 	/* update sector only for requests with clear definition of sector */
-	if (blk_fs_request(req) || blk_discard_rq(req))
+	if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD))
 		req->__sector += total_bytes >> 9;
 
 	/* mixed attributes always follow the first bio */
@@ -2111,11 +2131,32 @@
 	    blk_update_request(rq->next_rq, error, bidi_bytes))
 		return true;
 
-	add_disk_randomness(rq->rq_disk);
+	if (blk_queue_add_random(rq->q))
+		add_disk_randomness(rq->rq_disk);
 
 	return false;
 }
 
+/**
+ * blk_unprep_request - unprepare a request
+ * @req:	the request
+ *
+ * This function makes a request ready for complete resubmission (or
+ * completion).  It happens only after all error handling is complete,
+ * so represents the appropriate moment to deallocate any resources
+ * that were allocated to the request in the prep_rq_fn.  The queue
+ * lock is held when calling this.
+ */
+void blk_unprep_request(struct request *req)
+{
+	struct request_queue *q = req->q;
+
+	req->cmd_flags &= ~REQ_DONTPREP;
+	if (q->unprep_rq_fn)
+		q->unprep_rq_fn(q, req);
+}
+EXPORT_SYMBOL_GPL(blk_unprep_request);
+
 /*
  * queue lock must be held
  */
@@ -2126,11 +2167,15 @@
 
 	BUG_ON(blk_queued_rq(req));
 
-	if (unlikely(laptop_mode) && blk_fs_request(req))
+	if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS)
 		laptop_io_completion(&req->q->backing_dev_info);
 
 	blk_delete_timer(req);
 
+	if (req->cmd_flags & REQ_DONTPREP)
+		blk_unprep_request(req);
+
+
 	blk_account_io_done(req);
 
 	if (req->end_io)
@@ -2363,7 +2408,7 @@
 		     struct bio *bio)
 {
 	/* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
-	rq->cmd_flags |= bio->bi_rw & REQ_RW;
+	rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
 
 	if (bio_has_data(bio)) {
 		rq->nr_phys_segments = bio_phys_segments(q, bio);
@@ -2450,6 +2495,8 @@
 {
 	dst->cpu = src->cpu;
 	dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE);
+	if (src->cmd_flags & REQ_DISCARD)
+		dst->cmd_flags |= REQ_DISCARD;
 	dst->cmd_type = src->cmd_type;
 	dst->__sector = blk_rq_pos(src);
 	dst->__data_len = blk_rq_bytes(src);

diff --git a/block/blk-exec.c b/block/blk-exec.c
index 49557e9..e1672f1 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c

@@ -57,7 +57,7 @@
 	__elv_add_request(q, rq, where, 1);
 	__generic_unplug_device(q);
 	/* the queue is stopped so it won't be plugged+unplugged */
-	if (blk_pm_resume_request(rq))
+	if (rq->cmd_type == REQ_TYPE_PM_RESUME)
 		q->request_fn(q);
 	spin_unlock_irq(q->queue_lock);
 }

diff --git a/block/blk-lib.c b/block/blk-lib.c
index d0216b9..c1fc55a 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c

@@ -19,7 +19,6 @@
 
 	if (bio->bi_private)
 		complete(bio->bi_private);
-	__free_page(bio_page(bio));
 
 	bio_put(bio);
 }
@@ -42,8 +41,8 @@
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = flags & BLKDEV_IFL_BARRIER ?
 		DISCARD_BARRIER : DISCARD_NOBARRIER;
+	unsigned int max_discard_sectors;
 	struct bio *bio;
-	struct page *page;
 	int ret = 0;
 
 	if (!q)
@@ -52,36 +51,30 @@
 	if (!blk_queue_discard(q))
 		return -EOPNOTSUPP;
 
-	while (nr_sects && !ret) {
-		unsigned int sector_size = q->limits.logical_block_size;
-		unsigned int max_discard_sectors =
-			min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+	/*
+	 * Ensure that max_discard_sectors is of the proper
+	 * granularity
+	 */
+	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
+	if (q->limits.discard_granularity) {
+		unsigned int disc_sects = q->limits.discard_granularity >> 9;
 
+		max_discard_sectors &= ~(disc_sects - 1);
+	}
+
+	while (nr_sects && !ret) {
 		bio = bio_alloc(gfp_mask, 1);
-		if (!bio)
-			goto out;
+		if (!bio) {
+			ret = -ENOMEM;
+			break;
+		}
+
 		bio->bi_sector = sector;
 		bio->bi_end_io = blkdev_discard_end_io;
 		bio->bi_bdev = bdev;
 		if (flags & BLKDEV_IFL_WAIT)
 			bio->bi_private = &wait;
 
-		/*
-		 * Add a zeroed one-sector payload as that's what
-		 * our current implementations need.  If we'll ever need
-		 * more the interface will need revisiting.
-		 */
-		page = alloc_page(gfp_mask | __GFP_ZERO);
-		if (!page)
-			goto out_free_bio;
-		if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
-			goto out_free_page;
-
-		/*
-		 * And override the bio size - the way discard works we
-		 * touch many more blocks on disk than the actual payload
-		 * length.
-		 */
 		if (nr_sects > max_discard_sectors) {
 			bio->bi_size = max_discard_sectors << 9;
 			nr_sects -= max_discard_sectors;
@@ -103,13 +96,8 @@
 			ret = -EIO;
 		bio_put(bio);
 	}
+
 	return ret;
-out_free_page:
-	__free_page(page);
-out_free_bio:
-	bio_put(bio);
-out:
-	return -ENOMEM;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
 
@@ -157,7 +145,7 @@
 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 			sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
 {
-	int ret = 0;
+	int ret;
 	struct bio *bio;
 	struct bio_batch bb;
 	unsigned int sz, issued = 0;
@@ -175,11 +163,14 @@
 			return ret;
 	}
 submit:
+	ret = 0;
 	while (nr_sects != 0) {
 		bio = bio_alloc(gfp_mask,
 				min(nr_sects, (sector_t)BIO_MAX_PAGES));
-		if (!bio)
+		if (!bio) {
+			ret = -ENOMEM;
 			break;
+		}
 
 		bio->bi_sector = sector;
 		bio->bi_bdev   = bdev;
@@ -198,6 +189,7 @@
 			if (ret < (sz << 9))
 				break;
 		}
+		ret = 0;
 		issued++;
 		submit_bio(WRITE, bio);
 	}

diff --git a/block/blk-map.c b/block/blk-map.c
index 9083cf0..c65d759 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c

@@ -307,7 +307,7 @@
 		return PTR_ERR(bio);
 
 	if (rq_data_dir(rq) == WRITE)
-		bio->bi_rw |= (1 << BIO_RW);
+		bio->bi_rw |= (1 << REQ_WRITE);
 
 	if (do_copy)
 		rq->cmd_flags |= REQ_COPY_USER;

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 5e7dc99..3b0cd42 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c

@@ -12,7 +12,6 @@
 static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 					     struct bio *bio)
 {
-	unsigned int phys_size;
 	struct bio_vec *bv, *bvprv = NULL;
 	int cluster, i, high, highprv = 1;
 	unsigned int seg_size, nr_phys_segs;
@@ -24,7 +23,7 @@
 	fbio = bio;
 	cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
 	seg_size = 0;
-	phys_size = nr_phys_segs = 0;
+	nr_phys_segs = 0;
 	for_each_bio(bio) {
 		bio_for_each_segment(bv, bio, i) {
 			/*
@@ -180,7 +179,7 @@
 	}
 
 	if (q->dma_drain_size && q->dma_drain_needed(rq)) {
-		if (rq->cmd_flags & REQ_RW)
+		if (rq->cmd_flags & REQ_WRITE)
 			memset(q->dma_drain_buffer, 0, q->dma_drain_size);
 
 		sg->page_link &= ~0x02;
@@ -226,7 +225,7 @@
 {
 	unsigned short max_sectors;
 
-	if (unlikely(blk_pc_request(req)))
+	if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
 		max_sectors = queue_max_hw_sectors(q);
 	else
 		max_sectors = queue_max_sectors(q);
@@ -250,7 +249,7 @@
 {
 	unsigned short max_sectors;
 
-	if (unlikely(blk_pc_request(req)))
+	if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
 		max_sectors = queue_max_hw_sectors(q);
 	else
 		max_sectors = queue_max_sectors(q);

diff --git a/block/blk-settings.c b/block/blk-settings.c
index f5ed5a1..a234f4b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c

@@ -37,6 +37,23 @@
 EXPORT_SYMBOL(blk_queue_prep_rq);
 
 /**
+ * blk_queue_unprep_rq - set an unprepare_request function for queue
+ * @q:		queue
+ * @ufn:	unprepare_request function
+ *
+ * It's possible for a queue to register an unprepare_request callback
+ * which is invoked before the request is finally completed. The goal
+ * of the function is to deallocate any data that was allocated in the
+ * prepare_request callback.
+ *
+ */
+void blk_queue_unprep_rq(struct request_queue *q, unprep_rq_fn *ufn)
+{
+	q->unprep_rq_fn = ufn;
+}
+EXPORT_SYMBOL(blk_queue_unprep_rq);
+
+/**
  * blk_queue_merge_bvec - set a merge_bvec function for queue
  * @q:		queue
  * @mbfn:	merge_bvec_fn

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 306759b..001ab18 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c

@@ -180,26 +180,36 @@
 	return queue_var_show(max_hw_sectors_kb, (page));
 }
 
-static ssize_t queue_nonrot_show(struct request_queue *q, char *page)
-{
-	return queue_var_show(!blk_queue_nonrot(q), page);
+#define QUEUE_SYSFS_BIT_FNS(name, flag, neg)				\
+static ssize_t								\
+queue_show_##name(struct request_queue *q, char *page)			\
+{									\
+	int bit;							\
+	bit = test_bit(QUEUE_FLAG_##flag, &q->queue_flags);		\
+	return queue_var_show(neg ? !bit : bit, page);			\
+}									\
+static ssize_t								\
+queue_store_##name(struct request_queue *q, const char *page, size_t count) \
+{									\
+	unsigned long val;						\
+	ssize_t ret;							\
+	ret = queue_var_store(&val, page, count);			\
+	if (neg)							\
+		val = !val;						\
+									\
+	spin_lock_irq(q->queue_lock);					\
+	if (val)							\
+		queue_flag_set(QUEUE_FLAG_##flag, q);			\
+	else								\
+		queue_flag_clear(QUEUE_FLAG_##flag, q);			\
+	spin_unlock_irq(q->queue_lock);					\
+	return ret;							\
 }
 
-static ssize_t queue_nonrot_store(struct request_queue *q, const char *page,
-				  size_t count)
-{
-	unsigned long nm;
-	ssize_t ret = queue_var_store(&nm, page, count);
-
-	spin_lock_irq(q->queue_lock);
-	if (nm)
-		queue_flag_clear(QUEUE_FLAG_NONROT, q);
-	else
-		queue_flag_set(QUEUE_FLAG_NONROT, q);
-	spin_unlock_irq(q->queue_lock);
-
-	return ret;
-}
+QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1);
+QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
+QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
+#undef QUEUE_SYSFS_BIT_FNS
 
 static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
 {
@@ -250,27 +260,6 @@
 	return ret;
 }
 
-static ssize_t queue_iostats_show(struct request_queue *q, char *page)
-{
-	return queue_var_show(blk_queue_io_stat(q), page);
-}
-
-static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
-				   size_t count)
-{
-	unsigned long stats;
-	ssize_t ret = queue_var_store(&stats, page, count);
-
-	spin_lock_irq(q->queue_lock);
-	if (stats)
-		queue_flag_set(QUEUE_FLAG_IO_STAT, q);
-	else
-		queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
-	spin_unlock_irq(q->queue_lock);
-
-	return ret;
-}
-
 static struct queue_sysfs_entry queue_requests_entry = {
 	.attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
 	.show = queue_requests_show,
@@ -352,8 +341,8 @@
 
 static struct queue_sysfs_entry queue_nonrot_entry = {
 	.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
-	.show = queue_nonrot_show,
-	.store = queue_nonrot_store,
+	.show = queue_show_nonrot,
+	.store = queue_store_nonrot,
 };
 
 static struct queue_sysfs_entry queue_nomerges_entry = {
@@ -370,8 +359,14 @@
 
 static struct queue_sysfs_entry queue_iostats_entry = {
 	.attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
-	.show = queue_iostats_show,
-	.store = queue_iostats_store,
+	.show = queue_show_iostats,
+	.store = queue_store_iostats,
+};
+
+static struct queue_sysfs_entry queue_random_entry = {
+	.attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR },
+	.show = queue_show_random,
+	.store = queue_store_random,
 };
 
 static struct attribute *default_attrs[] = {
@@ -394,6 +389,7 @@
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
+	&queue_random_entry.attr,
 	NULL,
 };
 

diff --git a/block/blk.h b/block/blk.h
index 5ee3d7e..6e7dc87 100644
--- a/block/blk.h
+++ b/block/blk.h

@@ -161,8 +161,10 @@
  */
 static inline int blk_do_io_stat(struct request *rq)
 {
-	return rq->rq_disk && blk_rq_io_stat(rq) &&
-	       (blk_fs_request(rq) || blk_discard_rq(rq));
+	return rq->rq_disk &&
+	       (rq->cmd_flags & REQ_IO_STAT) &&
+	       (rq->cmd_type == REQ_TYPE_FS ||
+	        (rq->cmd_flags & REQ_DISCARD));
 }
 
 #endif

diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 7982b83..eb4086f 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c

@@ -458,7 +458,7 @@
  */
 static inline bool cfq_bio_sync(struct bio *bio)
 {
-	return bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO);
+	return bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC);
 }
 
 /*
@@ -646,9 +646,10 @@
 		return rq1;
 	else if (rq_is_sync(rq2) && !rq_is_sync(rq1))
 		return rq2;
-	if (rq_is_meta(rq1) && !rq_is_meta(rq2))
+	if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META))
 		return rq1;
-	else if (rq_is_meta(rq2) && !rq_is_meta(rq1))
+	else if ((rq2->cmd_flags & REQ_META) &&
+		 !(rq1->cmd_flags & REQ_META))
 		return rq2;
 
 	s1 = blk_rq_pos(rq1);
@@ -1484,7 +1485,7 @@
 	cfqq->cfqd->rq_queued--;
 	cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg,
 					rq_data_dir(rq), rq_is_sync(rq));
-	if (rq_is_meta(rq)) {
+	if (rq->cmd_flags & REQ_META) {
 		WARN_ON(!cfqq->meta_pending);
 		cfqq->meta_pending--;
 	}
@@ -3176,7 +3177,7 @@
 	 * So both queues are sync. Let the new request get disk time if
 	 * it's a metadata request and the current queue is doing regular IO.
 	 */
-	if (rq_is_meta(rq) && !cfqq->meta_pending)
+	if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending)
 		return true;
 
 	/*
@@ -3230,7 +3231,7 @@
 	struct cfq_io_context *cic = RQ_CIC(rq);
 
 	cfqd->rq_queued++;
-	if (rq_is_meta(rq))
+	if (rq->cmd_flags & REQ_META)
 		cfqq->meta_pending++;
 
 	cfq_update_io_thinktime(cfqd, cic);
@@ -3365,7 +3366,8 @@
 	unsigned long now;
 
 	now = jiffies;
-	cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", !!rq_noidle(rq));
+	cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d",
+		     !!(rq->cmd_flags & REQ_NOIDLE));
 
 	cfq_update_hw_tag(cfqd);
 
@@ -3419,11 +3421,12 @@
 			cfq_slice_expired(cfqd, 1);
 		else if (sync && cfqq_empty &&
 			 !cfq_close_cooperator(cfqd, cfqq)) {
-			cfqd->noidle_tree_requires_idle |= !rq_noidle(rq);
+			cfqd->noidle_tree_requires_idle |=
+				!(rq->cmd_flags & REQ_NOIDLE);
 			/*
 			 * Idling is enabled for SYNC_WORKLOAD.
 			 * SYNC_NOIDLE_WORKLOAD idles at the end of the tree
-			 * only if we processed at least one !rq_noidle request
+			 * only if we processed at least one !REQ_NOIDLE request
 			 */
 			if (cfqd->serving_type == SYNC_WORKLOAD
 			    || cfqd->noidle_tree_requires_idle

diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index f26051f..d530856 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c

@@ -535,56 +535,6 @@
 	return err;
 }
 
-struct compat_blk_user_trace_setup {
-	char name[32];
-	u16 act_mask;
-	u32 buf_size;
-	u32 buf_nr;
-	compat_u64 start_lba;
-	compat_u64 end_lba;
-	u32 pid;
-};
-#define BLKTRACESETUP32 _IOWR(0x12, 115, struct compat_blk_user_trace_setup)
-
-static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg)
-{
-	struct blk_user_trace_setup buts;
-	struct compat_blk_user_trace_setup cbuts;
-	struct request_queue *q;
-	char b[BDEVNAME_SIZE];
-	int ret;
-
-	q = bdev_get_queue(bdev);
-	if (!q)
-		return -ENXIO;
-
-	if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
-		return -EFAULT;
-
-	bdevname(bdev, b);
-
-	buts = (struct blk_user_trace_setup) {
-		.act_mask = cbuts.act_mask,
-		.buf_size = cbuts.buf_size,
-		.buf_nr = cbuts.buf_nr,
-		.start_lba = cbuts.start_lba,
-		.end_lba = cbuts.end_lba,
-		.pid = cbuts.pid,
-	};
-	memcpy(&buts.name, &cbuts.name, 32);
-
-	mutex_lock(&bdev->bd_mutex);
-	ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts);
-	mutex_unlock(&bdev->bd_mutex);
-	if (ret)
-		return ret;
-
-	if (copy_to_user(arg, &buts.name, 32))
-		return -EFAULT;
-
-	return 0;
-}
-
 static int compat_blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode,
 			unsigned cmd, unsigned long arg)
 {
@@ -802,16 +752,10 @@
 		return compat_put_u64(arg, bdev->bd_inode->i_size);
 
 	case BLKTRACESETUP32:
-		lock_kernel();
-		ret = compat_blk_trace_setup(bdev, compat_ptr(arg));
-		unlock_kernel();
-		return ret;
 	case BLKTRACESTART: /* compatible */
 	case BLKTRACESTOP:  /* compatible */
 	case BLKTRACETEARDOWN: /* compatible */
-		lock_kernel();
 		ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg));
-		unlock_kernel();
 		return ret;
 	default:
 		if (disk->fops->compat_ioctl)

diff --git a/block/elevator.c b/block/elevator.c
index 923a913..816a7c8 100644
--- a/block/elevator.c
+++ b/block/elevator.c

@@ -79,8 +79,7 @@
 	/*
 	 * Don't merge file system requests and discard requests
 	 */
-	if (bio_rw_flagged(bio, BIO_RW_DISCARD) !=
-	    bio_rw_flagged(rq->bio, BIO_RW_DISCARD))
+	if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
 		return 0;
 
 	/*
@@ -428,7 +427,8 @@
 	list_for_each_prev(entry, &q->queue_head) {
 		struct request *pos = list_entry_rq(entry);
 
-		if (blk_discard_rq(rq) != blk_discard_rq(pos))
+		if ((rq->cmd_flags & REQ_DISCARD) !=
+		    (pos->cmd_flags & REQ_DISCARD))
 			break;
 		if (rq_data_dir(rq) != rq_data_dir(pos))
 			break;
@@ -558,7 +558,7 @@
 	 */
 	if (blk_account_rq(rq)) {
 		q->in_flight[rq_is_sync(rq)]--;
-		if (blk_sorted_rq(rq))
+		if (rq->cmd_flags & REQ_SORTED)
 			elv_deactivate_rq(q, rq);
 	}
 
@@ -644,7 +644,8 @@
 		break;
 
 	case ELEVATOR_INSERT_SORT:
-		BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq));
+		BUG_ON(rq->cmd_type != REQ_TYPE_FS &&
+		       !(rq->cmd_flags & REQ_DISCARD));
 		rq->cmd_flags |= REQ_SORTED;
 		q->nr_sorted++;
 		if (rq_mergeable(rq)) {
@@ -716,7 +717,7 @@
 		/*
 		 * toggle ordered color
 		 */
-		if (blk_barrier_rq(rq))
+		if (rq->cmd_flags & REQ_HARDBARRIER)
 			q->ordcolor ^= 1;
 
 		/*
@@ -729,7 +730,8 @@
 		 * this request is scheduling boundary, update
 		 * end_sector
 		 */
-		if (blk_fs_request(rq) || blk_discard_rq(rq)) {
+		if (rq->cmd_type == REQ_TYPE_FS ||
+		    (rq->cmd_flags & REQ_DISCARD)) {
 			q->end_sector = rq_end_sector(rq);
 			q->boundary_rq = rq;
 		}
@@ -843,7 +845,8 @@
 	 */
 	if (blk_account_rq(rq)) {
 		q->in_flight[rq_is_sync(rq)]--;
-		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
+		if ((rq->cmd_flags & REQ_SORTED) &&
+		    e->ops->elevator_completed_req_fn)
 			e->ops->elevator_completed_req_fn(q, rq);
 	}
 

diff --git a/block/ioctl.c b/block/ioctl.c
index e8eb679..09fd7f1 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c

@@ -163,18 +163,10 @@
 			unsigned cmd, unsigned long arg)
 {
 	struct gendisk *disk = bdev->bd_disk;
-	int ret;
 
 	if (disk->fops->ioctl)
 		return disk->fops->ioctl(bdev, mode, cmd, arg);
 
-	if (disk->fops->locked_ioctl) {
-		lock_kernel();
-		ret = disk->fops->locked_ioctl(bdev, mode, cmd, arg);
-		unlock_kernel();
-		return ret;
-	}
-
 	return -ENOTTY;
 }
 /*
@@ -185,8 +177,7 @@
 EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl);
 
 /*
- * always keep this in sync with compat_blkdev_ioctl() and
- * compat_blkdev_locked_ioctl()
+ * always keep this in sync with compat_blkdev_ioctl()
  */
 int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 			unsigned long arg)
@@ -206,10 +197,8 @@
 		if (ret != -EINVAL && ret != -ENOTTY)
 			return ret;
 
-		lock_kernel();
 		fsync_bdev(bdev);
 		invalidate_bdev(bdev);
-		unlock_kernel();
 		return 0;
 
 	case BLKROSET:
@@ -221,9 +210,7 @@
 			return -EACCES;
 		if (get_user(n, (int __user *)(arg)))
 			return -EFAULT;
-		lock_kernel();
 		set_device_ro(bdev, n);
-		unlock_kernel();
 		return 0;
 
 	case BLKDISCARD: {
@@ -309,14 +296,10 @@
 			bd_release(bdev);
 		return ret;
 	case BLKPG:
-		lock_kernel();
 		ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg);
-		unlock_kernel();
 		break;
 	case BLKRRPART:
-		lock_kernel();
 		ret = blkdev_reread_part(bdev);
-		unlock_kernel();
 		break;
 	case BLKGETSIZE:
 		size = bdev->bd_inode->i_size;
@@ -329,9 +312,7 @@
 	case BLKTRACESTOP:
 	case BLKTRACESETUP:
 	case BLKTRACETEARDOWN:
-		lock_kernel();
 		ret = blk_trace_ioctl(bdev, cmd, (char __user *) arg);
-		unlock_kernel();
 		break;
 	default:
 		ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg);

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index d75c9c4..a89172c 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c

@@ -1111,10 +1111,10 @@
  */
 static int atapi_drain_needed(struct request *rq)
 {
-	if (likely(!blk_pc_request(rq)))
+	if (likely(rq->cmd_type != REQ_TYPE_BLOCK_PC))
 		return 0;
 
-	if (!blk_rq_bytes(rq) || (rq->cmd_flags & REQ_RW))
+	if (!blk_rq_bytes(rq) || (rq->cmd_flags & REQ_WRITE))
 		return 0;
 
 	return atapi_cmd_type(rq->cmd[0]) == ATAPI_MISC;

diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index c5f22bb..4e2c367 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c

@@ -79,23 +79,28 @@
 	struct gendisk *disk = bdev->bd_disk;
 	DAC960_Controller_T *p = disk->queue->queuedata;
 	int drive_nr = (long)disk->private_data;
+	int ret = -ENXIO;
 
+	lock_kernel();
 	if (p->FirmwareType == DAC960_V1_Controller) {
 		if (p->V1.LogicalDriveInformation[drive_nr].
 		    LogicalDriveState == DAC960_V1_LogicalDrive_Offline)
-			return -ENXIO;
+			goto out;
 	} else {
 		DAC960_V2_LogicalDeviceInfo_T *i =
 			p->V2.LogicalDeviceInformation[drive_nr];
 		if (!i || i->LogicalDeviceState == DAC960_V2_LogicalDevice_Offline)
-			return -ENXIO;
+			goto out;
 	}
 
 	check_disk_change(bdev);
 
 	if (!get_capacity(p->disks[drive_nr]))
-		return -ENXIO;
-	return 0;
+		goto out;
+	ret = 0;
+out:
+	unlock_kernel();
+	return ret;
 }
 
 static int DAC960_getgeo(struct block_device *bdev, struct hd_geometry *geo)

diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 832798a..76f114f 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c

@@ -60,6 +60,7 @@
 #include <linux/hdreg.h>
 #include <linux/delay.h>
 #include <linux/init.h>
+#include <linux/smp_lock.h>
 #include <linux/amifdreg.h>
 #include <linux/amifd.h>
 #include <linux/buffer_head.h>
@@ -1423,7 +1424,7 @@
 	return 0;
 }
 
-static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
 		    unsigned int cmd, unsigned long param)
 {
 	struct amiga_floppy_struct *p = bdev->bd_disk->private_data;
@@ -1500,6 +1501,18 @@
 	return 0;
 }
 
+static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long param)
+{
+	int ret;
+
+	lock_kernel();
+	ret = fd_locked_ioctl(bdev, mode, cmd, param);
+	unlock_kernel();
+
+	return ret;
+}
+
 static void fd_probe(int dev)
 {
 	unsigned long code;
@@ -1542,10 +1555,13 @@
 	int old_dev;
 	unsigned long flags;
 
+	lock_kernel();
 	old_dev = fd_device[drive];
 
-	if (fd_ref[drive] && old_dev != system)
+	if (fd_ref[drive] && old_dev != system) {
+		unlock_kernel();
 		return -EBUSY;
+	}
 
 	if (mode & (FMODE_READ|FMODE_WRITE)) {
 		check_disk_change(bdev);
@@ -1558,8 +1574,10 @@
 			fd_deselect (drive);
 			rel_fdc();
 
-			if (wrprot)
+			if (wrprot) {
+				unlock_kernel();
 				return -EROFS;
+			}
 		}
 	}
 
@@ -1576,6 +1594,7 @@
 	printk(KERN_INFO "fd%d: accessing %s-disk with %s-layout\n",drive,
 	       unit[drive].type->name, data_types[system].name);
 
+	unlock_kernel();
 	return 0;
 }
 
@@ -1584,6 +1603,7 @@
 	struct amiga_floppy_struct *p = disk->private_data;
 	int drive = p - unit;
 
+	lock_kernel();
 	if (unit[drive].dirty == 1) {
 		del_timer (flush_track_timer + drive);
 		non_int_flush_track (drive);
@@ -1597,6 +1617,7 @@
 /* the mod_use counter is handled this way */
 	floppy_off (drive | 0x40000000);
 #endif
+	unlock_kernel();
 	return 0;
 }
 
@@ -1638,7 +1659,7 @@
 	.owner		= THIS_MODULE,
 	.open		= floppy_open,
 	.release	= floppy_release,
-	.locked_ioctl	= fd_ioctl,
+	.ioctl		= fd_ioctl,
 	.getgeo		= fd_getgeo,
 	.media_changed	= amiga_floppy_change,
 };

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 035cefe..a946929 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c

@@ -12,6 +12,7 @@
 #include <linux/slab.h>
 #include <linux/genhd.h>
 #include <linux/netdevice.h>
+#include <linux/smp_lock.h>
 #include "aoe.h"
 
 static struct kmem_cache *buf_pool_cache;
@@ -124,13 +125,16 @@
 	struct aoedev *d = bdev->bd_disk->private_data;
 	ulong flags;
 
+	lock_kernel();
 	spin_lock_irqsave(&d->lock, flags);
 	if (d->flags & DEVFL_UP) {
 		d->nopen++;
 		spin_unlock_irqrestore(&d->lock, flags);
+		unlock_kernel();
 		return 0;
 	}
 	spin_unlock_irqrestore(&d->lock, flags);
+	unlock_kernel();
 	return -ENODEV;
 }
 
@@ -173,7 +177,7 @@
 		BUG();
 		bio_endio(bio, -ENXIO);
 		return 0;
-	} else if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
+	} else if (bio->bi_rw & REQ_HARDBARRIER) {
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;
 	} else if (bio->bi_io_vec == NULL) {

diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index e35cf59..aceb964 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c

@@ -67,6 +67,7 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 
 #include <asm/atafd.h>
 #include <asm/atafdreg.h>
@@ -359,7 +360,7 @@
 static void finish_fdc_done( int dummy );
 static void setup_req_params( int drive );
 static void redo_fd_request( void);
-static int fd_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
+static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
                      cmd, unsigned long param);
 static void fd_probe( int drive );
 static int fd_test_drive_present( int drive );
@@ -1480,7 +1481,7 @@
 	atari_enable_irq( IRQ_MFP_FDC );
 }
 
-static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
 		    unsigned int cmd, unsigned long param)
 {
 	struct gendisk *disk = bdev->bd_disk;
@@ -1665,6 +1666,17 @@
 	}
 }
 
+static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long arg)
+{
+	int ret;
+
+	lock_kernel();
+	ret = fd_locked_ioctl(bdev, mode, cmd, arg);
+	unlock_kernel();
+
+	return ret;
+}
 
 /* Initialize the 'unit' variable for drive 'drive' */
 
@@ -1838,24 +1850,36 @@
 	return 0;
 }
 
+static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	lock_kernel();
+	ret = floppy_open(bdev, mode);
+	unlock_kernel();
+
+	return ret;
+}
 
 static int floppy_release(struct gendisk *disk, fmode_t mode)
 {
 	struct atari_floppy_struct *p = disk->private_data;
+	lock_kernel();
 	if (p->ref < 0)
 		p->ref = 0;
 	else if (!p->ref--) {
 		printk(KERN_ERR "floppy_release with fd_ref == 0");
 		p->ref = 0;
 	}
+	unlock_kernel();
 	return 0;
 }
 
 static const struct block_device_operations floppy_fops = {
 	.owner		= THIS_MODULE,
-	.open		= floppy_open,
+	.open		= floppy_unlocked_open,
 	.release	= floppy_release,
-	.locked_ioctl	= fd_ioctl,
+	.ioctl		= fd_ioctl,
 	.media_changed	= check_floppy_change,
 	.revalidate_disk= floppy_revalidate,
 };

diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index f1bf79d..1c7f637 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c

@@ -15,6 +15,7 @@
 #include <linux/blkdev.h>
 #include <linux/bio.h>
 #include <linux/highmem.h>
+#include <linux/smp_lock.h>
 #include <linux/radix-tree.h>
 #include <linux/buffer_head.h> /* invalidate_bh_lrus() */
 #include <linux/slab.h>
@@ -340,7 +341,7 @@
 						get_capacity(bdev->bd_disk))
 		goto out;
 
-	if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
+	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
 		err = 0;
 		discard_from_brd(brd, sector, bio->bi_size);
 		goto out;
@@ -401,6 +402,7 @@
 	 * ram device BLKFLSBUF has special semantics, we want to actually
 	 * release and destroy the ramdisk data.
 	 */
+	lock_kernel();
 	mutex_lock(&bdev->bd_mutex);
 	error = -EBUSY;
 	if (bdev->bd_openers <= 1) {
@@ -417,13 +419,14 @@
 		error = 0;
 	}
 	mutex_unlock(&bdev->bd_mutex);
+	unlock_kernel();
 
 	return error;
 }
 
 static const struct block_device_operations brd_fops = {
 	.owner =		THIS_MODULE,
-	.locked_ioctl =		brd_ioctl,
+	.ioctl =		brd_ioctl,
 #ifdef CONFIG_BLK_DEV_XIP
 	.direct_access =	brd_direct_access,
 #endif
@@ -479,7 +482,7 @@
 	if (!brd->brd_queue)
 		goto out_free_dev;
 	blk_queue_make_request(brd->brd_queue, brd_make_request);
-	blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG, NULL);
+	blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG);
 	blk_queue_max_hw_sectors(brd->brd_queue, 1024);
 	blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
 

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index e1e7143..31064df 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c

@@ -56,16 +56,14 @@
 #include <linux/kthread.h>
 
 #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
-#define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
-#define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 20)
+#define DRIVER_NAME "HP CISS Driver (v 3.6.26)"
+#define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 26)
 
 /* Embedded module documentation macros - see modules.h */
 MODULE_AUTHOR("Hewlett-Packard Company");
 MODULE_DESCRIPTION("Driver for HP Smart Array Controllers");
-MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
-			" SA6i P600 P800 P400 P400i E200 E200i E500 P700m"
-			" Smart Array G2 Series SAS/SATA Controllers");
-MODULE_VERSION("3.6.20");
+MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers");
+MODULE_VERSION("3.6.26");
 MODULE_LICENSE("GPL");
 
 static int cciss_allow_hpsa;
@@ -107,6 +105,11 @@
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3249},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324A},
 	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324B},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3250},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3251},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3252},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3253},
+	{PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3254},
 	{0,}
 };
 
@@ -146,6 +149,11 @@
 	{0x3249103C, "Smart Array P812", &SA5_access},
 	{0x324A103C, "Smart Array P712m", &SA5_access},
 	{0x324B103C, "Smart Array P711m", &SA5_access},
+	{0x3250103C, "Smart Array", &SA5_access},
+	{0x3251103C, "Smart Array", &SA5_access},
+	{0x3252103C, "Smart Array", &SA5_access},
+	{0x3253103C, "Smart Array", &SA5_access},
+	{0x3254103C, "Smart Array", &SA5_access},
 };
 
 /* How long to wait (in milliseconds) for board to go into simple mode */
@@ -167,9 +175,13 @@
 static LIST_HEAD(scan_q);
 
 static void do_cciss_request(struct request_queue *q);
-static irqreturn_t do_cciss_intr(int irq, void *dev_id);
+static irqreturn_t do_cciss_intx(int irq, void *dev_id);
+static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id);
 static int cciss_open(struct block_device *bdev, fmode_t mode);
+static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode);
 static int cciss_release(struct gendisk *disk, fmode_t mode);
+static int do_ioctl(struct block_device *bdev, fmode_t mode,
+		    unsigned int cmd, unsigned long arg);
 static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
 		       unsigned int cmd, unsigned long arg);
 static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
@@ -179,25 +191,23 @@
 static int deregister_disk(ctlr_info_t *h, int drv_index,
 			   int clear_all, int via_ioctl);
 
-static void cciss_read_capacity(int ctlr, int logvol,
+static void cciss_read_capacity(ctlr_info_t *h, int logvol,
 			sector_t *total_size, unsigned int *block_size);
-static void cciss_read_capacity_16(int ctlr, int logvol,
+static void cciss_read_capacity_16(ctlr_info_t *h, int logvol,
 			sector_t *total_size, unsigned int *block_size);
-static void cciss_geometry_inquiry(int ctlr, int logvol,
+static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
 			sector_t total_size,
 			unsigned int block_size, InquiryData_struct *inq_buff,
 				   drive_info_struct *drv);
-static void __devinit cciss_interrupt_mode(ctlr_info_t *, struct pci_dev *,
-					   __u32);
+static void __devinit cciss_interrupt_mode(ctlr_info_t *);
 static void start_io(ctlr_info_t *h);
-static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
+static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
 			__u8 page_code, unsigned char scsi3addr[],
 			int cmd_type);
 static int sendcmd_withirq_core(ctlr_info_t *h, CommandList_struct *c,
 	int attempt_retry);
 static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c);
 
-static void fail_all_cmds(unsigned long ctlr);
 static int add_to_scan_list(struct ctlr_info *h);
 static int scan_thread(void *data);
 static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
@@ -205,11 +215,23 @@
 static void cciss_device_release(struct device *dev);
 static void cciss_free_gendisk(ctlr_info_t *h, int drv_index);
 static void cciss_free_drive_info(ctlr_info_t *h, int drv_index);
+static inline u32 next_command(ctlr_info_t *h);
+static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev,
+	void __iomem *vaddr, u32 *cfg_base_addr, u64 *cfg_base_addr_index,
+	u64 *cfg_offset);
+static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
+	unsigned long *memory_bar);
+
+
+/* performant mode helper functions */
+static void  calc_bucket_map(int *bucket, int num_buckets, int nsgs,
+				int *bucket_map);
+static void cciss_put_controller_into_performant_mode(ctlr_info_t *h);
 
 #ifdef CONFIG_PROC_FS
-static void cciss_procinit(int i);
+static void cciss_procinit(ctlr_info_t *h);
 #else
-static void cciss_procinit(int i)
+static void cciss_procinit(ctlr_info_t *h)
 {
 }
 #endif				/* CONFIG_PROC_FS */
@@ -221,9 +243,9 @@
 
 static const struct block_device_operations cciss_fops = {
 	.owner = THIS_MODULE,
-	.open = cciss_open,
+	.open = cciss_unlocked_open,
 	.release = cciss_release,
-	.locked_ioctl = cciss_ioctl,
+	.ioctl = do_ioctl,
 	.getgeo = cciss_getgeo,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl = cciss_compat_ioctl,
@@ -231,6 +253,16 @@
 	.revalidate_disk = cciss_revalidate,
 };
 
+/* set_performant_mode: Modify the tag for cciss performant
+ * set bit 0 for pull model, bits 3-1 for block fetch
+ * register number
+ */
+static void set_performant_mode(ctlr_info_t *h, CommandList_struct *c)
+{
+	if (likely(h->transMethod == CFGTBL_Trans_Performant))
+		c->busaddr |= 1 | (h->blockFetchTable[c->Header.SGList] << 1);
+}
+
 /*
  * Enqueuing and dequeuing functions for cmdlists.
  */
@@ -257,6 +289,18 @@
 	hlist_del_init(&c->list);
 }
 
+static void enqueue_cmd_and_start_io(ctlr_info_t *h,
+	CommandList_struct *c)
+{
+	unsigned long flags;
+	set_performant_mode(h, c);
+	spin_lock_irqsave(&h->lock, flags);
+	addQ(&h->reqQ, c);
+	h->Qdepth++;
+	start_io(h);
+	spin_unlock_irqrestore(&h->lock, flags);
+}
+
 static void cciss_free_sg_chain_blocks(SGDescriptor_struct **cmd_sg_list,
 	int nr_cmds)
 {
@@ -366,32 +410,31 @@
 		h->product_name,
 		(unsigned long)h->board_id,
 		h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
-		h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT],
+		h->firm_ver[3], (unsigned int)h->intr[PERF_MODE_INT],
 		h->num_luns,
 		h->Qdepth, h->commands_outstanding,
 		h->maxQsinceinit, h->max_outstanding, h->maxSG);
 
 #ifdef CONFIG_CISS_SCSI_TAPE
-	cciss_seq_tape_report(seq, h->ctlr);
+	cciss_seq_tape_report(seq, h);
 #endif /* CONFIG_CISS_SCSI_TAPE */
 }
 
 static void *cciss_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	ctlr_info_t *h = seq->private;
-	unsigned ctlr = h->ctlr;
 	unsigned long flags;
 
 	/* prevent displaying bogus info during configuration
 	 * or deconfiguration of a logical volume
 	 */
-	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
 	if (h->busy_configuring) {
-		spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+		spin_unlock_irqrestore(&h->lock, flags);
 		return ERR_PTR(-EBUSY);
 	}
 	h->busy_configuring = 1;
-	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+	spin_unlock_irqrestore(&h->lock, flags);
 
 	if (*pos == 0)
 		cciss_seq_show_header(seq);
@@ -499,7 +542,7 @@
 		struct seq_file *seq = file->private_data;
 		ctlr_info_t *h = seq->private;
 
-		err = cciss_engage_scsi(h->ctlr);
+		err = cciss_engage_scsi(h);
 		if (err == 0)
 			err = length;
 	} else
@@ -522,7 +565,7 @@
 	.write	 = cciss_proc_write,
 };
 
-static void __devinit cciss_procinit(int i)
+static void __devinit cciss_procinit(ctlr_info_t *h)
 {
 	struct proc_dir_entry *pde;
 
@@ -530,9 +573,9 @@
 		proc_cciss = proc_mkdir("driver/cciss", NULL);
 	if (!proc_cciss)
 		return;
-	pde = proc_create_data(hba[i]->devname, S_IWUSR | S_IRUSR | S_IRGRP |
+	pde = proc_create_data(h->devname, S_IWUSR | S_IRUSR | S_IRGRP |
 					S_IROTH, proc_cciss,
-					&cciss_proc_fops, hba[i]);
+					&cciss_proc_fops, h);
 }
 #endif				/* CONFIG_PROC_FS */
 
@@ -565,12 +608,12 @@
 	unsigned long flags;
 	int ret = 0;
 
-	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
 	if (h->busy_configuring)
 		ret = -EBUSY;
 	else
 		memcpy(sn, drv->serial_no, sizeof(sn));
-	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+	spin_unlock_irqrestore(&h->lock, flags);
 
 	if (ret)
 		return ret;
@@ -595,12 +638,12 @@
 	unsigned long flags;
 	int ret = 0;
 
-	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
 	if (h->busy_configuring)
 		ret = -EBUSY;
 	else
 		memcpy(vendor, drv->vendor, VENDOR_LEN + 1);
-	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+	spin_unlock_irqrestore(&h->lock, flags);
 
 	if (ret)
 		return ret;
@@ -619,12 +662,12 @@
 	unsigned long flags;
 	int ret = 0;
 
-	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
 	if (h->busy_configuring)
 		ret = -EBUSY;
 	else
 		memcpy(model, drv->model, MODEL_LEN + 1);
-	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+	spin_unlock_irqrestore(&h->lock, flags);
 
 	if (ret)
 		return ret;
@@ -643,12 +686,12 @@
 	unsigned long flags;
 	int ret = 0;
 
-	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
 	if (h->busy_configuring)
 		ret = -EBUSY;
 	else
 		memcpy(rev, drv->rev, REV_LEN + 1);
-	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+	spin_unlock_irqrestore(&h->lock, flags);
 
 	if (ret)
 		return ret;
@@ -665,17 +708,17 @@
 	unsigned long flags;
 	unsigned char lunid[8];
 
-	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
 	if (h->busy_configuring) {
-		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		spin_unlock_irqrestore(&h->lock, flags);
 		return -EBUSY;
 	}
 	if (!drv->heads) {
-		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		spin_unlock_irqrestore(&h->lock, flags);
 		return -ENOTTY;
 	}
 	memcpy(lunid, drv->LunID, sizeof(lunid));
-	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+	spin_unlock_irqrestore(&h->lock, flags);
 	return snprintf(buf, 20, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
 		lunid[0], lunid[1], lunid[2], lunid[3],
 		lunid[4], lunid[5], lunid[6], lunid[7]);
@@ -690,13 +733,13 @@
 	int raid;
 	unsigned long flags;
 
-	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
 	if (h->busy_configuring) {
-		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		spin_unlock_irqrestore(&h->lock, flags);
 		return -EBUSY;
 	}
 	raid = drv->raid_level;
-	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+	spin_unlock_irqrestore(&h->lock, flags);
 	if (raid < 0 || raid > RAID_UNKNOWN)
 		raid = RAID_UNKNOWN;
 
@@ -713,13 +756,13 @@
 	unsigned long flags;
 	int count;
 
-	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
 	if (h->busy_configuring) {
-		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		spin_unlock_irqrestore(&h->lock, flags);
 		return -EBUSY;
 	}
 	count = drv->usage_count;
-	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+	spin_unlock_irqrestore(&h->lock, flags);
 	return snprintf(buf, 20, "%d\n", count);
 }
 static DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
@@ -864,60 +907,31 @@
 /*
  * For operations that cannot sleep, a command block is allocated at init,
  * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
- * which ones are free or in use.  For operations that can wait for kmalloc
- * to possible sleep, this routine can be called with get_from_pool set to 0.
- * cmd_free() MUST be called with a got_from_pool set to 0 if cmd_alloc was.
+ * which ones are free or in use.
  */
-static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool)
+static CommandList_struct *cmd_alloc(ctlr_info_t *h)
 {
 	CommandList_struct *c;
 	int i;
 	u64bit temp64;
 	dma_addr_t cmd_dma_handle, err_dma_handle;
 
-	if (!get_from_pool) {
-		c = (CommandList_struct *) pci_alloc_consistent(h->pdev,
-			sizeof(CommandList_struct), &cmd_dma_handle);
-		if (c == NULL)
+	do {
+		i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
+		if (i == h->nr_cmds)
 			return NULL;
-		memset(c, 0, sizeof(CommandList_struct));
+	} while (test_and_set_bit(i & (BITS_PER_LONG - 1),
+		  h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
+	c = h->cmd_pool + i;
+	memset(c, 0, sizeof(CommandList_struct));
+	cmd_dma_handle = h->cmd_pool_dhandle + i * sizeof(CommandList_struct);
+	c->err_info = h->errinfo_pool + i;
+	memset(c->err_info, 0, sizeof(ErrorInfo_struct));
+	err_dma_handle = h->errinfo_pool_dhandle
+	    + i * sizeof(ErrorInfo_struct);
+	h->nr_allocs++;
 
-		c->cmdindex = -1;
-
-		c->err_info = (ErrorInfo_struct *)
-		    pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
-			    &err_dma_handle);
-
-		if (c->err_info == NULL) {
-			pci_free_consistent(h->pdev,
-				sizeof(CommandList_struct), c, cmd_dma_handle);
-			return NULL;
-		}
-		memset(c->err_info, 0, sizeof(ErrorInfo_struct));
-	} else {		/* get it out of the controllers pool */
-
-		do {
-			i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
-			if (i == h->nr_cmds)
-				return NULL;
-		} while (test_and_set_bit
-			 (i & (BITS_PER_LONG - 1),
-			  h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
-#ifdef CCISS_DEBUG
-		printk(KERN_DEBUG "cciss: using command buffer %d\n", i);
-#endif
-		c = h->cmd_pool + i;
-		memset(c, 0, sizeof(CommandList_struct));
-		cmd_dma_handle = h->cmd_pool_dhandle
-		    + i * sizeof(CommandList_struct);
-		c->err_info = h->errinfo_pool + i;
-		memset(c->err_info, 0, sizeof(ErrorInfo_struct));
-		err_dma_handle = h->errinfo_pool_dhandle
-		    + i * sizeof(ErrorInfo_struct);
-		h->nr_allocs++;
-
-		c->cmdindex = i;
-	}
+	c->cmdindex = i;
 
 	INIT_HLIST_NODE(&c->list);
 	c->busaddr = (__u32) cmd_dma_handle;
@@ -930,27 +944,65 @@
 	return c;
 }
 
-/*
- * Frees a command block that was previously allocated with cmd_alloc().
+/* allocate a command using pci_alloc_consistent, used for ioctls,
+ * etc., not for the main i/o path.
  */
-static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool)
+static CommandList_struct *cmd_special_alloc(ctlr_info_t *h)
+{
+	CommandList_struct *c;
+	u64bit temp64;
+	dma_addr_t cmd_dma_handle, err_dma_handle;
+
+	c = (CommandList_struct *) pci_alloc_consistent(h->pdev,
+		sizeof(CommandList_struct), &cmd_dma_handle);
+	if (c == NULL)
+		return NULL;
+	memset(c, 0, sizeof(CommandList_struct));
+
+	c->cmdindex = -1;
+
+	c->err_info = (ErrorInfo_struct *)
+	    pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
+		    &err_dma_handle);
+
+	if (c->err_info == NULL) {
+		pci_free_consistent(h->pdev,
+			sizeof(CommandList_struct), c, cmd_dma_handle);
+		return NULL;
+	}
+	memset(c->err_info, 0, sizeof(ErrorInfo_struct));
+
+	INIT_HLIST_NODE(&c->list);
+	c->busaddr = (__u32) cmd_dma_handle;
+	temp64.val = (__u64) err_dma_handle;
+	c->ErrDesc.Addr.lower = temp64.val32.lower;
+	c->ErrDesc.Addr.upper = temp64.val32.upper;
+	c->ErrDesc.Len = sizeof(ErrorInfo_struct);
+
+	c->ctlr = h->ctlr;
+	return c;
+}
+
+static void cmd_free(ctlr_info_t *h, CommandList_struct *c)
 {
 	int i;
+
+	i = c - h->cmd_pool;
+	clear_bit(i & (BITS_PER_LONG - 1),
+		  h->cmd_pool_bits + (i / BITS_PER_LONG));
+	h->nr_frees++;
+}
+
+static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c)
+{
 	u64bit temp64;
 
-	if (!got_from_pool) {
-		temp64.val32.lower = c->ErrDesc.Addr.lower;
-		temp64.val32.upper = c->ErrDesc.Addr.upper;
-		pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
-				    c->err_info, (dma_addr_t) temp64.val);
-		pci_free_consistent(h->pdev, sizeof(CommandList_struct),
-				    c, (dma_addr_t) c->busaddr);
-	} else {
-		i = c - h->cmd_pool;
-		clear_bit(i & (BITS_PER_LONG - 1),
-			  h->cmd_pool_bits + (i / BITS_PER_LONG));
-		h->nr_frees++;
-	}
+	temp64.val32.lower = c->ErrDesc.Addr.lower;
+	temp64.val32.upper = c->ErrDesc.Addr.upper;
+	pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
+			    c->err_info, (dma_addr_t) temp64.val);
+	pci_free_consistent(h->pdev, sizeof(CommandList_struct),
+			    c, (dma_addr_t) c->busaddr);
 }
 
 static inline ctlr_info_t *get_host(struct gendisk *disk)
@@ -968,13 +1020,10 @@
  */
 static int cciss_open(struct block_device *bdev, fmode_t mode)
 {
-	ctlr_info_t *host = get_host(bdev->bd_disk);
+	ctlr_info_t *h = get_host(bdev->bd_disk);
 	drive_info_struct *drv = get_drv(bdev->bd_disk);
 
-#ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "cciss_open %s\n", bdev->bd_disk->disk_name);
-#endif				/* CCISS_DEBUG */
-
+	dev_dbg(&h->pdev->dev, "cciss_open %s\n", bdev->bd_disk->disk_name);
 	if (drv->busy_configuring)
 		return -EBUSY;
 	/*
@@ -1000,29 +1049,39 @@
 			return -EPERM;
 	}
 	drv->usage_count++;
-	host->usage_count++;
+	h->usage_count++;
 	return 0;
 }
 
+static int cciss_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	lock_kernel();
+	ret = cciss_open(bdev, mode);
+	unlock_kernel();
+
+	return ret;
+}
+
 /*
  * Close.  Sync first.
  */
 static int cciss_release(struct gendisk *disk, fmode_t mode)
 {
-	ctlr_info_t *host = get_host(disk);
-	drive_info_struct *drv = get_drv(disk);
+	ctlr_info_t *h;
+	drive_info_struct *drv;
 
-#ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "cciss_release %s\n", disk->disk_name);
-#endif				/* CCISS_DEBUG */
-
+	lock_kernel();
+	h = get_host(disk);
+	drv = get_drv(disk);
+	dev_dbg(&h->pdev->dev, "cciss_release %s\n", disk->disk_name);
 	drv->usage_count--;
-	host->usage_count--;
+	h->usage_count--;
+	unlock_kernel();
 	return 0;
 }
 
-#ifdef CONFIG_COMPAT
-
 static int do_ioctl(struct block_device *bdev, fmode_t mode,
 		    unsigned cmd, unsigned long arg)
 {
@@ -1033,6 +1092,8 @@
 	return ret;
 }
 
+#ifdef CONFIG_COMPAT
+
 static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
 				  unsigned cmd, unsigned long arg);
 static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
@@ -1163,11 +1224,11 @@
 	return 0;
 }
 
-static void check_ioctl_unit_attention(ctlr_info_t *host, CommandList_struct *c)
+static void check_ioctl_unit_attention(ctlr_info_t *h, CommandList_struct *c)
 {
 	if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
 			c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
-		(void)check_for_unit_attention(host, c);
+		(void)check_for_unit_attention(h, c);
 }
 /*
  * ioctl
@@ -1176,15 +1237,12 @@
 		       unsigned int cmd, unsigned long arg)
 {
 	struct gendisk *disk = bdev->bd_disk;
-	ctlr_info_t *host = get_host(disk);
+	ctlr_info_t *h = get_host(disk);
 	drive_info_struct *drv = get_drv(disk);
-	int ctlr = host->ctlr;
 	void __user *argp = (void __user *)arg;
 
-#ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg);
-#endif				/* CCISS_DEBUG */
-
+	dev_dbg(&h->pdev->dev, "cciss_ioctl: Called with cmd=%x %lx\n",
+		cmd, arg);
 	switch (cmd) {
 	case CCISS_GETPCIINFO:
 		{
@@ -1192,10 +1250,10 @@
 
 			if (!arg)
 				return -EINVAL;
-			pciinfo.domain = pci_domain_nr(host->pdev->bus);
-			pciinfo.bus = host->pdev->bus->number;
-			pciinfo.dev_fn = host->pdev->devfn;
-			pciinfo.board_id = host->board_id;
+			pciinfo.domain = pci_domain_nr(h->pdev->bus);
+			pciinfo.bus = h->pdev->bus->number;
+			pciinfo.dev_fn = h->pdev->devfn;
+			pciinfo.board_id = h->board_id;
 			if (copy_to_user
 			    (argp, &pciinfo, sizeof(cciss_pci_info_struct)))
 				return -EFAULT;
@@ -1207,9 +1265,9 @@
 			if (!arg)
 				return -EINVAL;
 			intinfo.delay =
-			    readl(&host->cfgtable->HostWrite.CoalIntDelay);
+			    readl(&h->cfgtable->HostWrite.CoalIntDelay);
 			intinfo.count =
-			    readl(&host->cfgtable->HostWrite.CoalIntCount);
+			    readl(&h->cfgtable->HostWrite.CoalIntCount);
 			if (copy_to_user
 			    (argp, &intinfo, sizeof(cciss_coalint_struct)))
 				return -EFAULT;
@@ -1229,26 +1287,23 @@
 			    (&intinfo, argp, sizeof(cciss_coalint_struct)))
 				return -EFAULT;
 			if ((intinfo.delay == 0) && (intinfo.count == 0))
-			{
-//                      printk("cciss_ioctl: delay and count cannot be 0\n");
 				return -EINVAL;
-			}
-			spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
+			spin_lock_irqsave(&h->lock, flags);
 			/* Update the field, and then ring the doorbell */
 			writel(intinfo.delay,
-			       &(host->cfgtable->HostWrite.CoalIntDelay));
+			       &(h->cfgtable->HostWrite.CoalIntDelay));
 			writel(intinfo.count,
-			       &(host->cfgtable->HostWrite.CoalIntCount));
-			writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
+			       &(h->cfgtable->HostWrite.CoalIntCount));
+			writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
 
 			for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
-				if (!(readl(host->vaddr + SA5_DOORBELL)
+				if (!(readl(h->vaddr + SA5_DOORBELL)
 				      & CFGTBL_ChangeReq))
 					break;
 				/* delay and try again */
 				udelay(1000);
 			}
-			spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+			spin_unlock_irqrestore(&h->lock, flags);
 			if (i >= MAX_IOCTL_CONFIG_WAIT)
 				return -EAGAIN;
 			return 0;
@@ -1262,7 +1317,7 @@
 				return -EINVAL;
 			for (i = 0; i < 16; i++)
 				NodeName[i] =
-				    readb(&host->cfgtable->ServerName[i]);
+				    readb(&h->cfgtable->ServerName[i]);
 			if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
 				return -EFAULT;
 			return 0;
@@ -1282,23 +1337,23 @@
 			    (NodeName, argp, sizeof(NodeName_type)))
 				return -EFAULT;
 
-			spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
+			spin_lock_irqsave(&h->lock, flags);
 
 			/* Update the field, and then ring the doorbell */
 			for (i = 0; i < 16; i++)
 				writeb(NodeName[i],
-				       &host->cfgtable->ServerName[i]);
+				       &h->cfgtable->ServerName[i]);
 
-			writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
+			writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
 
 			for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
-				if (!(readl(host->vaddr + SA5_DOORBELL)
+				if (!(readl(h->vaddr + SA5_DOORBELL)
 				      & CFGTBL_ChangeReq))
 					break;
 				/* delay and try again */
 				udelay(1000);
 			}
-			spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+			spin_unlock_irqrestore(&h->lock, flags);
 			if (i >= MAX_IOCTL_CONFIG_WAIT)
 				return -EAGAIN;
 			return 0;
@@ -1310,7 +1365,7 @@
 
 			if (!arg)
 				return -EINVAL;
-			heartbeat = readl(&host->cfgtable->HeartBeat);
+			heartbeat = readl(&h->cfgtable->HeartBeat);
 			if (copy_to_user
 			    (argp, &heartbeat, sizeof(Heartbeat_type)))
 				return -EFAULT;
@@ -1322,7 +1377,7 @@
 
 			if (!arg)
 				return -EINVAL;
-			BusTypes = readl(&host->cfgtable->BusTypes);
+			BusTypes = readl(&h->cfgtable->BusTypes);
 			if (copy_to_user
 			    (argp, &BusTypes, sizeof(BusTypes_type)))
 				return -EFAULT;
@@ -1334,7 +1389,7 @@
 
 			if (!arg)
 				return -EINVAL;
-			memcpy(firmware, host->firm_ver, 4);
+			memcpy(firmware, h->firm_ver, 4);
 
 			if (copy_to_user
 			    (argp, firmware, sizeof(FirmwareVer_type)))
@@ -1357,7 +1412,7 @@
 	case CCISS_DEREGDISK:
 	case CCISS_REGNEWD:
 	case CCISS_REVALIDVOLS:
-		return rebuild_lun_table(host, 0, 1);
+		return rebuild_lun_table(h, 0, 1);
 
 	case CCISS_GETLUNINFO:{
 			LogvolInfo_struct luninfo;
@@ -1377,7 +1432,6 @@
 			CommandList_struct *c;
 			char *buff = NULL;
 			u64bit temp64;
-			unsigned long flags;
 			DECLARE_COMPLETION_ONSTACK(wait);
 
 			if (!arg)
@@ -1413,7 +1467,8 @@
 			} else {
 				memset(buff, 0, iocommand.buf_size);
 			}
-			if ((c = cmd_alloc(host, 0)) == NULL) {
+			c = cmd_special_alloc(h);
+			if (!c) {
 				kfree(buff);
 				return -ENOMEM;
 			}
@@ -1439,7 +1494,7 @@
 
 			/* Fill in the scatter gather information */
 			if (iocommand.buf_size > 0) {
-				temp64.val = pci_map_single(host->pdev, buff,
+				temp64.val = pci_map_single(h->pdev, buff,
 					iocommand.buf_size,
 					PCI_DMA_BIDIRECTIONAL);
 				c->SG[0].Addr.lower = temp64.val32.lower;
@@ -1449,30 +1504,24 @@
 			}
 			c->waiting = &wait;
 
-			/* Put the request on the tail of the request queue */
-			spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-			addQ(&host->reqQ, c);
-			host->Qdepth++;
-			start_io(host);
-			spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-
+			enqueue_cmd_and_start_io(h, c);
 			wait_for_completion(&wait);
 
 			/* unlock the buffers from DMA */
 			temp64.val32.lower = c->SG[0].Addr.lower;
 			temp64.val32.upper = c->SG[0].Addr.upper;
-			pci_unmap_single(host->pdev, (dma_addr_t) temp64.val,
+			pci_unmap_single(h->pdev, (dma_addr_t) temp64.val,
 					 iocommand.buf_size,
 					 PCI_DMA_BIDIRECTIONAL);
 
-			check_ioctl_unit_attention(host, c);
+			check_ioctl_unit_attention(h, c);
 
 			/* Copy the error information out */
 			iocommand.error_info = *(c->err_info);
 			if (copy_to_user
 			    (argp, &iocommand, sizeof(IOCTL_Command_struct))) {
 				kfree(buff);
-				cmd_free(host, c, 0);
+				cmd_special_free(h, c);
 				return -EFAULT;
 			}
 
@@ -1481,12 +1530,12 @@
 				if (copy_to_user
 				    (iocommand.buf, buff, iocommand.buf_size)) {
 					kfree(buff);
-					cmd_free(host, c, 0);
+					cmd_special_free(h, c);
 					return -EFAULT;
 				}
 			}
 			kfree(buff);
-			cmd_free(host, c, 0);
+			cmd_special_free(h, c);
 			return 0;
 		}
 	case CCISS_BIG_PASSTHRU:{
@@ -1495,7 +1544,6 @@
 			unsigned char **buff = NULL;
 			int *buff_size = NULL;
 			u64bit temp64;
-			unsigned long flags;
 			BYTE sg_used = 0;
 			int status = 0;
 			int i;
@@ -1569,7 +1617,8 @@
 				data_ptr += sz;
 				sg_used++;
 			}
-			if ((c = cmd_alloc(host, 0)) == NULL) {
+			c = cmd_special_alloc(h);
+			if (!c) {
 				status = -ENOMEM;
 				goto cleanup1;
 			}
@@ -1590,7 +1639,7 @@
 			if (ioc->buf_size > 0) {
 				for (i = 0; i < sg_used; i++) {
 					temp64.val =
-					    pci_map_single(host->pdev, buff[i],
+					    pci_map_single(h->pdev, buff[i],
 						    buff_size[i],
 						    PCI_DMA_BIDIRECTIONAL);
 					c->SG[i].Addr.lower =
@@ -1602,26 +1651,21 @@
 				}
 			}
 			c->waiting = &wait;
-			/* Put the request on the tail of the request queue */
-			spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-			addQ(&host->reqQ, c);
-			host->Qdepth++;
-			start_io(host);
-			spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+			enqueue_cmd_and_start_io(h, c);
 			wait_for_completion(&wait);
 			/* unlock the buffers from DMA */
 			for (i = 0; i < sg_used; i++) {
 				temp64.val32.lower = c->SG[i].Addr.lower;
 				temp64.val32.upper = c->SG[i].Addr.upper;
-				pci_unmap_single(host->pdev,
+				pci_unmap_single(h->pdev,
 					(dma_addr_t) temp64.val, buff_size[i],
 					PCI_DMA_BIDIRECTIONAL);
 			}
-			check_ioctl_unit_attention(host, c);
+			check_ioctl_unit_attention(h, c);
 			/* Copy the error information out */
 			ioc->error_info = *(c->err_info);
 			if (copy_to_user(argp, ioc, sizeof(*ioc))) {
-				cmd_free(host, c, 0);
+				cmd_special_free(h, c);
 				status = -EFAULT;
 				goto cleanup1;
 			}
@@ -1631,14 +1675,14 @@
 				for (i = 0; i < sg_used; i++) {
 					if (copy_to_user
 					    (ptr, buff[i], buff_size[i])) {
-						cmd_free(host, c, 0);
+						cmd_special_free(h, c);
 						status = -EFAULT;
 						goto cleanup1;
 					}
 					ptr += buff_size[i];
 				}
 			}
-			cmd_free(host, c, 0);
+			cmd_special_free(h, c);
 			status = 0;
 		      cleanup1:
 			if (buff) {
@@ -1726,26 +1770,26 @@
 
 static void cciss_softirq_done(struct request *rq)
 {
-	CommandList_struct *cmd = rq->completion_data;
-	ctlr_info_t *h = hba[cmd->ctlr];
-	SGDescriptor_struct *curr_sg = cmd->SG;
-	unsigned long flags;
+	CommandList_struct *c = rq->completion_data;
+	ctlr_info_t *h = hba[c->ctlr];
+	SGDescriptor_struct *curr_sg = c->SG;
 	u64bit temp64;
+	unsigned long flags;
 	int i, ddir;
 	int sg_index = 0;
 
-	if (cmd->Request.Type.Direction == XFER_READ)
+	if (c->Request.Type.Direction == XFER_READ)
 		ddir = PCI_DMA_FROMDEVICE;
 	else
 		ddir = PCI_DMA_TODEVICE;
 
 	/* command did not need to be retried */
 	/* unmap the DMA mapping for all the scatter gather elements */
-	for (i = 0; i < cmd->Header.SGList; i++) {
+	for (i = 0; i < c->Header.SGList; i++) {
 		if (curr_sg[sg_index].Ext == CCISS_SG_CHAIN) {
-			cciss_unmap_sg_chain_block(h, cmd);
+			cciss_unmap_sg_chain_block(h, c);
 			/* Point to the next block */
-			curr_sg = h->cmd_sg_list[cmd->cmdindex];
+			curr_sg = h->cmd_sg_list[c->cmdindex];
 			sg_index = 0;
 		}
 		temp64.val32.lower = curr_sg[sg_index].Addr.lower;
@@ -1755,18 +1799,16 @@
 		++sg_index;
 	}
 
-#ifdef CCISS_DEBUG
-	printk("Done with %p\n", rq);
-#endif				/* CCISS_DEBUG */
+	dev_dbg(&h->pdev->dev, "Done with %p\n", rq);
 
 	/* set the residual count for pc requests */
-	if (blk_pc_request(rq))
-		rq->resid_len = cmd->err_info->ResidualCnt;
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
+		rq->resid_len = c->err_info->ResidualCnt;
 
 	blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
 
 	spin_lock_irqsave(&h->lock, flags);
-	cmd_free(h, cmd, 1);
+	cmd_free(h, c);
 	cciss_check_queues(h);
 	spin_unlock_irqrestore(&h->lock, flags);
 }
@@ -1782,7 +1824,7 @@
  * via the inquiry page 0.  Model, vendor, and rev are set to empty strings if
  * they cannot be read.
  */
-static void cciss_get_device_descr(int ctlr, int logvol,
+static void cciss_get_device_descr(ctlr_info_t *h, int logvol,
 				   char *vendor, char *model, char *rev)
 {
 	int rc;
@@ -1797,8 +1839,8 @@
 	if (!inq_buf)
 		return;
 
-	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
-	rc = sendcmd_withirq(CISS_INQUIRY, ctlr, inq_buf, sizeof(*inq_buf), 0,
+	log_unit_to_scsi3addr(h, scsi3addr, logvol);
+	rc = sendcmd_withirq(h, CISS_INQUIRY, inq_buf, sizeof(*inq_buf), 0,
 			scsi3addr, TYPE_CMD);
 	if (rc == IO_OK) {
 		memcpy(vendor, &inq_buf->data_byte[8], VENDOR_LEN);
@@ -1818,7 +1860,7 @@
  * number cannot be had, for whatever reason, 16 bytes of 0xff
  * are returned instead.
  */
-static void cciss_get_serial_no(int ctlr, int logvol,
+static void cciss_get_serial_no(ctlr_info_t *h, int logvol,
 				unsigned char *serial_no, int buflen)
 {
 #define PAGE_83_INQ_BYTES 64
@@ -1833,8 +1875,8 @@
 	if (!buf)
 		return;
 	memset(serial_no, 0, buflen);
-	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
-	rc = sendcmd_withirq(CISS_INQUIRY, ctlr, buf,
+	log_unit_to_scsi3addr(h, scsi3addr, logvol);
+	rc = sendcmd_withirq(h, CISS_INQUIRY, buf,
 		PAGE_83_INQ_BYTES, 0x83, scsi3addr, TYPE_CMD);
 	if (rc == IO_OK)
 		memcpy(serial_no, &buf[8], buflen);
@@ -1900,10 +1942,9 @@
  * is also the controller node.  Any changes to disk 0 will show up on
  * the next reboot.
  */
-static void cciss_update_drive_info(int ctlr, int drv_index, int first_time,
-	int via_ioctl)
+static void cciss_update_drive_info(ctlr_info_t *h, int drv_index,
+	int first_time, int via_ioctl)
 {
-	ctlr_info_t *h = hba[ctlr];
 	struct gendisk *disk;
 	InquiryData_struct *inq_buff = NULL;
 	unsigned int block_size;
@@ -1920,16 +1961,16 @@
 
 	/* testing to see if 16-byte CDBs are already being used */
 	if (h->cciss_read == CCISS_READ_16) {
-		cciss_read_capacity_16(h->ctlr, drv_index,
+		cciss_read_capacity_16(h, drv_index,
 			&total_size, &block_size);
 
 	} else {
-		cciss_read_capacity(ctlr, drv_index, &total_size, &block_size);
+		cciss_read_capacity(h, drv_index, &total_size, &block_size);
 		/* if read_capacity returns all F's this volume is >2TB */
 		/* in size so we switch to 16-byte CDB's for all */
 		/* read/write ops */
 		if (total_size == 0xFFFFFFFFULL) {
-			cciss_read_capacity_16(ctlr, drv_index,
+			cciss_read_capacity_16(h, drv_index,
 			&total_size, &block_size);
 			h->cciss_read = CCISS_READ_16;
 			h->cciss_write = CCISS_WRITE_16;
@@ -1939,14 +1980,14 @@
 		}
 	}
 
-	cciss_geometry_inquiry(ctlr, drv_index, total_size, block_size,
+	cciss_geometry_inquiry(h, drv_index, total_size, block_size,
 			       inq_buff, drvinfo);
 	drvinfo->block_size = block_size;
 	drvinfo->nr_blocks = total_size + 1;
 
-	cciss_get_device_descr(ctlr, drv_index, drvinfo->vendor,
+	cciss_get_device_descr(h, drv_index, drvinfo->vendor,
 				drvinfo->model, drvinfo->rev);
-	cciss_get_serial_no(ctlr, drv_index, drvinfo->serial_no,
+	cciss_get_serial_no(h, drv_index, drvinfo->serial_no,
 			sizeof(drvinfo->serial_no));
 	/* Save the lunid in case we deregister the disk, below. */
 	memcpy(drvinfo->LunID, h->drv[drv_index]->LunID,
@@ -1971,10 +2012,10 @@
 	 * (unless it's the first disk (for the controller node).
 	 */
 	if (h->drv[drv_index]->raid_level != -1 && drv_index != 0) {
-		printk(KERN_WARNING "disk %d has changed.\n", drv_index);
-		spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+		dev_warn(&h->pdev->dev, "disk %d has changed.\n", drv_index);
+		spin_lock_irqsave(&h->lock, flags);
 		h->drv[drv_index]->busy_configuring = 1;
-		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		spin_unlock_irqrestore(&h->lock, flags);
 
 		/* deregister_disk sets h->drv[drv_index]->queue = NULL
 		 * which keeps the interrupt handler from starting
@@ -2024,8 +2065,8 @@
 		if (cciss_add_disk(h, disk, drv_index) != 0) {
 			cciss_free_gendisk(h, drv_index);
 			cciss_free_drive_info(h, drv_index);
-			printk(KERN_WARNING "cciss:%d could not update "
-				"disk %d\n", h->ctlr, drv_index);
+			dev_warn(&h->pdev->dev, "could not update disk %d\n",
+				drv_index);
 			--h->num_luns;
 		}
 	}
@@ -2035,7 +2076,7 @@
 	kfree(drvinfo);
 	return;
 mem_msg:
-	printk(KERN_ERR "cciss: out of memory\n");
+	dev_err(&h->pdev->dev, "out of memory\n");
 	goto freeret;
 }
 
@@ -2127,9 +2168,9 @@
 		h->gendisk[drv_index] =
 			alloc_disk(1 << NWD_SHIFT);
 		if (!h->gendisk[drv_index]) {
-			printk(KERN_ERR "cciss%d: could not "
-				"allocate a new disk %d\n",
-				h->ctlr, drv_index);
+			dev_err(&h->pdev->dev,
+				"could not allocate a new disk %d\n",
+				drv_index);
 			goto err_free_drive_info;
 		}
 	}
@@ -2180,8 +2221,7 @@
 	cciss_free_gendisk(h, drv_index);
 	cciss_free_drive_info(h, drv_index);
 error:
-	printk(KERN_WARNING "cciss%d: could not "
-		"add disk 0.\n", h->ctlr);
+	dev_warn(&h->pdev->dev, "could not add disk 0.\n");
 	return;
 }
 
@@ -2196,7 +2236,6 @@
 static int rebuild_lun_table(ctlr_info_t *h, int first_time,
 	int via_ioctl)
 {
-	int ctlr = h->ctlr;
 	int num_luns;
 	ReportLunData_struct *ld_buff = NULL;
 	int return_code;
@@ -2211,27 +2250,27 @@
 		return -EPERM;
 
 	/* Set busy_configuring flag for this operation */
-	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
 	if (h->busy_configuring) {
-		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		spin_unlock_irqrestore(&h->lock, flags);
 		return -EBUSY;
 	}
 	h->busy_configuring = 1;
-	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+	spin_unlock_irqrestore(&h->lock, flags);
 
 	ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
 	if (ld_buff == NULL)
 		goto mem_msg;
 
-	return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff,
+	return_code = sendcmd_withirq(h, CISS_REPORT_LOG, ld_buff,
 				      sizeof(ReportLunData_struct),
 				      0, CTLR_LUNID, TYPE_CMD);
 
 	if (return_code == IO_OK)
 		listlength = be32_to_cpu(*(__be32 *) ld_buff->LUNListLength);
 	else {	/* reading number of logical volumes failed */
-		printk(KERN_WARNING "cciss: report logical volume"
-		       " command failed\n");
+		dev_warn(&h->pdev->dev,
+			"report logical volume command failed\n");
 		listlength = 0;
 		goto freeret;
 	}
@@ -2239,7 +2278,7 @@
 	num_luns = listlength / 8;	/* 8 bytes per entry */
 	if (num_luns > CISS_MAX_LUN) {
 		num_luns = CISS_MAX_LUN;
-		printk(KERN_WARNING "cciss: more luns configured"
+		dev_warn(&h->pdev->dev, "more luns configured"
 		       " on controller than can be handled by"
 		       " this driver.\n");
 	}
@@ -2270,9 +2309,9 @@
 		}
 		if (!drv_found) {
 			/* Deregister it from the OS, it's gone. */
-			spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+			spin_lock_irqsave(&h->lock, flags);
 			h->drv[i]->busy_configuring = 1;
-			spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+			spin_unlock_irqrestore(&h->lock, flags);
 			return_code = deregister_disk(h, i, 1, via_ioctl);
 			if (h->drv[i] != NULL)
 				h->drv[i]->busy_configuring = 0;
@@ -2311,8 +2350,7 @@
 			if (drv_index == -1)
 				goto freeret;
 		}
-		cciss_update_drive_info(ctlr, drv_index, first_time,
-			via_ioctl);
+		cciss_update_drive_info(h, drv_index, first_time, via_ioctl);
 	}		/* end for */
 
 freeret:
@@ -2324,7 +2362,7 @@
 	 */
 	return -1;
 mem_msg:
-	printk(KERN_ERR "cciss: out of memory\n");
+	dev_err(&h->pdev->dev, "out of memory\n");
 	h->busy_configuring = 0;
 	goto freeret;
 }
@@ -2444,11 +2482,10 @@
 	return 0;
 }
 
-static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
+static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
 		size_t size, __u8 page_code, unsigned char *scsi3addr,
 		int cmd_type)
 {
-	ctlr_info_t *h = hba[ctlr];
 	u64bit buff_dma_handle;
 	int status = IO_OK;
 
@@ -2532,8 +2569,7 @@
 			c->Request.Timeout = 0;
 			break;
 		default:
-			printk(KERN_WARNING
-			       "cciss%d:  Unknown Command 0x%c\n", ctlr, cmd);
+			dev_warn(&h->pdev->dev, "Unknown Command 0x%c\n", cmd);
 			return IO_ERROR;
 		}
 	} else if (cmd_type == TYPE_MSG) {
@@ -2565,13 +2601,12 @@
 			c->Request.CDB[0] = cmd;
 			break;
 		default:
-			printk(KERN_WARNING
-			       "cciss%d: unknown message type %d\n", ctlr, cmd);
+			dev_warn(&h->pdev->dev,
+				"unknown message type %d\n", cmd);
 			return IO_ERROR;
 		}
 	} else {
-		printk(KERN_WARNING
-		       "cciss%d: unknown command type %d\n", ctlr, cmd_type);
+		dev_warn(&h->pdev->dev, "unknown command type %d\n", cmd_type);
 		return IO_ERROR;
 	}
 	/* Fill in the scatter gather information */
@@ -2599,15 +2634,14 @@
 		default:
 			if (check_for_unit_attention(h, c))
 				return IO_NEEDS_RETRY;
-			printk(KERN_WARNING "cciss%d: cmd 0x%02x "
+			dev_warn(&h->pdev->dev, "cmd 0x%02x "
 				"check condition, sense key = 0x%02x\n",
-				h->ctlr, c->Request.CDB[0],
-				c->err_info->SenseInfo[2]);
+				c->Request.CDB[0], c->err_info->SenseInfo[2]);
 		}
 		break;
 	default:
-		printk(KERN_WARNING "cciss%d: cmd 0x%02x"
-			"scsi status = 0x%02x\n", h->ctlr,
+		dev_warn(&h->pdev->dev, "cmd 0x%02x"
+			"scsi status = 0x%02x\n",
 			c->Request.CDB[0], c->err_info->ScsiStatus);
 		break;
 	}
@@ -2630,43 +2664,42 @@
 		/* expected for inquiry and report lun commands */
 		break;
 	case CMD_INVALID:
-		printk(KERN_WARNING "cciss: cmd 0x%02x is "
+		dev_warn(&h->pdev->dev, "cmd 0x%02x is "
 		       "reported invalid\n", c->Request.CDB[0]);
 		return_status = IO_ERROR;
 		break;
 	case CMD_PROTOCOL_ERR:
-		printk(KERN_WARNING "cciss: cmd 0x%02x has "
-		       "protocol error \n", c->Request.CDB[0]);
+		dev_warn(&h->pdev->dev, "cmd 0x%02x has "
+		       "protocol error\n", c->Request.CDB[0]);
 		return_status = IO_ERROR;
 		break;
 	case CMD_HARDWARE_ERR:
-		printk(KERN_WARNING "cciss: cmd 0x%02x had "
+		dev_warn(&h->pdev->dev, "cmd 0x%02x had "
 		       " hardware error\n", c->Request.CDB[0]);
 		return_status = IO_ERROR;
 		break;
 	case CMD_CONNECTION_LOST:
-		printk(KERN_WARNING "cciss: cmd 0x%02x had "
+		dev_warn(&h->pdev->dev, "cmd 0x%02x had "
 		       "connection lost\n", c->Request.CDB[0]);
 		return_status = IO_ERROR;
 		break;
 	case CMD_ABORTED:
-		printk(KERN_WARNING "cciss: cmd 0x%02x was "
+		dev_warn(&h->pdev->dev, "cmd 0x%02x was "
 		       "aborted\n", c->Request.CDB[0]);
 		return_status = IO_ERROR;
 		break;
 	case CMD_ABORT_FAILED:
-		printk(KERN_WARNING "cciss: cmd 0x%02x reports "
+		dev_warn(&h->pdev->dev, "cmd 0x%02x reports "
 		       "abort failed\n", c->Request.CDB[0]);
 		return_status = IO_ERROR;
 		break;
 	case CMD_UNSOLICITED_ABORT:
-		printk(KERN_WARNING
-		       "cciss%d: unsolicited abort 0x%02x\n", h->ctlr,
+		dev_warn(&h->pdev->dev, "unsolicited abort 0x%02x\n",
 			c->Request.CDB[0]);
 		return_status = IO_NEEDS_RETRY;
 		break;
 	default:
-		printk(KERN_WARNING "cciss: cmd 0x%02x returned "
+		dev_warn(&h->pdev->dev, "cmd 0x%02x returned "
 		       "unknown status %x\n", c->Request.CDB[0],
 		       c->err_info->CommandStatus);
 		return_status = IO_ERROR;
@@ -2679,17 +2712,11 @@
 {
 	DECLARE_COMPLETION_ONSTACK(wait);
 	u64bit buff_dma_handle;
-	unsigned long flags;
 	int return_status = IO_OK;
 
 resend_cmd2:
 	c->waiting = &wait;
-	/* Put the request on the tail of the queue and send it */
-	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
-	addQ(&h->reqQ, c);
-	h->Qdepth++;
-	start_io(h);
-	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+	enqueue_cmd_and_start_io(h, c);
 
 	wait_for_completion(&wait);
 
@@ -2700,7 +2727,7 @@
 
 	if (return_status == IO_NEEDS_RETRY &&
 		c->retry_count < MAX_CMD_RETRIES) {
-		printk(KERN_WARNING "cciss%d: retrying 0x%02x\n", h->ctlr,
+		dev_warn(&h->pdev->dev, "retrying 0x%02x\n",
 			c->Request.CDB[0]);
 		c->retry_count++;
 		/* erase the old error information */
@@ -2719,27 +2746,26 @@
 	return return_status;
 }
 
-static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
+static int sendcmd_withirq(ctlr_info_t *h, __u8 cmd, void *buff, size_t size,
 			   __u8 page_code, unsigned char scsi3addr[],
 			int cmd_type)
 {
-	ctlr_info_t *h = hba[ctlr];
 	CommandList_struct *c;
 	int return_status;
 
-	c = cmd_alloc(h, 0);
+	c = cmd_special_alloc(h);
 	if (!c)
 		return -ENOMEM;
-	return_status = fill_cmd(c, cmd, ctlr, buff, size, page_code,
+	return_status = fill_cmd(h, c, cmd, buff, size, page_code,
 		scsi3addr, cmd_type);
 	if (return_status == IO_OK)
 		return_status = sendcmd_withirq_core(h, c, 1);
 
-	cmd_free(h, c, 0);
+	cmd_special_free(h, c);
 	return return_status;
 }
 
-static void cciss_geometry_inquiry(int ctlr, int logvol,
+static void cciss_geometry_inquiry(ctlr_info_t *h, int logvol,
 				   sector_t total_size,
 				   unsigned int block_size,
 				   InquiryData_struct *inq_buff,
@@ -2750,13 +2776,13 @@
 	unsigned char scsi3addr[8];
 
 	memset(inq_buff, 0, sizeof(InquiryData_struct));
-	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
-	return_code = sendcmd_withirq(CISS_INQUIRY, ctlr, inq_buff,
+	log_unit_to_scsi3addr(h, scsi3addr, logvol);
+	return_code = sendcmd_withirq(h, CISS_INQUIRY, inq_buff,
 			sizeof(*inq_buff), 0xC1, scsi3addr, TYPE_CMD);
 	if (return_code == IO_OK) {
 		if (inq_buff->data_byte[8] == 0xFF) {
-			printk(KERN_WARNING
-			       "cciss: reading geometry failed, volume "
+			dev_warn(&h->pdev->dev,
+			       "reading geometry failed, volume "
 			       "does not support reading geometry\n");
 			drv->heads = 255;
 			drv->sectors = 32;	/* Sectors per track */
@@ -2780,12 +2806,12 @@
 			drv->cylinders = real_size;
 		}
 	} else {		/* Get geometry failed */
-		printk(KERN_WARNING "cciss: reading geometry failed\n");
+		dev_warn(&h->pdev->dev, "reading geometry failed\n");
 	}
 }
 
 static void
-cciss_read_capacity(int ctlr, int logvol, sector_t *total_size,
+cciss_read_capacity(ctlr_info_t *h, int logvol, sector_t *total_size,
 		    unsigned int *block_size)
 {
 	ReadCapdata_struct *buf;
@@ -2794,25 +2820,25 @@
 
 	buf = kzalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
 	if (!buf) {
-		printk(KERN_WARNING "cciss: out of memory\n");
+		dev_warn(&h->pdev->dev, "out of memory\n");
 		return;
 	}
 
-	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
-	return_code = sendcmd_withirq(CCISS_READ_CAPACITY, ctlr, buf,
+	log_unit_to_scsi3addr(h, scsi3addr, logvol);
+	return_code = sendcmd_withirq(h, CCISS_READ_CAPACITY, buf,
 		sizeof(ReadCapdata_struct), 0, scsi3addr, TYPE_CMD);
 	if (return_code == IO_OK) {
 		*total_size = be32_to_cpu(*(__be32 *) buf->total_size);
 		*block_size = be32_to_cpu(*(__be32 *) buf->block_size);
 	} else {		/* read capacity command failed */
-		printk(KERN_WARNING "cciss: read capacity failed\n");
+		dev_warn(&h->pdev->dev, "read capacity failed\n");
 		*total_size = 0;
 		*block_size = BLOCK_SIZE;
 	}
 	kfree(buf);
 }
 
-static void cciss_read_capacity_16(int ctlr, int logvol,
+static void cciss_read_capacity_16(ctlr_info_t *h, int logvol,
 	sector_t *total_size, unsigned int *block_size)
 {
 	ReadCapdata_struct_16 *buf;
@@ -2821,23 +2847,23 @@
 
 	buf = kzalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
 	if (!buf) {
-		printk(KERN_WARNING "cciss: out of memory\n");
+		dev_warn(&h->pdev->dev, "out of memory\n");
 		return;
 	}
 
-	log_unit_to_scsi3addr(hba[ctlr], scsi3addr, logvol);
-	return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
-		ctlr, buf, sizeof(ReadCapdata_struct_16),
+	log_unit_to_scsi3addr(h, scsi3addr, logvol);
+	return_code = sendcmd_withirq(h, CCISS_READ_CAPACITY_16,
+		buf, sizeof(ReadCapdata_struct_16),
 			0, scsi3addr, TYPE_CMD);
 	if (return_code == IO_OK) {
 		*total_size = be64_to_cpu(*(__be64 *) buf->total_size);
 		*block_size = be32_to_cpu(*(__be32 *) buf->block_size);
 	} else {		/* read capacity command failed */
-		printk(KERN_WARNING "cciss: read capacity failed\n");
+		dev_warn(&h->pdev->dev, "read capacity failed\n");
 		*total_size = 0;
 		*block_size = BLOCK_SIZE;
 	}
-	printk(KERN_INFO "      blocks= %llu block_size= %d\n",
+	dev_info(&h->pdev->dev, "      blocks= %llu block_size= %d\n",
 	       (unsigned long long)*total_size+1, *block_size);
 	kfree(buf);
 }
@@ -2865,17 +2891,17 @@
 
 	inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
 	if (inq_buff == NULL) {
-		printk(KERN_WARNING "cciss: out of memory\n");
+		dev_warn(&h->pdev->dev, "out of memory\n");
 		return 1;
 	}
 	if (h->cciss_read == CCISS_READ_10) {
-		cciss_read_capacity(h->ctlr, logvol,
+		cciss_read_capacity(h, logvol,
 					&total_size, &block_size);
 	} else {
-		cciss_read_capacity_16(h->ctlr, logvol,
+		cciss_read_capacity_16(h, logvol,
 					&total_size, &block_size);
 	}
-	cciss_geometry_inquiry(h->ctlr, logvol, total_size, block_size,
+	cciss_geometry_inquiry(h, logvol, total_size, block_size,
 			       inq_buff, drv);
 
 	blk_queue_logical_block_size(drv->queue, drv->block_size);
@@ -2909,7 +2935,7 @@
 		c = hlist_entry(h->reqQ.first, CommandList_struct, list);
 		/* can't do anything if fifo is full */
 		if ((h->access.fifo_full(h))) {
-			printk(KERN_WARNING "cciss: fifo full\n");
+			dev_warn(&h->pdev->dev, "fifo full\n");
 			break;
 		}
 
@@ -2925,7 +2951,7 @@
 	}
 }
 
-/* Assumes that CCISS_LOCK(h->ctlr) is held. */
+/* Assumes that h->lock is held. */
 /* Zeros out the error record and then resends the command back */
 /* to the controller */
 static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c)
@@ -2966,7 +2992,7 @@
 	driver_byte = DRIVER_OK;
 	msg_byte = cmd->err_info->CommandStatus; /* correct?  seems too device specific */
 
-	if (blk_pc_request(cmd->rq))
+	if (cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC)
 		host_byte = DID_PASSTHROUGH;
 	else
 		host_byte = DID_OK;
@@ -2975,8 +3001,8 @@
 		host_byte, driver_byte);
 
 	if (cmd->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) {
-		if (!blk_pc_request(cmd->rq))
-			printk(KERN_WARNING "cciss: cmd %p "
+		if (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC)
+			dev_warn(&h->pdev->dev, "cmd %p "
 			       "has SCSI Status 0x%x\n",
 			       cmd, cmd->err_info->ScsiStatus);
 		return error_value;
@@ -2985,17 +3011,19 @@
 	/* check the sense key */
 	sense_key = 0xf & cmd->err_info->SenseInfo[2];
 	/* no status or recovered error */
-	if (((sense_key == 0x0) || (sense_key == 0x1)) && !blk_pc_request(cmd->rq))
+	if (((sense_key == 0x0) || (sense_key == 0x1)) &&
+	    (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC))
 		error_value = 0;
 
 	if (check_for_unit_attention(h, cmd)) {
-		*retry_cmd = !blk_pc_request(cmd->rq);
+		*retry_cmd = !(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC);
 		return 0;
 	}
 
-	if (!blk_pc_request(cmd->rq)) { /* Not SG_IO or similar? */
+	/* Not SG_IO or similar? */
+	if (cmd->rq->cmd_type != REQ_TYPE_BLOCK_PC) {
 		if (error_value != 0)
-			printk(KERN_WARNING "cciss: cmd %p has CHECK CONDITION"
+			dev_warn(&h->pdev->dev, "cmd %p has CHECK CONDITION"
 			       " sense key = 0x%x\n", cmd, sense_key);
 		return error_value;
 	}
@@ -3035,90 +3063,97 @@
 		rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
 		break;
 	case CMD_DATA_UNDERRUN:
-		if (blk_fs_request(cmd->rq)) {
-			printk(KERN_WARNING "cciss: cmd %p has"
+		if (cmd->rq->cmd_type == REQ_TYPE_FS) {
+			dev_warn(&h->pdev->dev, "cmd %p has"
 			       " completed with data underrun "
 			       "reported\n", cmd);
 			cmd->rq->resid_len = cmd->err_info->ResidualCnt;
 		}
 		break;
 	case CMD_DATA_OVERRUN:
-		if (blk_fs_request(cmd->rq))
-			printk(KERN_WARNING "cciss: cmd %p has"
+		if (cmd->rq->cmd_type == REQ_TYPE_FS)
+			dev_warn(&h->pdev->dev, "cciss: cmd %p has"
 			       " completed with data overrun "
 			       "reported\n", cmd);
 		break;
 	case CMD_INVALID:
-		printk(KERN_WARNING "cciss: cmd %p is "
+		dev_warn(&h->pdev->dev, "cciss: cmd %p is "
 		       "reported invalid\n", cmd);
 		rq->errors = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
-			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
 		break;
 	case CMD_PROTOCOL_ERR:
-		printk(KERN_WARNING "cciss: cmd %p has "
-		       "protocol error \n", cmd);
+		dev_warn(&h->pdev->dev, "cciss: cmd %p has "
+		       "protocol error\n", cmd);
 		rq->errors = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
-			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
 		break;
 	case CMD_HARDWARE_ERR:
-		printk(KERN_WARNING "cciss: cmd %p had "
+		dev_warn(&h->pdev->dev, "cciss: cmd %p had "
 		       " hardware error\n", cmd);
 		rq->errors = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
-			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
 		break;
 	case CMD_CONNECTION_LOST:
-		printk(KERN_WARNING "cciss: cmd %p had "
+		dev_warn(&h->pdev->dev, "cciss: cmd %p had "
 		       "connection lost\n", cmd);
 		rq->errors = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
-			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
 		break;
 	case CMD_ABORTED:
-		printk(KERN_WARNING "cciss: cmd %p was "
+		dev_warn(&h->pdev->dev, "cciss: cmd %p was "
 		       "aborted\n", cmd);
 		rq->errors = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
-			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT);
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ABORT);
 		break;
 	case CMD_ABORT_FAILED:
-		printk(KERN_WARNING "cciss: cmd %p reports "
+		dev_warn(&h->pdev->dev, "cciss: cmd %p reports "
 		       "abort failed\n", cmd);
 		rq->errors = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
-			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
 		break;
 	case CMD_UNSOLICITED_ABORT:
-		printk(KERN_WARNING "cciss%d: unsolicited "
+		dev_warn(&h->pdev->dev, "cciss%d: unsolicited "
 		       "abort %p\n", h->ctlr, cmd);
 		if (cmd->retry_count < MAX_CMD_RETRIES) {
 			retry_cmd = 1;
-			printk(KERN_WARNING
-			       "cciss%d: retrying %p\n", h->ctlr, cmd);
+			dev_warn(&h->pdev->dev, "retrying %p\n", cmd);
 			cmd->retry_count++;
 		} else
-			printk(KERN_WARNING
-			       "cciss%d: %p retried too "
-			       "many times\n", h->ctlr, cmd);
+			dev_warn(&h->pdev->dev,
+				"%p retried too many times\n", cmd);
 		rq->errors = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
-			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT);
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ABORT);
 		break;
 	case CMD_TIMEOUT:
-		printk(KERN_WARNING "cciss: cmd %p timedout\n", cmd);
+		dev_warn(&h->pdev->dev, "cmd %p timedout\n", cmd);
 		rq->errors = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
-			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
 		break;
 	default:
-		printk(KERN_WARNING "cciss: cmd %p returned "
+		dev_warn(&h->pdev->dev, "cmd %p returned "
 		       "unknown status %x\n", cmd,
 		       cmd->err_info->CommandStatus);
 		rq->errors = make_status_bytes(SAM_STAT_GOOD,
 			cmd->err_info->CommandStatus, DRIVER_OK,
-			blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
+			(cmd->rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+				DID_PASSTHROUGH : DID_ERROR);
 	}
 
 after_error_processing:
@@ -3132,6 +3167,34 @@
 	blk_complete_request(cmd->rq);
 }
 
+static inline u32 cciss_tag_contains_index(u32 tag)
+{
+#define DIRECT_LOOKUP_BIT 0x10
+	return tag & DIRECT_LOOKUP_BIT;
+}
+
+static inline u32 cciss_tag_to_index(u32 tag)
+{
+#define DIRECT_LOOKUP_SHIFT 5
+	return tag >> DIRECT_LOOKUP_SHIFT;
+}
+
+static inline u32 cciss_tag_discard_error_bits(u32 tag)
+{
+#define CCISS_ERROR_BITS 0x03
+	return tag & ~CCISS_ERROR_BITS;
+}
+
+static inline void cciss_mark_tag_indexed(u32 *tag)
+{
+	*tag |= DIRECT_LOOKUP_BIT;
+}
+
+static inline void cciss_set_tag_index(u32 *tag, u32 index)
+{
+	*tag |= (index << DIRECT_LOOKUP_SHIFT);
+}
+
 /*
  * Get a request and submit it to the controller.
  */
@@ -3163,7 +3226,8 @@
 
 	BUG_ON(creq->nr_phys_segments > h->maxsgentries);
 
-	if ((c = cmd_alloc(h, 1)) == NULL)
+	c = cmd_alloc(h);
+	if (!c)
 		goto full;
 
 	blk_start_request(creq);
@@ -3180,8 +3244,8 @@
 	/* got command from pool, so use the command block index instead */
 	/* for direct lookups. */
 	/* The first 2 bits are reserved for controller error reporting. */
-	c->Header.Tag.lower = (c->cmdindex << 3);
-	c->Header.Tag.lower |= 0x04;	/* flag for direct lookup. */
+	cciss_set_tag_index(&c->Header.Tag.lower, c->cmdindex);
+	cciss_mark_tag_indexed(&c->Header.Tag.lower);
 	memcpy(&c->Header.LUN, drv->LunID, sizeof(drv->LunID));
 	c->Request.CDBLen = 10;	/* 12 byte commands not in FW yet; */
 	c->Request.Type.Type = TYPE_CMD;	/* It is a command. */
@@ -3192,11 +3256,8 @@
 	c->Request.CDB[0] =
 	    (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
 	start_blk = blk_rq_pos(creq);
-#ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",
+	dev_dbg(&h->pdev->dev, "sector =%d nr_sectors=%d\n",
 	       (int)blk_rq_pos(creq), (int)blk_rq_sectors(creq));
-#endif				/* CCISS_DEBUG */
-
 	sg_init_table(tmp_sg, h->maxsgentries);
 	seg = blk_rq_map_sg(q, creq, tmp_sg);
 
@@ -3236,17 +3297,18 @@
 	if (seg > h->maxSG)
 		h->maxSG = seg;
 
-#ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "cciss: Submitting %ld sectors in %d segments "
+	dev_dbg(&h->pdev->dev, "Submitting %u sectors in %d segments "
 			"chained[%d]\n",
 			blk_rq_sectors(creq), seg, chained);
-#endif				/* CCISS_DEBUG */
 
-	c->Header.SGList = c->Header.SGTotal = seg + chained;
-	if (seg > h->max_cmd_sgentries)
+	c->Header.SGTotal = seg + chained;
+	if (seg <= h->max_cmd_sgentries)
+		c->Header.SGList = c->Header.SGTotal;
+	else
 		c->Header.SGList = h->max_cmd_sgentries;
+	set_performant_mode(h, c);
 
-	if (likely(blk_fs_request(creq))) {
+	if (likely(creq->cmd_type == REQ_TYPE_FS)) {
 		if(h->cciss_read == CCISS_READ_10) {
 			c->Request.CDB[1] = 0;
 			c->Request.CDB[2] = (start_blk >> 24) & 0xff; /* MSB */
@@ -3276,11 +3338,12 @@
 			c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff;
 			c->Request.CDB[14] = c->Request.CDB[15] = 0;
 		}
-	} else if (blk_pc_request(creq)) {
+	} else if (creq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		c->Request.CDBLen = creq->cmd_len;
 		memcpy(c->Request.CDB, creq->cmd, BLK_MAX_CDB);
 	} else {
-		printk(KERN_WARNING "cciss%d: bad request type %d\n", h->ctlr, creq->cmd_type);
+		dev_warn(&h->pdev->dev, "bad request type %d\n",
+			creq->cmd_type);
 		BUG();
 	}
 
@@ -3313,72 +3376,131 @@
 
 static inline long interrupt_not_for_us(ctlr_info_t *h)
 {
-	return (((h->access.intr_pending(h) == 0) ||
-		 (h->interrupts_enabled == 0)));
+	return ((h->access.intr_pending(h) == 0) ||
+		(h->interrupts_enabled == 0));
 }
 
-static irqreturn_t do_cciss_intr(int irq, void *dev_id)
+static inline int bad_tag(ctlr_info_t *h, u32 tag_index,
+			u32 raw_tag)
+{
+	if (unlikely(tag_index >= h->nr_cmds)) {
+		dev_warn(&h->pdev->dev, "bad tag 0x%08x ignored.\n", raw_tag);
+		return 1;
+	}
+	return 0;
+}
+
+static inline void finish_cmd(ctlr_info_t *h, CommandList_struct *c,
+				u32 raw_tag)
+{
+	removeQ(c);
+	if (likely(c->cmd_type == CMD_RWREQ))
+		complete_command(h, c, 0);
+	else if (c->cmd_type == CMD_IOCTL_PEND)
+		complete(c->waiting);
+#ifdef CONFIG_CISS_SCSI_TAPE
+	else if (c->cmd_type == CMD_SCSI)
+		complete_scsi_command(c, 0, raw_tag);
+#endif
+}
+
+static inline u32 next_command(ctlr_info_t *h)
+{
+	u32 a;
+
+	if (unlikely(h->transMethod != CFGTBL_Trans_Performant))
+		return h->access.command_completed(h);
+
+	if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
+		a = *(h->reply_pool_head); /* Next cmd in ring buffer */
+		(h->reply_pool_head)++;
+		h->commands_outstanding--;
+	} else {
+		a = FIFO_EMPTY;
+	}
+	/* Check for wraparound */
+	if (h->reply_pool_head == (h->reply_pool + h->max_commands)) {
+		h->reply_pool_head = h->reply_pool;
+		h->reply_pool_wraparound ^= 1;
+	}
+	return a;
+}
+
+/* process completion of an indexed ("direct lookup") command */
+static inline u32 process_indexed_cmd(ctlr_info_t *h, u32 raw_tag)
+{
+	u32 tag_index;
+	CommandList_struct *c;
+
+	tag_index = cciss_tag_to_index(raw_tag);
+	if (bad_tag(h, tag_index, raw_tag))
+		return next_command(h);
+	c = h->cmd_pool + tag_index;
+	finish_cmd(h, c, raw_tag);
+	return next_command(h);
+}
+
+/* process completion of a non-indexed command */
+static inline u32 process_nonindexed_cmd(ctlr_info_t *h, u32 raw_tag)
+{
+	u32 tag;
+	CommandList_struct *c = NULL;
+	struct hlist_node *tmp;
+	__u32 busaddr_masked, tag_masked;
+
+	tag = cciss_tag_discard_error_bits(raw_tag);
+	hlist_for_each_entry(c, tmp, &h->cmpQ, list) {
+		busaddr_masked = cciss_tag_discard_error_bits(c->busaddr);
+		tag_masked = cciss_tag_discard_error_bits(tag);
+		if (busaddr_masked == tag_masked) {
+			finish_cmd(h, c, raw_tag);
+			return next_command(h);
+		}
+	}
+	bad_tag(h, h->nr_cmds + 1, raw_tag);
+	return next_command(h);
+}
+
+static irqreturn_t do_cciss_intx(int irq, void *dev_id)
 {
 	ctlr_info_t *h = dev_id;
-	CommandList_struct *c;
 	unsigned long flags;
-	__u32 a, a1, a2;
+	u32 raw_tag;
 
 	if (interrupt_not_for_us(h))
 		return IRQ_NONE;
-	/*
-	 * If there are completed commands in the completion queue,
-	 * we had better do something about it.
-	 */
-	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
 	while (interrupt_pending(h)) {
-		while ((a = get_next_completion(h)) != FIFO_EMPTY) {
-			a1 = a;
-			if ((a & 0x04)) {
-				a2 = (a >> 3);
-				if (a2 >= h->nr_cmds) {
-					printk(KERN_WARNING
-					       "cciss: controller cciss%d failed, stopping.\n",
-					       h->ctlr);
-					spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
-					fail_all_cmds(h->ctlr);
-					return IRQ_HANDLED;
-				}
-
-				c = h->cmd_pool + a2;
-				a = c->busaddr;
-
-			} else {
-				struct hlist_node *tmp;
-
-				a &= ~3;
-				c = NULL;
-				hlist_for_each_entry(c, tmp, &h->cmpQ, list) {
-					if (c->busaddr == a)
-						break;
-				}
-			}
-			/*
-			 * If we've found the command, take it off the
-			 * completion Q and free it
-			 */
-			if (c && c->busaddr == a) {
-				removeQ(c);
-				if (c->cmd_type == CMD_RWREQ) {
-					complete_command(h, c, 0);
-				} else if (c->cmd_type == CMD_IOCTL_PEND) {
-					complete(c->waiting);
-				}
-#				ifdef CONFIG_CISS_SCSI_TAPE
-				else if (c->cmd_type == CMD_SCSI)
-					complete_scsi_command(c, 0, a1);
-#				endif
-				continue;
-			}
+		raw_tag = get_next_completion(h);
+		while (raw_tag != FIFO_EMPTY) {
+			if (cciss_tag_contains_index(raw_tag))
+				raw_tag = process_indexed_cmd(h, raw_tag);
+			else
+				raw_tag = process_nonindexed_cmd(h, raw_tag);
 		}
 	}
+	spin_unlock_irqrestore(&h->lock, flags);
+	return IRQ_HANDLED;
+}
 
-	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+/* Add a second interrupt handler for MSI/MSI-X mode. In this mode we never
+ * check the interrupt pending register because it is not set.
+ */
+static irqreturn_t do_cciss_msix_intr(int irq, void *dev_id)
+{
+	ctlr_info_t *h = dev_id;
+	unsigned long flags;
+	u32 raw_tag;
+
+	spin_lock_irqsave(&h->lock, flags);
+	raw_tag = get_next_completion(h);
+	while (raw_tag != FIFO_EMPTY) {
+		if (cciss_tag_contains_index(raw_tag))
+			raw_tag = process_indexed_cmd(h, raw_tag);
+		else
+			raw_tag = process_nonindexed_cmd(h, raw_tag);
+	}
+	spin_unlock_irqrestore(&h->lock, flags);
 	return IRQ_HANDLED;
 }
 
@@ -3510,18 +3632,17 @@
 
 	switch (c->err_info->SenseInfo[12]) {
 	case STATE_CHANGED:
-		printk(KERN_WARNING "cciss%d: a state change "
-			"detected, command retried\n", h->ctlr);
+		dev_warn(&h->pdev->dev, "a state change "
+			"detected, command retried\n");
 		return 1;
 	break;
 	case LUN_FAILED:
-		printk(KERN_WARNING "cciss%d: LUN failure "
-			"detected, action required\n", h->ctlr);
+		dev_warn(&h->pdev->dev, "LUN failure "
+			"detected, action required\n");
 		return 1;
 	break;
 	case REPORT_LUNS_CHANGED:
-		printk(KERN_WARNING "cciss%d: report LUN data "
-			"changed\n", h->ctlr);
+		dev_warn(&h->pdev->dev, "report LUN data changed\n");
 	/*
 	 * Here, we could call add_to_scan_list and wake up the scan thread,
 	 * except that it's quite likely that we will get more than one
@@ -3541,19 +3662,18 @@
 		return 1;
 	break;
 	case POWER_OR_RESET:
-		printk(KERN_WARNING "cciss%d: a power on "
-			"or device reset detected\n", h->ctlr);
+		dev_warn(&h->pdev->dev,
+			"a power on or device reset detected\n");
 		return 1;
 	break;
 	case UNIT_ATTENTION_CLEARED:
-		printk(KERN_WARNING "cciss%d: unit attention "
-		    "cleared by another initiator\n", h->ctlr);
+		dev_warn(&h->pdev->dev,
+			"unit attention cleared by another initiator\n");
 		return 1;
 	break;
 	default:
-		printk(KERN_WARNING "cciss%d: unknown "
-			"unit attention detected\n", h->ctlr);
-				return 1;
+		dev_warn(&h->pdev->dev, "unknown unit attention detected\n");
+		return 1;
 	}
 }
 
@@ -3562,39 +3682,41 @@
  *   the io functions.
  *   This is for debug only.
  */
-#ifdef CCISS_DEBUG
-static void print_cfg_table(CfgTable_struct *tb)
+static void print_cfg_table(ctlr_info_t *h)
 {
 	int i;
 	char temp_name[17];
+	CfgTable_struct *tb = h->cfgtable;
 
-	printk("Controller Configuration information\n");
-	printk("------------------------------------\n");
+	dev_dbg(&h->pdev->dev, "Controller Configuration information\n");
+	dev_dbg(&h->pdev->dev, "------------------------------------\n");
 	for (i = 0; i < 4; i++)
 		temp_name[i] = readb(&(tb->Signature[i]));
 	temp_name[4] = '\0';
-	printk("   Signature = %s\n", temp_name);
-	printk("   Spec Number = %d\n", readl(&(tb->SpecValence)));
-	printk("   Transport methods supported = 0x%x\n",
+	dev_dbg(&h->pdev->dev, "   Signature = %s\n", temp_name);
+	dev_dbg(&h->pdev->dev, "   Spec Number = %d\n",
+		readl(&(tb->SpecValence)));
+	dev_dbg(&h->pdev->dev, "   Transport methods supported = 0x%x\n",
 	       readl(&(tb->TransportSupport)));
-	printk("   Transport methods active = 0x%x\n",
+	dev_dbg(&h->pdev->dev, "   Transport methods active = 0x%x\n",
 	       readl(&(tb->TransportActive)));
-	printk("   Requested transport Method = 0x%x\n",
+	dev_dbg(&h->pdev->dev, "   Requested transport Method = 0x%x\n",
 	       readl(&(tb->HostWrite.TransportRequest)));
-	printk("   Coalesce Interrupt Delay = 0x%x\n",
+	dev_dbg(&h->pdev->dev, "   Coalesce Interrupt Delay = 0x%x\n",
 	       readl(&(tb->HostWrite.CoalIntDelay)));
-	printk("   Coalesce Interrupt Count = 0x%x\n",
+	dev_dbg(&h->pdev->dev, "   Coalesce Interrupt Count = 0x%x\n",
 	       readl(&(tb->HostWrite.CoalIntCount)));
-	printk("   Max outstanding commands = 0x%d\n",
+	dev_dbg(&h->pdev->dev, "   Max outstanding commands = 0x%d\n",
 	       readl(&(tb->CmdsOutMax)));
-	printk("   Bus Types = 0x%x\n", readl(&(tb->BusTypes)));
+	dev_dbg(&h->pdev->dev, "   Bus Types = 0x%x\n",
+		readl(&(tb->BusTypes)));
 	for (i = 0; i < 16; i++)
 		temp_name[i] = readb(&(tb->ServerName[i]));
 	temp_name[16] = '\0';
-	printk("   Server Name = %s\n", temp_name);
-	printk("   Heartbeat Counter = 0x%x\n\n\n", readl(&(tb->HeartBeat)));
+	dev_dbg(&h->pdev->dev, "   Server Name = %s\n", temp_name);
+	dev_dbg(&h->pdev->dev, "   Heartbeat Counter = 0x%x\n\n\n",
+		readl(&(tb->HeartBeat)));
 }
-#endif				/* CCISS_DEBUG */
 
 static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
 {
@@ -3618,7 +3740,7 @@
 				offset += 8;
 				break;
 			default:	/* reserved in PCI 2.2 */
-				printk(KERN_WARNING
+				dev_warn(&pdev->dev,
 				       "Base address is invalid\n");
 				return -1;
 				break;
@@ -3630,12 +3752,182 @@
 	return -1;
 }
 
+/* Fill in bucket_map[], given nsgs (the max number of
+ * scatter gather elements supported) and bucket[],
+ * which is an array of 8 integers.  The bucket[] array
+ * contains 8 different DMA transfer sizes (in 16
+ * byte increments) which the controller uses to fetch
+ * commands.  This function fills in bucket_map[], which
+ * maps a given number of scatter gather elements to one of
+ * the 8 DMA transfer sizes.  The point of it is to allow the
+ * controller to only do as much DMA as needed to fetch the
+ * command, with the DMA transfer size encoded in the lower
+ * bits of the command address.
+ */
+static void  calc_bucket_map(int bucket[], int num_buckets,
+	int nsgs, int *bucket_map)
+{
+	int i, j, b, size;
+
+	/* even a command with 0 SGs requires 4 blocks */
+#define MINIMUM_TRANSFER_BLOCKS 4
+#define NUM_BUCKETS 8
+	/* Note, bucket_map must have nsgs+1 entries. */
+	for (i = 0; i <= nsgs; i++) {
+		/* Compute size of a command with i SG entries */
+		size = i + MINIMUM_TRANSFER_BLOCKS;
+		b = num_buckets; /* Assume the biggest bucket */
+		/* Find the bucket that is just big enough */
+		for (j = 0; j < 8; j++) {
+			if (bucket[j] >= size) {
+				b = j;
+				break;
+			}
+		}
+		/* for a command with i SG entries, use bucket b. */
+		bucket_map[i] = b;
+	}
+}
+
+static void __devinit cciss_wait_for_mode_change_ack(ctlr_info_t *h)
+{
+	int i;
+
+	/* under certain very rare conditions, this can take awhile.
+	 * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
+	 * as we enter this code.) */
+	for (i = 0; i < MAX_CONFIG_WAIT; i++) {
+		if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
+			break;
+		msleep(10);
+	}
+}
+
+static __devinit void cciss_enter_performant_mode(ctlr_info_t *h)
+{
+	/* This is a bit complicated.  There are 8 registers on
+	 * the controller which we write to to tell it 8 different
+	 * sizes of commands which there may be.  It's a way of
+	 * reducing the DMA done to fetch each command.  Encoded into
+	 * each command's tag are 3 bits which communicate to the controller
+	 * which of the eight sizes that command fits within.  The size of
+	 * each command depends on how many scatter gather entries there are.
+	 * Each SG entry requires 16 bytes.  The eight registers are programmed
+	 * with the number of 16-byte blocks a command of that size requires.
+	 * The smallest command possible requires 5 such 16 byte blocks.
+	 * the largest command possible requires MAXSGENTRIES + 4 16-byte
+	 * blocks.  Note, this only extends to the SG entries contained
+	 * within the command block, and does not extend to chained blocks
+	 * of SG elements.   bft[] contains the eight values we write to
+	 * the registers.  They are not evenly distributed, but have more
+	 * sizes for small commands, and fewer sizes for larger commands.
+	 */
+	__u32 trans_offset;
+	int bft[8] = { 5, 6, 8, 10, 12, 20, 28, MAXSGENTRIES + 4};
+			/*
+			 *  5 = 1 s/g entry or 4k
+			 *  6 = 2 s/g entry or 8k
+			 *  8 = 4 s/g entry or 16k
+			 * 10 = 6 s/g entry or 24k
+			 */
+	unsigned long register_value;
+	BUILD_BUG_ON(28 > MAXSGENTRIES + 4);
+
+	h->reply_pool_wraparound = 1; /* spec: init to 1 */
+
+	/* Controller spec: zero out this buffer. */
+	memset(h->reply_pool, 0, h->max_commands * sizeof(__u64));
+	h->reply_pool_head = h->reply_pool;
+
+	trans_offset = readl(&(h->cfgtable->TransMethodOffset));
+	calc_bucket_map(bft, ARRAY_SIZE(bft), h->maxsgentries,
+				h->blockFetchTable);
+	writel(bft[0], &h->transtable->BlockFetch0);
+	writel(bft[1], &h->transtable->BlockFetch1);
+	writel(bft[2], &h->transtable->BlockFetch2);
+	writel(bft[3], &h->transtable->BlockFetch3);
+	writel(bft[4], &h->transtable->BlockFetch4);
+	writel(bft[5], &h->transtable->BlockFetch5);
+	writel(bft[6], &h->transtable->BlockFetch6);
+	writel(bft[7], &h->transtable->BlockFetch7);
+
+	/* size of controller ring buffer */
+	writel(h->max_commands, &h->transtable->RepQSize);
+	writel(1, &h->transtable->RepQCount);
+	writel(0, &h->transtable->RepQCtrAddrLow32);
+	writel(0, &h->transtable->RepQCtrAddrHigh32);
+	writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32);
+	writel(0, &h->transtable->RepQAddr0High32);
+	writel(CFGTBL_Trans_Performant,
+			&(h->cfgtable->HostWrite.TransportRequest));
+
+	writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+	cciss_wait_for_mode_change_ack(h);
+	register_value = readl(&(h->cfgtable->TransportActive));
+	if (!(register_value & CFGTBL_Trans_Performant))
+		dev_warn(&h->pdev->dev, "cciss: unable to get board into"
+					" performant mode\n");
+}
+
+static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h)
+{
+	__u32 trans_support;
+
+	dev_dbg(&h->pdev->dev, "Trying to put board into Performant mode\n");
+	/* Attempt to put controller into performant mode if supported */
+	/* Does board support performant mode? */
+	trans_support = readl(&(h->cfgtable->TransportSupport));
+	if (!(trans_support & PERFORMANT_MODE))
+		return;
+
+	dev_dbg(&h->pdev->dev, "Placing controller into performant mode\n");
+	/* Performant mode demands commands on a 32 byte boundary
+	 * pci_alloc_consistent aligns on page boundarys already.
+	 * Just need to check if divisible by 32
+	 */
+	if ((sizeof(CommandList_struct) % 32) != 0) {
+		dev_warn(&h->pdev->dev, "%s %d %s\n",
+			"cciss info: command size[",
+			(int)sizeof(CommandList_struct),
+			"] not divisible by 32, no performant mode..\n");
+		return;
+	}
+
+	/* Performant mode ring buffer and supporting data structures */
+	h->reply_pool = (__u64 *)pci_alloc_consistent(
+		h->pdev, h->max_commands * sizeof(__u64),
+		&(h->reply_pool_dhandle));
+
+	/* Need a block fetch table for performant mode */
+	h->blockFetchTable = kmalloc(((h->maxsgentries+1) *
+		sizeof(__u32)), GFP_KERNEL);
+
+	if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL))
+		goto clean_up;
+
+	cciss_enter_performant_mode(h);
+
+	/* Change the access methods to the performant access methods */
+	h->access = SA5_performant_access;
+	h->transMethod = CFGTBL_Trans_Performant;
+
+	return;
+clean_up:
+	kfree(h->blockFetchTable);
+	if (h->reply_pool)
+		pci_free_consistent(h->pdev,
+				h->max_commands * sizeof(__u64),
+				h->reply_pool,
+				h->reply_pool_dhandle);
+	return;
+
+} /* cciss_put_controller_into_performant_mode */
+
 /* If MSI/MSI-X is supported by the kernel we will try to enable it on
  * controllers that are capable. If not, we use IO-APIC mode.
  */
 
-static void __devinit cciss_interrupt_mode(ctlr_info_t *c,
-					   struct pci_dev *pdev, __u32 board_id)
+static void __devinit cciss_interrupt_mode(ctlr_info_t *h)
 {
 #ifdef CONFIG_PCI_MSI
 	int err;
@@ -3644,268 +3936,283 @@
 	};
 
 	/* Some boards advertise MSI but don't really support it */
-	if ((board_id == 0x40700E11) ||
-	    (board_id == 0x40800E11) ||
-	    (board_id == 0x40820E11) || (board_id == 0x40830E11))
+	if ((h->board_id == 0x40700E11) || (h->board_id == 0x40800E11) ||
+	    (h->board_id == 0x40820E11) || (h->board_id == 0x40830E11))
 		goto default_int_mode;
 
-	if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) {
-		err = pci_enable_msix(pdev, cciss_msix_entries, 4);
+	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) {
+		err = pci_enable_msix(h->pdev, cciss_msix_entries, 4);
 		if (!err) {
-			c->intr[0] = cciss_msix_entries[0].vector;
-			c->intr[1] = cciss_msix_entries[1].vector;
-			c->intr[2] = cciss_msix_entries[2].vector;
-			c->intr[3] = cciss_msix_entries[3].vector;
-			c->msix_vector = 1;
+			h->intr[0] = cciss_msix_entries[0].vector;
+			h->intr[1] = cciss_msix_entries[1].vector;
+			h->intr[2] = cciss_msix_entries[2].vector;
+			h->intr[3] = cciss_msix_entries[3].vector;
+			h->msix_vector = 1;
 			return;
 		}
 		if (err > 0) {
-			printk(KERN_WARNING "cciss: only %d MSI-X vectors "
-			       "available\n", err);
+			dev_warn(&h->pdev->dev,
+				"only %d MSI-X vectors available\n", err);
 			goto default_int_mode;
 		} else {
-			printk(KERN_WARNING "cciss: MSI-X init failed %d\n",
-			       err);
+			dev_warn(&h->pdev->dev,
+				"MSI-X init failed %d\n", err);
 			goto default_int_mode;
 		}
 	}
-	if (pci_find_capability(pdev, PCI_CAP_ID_MSI)) {
-		if (!pci_enable_msi(pdev)) {
-			c->msi_vector = 1;
-		} else {
-			printk(KERN_WARNING "cciss: MSI init failed\n");
-		}
+	if (pci_find_capability(h->pdev, PCI_CAP_ID_MSI)) {
+		if (!pci_enable_msi(h->pdev))
+			h->msi_vector = 1;
+		else
+			dev_warn(&h->pdev->dev, "MSI init failed\n");
 	}
 default_int_mode:
 #endif				/* CONFIG_PCI_MSI */
 	/* if we get here we're going to use the default interrupt mode */
-	c->intr[SIMPLE_MODE_INT] = pdev->irq;
+	h->intr[PERF_MODE_INT] = h->pdev->irq;
 	return;
 }
 
-static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
+static int __devinit cciss_lookup_board_id(struct pci_dev *pdev, u32 *board_id)
 {
-	ushort subsystem_vendor_id, subsystem_device_id, command;
-	__u32 board_id, scratchpad = 0;
-	__u64 cfg_offset;
-	__u32 cfg_base_addr;
-	__u64 cfg_base_addr_index;
-	int i, prod_index, err;
+	int i;
+	u32 subsystem_vendor_id, subsystem_device_id;
 
 	subsystem_vendor_id = pdev->subsystem_vendor;
 	subsystem_device_id = pdev->subsystem_device;
-	board_id = (((__u32) (subsystem_device_id << 16) & 0xffff0000) |
-		    subsystem_vendor_id);
+	*board_id = ((subsystem_device_id << 16) & 0xffff0000) |
+			subsystem_vendor_id;
 
 	for (i = 0; i < ARRAY_SIZE(products); i++) {
 		/* Stand aside for hpsa driver on request */
 		if (cciss_allow_hpsa && products[i].board_id == HPSA_BOUNDARY)
 			return -ENODEV;
-		if (board_id == products[i].board_id)
-			break;
+		if (*board_id == products[i].board_id)
+			return i;
 	}
-	prod_index = i;
-	if (prod_index == ARRAY_SIZE(products)) {
-		dev_warn(&pdev->dev,
-			"unrecognized board ID: 0x%08lx, ignoring.\n",
-			(unsigned long) board_id);
+	dev_warn(&pdev->dev, "unrecognized board ID: 0x%08x, ignoring.\n",
+		*board_id);
+	return -ENODEV;
+}
+
+static inline bool cciss_board_disabled(ctlr_info_t *h)
+{
+	u16 command;
+
+	(void) pci_read_config_word(h->pdev, PCI_COMMAND, &command);
+	return ((command & PCI_COMMAND_MEMORY) == 0);
+}
+
+static int __devinit cciss_pci_find_memory_BAR(struct pci_dev *pdev,
+	unsigned long *memory_bar)
+{
+	int i;
+
+	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++)
+		if (pci_resource_flags(pdev, i) & IORESOURCE_MEM) {
+			/* addressing mode bits already removed */
+			*memory_bar = pci_resource_start(pdev, i);
+			dev_dbg(&pdev->dev, "memory BAR = %lx\n",
+				*memory_bar);
+			return 0;
+		}
+	dev_warn(&pdev->dev, "no memory BAR found\n");
+	return -ENODEV;
+}
+
+static int __devinit cciss_wait_for_board_ready(ctlr_info_t *h)
+{
+	int i;
+	u32 scratchpad;
+
+	for (i = 0; i < CCISS_BOARD_READY_ITERATIONS; i++) {
+		scratchpad = readl(h->vaddr + SA5_SCRATCHPAD_OFFSET);
+		if (scratchpad == CCISS_FIRMWARE_READY)
+			return 0;
+		msleep(CCISS_BOARD_READY_POLL_INTERVAL_MSECS);
+	}
+	dev_warn(&h->pdev->dev, "board not ready, timed out.\n");
+	return -ENODEV;
+}
+
+static int __devinit cciss_find_cfg_addrs(struct pci_dev *pdev,
+	void __iomem *vaddr, u32 *cfg_base_addr, u64 *cfg_base_addr_index,
+	u64 *cfg_offset)
+{
+	*cfg_base_addr = readl(vaddr + SA5_CTCFG_OFFSET);
+	*cfg_offset = readl(vaddr + SA5_CTMEM_OFFSET);
+	*cfg_base_addr &= (u32) 0x0000ffff;
+	*cfg_base_addr_index = find_PCI_BAR_index(pdev, *cfg_base_addr);
+	if (*cfg_base_addr_index == -1) {
+		dev_warn(&pdev->dev, "cannot find cfg_base_addr_index, "
+			"*cfg_base_addr = 0x%08x\n", *cfg_base_addr);
 		return -ENODEV;
 	}
+	return 0;
+}
 
-	/* check to see if controller has been disabled */
-	/* BEFORE trying to enable it */
-	(void)pci_read_config_word(pdev, PCI_COMMAND, &command);
-	if (!(command & 0x02)) {
-		printk(KERN_WARNING
-		       "cciss: controller appears to be disabled\n");
+static int __devinit cciss_find_cfgtables(ctlr_info_t *h)
+{
+	u64 cfg_offset;
+	u32 cfg_base_addr;
+	u64 cfg_base_addr_index;
+	u32 trans_offset;
+	int rc;
+
+	rc = cciss_find_cfg_addrs(h->pdev, h->vaddr, &cfg_base_addr,
+		&cfg_base_addr_index, &cfg_offset);
+	if (rc)
+		return rc;
+	h->cfgtable = remap_pci_mem(pci_resource_start(h->pdev,
+		cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable));
+	if (!h->cfgtable)
+		return -ENOMEM;
+	/* Find performant mode table. */
+	trans_offset = readl(&h->cfgtable->TransMethodOffset);
+	h->transtable = remap_pci_mem(pci_resource_start(h->pdev,
+				cfg_base_addr_index)+cfg_offset+trans_offset,
+				sizeof(*h->transtable));
+	if (!h->transtable)
+		return -ENOMEM;
+	return 0;
+}
+
+static void __devinit cciss_get_max_perf_mode_cmds(struct ctlr_info *h)
+{
+	h->max_commands = readl(&(h->cfgtable->MaxPerformantModeCommands));
+	if (h->max_commands < 16) {
+		dev_warn(&h->pdev->dev, "Controller reports "
+			"max supported commands of %d, an obvious lie. "
+			"Using 16.  Ensure that firmware is up to date.\n",
+			h->max_commands);
+		h->max_commands = 16;
+	}
+}
+
+/* Interrogate the hardware for some limits:
+ * max commands, max SG elements without chaining, and with chaining,
+ * SG chain block size, etc.
+ */
+static void __devinit cciss_find_board_params(ctlr_info_t *h)
+{
+	cciss_get_max_perf_mode_cmds(h);
+	h->nr_cmds = h->max_commands - 4; /* Allow room for some ioctls */
+	h->maxsgentries = readl(&(h->cfgtable->MaxSGElements));
+	/*
+	 * Limit in-command s/g elements to 32 save dma'able memory.
+	 * Howvever spec says if 0, use 31
+	 */
+	h->max_cmd_sgentries = 31;
+	if (h->maxsgentries > 512) {
+		h->max_cmd_sgentries = 32;
+		h->chainsize = h->maxsgentries - h->max_cmd_sgentries + 1;
+		h->maxsgentries--; /* save one for chain pointer */
+	} else {
+		h->maxsgentries = 31; /* default to traditional values */
+		h->chainsize = 0;
+	}
+}
+
+static inline bool CISS_signature_present(ctlr_info_t *h)
+{
+	if ((readb(&h->cfgtable->Signature[0]) != 'C') ||
+	    (readb(&h->cfgtable->Signature[1]) != 'I') ||
+	    (readb(&h->cfgtable->Signature[2]) != 'S') ||
+	    (readb(&h->cfgtable->Signature[3]) != 'S')) {
+		dev_warn(&h->pdev->dev, "not a valid CISS config table\n");
+		return false;
+	}
+	return true;
+}
+
+/* Need to enable prefetch in the SCSI core for 6400 in x86 */
+static inline void cciss_enable_scsi_prefetch(ctlr_info_t *h)
+{
+#ifdef CONFIG_X86
+	u32 prefetch;
+
+	prefetch = readl(&(h->cfgtable->SCSI_Prefetch));
+	prefetch |= 0x100;
+	writel(prefetch, &(h->cfgtable->SCSI_Prefetch));
+#endif
+}
+
+/* Disable DMA prefetch for the P600.  Otherwise an ASIC bug may result
+ * in a prefetch beyond physical memory.
+ */
+static inline void cciss_p600_dma_prefetch_quirk(ctlr_info_t *h)
+{
+	u32 dma_prefetch;
+	__u32 dma_refetch;
+
+	if (h->board_id != 0x3225103C)
+		return;
+	dma_prefetch = readl(h->vaddr + I2O_DMA1_CFG);
+	dma_prefetch |= 0x8000;
+	writel(dma_prefetch, h->vaddr + I2O_DMA1_CFG);
+	pci_read_config_dword(h->pdev, PCI_COMMAND_PARITY, &dma_refetch);
+	dma_refetch |= 0x1;
+	pci_write_config_dword(h->pdev, PCI_COMMAND_PARITY, dma_refetch);
+}
+
+static int __devinit cciss_pci_init(ctlr_info_t *h)
+{
+	int prod_index, err;
+
+	prod_index = cciss_lookup_board_id(h->pdev, &h->board_id);
+	if (prod_index < 0)
+		return -ENODEV;
+	h->product_name = products[prod_index].product_name;
+	h->access = *(products[prod_index].access);
+
+	if (cciss_board_disabled(h)) {
+		dev_warn(&h->pdev->dev, "controller appears to be disabled\n");
 		return -ENODEV;
 	}
-
-	err = pci_enable_device(pdev);
+	err = pci_enable_device(h->pdev);
 	if (err) {
-		printk(KERN_ERR "cciss: Unable to Enable PCI device\n");
+		dev_warn(&h->pdev->dev, "Unable to Enable PCI device\n");
 		return err;
 	}
 
-	err = pci_request_regions(pdev, "cciss");
+	err = pci_request_regions(h->pdev, "cciss");
 	if (err) {
-		printk(KERN_ERR "cciss: Cannot obtain PCI resources, "
-		       "aborting\n");
+		dev_warn(&h->pdev->dev,
+			"Cannot obtain PCI resources, aborting\n");
 		return err;
 	}
 
-#ifdef CCISS_DEBUG
-	printk("command = %x\n", command);
-	printk("irq = %x\n", pdev->irq);
-	printk("board_id = %x\n", board_id);
-#endif				/* CCISS_DEBUG */
+	dev_dbg(&h->pdev->dev, "irq = %x\n", h->pdev->irq);
+	dev_dbg(&h->pdev->dev, "board_id = %x\n", h->board_id);
 
 /* If the kernel supports MSI/MSI-X we will try to enable that functionality,
  * else we use the IO-APIC interrupt assigned to us by system ROM.
  */
-	cciss_interrupt_mode(c, pdev, board_id);
-
-	/* find the memory BAR */
-	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-		if (pci_resource_flags(pdev, i) & IORESOURCE_MEM)
-			break;
+	cciss_interrupt_mode(h);
+	err = cciss_pci_find_memory_BAR(h->pdev, &h->paddr);
+	if (err)
+		goto err_out_free_res;
+	h->vaddr = remap_pci_mem(h->paddr, 0x250);
+	if (!h->vaddr) {
+		err = -ENOMEM;
+		goto err_out_free_res;
 	}
-	if (i == DEVICE_COUNT_RESOURCE) {
-		printk(KERN_WARNING "cciss: No memory BAR found\n");
+	err = cciss_wait_for_board_ready(h);
+	if (err)
+		goto err_out_free_res;
+	err = cciss_find_cfgtables(h);
+	if (err)
+		goto err_out_free_res;
+	print_cfg_table(h);
+	cciss_find_board_params(h);
+
+	if (!CISS_signature_present(h)) {
 		err = -ENODEV;
 		goto err_out_free_res;
 	}
-
-	c->paddr = pci_resource_start(pdev, i); /* addressing mode bits
-						 * already removed
-						 */
-
-#ifdef CCISS_DEBUG
-	printk("address 0 = %lx\n", c->paddr);
-#endif				/* CCISS_DEBUG */
-	c->vaddr = remap_pci_mem(c->paddr, 0x250);
-
-	/* Wait for the board to become ready.  (PCI hotplug needs this.)
-	 * We poll for up to 120 secs, once per 100ms. */
-	for (i = 0; i < 1200; i++) {
-		scratchpad = readl(c->vaddr + SA5_SCRATCHPAD_OFFSET);
-		if (scratchpad == CCISS_FIRMWARE_READY)
-			break;
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(msecs_to_jiffies(100));	/* wait 100ms */
-	}
-	if (scratchpad != CCISS_FIRMWARE_READY) {
-		printk(KERN_WARNING "cciss: Board not ready.  Timed out.\n");
-		err = -ENODEV;
-		goto err_out_free_res;
-	}
-
-	/* get the address index number */
-	cfg_base_addr = readl(c->vaddr + SA5_CTCFG_OFFSET);
-	cfg_base_addr &= (__u32) 0x0000ffff;
-#ifdef CCISS_DEBUG
-	printk("cfg base address = %x\n", cfg_base_addr);
-#endif				/* CCISS_DEBUG */
-	cfg_base_addr_index = find_PCI_BAR_index(pdev, cfg_base_addr);
-#ifdef CCISS_DEBUG
-	printk("cfg base address index = %llx\n",
-		(unsigned long long)cfg_base_addr_index);
-#endif				/* CCISS_DEBUG */
-	if (cfg_base_addr_index == -1) {
-		printk(KERN_WARNING "cciss: Cannot find cfg_base_addr_index\n");
-		err = -ENODEV;
-		goto err_out_free_res;
-	}
-
-	cfg_offset = readl(c->vaddr + SA5_CTMEM_OFFSET);
-#ifdef CCISS_DEBUG
-	printk("cfg offset = %llx\n", (unsigned long long)cfg_offset);
-#endif				/* CCISS_DEBUG */
-	c->cfgtable = remap_pci_mem(pci_resource_start(pdev,
-						       cfg_base_addr_index) +
-				    cfg_offset, sizeof(CfgTable_struct));
-	c->board_id = board_id;
-
-#ifdef CCISS_DEBUG
-	print_cfg_table(c->cfgtable);
-#endif				/* CCISS_DEBUG */
-
-	/* Some controllers support Zero Memory Raid (ZMR).
-	 * When configured in ZMR mode the number of supported
-	 * commands drops to 64. So instead of just setting an
-	 * arbitrary value we make the driver a little smarter.
-	 * We read the config table to tell us how many commands
-	 * are supported on the controller then subtract 4 to
-	 * leave a little room for ioctl calls.
-	 */
-	c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
-	c->maxsgentries = readl(&(c->cfgtable->MaxSGElements));
-
-	/*
-	 * Limit native command to 32 s/g elements to save dma'able memory.
-	 * Howvever spec says if 0, use 31
-	 */
-
-	c->max_cmd_sgentries = 31;
-	if (c->maxsgentries > 512) {
-		c->max_cmd_sgentries = 32;
-		c->chainsize = c->maxsgentries - c->max_cmd_sgentries + 1;
-		c->maxsgentries -= 1;   /* account for chain pointer */
-	} else {
-		c->maxsgentries = 31;   /* Default to traditional value */
-		c->chainsize = 0;       /* traditional */
-	}
-
-	c->product_name = products[prod_index].product_name;
-	c->access = *(products[prod_index].access);
-	c->nr_cmds = c->max_commands - 4;
-	if ((readb(&c->cfgtable->Signature[0]) != 'C') ||
-	    (readb(&c->cfgtable->Signature[1]) != 'I') ||
-	    (readb(&c->cfgtable->Signature[2]) != 'S') ||
-	    (readb(&c->cfgtable->Signature[3]) != 'S')) {
-		printk("Does not appear to be a valid CISS config table\n");
-		err = -ENODEV;
-		goto err_out_free_res;
-	}
-#ifdef CONFIG_X86
-	{
-		/* Need to enable prefetch in the SCSI core for 6400 in x86 */
-		__u32 prefetch;
-		prefetch = readl(&(c->cfgtable->SCSI_Prefetch));
-		prefetch |= 0x100;
-		writel(prefetch, &(c->cfgtable->SCSI_Prefetch));
-	}
-#endif
-
-	/* Disabling DMA prefetch and refetch for the P600.
-	 * An ASIC bug may result in accesses to invalid memory addresses.
-	 * We've disabled prefetch for some time now. Testing with XEN
-	 * kernels revealed a bug in the refetch if dom0 resides on a P600.
-	 */
-	if(board_id == 0x3225103C) {
-		__u32 dma_prefetch;
-		__u32 dma_refetch;
-		dma_prefetch = readl(c->vaddr + I2O_DMA1_CFG);
-		dma_prefetch |= 0x8000;
-		writel(dma_prefetch, c->vaddr + I2O_DMA1_CFG);
-		pci_read_config_dword(pdev, PCI_COMMAND_PARITY, &dma_refetch);
-		dma_refetch |= 0x1;
-		pci_write_config_dword(pdev, PCI_COMMAND_PARITY, dma_refetch);
-	}
-
-#ifdef CCISS_DEBUG
-	printk("Trying to put board into Simple mode\n");
-#endif				/* CCISS_DEBUG */
-	c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
-	/* Update the field, and then ring the doorbell */
-	writel(CFGTBL_Trans_Simple, &(c->cfgtable->HostWrite.TransportRequest));
-	writel(CFGTBL_ChangeReq, c->vaddr + SA5_DOORBELL);
-
-	/* under certain very rare conditions, this can take awhile.
-	 * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
-	 * as we enter this code.) */
-	for (i = 0; i < MAX_CONFIG_WAIT; i++) {
-		if (!(readl(c->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
-			break;
-		/* delay and try again */
-		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(msecs_to_jiffies(1));
-	}
-
-#ifdef CCISS_DEBUG
-	printk(KERN_DEBUG "I counter got to %d %x\n", i,
-	       readl(c->vaddr + SA5_DOORBELL));
-#endif				/* CCISS_DEBUG */
-#ifdef CCISS_DEBUG
-	print_cfg_table(c->cfgtable);
-#endif				/* CCISS_DEBUG */
-
-	if (!(readl(&(c->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
-		printk(KERN_WARNING "cciss: unable to get board into"
-		       " simple mode\n");
-		err = -ENODEV;
-		goto err_out_free_res;
-	}
+	cciss_enable_scsi_prefetch(h);
+	cciss_p600_dma_prefetch_quirk(h);
+	cciss_put_controller_into_performant_mode(h);
 	return 0;
 
 err_out_free_res:
@@ -3913,42 +4220,47 @@
 	 * Deliberately omit pci_disable_device(): it does something nasty to
 	 * Smart Array controllers that pci_enable_device does not undo
 	 */
-	pci_release_regions(pdev);
+	if (h->transtable)
+		iounmap(h->transtable);
+	if (h->cfgtable)
+		iounmap(h->cfgtable);
+	if (h->vaddr)
+		iounmap(h->vaddr);
+	pci_release_regions(h->pdev);
 	return err;
 }
 
 /* Function to find the first free pointer into our hba[] array
  * Returns -1 if no free entries are left.
  */
-static int alloc_cciss_hba(void)
+static int alloc_cciss_hba(struct pci_dev *pdev)
 {
 	int i;
 
 	for (i = 0; i < MAX_CTLR; i++) {
 		if (!hba[i]) {
-			ctlr_info_t *p;
+			ctlr_info_t *h;
 
-			p = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL);
-			if (!p)
+			h = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL);
+			if (!h)
 				goto Enomem;
-			hba[i] = p;
+			hba[i] = h;
 			return i;
 		}
 	}
-	printk(KERN_WARNING "cciss: This driver supports a maximum"
+	dev_warn(&pdev->dev, "This driver supports a maximum"
 	       " of %d controllers.\n", MAX_CTLR);
 	return -1;
 Enomem:
-	printk(KERN_ERR "cciss: out of memory.\n");
+	dev_warn(&pdev->dev, "out of memory.\n");
 	return -1;
 }
 
-static void free_hba(int n)
+static void free_hba(ctlr_info_t *h)
 {
-	ctlr_info_t *h = hba[n];
 	int i;
 
-	hba[n] = NULL;
+	hba[h->ctlr] = NULL;
 	for (i = 0; i < h->highest_lun + 1; i++)
 		if (h->gendisk[i] != NULL)
 			put_disk(h->gendisk[i]);
@@ -4028,7 +4340,8 @@
 	/* we leak the DMA buffer here ... no choice since the controller could
 	   still complete the command. */
 	if (i == 10) {
-		printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
+		dev_err(&pdev->dev,
+			"controller message %02x:%02x timed out\n",
 			opcode, type);
 		return -ETIMEDOUT;
 	}
@@ -4036,12 +4349,12 @@
 	pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
 
 	if (tag & 2) {
-		printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
+		dev_err(&pdev->dev, "controller message %02x:%02x failed\n",
 			opcode, type);
 		return -EIO;
 	}
 
-	printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
+	dev_info(&pdev->dev, "controller message %02x:%02x succeeded\n",
 		opcode, type);
 	return 0;
 }
@@ -4062,7 +4375,7 @@
 	if (pos) {
 		pci_read_config_word(pdev, msi_control_reg(pos), &control);
 		if (control & PCI_MSI_FLAGS_ENABLE) {
-			printk(KERN_INFO "cciss: resetting MSI\n");
+			dev_info(&pdev->dev, "resetting MSI\n");
 			pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
 		}
 	}
@@ -4071,7 +4384,7 @@
 	if (pos) {
 		pci_read_config_word(pdev, msi_control_reg(pos), &control);
 		if (control & PCI_MSIX_FLAGS_ENABLE) {
-			printk(KERN_INFO "cciss: resetting MSI-X\n");
+			dev_info(&pdev->dev, "resetting MSI-X\n");
 			pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
 		}
 	}
@@ -4079,68 +4392,144 @@
 	return 0;
 }
 
-/* This does a hard reset of the controller using PCI power management
- * states. */
-static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
+static int cciss_controller_hard_reset(struct pci_dev *pdev,
+	void * __iomem vaddr, bool use_doorbell)
 {
-	u16 pmcsr, saved_config_space[32];
-	int i, pos;
+	u16 pmcsr;
+	int pos;
 
-	printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
+	if (use_doorbell) {
+		/* For everything after the P600, the PCI power state method
+		 * of resetting the controller doesn't work, so we have this
+		 * other way using the doorbell register.
+		 */
+		dev_info(&pdev->dev, "using doorbell to reset controller\n");
+		writel(DOORBELL_CTLR_RESET, vaddr + SA5_DOORBELL);
+		msleep(1000);
+	} else { /* Try to do it the PCI power state way */
 
-	/* This is very nearly the same thing as
+		/* Quoting from the Open CISS Specification: "The Power
+		 * Management Control/Status Register (CSR) controls the power
+		 * state of the device.  The normal operating state is D0,
+		 * CSR=00h.  The software off state is D3, CSR=03h.  To reset
+		 * the controller, place the interface device in D3 then to D0,
+		 * this causes a secondary PCI reset which will reset the
+		 * controller." */
 
-	   pci_save_state(pci_dev);
-	   pci_set_power_state(pci_dev, PCI_D3hot);
-	   pci_set_power_state(pci_dev, PCI_D0);
-	   pci_restore_state(pci_dev);
+		pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
+		if (pos == 0) {
+			dev_err(&pdev->dev,
+				"cciss_controller_hard_reset: "
+				"PCI PM not supported\n");
+			return -ENODEV;
+		}
+		dev_info(&pdev->dev, "using PCI PM to reset controller\n");
+		/* enter the D3hot power management state */
+		pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
+		pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+		pmcsr |= PCI_D3hot;
+		pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
 
-	   but we can't use these nice canned kernel routines on
-	   kexec, because they also check the MSI/MSI-X state in PCI
-	   configuration space and do the wrong thing when it is
-	   set/cleared.  Also, the pci_save/restore_state functions
-	   violate the ordering requirements for restoring the
-	   configuration space from the CCISS document (see the
-	   comment below).  So we roll our own .... */
+		msleep(500);
+
+		/* enter the D0 power management state */
+		pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
+		pmcsr |= PCI_D0;
+		pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
+
+		msleep(500);
+	}
+	return 0;
+}
+
+/* This does a hard reset of the controller using PCI power management
+ * states or using the doorbell register. */
+static __devinit int cciss_kdump_hard_reset_controller(struct pci_dev *pdev)
+{
+	u16 saved_config_space[32];
+	u64 cfg_offset;
+	u32 cfg_base_addr;
+	u64 cfg_base_addr_index;
+	void __iomem *vaddr;
+	unsigned long paddr;
+	u32 misc_fw_support, active_transport;
+	int rc, i;
+	CfgTable_struct __iomem *cfgtable;
+	bool use_doorbell;
+	u32 board_id;
+
+	/* For controllers as old a the p600, this is very nearly
+	 * the same thing as
+	 *
+	 * pci_save_state(pci_dev);
+	 * pci_set_power_state(pci_dev, PCI_D3hot);
+	 * pci_set_power_state(pci_dev, PCI_D0);
+	 * pci_restore_state(pci_dev);
+	 *
+	 * but we can't use these nice canned kernel routines on
+	 * kexec, because they also check the MSI/MSI-X state in PCI
+	 * configuration space and do the wrong thing when it is
+	 * set/cleared.  Also, the pci_save/restore_state functions
+	 * violate the ordering requirements for restoring the
+	 * configuration space from the CCISS document (see the
+	 * comment below).  So we roll our own ....
+	 *
+	 * For controllers newer than the P600, the pci power state
+	 * method of resetting doesn't work so we have another way
+	 * using the doorbell register.
+	 */
+
+	/* Exclude 640x boards.  These are two pci devices in one slot
+	 * which share a battery backed cache module.  One controls the
+	 * cache, the other accesses the cache through the one that controls
+	 * it.  If we reset the one controlling the cache, the other will
+	 * likely not be happy.  Just forbid resetting this conjoined mess.
+	 */
+	cciss_lookup_board_id(pdev, &board_id);
+	if (board_id == 0x409C0E11 || board_id == 0x409D0E11) {
+		dev_warn(&pdev->dev, "Cannot reset Smart Array 640x "
+				"due to shared cache module.");
+		return -ENODEV;
+	}
 
 	for (i = 0; i < 32; i++)
 		pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
 
-	pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
-	if (pos == 0) {
-		printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
-		return -ENODEV;
+	/* find the first memory BAR, so we can find the cfg table */
+	rc = cciss_pci_find_memory_BAR(pdev, &paddr);
+	if (rc)
+		return rc;
+	vaddr = remap_pci_mem(paddr, 0x250);
+	if (!vaddr)
+		return -ENOMEM;
+
+	/* find cfgtable in order to check if reset via doorbell is supported */
+	rc = cciss_find_cfg_addrs(pdev, vaddr, &cfg_base_addr,
+					&cfg_base_addr_index, &cfg_offset);
+	if (rc)
+		goto unmap_vaddr;
+	cfgtable = remap_pci_mem(pci_resource_start(pdev,
+		       cfg_base_addr_index) + cfg_offset, sizeof(*cfgtable));
+	if (!cfgtable) {
+		rc = -ENOMEM;
+		goto unmap_vaddr;
 	}
 
-	/* Quoting from the Open CISS Specification: "The Power
-	 * Management Control/Status Register (CSR) controls the power
-	 * state of the device.  The normal operating state is D0,
-	 * CSR=00h.  The software off state is D3, CSR=03h.  To reset
-	 * the controller, place the interface device in D3 then to
-	 * D0, this causes a secondary PCI reset which will reset the
-	 * controller." */
+	/* If reset via doorbell register is supported, use that. */
+	misc_fw_support = readl(&cfgtable->misc_fw_support);
+	use_doorbell = misc_fw_support & MISC_FW_DOORBELL_RESET;
 
-	/* enter the D3hot power management state */
-	pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
-	pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
-	pmcsr |= PCI_D3hot;
-	pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
-
-	schedule_timeout_uninterruptible(HZ >> 1);
-
-	/* enter the D0 power management state */
-	pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
-	pmcsr |= PCI_D0;
-	pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
-
-	schedule_timeout_uninterruptible(HZ >> 1);
+	rc = cciss_controller_hard_reset(pdev, vaddr, use_doorbell);
+	if (rc)
+		goto unmap_cfgtable;
 
 	/* Restore the PCI configuration space.  The Open CISS
 	 * Specification says, "Restore the PCI Configuration
 	 * Registers, offsets 00h through 60h. It is important to
 	 * restore the command register, 16-bits at offset 04h,
 	 * last. Do not restore the configuration status register,
-	 * 16-bits at offset 06h."  Note that the offset is 2*i. */
+	 * 16-bits at offset 06h."  Note that the offset is 2*i.
+	 */
 	for (i = 0; i < 32; i++) {
 		if (i == 2 || i == 3)
 			continue;
@@ -4149,6 +4538,63 @@
 	wmb();
 	pci_write_config_word(pdev, 4, saved_config_space[2]);
 
+	/* Some devices (notably the HP Smart Array 5i Controller)
+	   need a little pause here */
+	msleep(CCISS_POST_RESET_PAUSE_MSECS);
+
+	/* Controller should be in simple mode at this point.  If it's not,
+	 * It means we're on one of those controllers which doesn't support
+	 * the doorbell reset method and on which the PCI power management reset
+	 * method doesn't work (P800, for example.)
+	 * In those cases, don't try to proceed, as it generally doesn't work.
+	 */
+	active_transport = readl(&cfgtable->TransportActive);
+	if (active_transport & PERFORMANT_MODE) {
+		dev_warn(&pdev->dev, "Unable to successfully reset controller,"
+			" Ignoring controller.\n");
+		rc = -ENODEV;
+	}
+
+unmap_cfgtable:
+	iounmap(cfgtable);
+
+unmap_vaddr:
+	iounmap(vaddr);
+	return rc;
+}
+
+static __devinit int cciss_init_reset_devices(struct pci_dev *pdev)
+{
+	int rc, i;
+
+	if (!reset_devices)
+		return 0;
+
+	/* Reset the controller with a PCI power-cycle or via doorbell */
+	rc = cciss_kdump_hard_reset_controller(pdev);
+
+	/* -ENOTSUPP here means we cannot reset the controller
+	 * but it's already (and still) up and running in
+	 * "performant mode".  Or, it might be 640x, which can't reset
+	 * due to concerns about shared bbwc between 6402/6404 pair.
+	 */
+	if (rc == -ENOTSUPP)
+		return 0; /* just try to do the kdump anyhow. */
+	if (rc)
+		return -ENODEV;
+	if (cciss_reset_msi(pdev))
+		return -ENODEV;
+
+	/* Now try to get the controller to respond to a no-op */
+	for (i = 0; i < CCISS_POST_RESET_NOOP_RETRIES; i++) {
+		if (cciss_noop(pdev) == 0)
+			break;
+		else
+			dev_warn(&pdev->dev, "no-op failed%s\n",
+				(i < CCISS_POST_RESET_NOOP_RETRIES - 1 ?
+					"; re-trying" : ""));
+		msleep(CCISS_POST_RESET_NOOP_INTERVAL_MSECS);
+	}
 	return 0;
 }
 
@@ -4166,46 +4612,31 @@
 	int rc;
 	int dac, return_code;
 	InquiryData_struct *inq_buff;
+	ctlr_info_t *h;
 
-	if (reset_devices) {
-		/* Reset the controller with a PCI power-cycle */
-		if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
-			return -ENODEV;
-
-		/* Now try to get the controller to respond to a no-op. Some
-		   devices (notably the HP Smart Array 5i Controller) need
-		   up to 30 seconds to respond. */
-		for (i=0; i<30; i++) {
-			if (cciss_noop(pdev) == 0)
-				break;
-
-			schedule_timeout_uninterruptible(HZ);
-		}
-		if (i == 30) {
-			printk(KERN_ERR "cciss: controller seems dead\n");
-			return -EBUSY;
-		}
-	}
-
-	i = alloc_cciss_hba();
+	rc = cciss_init_reset_devices(pdev);
+	if (rc)
+		return rc;
+	i = alloc_cciss_hba(pdev);
 	if (i < 0)
 		return -1;
 
-	hba[i]->busy_initializing = 1;
-	INIT_HLIST_HEAD(&hba[i]->cmpQ);
-	INIT_HLIST_HEAD(&hba[i]->reqQ);
-	mutex_init(&hba[i]->busy_shutting_down);
+	h = hba[i];
+	h->pdev = pdev;
+	h->busy_initializing = 1;
+	INIT_HLIST_HEAD(&h->cmpQ);
+	INIT_HLIST_HEAD(&h->reqQ);
+	mutex_init(&h->busy_shutting_down);
 
-	if (cciss_pci_init(hba[i], pdev) != 0)
+	if (cciss_pci_init(h) != 0)
 		goto clean_no_release_regions;
 
-	sprintf(hba[i]->devname, "cciss%d", i);
-	hba[i]->ctlr = i;
-	hba[i]->pdev = pdev;
+	sprintf(h->devname, "cciss%d", i);
+	h->ctlr = i;
 
-	init_completion(&hba[i]->scan_wait);
+	init_completion(&h->scan_wait);
 
-	if (cciss_create_hba_sysfs_entry(hba[i]))
+	if (cciss_create_hba_sysfs_entry(h))
 		goto clean0;
 
 	/* configure PCI DMA stuff */
@@ -4214,7 +4645,7 @@
 	else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))
 		dac = 0;
 	else {
-		printk(KERN_ERR "cciss: no suitable DMA available\n");
+		dev_err(&h->pdev->dev, "no suitable DMA available\n");
 		goto clean1;
 	}
 
@@ -4224,151 +4655,161 @@
 	 * 8 controller support.
 	 */
 	if (i < MAX_CTLR_ORIG)
-		hba[i]->major = COMPAQ_CISS_MAJOR + i;
-	rc = register_blkdev(hba[i]->major, hba[i]->devname);
+		h->major = COMPAQ_CISS_MAJOR + i;
+	rc = register_blkdev(h->major, h->devname);
 	if (rc == -EBUSY || rc == -EINVAL) {
-		printk(KERN_ERR
-		       "cciss:  Unable to get major number %d for %s "
-		       "on hba %d\n", hba[i]->major, hba[i]->devname, i);
+		dev_err(&h->pdev->dev,
+		       "Unable to get major number %d for %s "
+		       "on hba %d\n", h->major, h->devname, i);
 		goto clean1;
 	} else {
 		if (i >= MAX_CTLR_ORIG)
-			hba[i]->major = rc;
+			h->major = rc;
 	}
 
 	/* make sure the board interrupts are off */
-	hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF);
-	if (request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr,
-			IRQF_DISABLED | IRQF_SHARED, hba[i]->devname, hba[i])) {
-		printk(KERN_ERR "cciss: Unable to get irq %d for %s\n",
-		       hba[i]->intr[SIMPLE_MODE_INT], hba[i]->devname);
-		goto clean2;
+	h->access.set_intr_mask(h, CCISS_INTR_OFF);
+	if (h->msi_vector || h->msix_vector) {
+		if (request_irq(h->intr[PERF_MODE_INT],
+				do_cciss_msix_intr,
+				IRQF_DISABLED, h->devname, h)) {
+			dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
+			       h->intr[PERF_MODE_INT], h->devname);
+			goto clean2;
+		}
+	} else {
+		if (request_irq(h->intr[PERF_MODE_INT], do_cciss_intx,
+				IRQF_DISABLED, h->devname, h)) {
+			dev_err(&h->pdev->dev, "Unable to get irq %d for %s\n",
+			       h->intr[PERF_MODE_INT], h->devname);
+			goto clean2;
+		}
 	}
 
-	printk(KERN_INFO "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
-	       hba[i]->devname, pdev->device, pci_name(pdev),
-	       hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not");
+	dev_info(&h->pdev->dev, "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
+	       h->devname, pdev->device, pci_name(pdev),
+	       h->intr[PERF_MODE_INT], dac ? "" : " not");
 
-	hba[i]->cmd_pool_bits =
-	    kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
+	h->cmd_pool_bits =
+	    kmalloc(DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG)
 			* sizeof(unsigned long), GFP_KERNEL);
-	hba[i]->cmd_pool = (CommandList_struct *)
-	    pci_alloc_consistent(hba[i]->pdev,
-		    hba[i]->nr_cmds * sizeof(CommandList_struct),
-		    &(hba[i]->cmd_pool_dhandle));
-	hba[i]->errinfo_pool = (ErrorInfo_struct *)
-	    pci_alloc_consistent(hba[i]->pdev,
-		    hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
-		    &(hba[i]->errinfo_pool_dhandle));
-	if ((hba[i]->cmd_pool_bits == NULL)
-	    || (hba[i]->cmd_pool == NULL)
-	    || (hba[i]->errinfo_pool == NULL)) {
-		printk(KERN_ERR "cciss: out of memory");
+	h->cmd_pool = (CommandList_struct *)
+	    pci_alloc_consistent(h->pdev,
+		    h->nr_cmds * sizeof(CommandList_struct),
+		    &(h->cmd_pool_dhandle));
+	h->errinfo_pool = (ErrorInfo_struct *)
+	    pci_alloc_consistent(h->pdev,
+		    h->nr_cmds * sizeof(ErrorInfo_struct),
+		    &(h->errinfo_pool_dhandle));
+	if ((h->cmd_pool_bits == NULL)
+	    || (h->cmd_pool == NULL)
+	    || (h->errinfo_pool == NULL)) {
+		dev_err(&h->pdev->dev, "out of memory");
 		goto clean4;
 	}
 
 	/* Need space for temp scatter list */
-	hba[i]->scatter_list = kmalloc(hba[i]->max_commands *
+	h->scatter_list = kmalloc(h->max_commands *
 						sizeof(struct scatterlist *),
 						GFP_KERNEL);
-	for (k = 0; k < hba[i]->nr_cmds; k++) {
-		hba[i]->scatter_list[k] = kmalloc(sizeof(struct scatterlist) *
-							hba[i]->maxsgentries,
+	for (k = 0; k < h->nr_cmds; k++) {
+		h->scatter_list[k] = kmalloc(sizeof(struct scatterlist) *
+							h->maxsgentries,
 							GFP_KERNEL);
-		if (hba[i]->scatter_list[k] == NULL) {
-			printk(KERN_ERR "cciss%d: could not allocate "
-				"s/g lists\n", i);
+		if (h->scatter_list[k] == NULL) {
+			dev_err(&h->pdev->dev,
+				"could not allocate s/g lists\n");
 			goto clean4;
 		}
 	}
-	hba[i]->cmd_sg_list = cciss_allocate_sg_chain_blocks(hba[i],
-		hba[i]->chainsize, hba[i]->nr_cmds);
-	if (!hba[i]->cmd_sg_list && hba[i]->chainsize > 0)
+	h->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
+		h->chainsize, h->nr_cmds);
+	if (!h->cmd_sg_list && h->chainsize > 0)
 		goto clean4;
 
-	spin_lock_init(&hba[i]->lock);
+	spin_lock_init(&h->lock);
 
 	/* Initialize the pdev driver private data.
-	   have it point to hba[i].  */
-	pci_set_drvdata(pdev, hba[i]);
+	   have it point to h.  */
+	pci_set_drvdata(pdev, h);
 	/* command and error info recs zeroed out before
 	   they are used */
-	memset(hba[i]->cmd_pool_bits, 0,
-	       DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
+	memset(h->cmd_pool_bits, 0,
+	       DIV_ROUND_UP(h->nr_cmds, BITS_PER_LONG)
 			* sizeof(unsigned long));
 
-	hba[i]->num_luns = 0;
-	hba[i]->highest_lun = -1;
+	h->num_luns = 0;
+	h->highest_lun = -1;
 	for (j = 0; j < CISS_MAX_LUN; j++) {
-		hba[i]->drv[j] = NULL;
-		hba[i]->gendisk[j] = NULL;
+		h->drv[j] = NULL;
+		h->gendisk[j] = NULL;
 	}
 
-	cciss_scsi_setup(i);
+	cciss_scsi_setup(h);
 
 	/* Turn the interrupts on so we can service requests */
-	hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON);
+	h->access.set_intr_mask(h, CCISS_INTR_ON);
 
 	/* Get the firmware version */
 	inq_buff = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
 	if (inq_buff == NULL) {
-		printk(KERN_ERR "cciss: out of memory\n");
+		dev_err(&h->pdev->dev, "out of memory\n");
 		goto clean4;
 	}
 
-	return_code = sendcmd_withirq(CISS_INQUIRY, i, inq_buff,
+	return_code = sendcmd_withirq(h, CISS_INQUIRY, inq_buff,
 		sizeof(InquiryData_struct), 0, CTLR_LUNID, TYPE_CMD);
 	if (return_code == IO_OK) {
-		hba[i]->firm_ver[0] = inq_buff->data_byte[32];
-		hba[i]->firm_ver[1] = inq_buff->data_byte[33];
-		hba[i]->firm_ver[2] = inq_buff->data_byte[34];
-		hba[i]->firm_ver[3] = inq_buff->data_byte[35];
+		h->firm_ver[0] = inq_buff->data_byte[32];
+		h->firm_ver[1] = inq_buff->data_byte[33];
+		h->firm_ver[2] = inq_buff->data_byte[34];
+		h->firm_ver[3] = inq_buff->data_byte[35];
 	} else {	 /* send command failed */
-		printk(KERN_WARNING "cciss: unable to determine firmware"
+		dev_warn(&h->pdev->dev, "unable to determine firmware"
 			" version of controller\n");
 	}
 	kfree(inq_buff);
 
-	cciss_procinit(i);
+	cciss_procinit(h);
 
-	hba[i]->cciss_max_sectors = 8192;
+	h->cciss_max_sectors = 8192;
 
-	rebuild_lun_table(hba[i], 1, 0);
-	hba[i]->busy_initializing = 0;
+	rebuild_lun_table(h, 1, 0);
+	h->busy_initializing = 0;
 	return 1;
 
 clean4:
-	kfree(hba[i]->cmd_pool_bits);
+	kfree(h->cmd_pool_bits);
 	/* Free up sg elements */
-	for (k = 0; k < hba[i]->nr_cmds; k++)
-		kfree(hba[i]->scatter_list[k]);
-	kfree(hba[i]->scatter_list);
-	cciss_free_sg_chain_blocks(hba[i]->cmd_sg_list, hba[i]->nr_cmds);
-	if (hba[i]->cmd_pool)
-		pci_free_consistent(hba[i]->pdev,
-				    hba[i]->nr_cmds * sizeof(CommandList_struct),
-				    hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
-	if (hba[i]->errinfo_pool)
-		pci_free_consistent(hba[i]->pdev,
-				    hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
-				    hba[i]->errinfo_pool,
-				    hba[i]->errinfo_pool_dhandle);
-	free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]);
+	for (k = 0; k < h->nr_cmds; k++)
+		kfree(h->scatter_list[k]);
+	kfree(h->scatter_list);
+	cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
+	if (h->cmd_pool)
+		pci_free_consistent(h->pdev,
+				    h->nr_cmds * sizeof(CommandList_struct),
+				    h->cmd_pool, h->cmd_pool_dhandle);
+	if (h->errinfo_pool)
+		pci_free_consistent(h->pdev,
+				    h->nr_cmds * sizeof(ErrorInfo_struct),
+				    h->errinfo_pool,
+				    h->errinfo_pool_dhandle);
+	free_irq(h->intr[PERF_MODE_INT], h);
 clean2:
-	unregister_blkdev(hba[i]->major, hba[i]->devname);
+	unregister_blkdev(h->major, h->devname);
 clean1:
-	cciss_destroy_hba_sysfs_entry(hba[i]);
+	cciss_destroy_hba_sysfs_entry(h);
 clean0:
 	pci_release_regions(pdev);
 clean_no_release_regions:
-	hba[i]->busy_initializing = 0;
+	h->busy_initializing = 0;
 
 	/*
 	 * Deliberately omit pci_disable_device(): it does something nasty to
 	 * Smart Array controllers that pci_enable_device does not undo
 	 */
 	pci_set_drvdata(pdev, NULL);
-	free_hba(i);
+	free_hba(h);
 	return -1;
 }
 
@@ -4381,55 +4822,51 @@
 	h = pci_get_drvdata(pdev);
 	flush_buf = kzalloc(4, GFP_KERNEL);
 	if (!flush_buf) {
-		printk(KERN_WARNING
-			"cciss:%d cache not flushed, out of memory.\n",
-			h->ctlr);
+		dev_warn(&h->pdev->dev, "cache not flushed, out of memory.\n");
 		return;
 	}
 	/* write all data in the battery backed cache to disk */
 	memset(flush_buf, 0, 4);
-	return_code = sendcmd_withirq(CCISS_CACHE_FLUSH, h->ctlr, flush_buf,
+	return_code = sendcmd_withirq(h, CCISS_CACHE_FLUSH, flush_buf,
 		4, 0, CTLR_LUNID, TYPE_CMD);
 	kfree(flush_buf);
 	if (return_code != IO_OK)
-		printk(KERN_WARNING "cciss%d: Error flushing cache\n",
-			h->ctlr);
+		dev_warn(&h->pdev->dev, "Error flushing cache\n");
 	h->access.set_intr_mask(h, CCISS_INTR_OFF);
-	free_irq(h->intr[2], h);
+	free_irq(h->intr[PERF_MODE_INT], h);
 }
 
 static void __devexit cciss_remove_one(struct pci_dev *pdev)
 {
-	ctlr_info_t *tmp_ptr;
+	ctlr_info_t *h;
 	int i, j;
 
 	if (pci_get_drvdata(pdev) == NULL) {
-		printk(KERN_ERR "cciss: Unable to remove device \n");
+		dev_err(&pdev->dev, "Unable to remove device\n");
 		return;
 	}
 
-	tmp_ptr = pci_get_drvdata(pdev);
-	i = tmp_ptr->ctlr;
+	h = pci_get_drvdata(pdev);
+	i = h->ctlr;
 	if (hba[i] == NULL) {
-		printk(KERN_ERR "cciss: device appears to "
-		       "already be removed \n");
+		dev_err(&pdev->dev, "device appears to already be removed\n");
 		return;
 	}
 
-	mutex_lock(&hba[i]->busy_shutting_down);
+	mutex_lock(&h->busy_shutting_down);
 
-	remove_from_scan_list(hba[i]);
-	remove_proc_entry(hba[i]->devname, proc_cciss);
-	unregister_blkdev(hba[i]->major, hba[i]->devname);
+	remove_from_scan_list(h);
+	remove_proc_entry(h->devname, proc_cciss);
+	unregister_blkdev(h->major, h->devname);
 
 	/* remove it from the disk list */
 	for (j = 0; j < CISS_MAX_LUN; j++) {
-		struct gendisk *disk = hba[i]->gendisk[j];
+		struct gendisk *disk = h->gendisk[j];
 		if (disk) {
 			struct request_queue *q = disk->queue;
 
 			if (disk->flags & GENHD_FL_UP) {
-				cciss_destroy_ld_sysfs_entry(hba[i], j, 1);
+				cciss_destroy_ld_sysfs_entry(h, j, 1);
 				del_gendisk(disk);
 			}
 			if (q)
@@ -4438,39 +4875,41 @@
 	}
 
 #ifdef CONFIG_CISS_SCSI_TAPE
-	cciss_unregister_scsi(i);	/* unhook from SCSI subsystem */
+	cciss_unregister_scsi(h);	/* unhook from SCSI subsystem */
 #endif
 
 	cciss_shutdown(pdev);
 
 #ifdef CONFIG_PCI_MSI
-	if (hba[i]->msix_vector)
-		pci_disable_msix(hba[i]->pdev);
-	else if (hba[i]->msi_vector)
-		pci_disable_msi(hba[i]->pdev);
+	if (h->msix_vector)
+		pci_disable_msix(h->pdev);
+	else if (h->msi_vector)
+		pci_disable_msi(h->pdev);
 #endif				/* CONFIG_PCI_MSI */
 
-	iounmap(hba[i]->vaddr);
+	iounmap(h->transtable);
+	iounmap(h->cfgtable);
+	iounmap(h->vaddr);
 
-	pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(CommandList_struct),
-			    hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
-	pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
-			    hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle);
-	kfree(hba[i]->cmd_pool_bits);
+	pci_free_consistent(h->pdev, h->nr_cmds * sizeof(CommandList_struct),
+			    h->cmd_pool, h->cmd_pool_dhandle);
+	pci_free_consistent(h->pdev, h->nr_cmds * sizeof(ErrorInfo_struct),
+			    h->errinfo_pool, h->errinfo_pool_dhandle);
+	kfree(h->cmd_pool_bits);
 	/* Free up sg elements */
-	for (j = 0; j < hba[i]->nr_cmds; j++)
-		kfree(hba[i]->scatter_list[j]);
-	kfree(hba[i]->scatter_list);
-	cciss_free_sg_chain_blocks(hba[i]->cmd_sg_list, hba[i]->nr_cmds);
+	for (j = 0; j < h->nr_cmds; j++)
+		kfree(h->scatter_list[j]);
+	kfree(h->scatter_list);
+	cciss_free_sg_chain_blocks(h->cmd_sg_list, h->nr_cmds);
 	/*
 	 * Deliberately omit pci_disable_device(): it does something nasty to
 	 * Smart Array controllers that pci_enable_device does not undo
 	 */
 	pci_release_regions(pdev);
 	pci_set_drvdata(pdev, NULL);
-	cciss_destroy_hba_sysfs_entry(hba[i]);
-	mutex_unlock(&hba[i]->busy_shutting_down);
-	free_hba(i);
+	cciss_destroy_hba_sysfs_entry(h);
+	mutex_unlock(&h->busy_shutting_down);
+	free_hba(h);
 }
 
 static struct pci_driver cciss_pci_driver = {
@@ -4495,7 +4934,6 @@
 	 * array of them, the size must be a multiple of 8 bytes.
 	 */
 	BUILD_BUG_ON(sizeof(CommandList_struct) % COMMANDLIST_ALIGNMENT);
-
 	printk(KERN_INFO DRIVER_NAME "\n");
 
 	err = bus_register(&cciss_bus_type);
@@ -4532,8 +4970,8 @@
 	/* double check that all controller entrys have been removed */
 	for (i = 0; i < MAX_CTLR; i++) {
 		if (hba[i] != NULL) {
-			printk(KERN_WARNING "cciss: had to remove"
-			       " controller %d\n", i);
+			dev_warn(&hba[i]->pdev->dev,
+				"had to remove controller\n");
 			cciss_remove_one(hba[i]->pdev);
 		}
 	}
@@ -4542,46 +4980,5 @@
 	bus_unregister(&cciss_bus_type);
 }
 
-static void fail_all_cmds(unsigned long ctlr)
-{
-	/* If we get here, the board is apparently dead. */
-	ctlr_info_t *h = hba[ctlr];
-	CommandList_struct *c;
-	unsigned long flags;
-
-	printk(KERN_WARNING "cciss%d: controller not responding.\n", h->ctlr);
-	h->alive = 0;		/* the controller apparently died... */
-
-	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-
-	pci_disable_device(h->pdev);	/* Make sure it is really dead. */
-
-	/* move everything off the request queue onto the completed queue */
-	while (!hlist_empty(&h->reqQ)) {
-		c = hlist_entry(h->reqQ.first, CommandList_struct, list);
-		removeQ(c);
-		h->Qdepth--;
-		addQ(&h->cmpQ, c);
-	}
-
-	/* Now, fail everything on the completed queue with a HW error */
-	while (!hlist_empty(&h->cmpQ)) {
-		c = hlist_entry(h->cmpQ.first, CommandList_struct, list);
-		removeQ(c);
-		if (c->cmd_type != CMD_MSG_STALE)
-			c->err_info->CommandStatus = CMD_HARDWARE_ERR;
-		if (c->cmd_type == CMD_RWREQ) {
-			complete_command(h, c, 0);
-		} else if (c->cmd_type == CMD_IOCTL_PEND)
-			complete(c->waiting);
-#ifdef CONFIG_CISS_SCSI_TAPE
-		else if (c->cmd_type == CMD_SCSI)
-			complete_scsi_command(c, 0, 0);
-#endif
-	}
-	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-	return;
-}
-
 module_init(cciss_init);
 module_exit(cciss_cleanup);

diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index c5d4111..ae340ff 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h

@@ -25,7 +25,7 @@
 	void (*submit_command)(ctlr_info_t *h, CommandList_struct *c);
 	void (*set_intr_mask)(ctlr_info_t *h, unsigned long val);
 	unsigned long (*fifo_full)(ctlr_info_t *h);
-	unsigned long (*intr_pending)(ctlr_info_t *h);
+	bool (*intr_pending)(ctlr_info_t *h);
 	unsigned long (*command_completed)(ctlr_info_t *h);
 };
 typedef struct _drive_info_struct
@@ -85,8 +85,8 @@
 	int	max_cmd_sgentries;
 	SGDescriptor_struct **cmd_sg_list;
 
-#	define DOORBELL_INT	0
-#	define PERF_MODE_INT	1
+#	define PERF_MODE_INT	0
+#	define DOORBELL_INT	1
 #	define SIMPLE_MODE_INT	2
 #	define MEMQ_MODE_INT	3
 	unsigned int intr[4];
@@ -137,10 +137,27 @@
 	struct list_head scan_list;
 	struct completion scan_wait;
 	struct device dev;
+	/*
+	 * Performant mode tables.
+	 */
+	u32 trans_support;
+	u32 trans_offset;
+	struct TransTable_struct *transtable;
+	unsigned long transMethod;
+
+	/*
+	 * Performant mode completion buffer
+	 */
+	u64 *reply_pool;
+	dma_addr_t reply_pool_dhandle;
+	u64 *reply_pool_head;
+	size_t reply_pool_size;
+	unsigned char reply_pool_wraparound;
+	u32 *blockFetchTable;
 };
 
-/*  Defining the diffent access_menthods */
-/*
+/*  Defining the diffent access_methods
+ *
  * Memory mapped FIFO interface (SMART 53xx cards)
  */
 #define SA5_DOORBELL	0x20
@@ -159,19 +176,47 @@
 #define SA5B_INTR_PENDING	0x04
 #define FIFO_EMPTY		0xffffffff	
 #define CCISS_FIRMWARE_READY	0xffff0000 /* value in scratchpad register */
+/* Perf. mode flags */
+#define SA5_PERF_INTR_PENDING	0x04
+#define SA5_PERF_INTR_OFF	0x05
+#define SA5_OUTDB_STATUS_PERF_BIT	0x01
+#define SA5_OUTDB_CLEAR_PERF_BIT	0x01
+#define SA5_OUTDB_CLEAR         0xA0
+#define SA5_OUTDB_CLEAR_PERF_BIT        0x01
+#define SA5_OUTDB_STATUS        0x9C
+
 
 #define  CISS_ERROR_BIT		0x02
 
 #define CCISS_INTR_ON 	1 
 #define CCISS_INTR_OFF	0
+
+
+/* CCISS_BOARD_READY_WAIT_SECS is how long to wait for a board
+ * to become ready, in seconds, before giving up on it.
+ * CCISS_BOARD_READY_POLL_INTERVAL_MSECS * is how long to wait
+ * between polling the board to see if it is ready, in
+ * milliseconds.  CCISS_BOARD_READY_ITERATIONS is derived
+ * the above.
+ */
+#define CCISS_BOARD_READY_WAIT_SECS (120)
+#define CCISS_BOARD_READY_POLL_INTERVAL_MSECS (100)
+#define CCISS_BOARD_READY_ITERATIONS \
+	((CCISS_BOARD_READY_WAIT_SECS * 1000) / \
+		CCISS_BOARD_READY_POLL_INTERVAL_MSECS)
+#define CCISS_POST_RESET_PAUSE_MSECS (3000)
+#define CCISS_POST_RESET_NOOP_INTERVAL_MSECS (1000)
+#define CCISS_POST_RESET_NOOP_RETRIES (12)
+
 /* 
 	Send the command to the hardware 
 */
 static void SA5_submit_command( ctlr_info_t *h, CommandList_struct *c) 
 {
 #ifdef CCISS_DEBUG
-	 printk("Sending %x - down to controller\n", c->busaddr );
-#endif /* CCISS_DEBUG */ 
+	printk(KERN_WARNING "cciss%d: Sending %08x - down to controller\n",
+			h->ctlr, c->busaddr);
+#endif /* CCISS_DEBUG */
          writel(c->busaddr, h->vaddr + SA5_REQUEST_PORT_OFFSET);
 	 h->commands_outstanding++;
 	 if ( h->commands_outstanding > h->max_outstanding)
@@ -214,6 +259,20 @@
                         h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
         }
 }
+
+/* Performant mode intr_mask */
+static void SA5_performant_intr_mask(ctlr_info_t *h, unsigned long val)
+{
+	if (val) { /* turn on interrupts */
+		h->interrupts_enabled = 1;
+		writel(0, h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+	} else {
+		h->interrupts_enabled = 0;
+		writel(SA5_PERF_INTR_OFF,
+				h->vaddr + SA5_REPLY_INTR_MASK_OFFSET);
+	}
+}
+
 /*
  *  Returns true if fifo is full.  
  * 
@@ -250,10 +309,44 @@
 	return ( register_value); 
 
 }
+
+/* Performant mode command completed */
+static unsigned long SA5_performant_completed(ctlr_info_t *h)
+{
+	unsigned long register_value = FIFO_EMPTY;
+
+	/* flush the controller write of the reply queue by reading
+	 * outbound doorbell status register.
+	 */
+	register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+	/* msi auto clears the interrupt pending bit. */
+	if (!(h->msi_vector || h->msix_vector)) {
+		writel(SA5_OUTDB_CLEAR_PERF_BIT, h->vaddr + SA5_OUTDB_CLEAR);
+		/* Do a read in order to flush the write to the controller
+		 * (as per spec.)
+		 */
+		register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+	}
+
+	if ((*(h->reply_pool_head) & 1) == (h->reply_pool_wraparound)) {
+		register_value = *(h->reply_pool_head);
+		(h->reply_pool_head)++;
+		h->commands_outstanding--;
+	} else {
+		register_value = FIFO_EMPTY;
+	}
+	/* Check for wraparound */
+	if (h->reply_pool_head == (h->reply_pool + h->max_commands)) {
+		h->reply_pool_head = h->reply_pool;
+		h->reply_pool_wraparound ^= 1;
+	}
+
+	return register_value;
+}
 /*
  *	Returns true if an interrupt is pending.. 
  */
-static unsigned long SA5_intr_pending(ctlr_info_t *h)
+static bool SA5_intr_pending(ctlr_info_t *h)
 {
 	unsigned long register_value  = 
 		readl(h->vaddr + SA5_INTR_STATUS);
@@ -268,7 +361,7 @@
 /*
  *      Returns true if an interrupt is pending..
  */
-static unsigned long SA5B_intr_pending(ctlr_info_t *h)
+static bool SA5B_intr_pending(ctlr_info_t *h)
 {
         unsigned long register_value  =
                 readl(h->vaddr + SA5_INTR_STATUS);
@@ -280,6 +373,20 @@
         return 0 ;
 }
 
+static bool SA5_performant_intr_pending(ctlr_info_t *h)
+{
+	unsigned long register_value = readl(h->vaddr + SA5_INTR_STATUS);
+
+	if (!register_value)
+		return false;
+
+	if (h->msi_vector || h->msix_vector)
+		return true;
+
+	/* Read outbound doorbell to flush */
+	register_value = readl(h->vaddr + SA5_OUTDB_STATUS);
+	return register_value & SA5_OUTDB_STATUS_PERF_BIT;
+}
 
 static struct access_method SA5_access = {
 	SA5_submit_command,
@@ -297,6 +404,14 @@
         SA5_completed,
 };
 
+static struct access_method SA5_performant_access = {
+	SA5_submit_command,
+	SA5_performant_intr_mask,
+	SA5_fifo_full,
+	SA5_performant_intr_pending,
+	SA5_performant_completed,
+};
+
 struct board_type {
 	__u32	board_id;
 	char	*product_name;
@@ -304,6 +419,4 @@
 	int nr_cmds; /* Max cmds this kind of ctlr can handle. */
 };
 
-#define CCISS_LOCK(i)	(&hba[i]->lock)
-
 #endif /* CCISS_H */

diff --git a/drivers/block/cciss_cmd.h b/drivers/block/cciss_cmd.h
index e624ff9..eb060f1 100644
--- a/drivers/block/cciss_cmd.h
+++ b/drivers/block/cciss_cmd.h

@@ -52,8 +52,10 @@
 /* Configuration Table */
 #define CFGTBL_ChangeReq        0x00000001l
 #define CFGTBL_AccCmds          0x00000001l
+#define DOORBELL_CTLR_RESET     0x00000004l
 
 #define CFGTBL_Trans_Simple     0x00000002l
+#define CFGTBL_Trans_Performant 0x00000004l
 
 #define CFGTBL_BusType_Ultra2   0x00000001l
 #define CFGTBL_BusType_Ultra3   0x00000002l
@@ -173,12 +175,15 @@
  * PAD_64 can be adjusted independently as needed for 32-bit
  * and 64-bits systems.
  */
-#define COMMANDLIST_ALIGNMENT (8)
+#define COMMANDLIST_ALIGNMENT (32)
 #define IS_64_BIT ((sizeof(long) - 4)/4)
 #define IS_32_BIT (!IS_64_BIT)
 #define PAD_32 (0)
 #define PAD_64 (4)
 #define PADSIZE (IS_32_BIT * PAD_32 + IS_64_BIT * PAD_64)
+#define DIRECT_LOOKUP_BIT 0x10
+#define DIRECT_LOOKUP_SHIFT 5
+
 typedef struct _CommandList_struct {
   CommandListHeader_struct Header;
   RequestBlock_struct      Request;
@@ -195,7 +200,7 @@
   struct completion *waiting;
   int	 retry_count;
   void * scsi_cmd;
-  char   pad[PADSIZE];
+  char pad[PADSIZE];
 } CommandList_struct;
 
 /* Configuration Table Structure */
@@ -209,12 +214,15 @@
 typedef struct _CfgTable_struct {
   BYTE             Signature[4];
   DWORD            SpecValence;
+#define SIMPLE_MODE	0x02
+#define PERFORMANT_MODE	0x04
+#define MEMQ_MODE	0x08
   DWORD            TransportSupport;
   DWORD            TransportActive;
   HostWrite_struct HostWrite;
   DWORD            CmdsOutMax;
   DWORD            BusTypes;
-  DWORD            Reserved; 
+  DWORD            TransMethodOffset;
   BYTE             ServerName[16];
   DWORD            HeartBeat;
   DWORD            SCSI_Prefetch;
@@ -222,6 +230,28 @@
   DWORD            MaxLogicalUnits;
   DWORD            MaxPhysicalDrives;
   DWORD            MaxPhysicalDrivesPerLogicalUnit;
+  DWORD            MaxPerformantModeCommands;
+  u8		   reserved[0x78 - 0x58];
+  u32		   misc_fw_support; /* offset 0x78 */
+#define MISC_FW_DOORBELL_RESET (0x02)
 } CfgTable_struct;
+
+struct TransTable_struct {
+  u32 BlockFetch0;
+  u32 BlockFetch1;
+  u32 BlockFetch2;
+  u32 BlockFetch3;
+  u32 BlockFetch4;
+  u32 BlockFetch5;
+  u32 BlockFetch6;
+  u32 BlockFetch7;
+  u32 RepQSize;
+  u32 RepQCount;
+  u32 RepQCtrAddrLow32;
+  u32 RepQCtrAddrHigh32;
+  u32 RepQAddr0Low32;
+  u32 RepQAddr0High32;
+};
+
 #pragma pack()	 
 #endif /* CCISS_CMD_H */

diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c
index 72dae92..575495f 100644
--- a/drivers/block/cciss_scsi.c
+++ b/drivers/block/cciss_scsi.c

@@ -44,13 +44,15 @@
 #define CCISS_ABORT_MSG 0x00
 #define CCISS_RESET_MSG 0x01
 
-static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff,
+static int fill_cmd(ctlr_info_t *h, CommandList_struct *c, __u8 cmd, void *buff,
 	size_t size,
 	__u8 page_code, unsigned char *scsi3addr,
 	int cmd_type);
 
-static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool);
-static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool);
+static CommandList_struct *cmd_alloc(ctlr_info_t *h);
+static CommandList_struct *cmd_special_alloc(ctlr_info_t *h);
+static void cmd_free(ctlr_info_t *h, CommandList_struct *c);
+static void cmd_special_free(ctlr_info_t *h, CommandList_struct *c);
 
 static int cciss_scsi_proc_info(
 		struct Scsi_Host *sh,
@@ -93,8 +95,8 @@
 
 #pragma pack(1)
 
-#define SCSI_PAD_32 0
-#define SCSI_PAD_64 0
+#define SCSI_PAD_32 8
+#define SCSI_PAD_64 8
 
 struct cciss_scsi_cmd_stack_elem_t {
 	CommandList_struct cmd;
@@ -127,16 +129,16 @@
 	spinlock_t lock; // to protect ccissscsi[ctlr]; 
 };
 
-#define CPQ_TAPE_LOCK(ctlr, flags) spin_lock_irqsave( \
-	&hba[ctlr]->scsi_ctlr->lock, flags);
-#define CPQ_TAPE_UNLOCK(ctlr, flags) spin_unlock_irqrestore( \
-	&hba[ctlr]->scsi_ctlr->lock, flags);
+#define CPQ_TAPE_LOCK(h, flags) spin_lock_irqsave( \
+	&h->scsi_ctlr->lock, flags);
+#define CPQ_TAPE_UNLOCK(h, flags) spin_unlock_irqrestore( \
+	&h->scsi_ctlr->lock, flags);
 
 static CommandList_struct *
 scsi_cmd_alloc(ctlr_info_t *h)
 {
 	/* assume only one process in here at a time, locking done by caller. */
-	/* use CCISS_LOCK(ctlr) */
+	/* use h->lock */
 	/* might be better to rewrite how we allocate scsi commands in a way that */
 	/* needs no locking at all. */
 
@@ -177,10 +179,10 @@
 }
 
 static void 
-scsi_cmd_free(ctlr_info_t *h, CommandList_struct *cmd)
+scsi_cmd_free(ctlr_info_t *h, CommandList_struct *c)
 {
 	/* assume only one process in here at a time, locking done by caller. */
-	/* use CCISS_LOCK(ctlr) */
+	/* use h->lock */
 	/* drop the free memory chunk on top of the stack. */
 
 	struct cciss_scsi_adapter_data_t *sa;
@@ -190,22 +192,23 @@
 	stk = &sa->cmd_stack; 
 	stk->top++;
 	if (stk->top >= CMD_STACK_SIZE) {
-		printk("cciss: scsi_cmd_free called too many times.\n");
+		dev_err(&h->pdev->dev,
+			"scsi_cmd_free called too many times.\n");
 		BUG();
 	}
-	stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) cmd;
+	stk->elem[stk->top] = (struct cciss_scsi_cmd_stack_elem_t *) c;
 }
 
 static int
-scsi_cmd_stack_setup(int ctlr, struct cciss_scsi_adapter_data_t *sa)
+scsi_cmd_stack_setup(ctlr_info_t *h, struct cciss_scsi_adapter_data_t *sa)
 {
 	int i;
 	struct cciss_scsi_cmd_stack_t *stk;
 	size_t size;
 
-	sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(hba[ctlr],
-		hba[ctlr]->chainsize, CMD_STACK_SIZE);
-	if (!sa->cmd_sg_list && hba[ctlr]->chainsize > 0)
+	sa->cmd_sg_list = cciss_allocate_sg_chain_blocks(h,
+		h->chainsize, CMD_STACK_SIZE);
+	if (!sa->cmd_sg_list && h->chainsize > 0)
 		return -ENOMEM;
 
 	stk = &sa->cmd_stack; 
@@ -215,7 +218,7 @@
 	BUILD_BUG_ON((sizeof(*stk->pool) % COMMANDLIST_ALIGNMENT) != 0);
 	/* pci_alloc_consistent guarantees 32-bit DMA address will be used */
 	stk->pool = (struct cciss_scsi_cmd_stack_elem_t *)
-		pci_alloc_consistent(hba[ctlr]->pdev, size, &stk->cmd_pool_handle);
+		pci_alloc_consistent(h->pdev, size, &stk->cmd_pool_handle);
 
 	if (stk->pool == NULL) {
 		cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE);
@@ -234,23 +237,22 @@
 }
 
 static void
-scsi_cmd_stack_free(int ctlr)
+scsi_cmd_stack_free(ctlr_info_t *h)
 {
 	struct cciss_scsi_adapter_data_t *sa;
 	struct cciss_scsi_cmd_stack_t *stk;
 	size_t size;
 
-	sa = hba[ctlr]->scsi_ctlr;
+	sa = h->scsi_ctlr;
 	stk = &sa->cmd_stack; 
 	if (stk->top != CMD_STACK_SIZE-1) {
-		printk( "cciss: %d scsi commands are still outstanding.\n",
+		dev_warn(&h->pdev->dev,
+			"bug: %d scsi commands are still outstanding.\n",
 			CMD_STACK_SIZE - stk->top);
-		// BUG();
-		printk("WE HAVE A BUG HERE!!! stk=0x%p\n", stk);
 	}
 	size = sizeof(struct cciss_scsi_cmd_stack_elem_t) * CMD_STACK_SIZE;
 
-	pci_free_consistent(hba[ctlr]->pdev, size, stk->pool, stk->cmd_pool_handle);
+	pci_free_consistent(h->pdev, size, stk->pool, stk->cmd_pool_handle);
 	stk->pool = NULL;
 	cciss_free_sg_chain_blocks(sa->cmd_sg_list, CMD_STACK_SIZE);
 }
@@ -342,20 +344,20 @@
 #endif
 
 static int 
-find_bus_target_lun(int ctlr, int *bus, int *target, int *lun)
+find_bus_target_lun(ctlr_info_t *h, int *bus, int *target, int *lun)
 {
 	/* finds an unused bus, target, lun for a new device */
-	/* assumes hba[ctlr]->scsi_ctlr->lock is held */ 
+	/* assumes h->scsi_ctlr->lock is held */
 	int i, found=0;
 	unsigned char target_taken[CCISS_MAX_SCSI_DEVS_PER_HBA];
 
 	memset(&target_taken[0], 0, CCISS_MAX_SCSI_DEVS_PER_HBA);
 
 	target_taken[SELF_SCSI_ID] = 1;	
-	for (i=0;i<ccissscsi[ctlr].ndevices;i++)
-		target_taken[ccissscsi[ctlr].dev[i].target] = 1;
+	for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++)
+		target_taken[ccissscsi[h->ctlr].dev[i].target] = 1;
 	
-	for (i=0;i<CCISS_MAX_SCSI_DEVS_PER_HBA;i++) {
+	for (i = 0; i < CCISS_MAX_SCSI_DEVS_PER_HBA; i++) {
 		if (!target_taken[i]) {
 			*bus = 0; *target=i; *lun = 0; found=1;
 			break;
@@ -369,19 +371,19 @@
 };
 
 static int 
-cciss_scsi_add_entry(int ctlr, int hostno, 
+cciss_scsi_add_entry(ctlr_info_t *h, int hostno,
 		struct cciss_scsi_dev_t *device,
 		struct scsi2map *added, int *nadded)
 {
-	/* assumes hba[ctlr]->scsi_ctlr->lock is held */ 
-	int n = ccissscsi[ctlr].ndevices;
+	/* assumes h->scsi_ctlr->lock is held */
+	int n = ccissscsi[h->ctlr].ndevices;
 	struct cciss_scsi_dev_t *sd;
 	int i, bus, target, lun;
 	unsigned char addr1[8], addr2[8];
 
 	if (n >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
-		printk("cciss%d: Too many devices, "
-			"some will be inaccessible.\n", ctlr);
+		dev_warn(&h->pdev->dev, "Too many devices, "
+			"some will be inaccessible.\n");
 		return -1;
 	}
 
@@ -397,7 +399,7 @@
 		memcpy(addr1, device->scsi3addr, 8);
 		addr1[4] = 0;
 		for (i = 0; i < n; i++) {
-			sd = &ccissscsi[ctlr].dev[i];
+			sd = &ccissscsi[h->ctlr].dev[i];
 			memcpy(addr2, sd->scsi3addr, 8);
 			addr2[4] = 0;
 			/* differ only in byte 4? */
@@ -410,9 +412,9 @@
 		}
 	}
 
-	sd = &ccissscsi[ctlr].dev[n];
+	sd = &ccissscsi[h->ctlr].dev[n];
 	if (lun == 0) {
-		if (find_bus_target_lun(ctlr,
+		if (find_bus_target_lun(h,
 			&sd->bus, &sd->target, &sd->lun) != 0)
 			return -1;
 	} else {
@@ -431,37 +433,37 @@
 	memcpy(sd->device_id, device->device_id, sizeof(sd->device_id));
 	sd->devtype = device->devtype;
 
-	ccissscsi[ctlr].ndevices++;
+	ccissscsi[h->ctlr].ndevices++;
 
 	/* initially, (before registering with scsi layer) we don't 
 	   know our hostno and we don't want to print anything first 
 	   time anyway (the scsi layer's inquiries will show that info) */
 	if (hostno != -1)
-		printk("cciss%d: %s device c%db%dt%dl%d added.\n", 
-			ctlr, scsi_device_type(sd->devtype), hostno,
+		dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d added.\n",
+			scsi_device_type(sd->devtype), hostno,
 			sd->bus, sd->target, sd->lun);
 	return 0;
 }
 
 static void
-cciss_scsi_remove_entry(int ctlr, int hostno, int entry,
+cciss_scsi_remove_entry(ctlr_info_t *h, int hostno, int entry,
 	struct scsi2map *removed, int *nremoved)
 {
-	/* assumes hba[ctlr]->scsi_ctlr->lock is held */ 
+	/* assumes h->ctlr]->scsi_ctlr->lock is held */
 	int i;
 	struct cciss_scsi_dev_t sd;
 
 	if (entry < 0 || entry >= CCISS_MAX_SCSI_DEVS_PER_HBA) return;
-	sd = ccissscsi[ctlr].dev[entry];
+	sd = ccissscsi[h->ctlr].dev[entry];
 	removed[*nremoved].bus    = sd.bus;
 	removed[*nremoved].target = sd.target;
 	removed[*nremoved].lun    = sd.lun;
 	(*nremoved)++;
-	for (i=entry;i<ccissscsi[ctlr].ndevices-1;i++)
-		ccissscsi[ctlr].dev[i] = ccissscsi[ctlr].dev[i+1];
-	ccissscsi[ctlr].ndevices--;
-	printk("cciss%d: %s device c%db%dt%dl%d removed.\n",
-		ctlr, scsi_device_type(sd.devtype), hostno,
+	for (i = entry; i < ccissscsi[h->ctlr].ndevices-1; i++)
+		ccissscsi[h->ctlr].dev[i] = ccissscsi[h->ctlr].dev[i+1];
+	ccissscsi[h->ctlr].ndevices--;
+	dev_info(&h->pdev->dev, "%s device c%db%dt%dl%d removed.\n",
+		scsi_device_type(sd.devtype), hostno,
 			sd.bus, sd.target, sd.lun);
 }
 
@@ -476,24 +478,24 @@
 	(a)[1] == (b)[1] && \
 	(a)[0] == (b)[0])
 
-static void fixup_botched_add(int ctlr, char *scsi3addr)
+static void fixup_botched_add(ctlr_info_t *h, char *scsi3addr)
 {
 	/* called when scsi_add_device fails in order to re-adjust */
 	/* ccissscsi[] to match the mid layer's view. */
 	unsigned long flags;
 	int i, j;
-	CPQ_TAPE_LOCK(ctlr, flags);
-	for (i = 0; i < ccissscsi[ctlr].ndevices; i++) {
+	CPQ_TAPE_LOCK(h, flags);
+	for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
 		if (memcmp(scsi3addr,
-				ccissscsi[ctlr].dev[i].scsi3addr, 8) == 0) {
-			for (j = i; j < ccissscsi[ctlr].ndevices-1; j++)
-				ccissscsi[ctlr].dev[j] =
-					ccissscsi[ctlr].dev[j+1];
-			ccissscsi[ctlr].ndevices--;
+				ccissscsi[h->ctlr].dev[i].scsi3addr, 8) == 0) {
+			for (j = i; j < ccissscsi[h->ctlr].ndevices-1; j++)
+				ccissscsi[h->ctlr].dev[j] =
+					ccissscsi[h->ctlr].dev[j+1];
+			ccissscsi[h->ctlr].ndevices--;
 			break;
 		}
 	}
-	CPQ_TAPE_UNLOCK(ctlr, flags);
+	CPQ_TAPE_UNLOCK(h, flags);
 }
 
 static int device_is_the_same(struct cciss_scsi_dev_t *dev1,
@@ -513,7 +515,7 @@
 }
 
 static int
-adjust_cciss_scsi_table(int ctlr, int hostno,
+adjust_cciss_scsi_table(ctlr_info_t *h, int hostno,
 	struct cciss_scsi_dev_t sd[], int nsds)
 {
 	/* sd contains scsi3 addresses and devtypes, but
@@ -534,15 +536,15 @@
 			GFP_KERNEL);
 
 	if (!added || !removed) {
-		printk(KERN_WARNING "cciss%d: Out of memory in "
-			"adjust_cciss_scsi_table\n", ctlr);
+		dev_warn(&h->pdev->dev,
+			"Out of memory in adjust_cciss_scsi_table\n");
 		goto free_and_out;
 	}
 
-	CPQ_TAPE_LOCK(ctlr, flags);
+	CPQ_TAPE_LOCK(h, flags);
 
 	if (hostno != -1)  /* if it's not the first time... */
-		sh = hba[ctlr]->scsi_ctlr->scsi_host;
+		sh = h->scsi_ctlr->scsi_host;
 
 	/* find any devices in ccissscsi[] that are not in 
 	   sd[] and remove them from ccissscsi[] */
@@ -550,8 +552,8 @@
 	i = 0;
 	nremoved = 0;
 	nadded = 0;
-	while(i<ccissscsi[ctlr].ndevices) {
-		csd = &ccissscsi[ctlr].dev[i];
+	while (i < ccissscsi[h->ctlr].ndevices) {
+		csd = &ccissscsi[h->ctlr].dev[i];
 		found=0;
 		for (j=0;j<nsds;j++) {
 			if (SCSI3ADDR_EQ(sd[j].scsi3addr,
@@ -566,20 +568,18 @@
 
 		if (found == 0) { /* device no longer present. */ 
 			changes++;
-			/* printk("cciss%d: %s device c%db%dt%dl%d removed.\n",
-				ctlr, scsi_device_type(csd->devtype), hostno,
-					csd->bus, csd->target, csd->lun); */
-			cciss_scsi_remove_entry(ctlr, hostno, i,
+			cciss_scsi_remove_entry(h, hostno, i,
 				removed, &nremoved);
 			/* remove ^^^, hence i not incremented */
 		} else if (found == 1) { /* device is different in some way */
 			changes++;
-			printk("cciss%d: device c%db%dt%dl%d has changed.\n",
-				ctlr, hostno, csd->bus, csd->target, csd->lun);
-			cciss_scsi_remove_entry(ctlr, hostno, i,
+			dev_info(&h->pdev->dev,
+				"device c%db%dt%dl%d has changed.\n",
+				hostno, csd->bus, csd->target, csd->lun);
+			cciss_scsi_remove_entry(h, hostno, i,
 				removed, &nremoved);
 			/* remove ^^^, hence i not incremented */
-			if (cciss_scsi_add_entry(ctlr, hostno, &sd[j],
+			if (cciss_scsi_add_entry(h, hostno, &sd[j],
 				added, &nadded) != 0)
 				/* we just removed one, so add can't fail. */
 					BUG();
@@ -601,8 +601,8 @@
 
 	for (i=0;i<nsds;i++) {
 		found=0;
-		for (j=0;j<ccissscsi[ctlr].ndevices;j++) {
-			csd = &ccissscsi[ctlr].dev[j];
+		for (j = 0; j < ccissscsi[h->ctlr].ndevices; j++) {
+			csd = &ccissscsi[h->ctlr].dev[j];
 			if (SCSI3ADDR_EQ(sd[i].scsi3addr,
 				csd->scsi3addr)) {
 				if (device_is_the_same(&sd[i], csd))
@@ -614,18 +614,18 @@
 		}
 		if (!found) {
 			changes++;
-			if (cciss_scsi_add_entry(ctlr, hostno, &sd[i],
+			if (cciss_scsi_add_entry(h, hostno, &sd[i],
 				added, &nadded) != 0)
 				break;
 		} else if (found == 1) {
 			/* should never happen... */
 			changes++;
-			printk(KERN_WARNING "cciss%d: device "
-				"unexpectedly changed\n", ctlr);
+			dev_warn(&h->pdev->dev,
+				"device unexpectedly changed\n");
 			/* but if it does happen, we just ignore that device */
 		}
 	}
-	CPQ_TAPE_UNLOCK(ctlr, flags);
+	CPQ_TAPE_UNLOCK(h, flags);
 
 	/* Don't notify scsi mid layer of any changes the first time through */
 	/* (or if there are no changes) scsi_scan_host will do it later the */
@@ -645,9 +645,9 @@
 			/* We don't expect to get here. */
 			/* future cmds to this device will get selection */
 			/* timeout as if the device was gone. */
-			printk(KERN_WARNING "cciss%d: didn't find "
+			dev_warn(&h->pdev->dev, "didn't find "
 				"c%db%dt%dl%d\n for removal.",
-				ctlr, hostno, removed[i].bus,
+				hostno, removed[i].bus,
 				removed[i].target, removed[i].lun);
 		}
 	}
@@ -659,13 +659,12 @@
 			added[i].target, added[i].lun);
 		if (rc == 0)
 			continue;
-		printk(KERN_WARNING "cciss%d: scsi_add_device "
+		dev_warn(&h->pdev->dev, "scsi_add_device "
 			"c%db%dt%dl%d failed, device not added.\n",
-			ctlr, hostno,
-			added[i].bus, added[i].target, added[i].lun);
+			hostno, added[i].bus, added[i].target, added[i].lun);
 		/* now we have to remove it from ccissscsi, */
 		/* since it didn't get added to scsi mid layer */
-		fixup_botched_add(ctlr, added[i].scsi3addr);
+		fixup_botched_add(h, added[i].scsi3addr);
 	}
 
 free_and_out:
@@ -675,33 +674,33 @@
 }
 
 static int
-lookup_scsi3addr(int ctlr, int bus, int target, int lun, char *scsi3addr)
+lookup_scsi3addr(ctlr_info_t *h, int bus, int target, int lun, char *scsi3addr)
 {
 	int i;
 	struct cciss_scsi_dev_t *sd;
 	unsigned long flags;
 
-	CPQ_TAPE_LOCK(ctlr, flags);
-	for (i=0;i<ccissscsi[ctlr].ndevices;i++) {
-		sd = &ccissscsi[ctlr].dev[i];
+	CPQ_TAPE_LOCK(h, flags);
+	for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
+		sd = &ccissscsi[h->ctlr].dev[i];
 		if (sd->bus == bus &&
 		    sd->target == target &&
 		    sd->lun == lun) {
 			memcpy(scsi3addr, &sd->scsi3addr[0], 8);
-			CPQ_TAPE_UNLOCK(ctlr, flags);
+			CPQ_TAPE_UNLOCK(h, flags);
 			return 0;
 		}
 	}
-	CPQ_TAPE_UNLOCK(ctlr, flags);
+	CPQ_TAPE_UNLOCK(h, flags);
 	return -1;
 }
 
 static void 
-cciss_scsi_setup(int cntl_num)
+cciss_scsi_setup(ctlr_info_t *h)
 {
 	struct cciss_scsi_adapter_data_t * shba;
 
-	ccissscsi[cntl_num].ndevices = 0;
+	ccissscsi[h->ctlr].ndevices = 0;
 	shba = (struct cciss_scsi_adapter_data_t *)
 		kmalloc(sizeof(*shba), GFP_KERNEL);	
 	if (shba == NULL)
@@ -709,35 +708,35 @@
 	shba->scsi_host = NULL;
 	spin_lock_init(&shba->lock);
 	shba->registered = 0;
-	if (scsi_cmd_stack_setup(cntl_num, shba) != 0) {
+	if (scsi_cmd_stack_setup(h, shba) != 0) {
 		kfree(shba);
 		shba = NULL;
 	}
-	hba[cntl_num]->scsi_ctlr = shba;
+	h->scsi_ctlr = shba;
 	return;
 }
 
-static void
-complete_scsi_command( CommandList_struct *cp, int timeout, __u32 tag)
+static void complete_scsi_command(CommandList_struct *c, int timeout,
+	__u32 tag)
 {
 	struct scsi_cmnd *cmd;
-	ctlr_info_t *ctlr;
+	ctlr_info_t *h;
 	ErrorInfo_struct *ei;
 
-	ei = cp->err_info;
+	ei = c->err_info;
 
 	/* First, see if it was a message rather than a command */
-	if (cp->Request.Type.Type == TYPE_MSG)  {
-		cp->cmd_type = CMD_MSG_DONE;
+	if (c->Request.Type.Type == TYPE_MSG)  {
+		c->cmd_type = CMD_MSG_DONE;
 		return;
 	}
 
-	cmd = (struct scsi_cmnd *) cp->scsi_cmd;	
-	ctlr = hba[cp->ctlr];
+	cmd = (struct scsi_cmnd *) c->scsi_cmd;
+	h = hba[c->ctlr];
 
 	scsi_dma_unmap(cmd);
-	if (cp->Header.SGTotal > ctlr->max_cmd_sgentries)
-		cciss_unmap_sg_chain_block(ctlr, cp);
+	if (c->Header.SGTotal > h->max_cmd_sgentries)
+		cciss_unmap_sg_chain_block(h, c);
 
 	cmd->result = (DID_OK << 16); 		/* host byte */
 	cmd->result |= (COMMAND_COMPLETE << 8);	/* msg byte */
@@ -764,9 +763,8 @@
                 		{
 #if 0
                     			printk(KERN_WARNING "cciss: cmd %p "
-					"has SCSI Status = %x\n",
-                        			cp,  
-						ei->ScsiStatus); 
+						"has SCSI Status = %x\n",
+						c, ei->ScsiStatus);
 #endif
 					cmd->result |= (ei->ScsiStatus << 1);
                 		}
@@ -786,13 +784,13 @@
 			case CMD_DATA_UNDERRUN: /* let mid layer handle it. */
 			break;
 			case CMD_DATA_OVERRUN:
-				printk(KERN_WARNING "cciss: cp %p has"
+				dev_warn(&h->pdev->dev, "%p has"
 					" completed with data overrun "
-					"reported\n", cp);
+					"reported\n", c);
 			break;
 			case CMD_INVALID: {
-				/* print_bytes(cp, sizeof(*cp), 1, 0);
-				print_cmd(cp); */
+				/* print_bytes(c, sizeof(*c), 1, 0);
+				print_cmd(c); */
      /* We get CMD_INVALID if you address a non-existent tape drive instead
 	of a selection timeout (no response).  You will see this if you yank 
 	out a tape drive, then try to access it. This is kind of a shame
@@ -802,54 +800,50 @@
 				}
 			break;
 			case CMD_PROTOCOL_ERR:
-                                printk(KERN_WARNING "cciss: cp %p has "
-					"protocol error \n", cp);
+				dev_warn(&h->pdev->dev,
+					"%p has protocol error\n", c);
                         break;
 			case CMD_HARDWARE_ERR:
 				cmd->result = DID_ERROR << 16;
-                                printk(KERN_WARNING "cciss: cp %p had " 
-                                        " hardware error\n", cp);
+				dev_warn(&h->pdev->dev,
+					"%p had hardware error\n", c);
                         break;
 			case CMD_CONNECTION_LOST:
 				cmd->result = DID_ERROR << 16;
-				printk(KERN_WARNING "cciss: cp %p had "
-					"connection lost\n", cp);
+				dev_warn(&h->pdev->dev,
+					"%p had connection lost\n", c);
 			break;
 			case CMD_ABORTED:
 				cmd->result = DID_ABORT << 16;
-				printk(KERN_WARNING "cciss: cp %p was "
-					"aborted\n", cp);
+				dev_warn(&h->pdev->dev, "%p was aborted\n", c);
 			break;
 			case CMD_ABORT_FAILED:
 				cmd->result = DID_ERROR << 16;
-				printk(KERN_WARNING "cciss: cp %p reports "
-					"abort failed\n", cp);
+				dev_warn(&h->pdev->dev,
+					"%p reports abort failed\n", c);
 			break;
 			case CMD_UNSOLICITED_ABORT:
 				cmd->result = DID_ABORT << 16;
-				printk(KERN_WARNING "cciss: cp %p aborted "
-					"do to an unsolicited abort\n", cp);
+				dev_warn(&h->pdev->dev, "%p aborted do to an "
+					"unsolicited abort\n", c);
 			break;
 			case CMD_TIMEOUT:
 				cmd->result = DID_TIME_OUT << 16;
-				printk(KERN_WARNING "cciss: cp %p timedout\n",
-					cp);
+				dev_warn(&h->pdev->dev, "%p timedout\n", c);
 			break;
 			default:
 				cmd->result = DID_ERROR << 16;
-				printk(KERN_WARNING "cciss: cp %p returned "
-					"unknown status %x\n", cp, 
+				dev_warn(&h->pdev->dev,
+					"%p returned unknown status %x\n", c,
 						ei->CommandStatus); 
 		}
 	}
-	// printk("c:%p:c%db%dt%dl%d ", cmd, ctlr->ctlr, cmd->channel, 
-	//	cmd->target, cmd->lun);
 	cmd->scsi_done(cmd);
-	scsi_cmd_free(ctlr, cp);
+	scsi_cmd_free(h, c);
 }
 
 static int
-cciss_scsi_detect(int ctlr)
+cciss_scsi_detect(ctlr_info_t *h)
 {
 	struct Scsi_Host *sh;
 	int error;
@@ -860,15 +854,15 @@
 	sh->io_port = 0;	// good enough?  FIXME, 
 	sh->n_io_port = 0;	// I don't think we use these two...
 	sh->this_id = SELF_SCSI_ID;  
-	sh->sg_tablesize = hba[ctlr]->maxsgentries;
+	sh->sg_tablesize = h->maxsgentries;
 	sh->max_cmd_len = MAX_COMMAND_SIZE;
 
 	((struct cciss_scsi_adapter_data_t *) 
-		hba[ctlr]->scsi_ctlr)->scsi_host = sh;
-	sh->hostdata[0] = (unsigned long) hba[ctlr];
-	sh->irq = hba[ctlr]->intr[SIMPLE_MODE_INT];
+		h->scsi_ctlr)->scsi_host = sh;
+	sh->hostdata[0] = (unsigned long) h;
+	sh->irq = h->intr[SIMPLE_MODE_INT];
 	sh->unique_id = sh->irq;
-	error = scsi_add_host(sh, &hba[ctlr]->pdev->dev);
+	error = scsi_add_host(sh, &h->pdev->dev);
 	if (error)
 		goto fail_host_put;
 	scsi_scan_host(sh);
@@ -882,20 +876,20 @@
 
 static void
 cciss_unmap_one(struct pci_dev *pdev,
-		CommandList_struct *cp,
+		CommandList_struct *c,
 		size_t buflen,
 		int data_direction)
 {
 	u64bit addr64;
 
-	addr64.val32.lower = cp->SG[0].Addr.lower;
-	addr64.val32.upper = cp->SG[0].Addr.upper;
+	addr64.val32.lower = c->SG[0].Addr.lower;
+	addr64.val32.upper = c->SG[0].Addr.upper;
 	pci_unmap_single(pdev, (dma_addr_t) addr64.val, buflen, data_direction);
 }
 
 static void
 cciss_map_one(struct pci_dev *pdev,
-		CommandList_struct *cp,
+		CommandList_struct *c,
 		unsigned char *buf,
 		size_t buflen,
 		int data_direction)
@@ -903,164 +897,149 @@
 	__u64 addr64;
 
 	addr64 = (__u64) pci_map_single(pdev, buf, buflen, data_direction);
-	cp->SG[0].Addr.lower = 
+	c->SG[0].Addr.lower =
 	  (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF);
-	cp->SG[0].Addr.upper =
+	c->SG[0].Addr.upper =
 	  (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF);
-	cp->SG[0].Len = buflen;
-	cp->Header.SGList = (__u8) 1;   /* no. SGs contig in this cmd */
-	cp->Header.SGTotal = (__u16) 1; /* total sgs in this cmd list */
+	c->SG[0].Len = buflen;
+	c->Header.SGList = (__u8) 1;   /* no. SGs contig in this cmd */
+	c->Header.SGTotal = (__u16) 1; /* total sgs in this cmd list */
 }
 
 static int
-cciss_scsi_do_simple_cmd(ctlr_info_t *c,
-			CommandList_struct *cp,
+cciss_scsi_do_simple_cmd(ctlr_info_t *h,
+			CommandList_struct *c,
 			unsigned char *scsi3addr, 
 			unsigned char *cdb,
 			unsigned char cdblen,
 			unsigned char *buf, int bufsize,
 			int direction)
 {
-	unsigned long flags;
 	DECLARE_COMPLETION_ONSTACK(wait);
 
-	cp->cmd_type = CMD_IOCTL_PEND;		// treat this like an ioctl 
-	cp->scsi_cmd = NULL;
-	cp->Header.ReplyQueue = 0;  // unused in simple mode
-	memcpy(&cp->Header.LUN, scsi3addr, sizeof(cp->Header.LUN));
-	cp->Header.Tag.lower = cp->busaddr;  // Use k. address of cmd as tag
+	c->cmd_type = CMD_IOCTL_PEND; /* treat this like an ioctl */
+	c->scsi_cmd = NULL;
+	c->Header.ReplyQueue = 0;  /* unused in simple mode */
+	memcpy(&c->Header.LUN, scsi3addr, sizeof(c->Header.LUN));
+	c->Header.Tag.lower = c->busaddr;  /* Use k. address of cmd as tag */
 	// Fill in the request block...
 
 	/* printk("Using scsi3addr 0x%02x%0x2%0x2%0x2%0x2%0x2%0x2%0x2\n", 
 		scsi3addr[0], scsi3addr[1], scsi3addr[2], scsi3addr[3],
 		scsi3addr[4], scsi3addr[5], scsi3addr[6], scsi3addr[7]); */
 
-	memset(cp->Request.CDB, 0, sizeof(cp->Request.CDB));
-	memcpy(cp->Request.CDB, cdb, cdblen);
-	cp->Request.Timeout = 0;
-	cp->Request.CDBLen = cdblen;
-	cp->Request.Type.Type = TYPE_CMD;
-	cp->Request.Type.Attribute = ATTR_SIMPLE;
-	cp->Request.Type.Direction = direction;
+	memset(c->Request.CDB, 0, sizeof(c->Request.CDB));
+	memcpy(c->Request.CDB, cdb, cdblen);
+	c->Request.Timeout = 0;
+	c->Request.CDBLen = cdblen;
+	c->Request.Type.Type = TYPE_CMD;
+	c->Request.Type.Attribute = ATTR_SIMPLE;
+	c->Request.Type.Direction = direction;
 
 	/* Fill in the SG list and do dma mapping */
-	cciss_map_one(c->pdev, cp, (unsigned char *) buf,
+	cciss_map_one(h->pdev, c, (unsigned char *) buf,
 			bufsize, DMA_FROM_DEVICE); 
 
-	cp->waiting = &wait;
-
-	/* Put the request on the tail of the request queue */
-	spin_lock_irqsave(CCISS_LOCK(c->ctlr), flags);
-	addQ(&c->reqQ, cp);
-	c->Qdepth++;
-	start_io(c);
-	spin_unlock_irqrestore(CCISS_LOCK(c->ctlr), flags);
-
+	c->waiting = &wait;
+	enqueue_cmd_and_start_io(h, c);
 	wait_for_completion(&wait);
 
 	/* undo the dma mapping */
-	cciss_unmap_one(c->pdev, cp, bufsize, DMA_FROM_DEVICE);
+	cciss_unmap_one(h->pdev, c, bufsize, DMA_FROM_DEVICE);
 	return(0);
 }
 
 static void 
-cciss_scsi_interpret_error(CommandList_struct *cp)
+cciss_scsi_interpret_error(ctlr_info_t *h, CommandList_struct *c)
 {
 	ErrorInfo_struct *ei;
 
-	ei = cp->err_info; 
+	ei = c->err_info;
 	switch(ei->CommandStatus)
 	{
 		case CMD_TARGET_STATUS:
-			printk(KERN_WARNING "cciss: cmd %p has "
-				"completed with errors\n", cp);
-			printk(KERN_WARNING "cciss: cmd %p "
-				"has SCSI Status = %x\n",
-					cp,  
-					ei->ScsiStatus);
+			dev_warn(&h->pdev->dev,
+				"cmd %p has completed with errors\n", c);
+			dev_warn(&h->pdev->dev,
+				"cmd %p has SCSI Status = %x\n",
+				c, ei->ScsiStatus);
 			if (ei->ScsiStatus == 0)
-				printk(KERN_WARNING 
-				"cciss:SCSI status is abnormally zero.  "
+				dev_warn(&h->pdev->dev,
+				"SCSI status is abnormally zero.  "
 				"(probably indicates selection timeout "
 				"reported incorrectly due to a known "
 				"firmware bug, circa July, 2001.)\n");
 		break;
 		case CMD_DATA_UNDERRUN: /* let mid layer handle it. */
-			printk("UNDERRUN\n");
+			dev_info(&h->pdev->dev, "UNDERRUN\n");
 		break;
 		case CMD_DATA_OVERRUN:
-			printk(KERN_WARNING "cciss: cp %p has"
+			dev_warn(&h->pdev->dev, "%p has"
 				" completed with data overrun "
-				"reported\n", cp);
+				"reported\n", c);
 		break;
 		case CMD_INVALID: {
 			/* controller unfortunately reports SCSI passthru's */
 			/* to non-existent targets as invalid commands. */
-			printk(KERN_WARNING "cciss: cp %p is "
-				"reported invalid (probably means "
-				"target device no longer present)\n", 
-				cp); 
-			/* print_bytes((unsigned char *) cp, sizeof(*cp), 1, 0);
-			print_cmd(cp);  */
+			dev_warn(&h->pdev->dev,
+				"%p is reported invalid (probably means "
+				"target device no longer present)\n", c);
+			/* print_bytes((unsigned char *) c, sizeof(*c), 1, 0);
+			print_cmd(c);  */
 			}
 		break;
 		case CMD_PROTOCOL_ERR:
-			printk(KERN_WARNING "cciss: cp %p has "
-				"protocol error \n", cp);
+			dev_warn(&h->pdev->dev, "%p has protocol error\n", c);
 		break;
 		case CMD_HARDWARE_ERR:
 			/* cmd->result = DID_ERROR << 16; */
-			printk(KERN_WARNING "cciss: cp %p had " 
-				" hardware error\n", cp);
+			dev_warn(&h->pdev->dev, "%p had hardware error\n", c);
 		break;
 		case CMD_CONNECTION_LOST:
-			printk(KERN_WARNING "cciss: cp %p had "
-				"connection lost\n", cp);
+			dev_warn(&h->pdev->dev, "%p had connection lost\n", c);
 		break;
 		case CMD_ABORTED:
-			printk(KERN_WARNING "cciss: cp %p was "
-				"aborted\n", cp);
+			dev_warn(&h->pdev->dev, "%p was aborted\n", c);
 		break;
 		case CMD_ABORT_FAILED:
-			printk(KERN_WARNING "cciss: cp %p reports "
-				"abort failed\n", cp);
+			dev_warn(&h->pdev->dev,
+				"%p reports abort failed\n", c);
 		break;
 		case CMD_UNSOLICITED_ABORT:
-			printk(KERN_WARNING "cciss: cp %p aborted "
-				"do to an unsolicited abort\n", cp);
+			dev_warn(&h->pdev->dev,
+				"%p aborted do to an unsolicited abort\n", c);
 		break;
 		case CMD_TIMEOUT:
-			printk(KERN_WARNING "cciss: cp %p timedout\n",
-				cp);
+			dev_warn(&h->pdev->dev, "%p timedout\n", c);
 		break;
 		default:
-			printk(KERN_WARNING "cciss: cp %p returned "
-				"unknown status %x\n", cp, 
-					ei->CommandStatus); 
+			dev_warn(&h->pdev->dev,
+				"%p returned unknown status %x\n",
+				c, ei->CommandStatus);
 	}
 }
 
 static int
-cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, 
+cciss_scsi_do_inquiry(ctlr_info_t *h, unsigned char *scsi3addr,
 	unsigned char page, unsigned char *buf,
 	unsigned char bufsize)
 {
 	int rc;
-	CommandList_struct *cp;
+	CommandList_struct *c;
 	char cdb[6];
 	ErrorInfo_struct *ei;
 	unsigned long flags;
 
-	spin_lock_irqsave(CCISS_LOCK(c->ctlr), flags);
-	cp = scsi_cmd_alloc(c);
-	spin_unlock_irqrestore(CCISS_LOCK(c->ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
+	c = scsi_cmd_alloc(h);
+	spin_unlock_irqrestore(&h->lock, flags);
 
-	if (cp == NULL) {			/* trouble... */
+	if (c == NULL) {			/* trouble... */
 		printk("cmd_alloc returned NULL!\n");
 		return -1;
 	}
 
-	ei = cp->err_info; 
+	ei = c->err_info;
 
 	cdb[0] = CISS_INQUIRY;
 	cdb[1] = (page != 0);
@@ -1068,24 +1047,24 @@
 	cdb[3] = 0;
 	cdb[4] = bufsize;
 	cdb[5] = 0;
-	rc = cciss_scsi_do_simple_cmd(c, cp, scsi3addr, cdb, 
+	rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr, cdb,
 				6, buf, bufsize, XFER_READ);
 
 	if (rc != 0) return rc; /* something went wrong */
 
 	if (ei->CommandStatus != 0 && 
 	    ei->CommandStatus != CMD_DATA_UNDERRUN) {
-		cciss_scsi_interpret_error(cp);
+		cciss_scsi_interpret_error(h, c);
 		rc = -1;
 	}
-	spin_lock_irqsave(CCISS_LOCK(c->ctlr), flags);
-	scsi_cmd_free(c, cp);
-	spin_unlock_irqrestore(CCISS_LOCK(c->ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
+	scsi_cmd_free(h, c);
+	spin_unlock_irqrestore(&h->lock, flags);
 	return rc;	
 }
 
 /* Get the device id from inquiry page 0x83 */
-static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr,
+static int cciss_scsi_get_device_id(ctlr_info_t *h, unsigned char *scsi3addr,
 	unsigned char *device_id, int buflen)
 {
 	int rc;
@@ -1096,7 +1075,7 @@
 	buf = kzalloc(64, GFP_KERNEL);
 	if (!buf)
 		return -1;
-	rc = cciss_scsi_do_inquiry(c, scsi3addr, 0x83, buf, 64);
+	rc = cciss_scsi_do_inquiry(h, scsi3addr, 0x83, buf, 64);
 	if (rc == 0)
 		memcpy(device_id, &buf[8], buflen);
 	kfree(buf);
@@ -1104,20 +1083,20 @@
 }
 
 static int
-cciss_scsi_do_report_phys_luns(ctlr_info_t *c, 
+cciss_scsi_do_report_phys_luns(ctlr_info_t *h,
 		ReportLunData_struct *buf, int bufsize)
 {
 	int rc;
-	CommandList_struct *cp;
+	CommandList_struct *c;
 	unsigned char cdb[12];
 	unsigned char scsi3addr[8]; 
 	ErrorInfo_struct *ei;
 	unsigned long flags;
 
-	spin_lock_irqsave(CCISS_LOCK(c->ctlr), flags);
-	cp = scsi_cmd_alloc(c);
-	spin_unlock_irqrestore(CCISS_LOCK(c->ctlr), flags);
-	if (cp == NULL) {			/* trouble... */
+	spin_lock_irqsave(&h->lock, flags);
+	c = scsi_cmd_alloc(h);
+	spin_unlock_irqrestore(&h->lock, flags);
+	if (c == NULL) {			/* trouble... */
 		printk("cmd_alloc returned NULL!\n");
 		return -1;
 	}
@@ -1136,27 +1115,27 @@
 	cdb[10] = 0;
 	cdb[11] = 0;
 
-	rc = cciss_scsi_do_simple_cmd(c, cp, scsi3addr, 
+	rc = cciss_scsi_do_simple_cmd(h, c, scsi3addr,
 				cdb, 12, 
 				(unsigned char *) buf, 
 				bufsize, XFER_READ);
 
 	if (rc != 0) return rc; /* something went wrong */
 
-	ei = cp->err_info; 
+	ei = c->err_info;
 	if (ei->CommandStatus != 0 && 
 	    ei->CommandStatus != CMD_DATA_UNDERRUN) {
-		cciss_scsi_interpret_error(cp);
+		cciss_scsi_interpret_error(h, c);
 		rc = -1;
 	}
-	spin_lock_irqsave(CCISS_LOCK(c->ctlr), flags);
-	scsi_cmd_free(c, cp);
-	spin_unlock_irqrestore(CCISS_LOCK(c->ctlr), flags);
+	spin_lock_irqsave(&h->lock, flags);
+	scsi_cmd_free(h, c);
+	spin_unlock_irqrestore(&h->lock, flags);
 	return rc;	
 }
 
 static void
-cciss_update_non_disk_devices(int cntl_num, int hostno)
+cciss_update_non_disk_devices(ctlr_info_t *h, int hostno)
 {
 	/* the idea here is we could get notified from /proc
 	   that some devices have changed, so we do a report 
@@ -1189,7 +1168,6 @@
 	ReportLunData_struct *ld_buff;
 	unsigned char *inq_buff;
 	unsigned char scsi3addr[8];
-	ctlr_info_t *c;
 	__u32 num_luns=0;
 	unsigned char *ch;
 	struct cciss_scsi_dev_t *currentsd, *this_device;
@@ -1197,7 +1175,6 @@
 	int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8;
 	int i;
 
-	c = (ctlr_info_t *) hba[cntl_num];	
 	ld_buff = kzalloc(reportlunsize, GFP_KERNEL);
 	inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL);
 	currentsd = kzalloc(sizeof(*currentsd) *
@@ -1207,7 +1184,7 @@
 		goto out;
 	}
 	this_device = &currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA];
-	if (cciss_scsi_do_report_phys_luns(c, ld_buff, reportlunsize) == 0) {
+	if (cciss_scsi_do_report_phys_luns(h, ld_buff, reportlunsize) == 0) {
 		ch = &ld_buff->LUNListLength[0];
 		num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8;
 		if (num_luns > CISS_MAX_PHYS_LUN) {
@@ -1231,7 +1208,7 @@
 		memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE);
 		memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8);
 
-		if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, 0, inq_buff,
+		if (cciss_scsi_do_inquiry(h, scsi3addr, 0, inq_buff,
 			(unsigned char) OBDR_TAPE_INQ_SIZE) != 0)
 			/* Inquiry failed (msg printed already) */
 			continue; /* so we will skip this device. */
@@ -1249,7 +1226,7 @@
 			sizeof(this_device->revision));
 		memset(this_device->device_id, 0,
 			sizeof(this_device->device_id));
-		cciss_scsi_get_device_id(hba[cntl_num], scsi3addr,
+		cciss_scsi_get_device_id(h, scsi3addr,
 			this_device->device_id, sizeof(this_device->device_id));
 
 		switch (this_device->devtype)
@@ -1276,7 +1253,7 @@
 		  case 0x08: /* medium changer */
 			if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) {
 				printk(KERN_INFO "cciss%d: %s ignored, "
-					"too many devices.\n", cntl_num,
+					"too many devices.\n", h->ctlr,
 					scsi_device_type(this_device->devtype));
 				break;
 			}
@@ -1288,7 +1265,7 @@
 		}
 	}
 
-	adjust_cciss_scsi_table(cntl_num, hostno, currentsd, ncurrent);
+	adjust_cciss_scsi_table(h, hostno, currentsd, ncurrent);
 out:
 	kfree(inq_buff);
 	kfree(ld_buff);
@@ -1307,12 +1284,12 @@
 }
 
 static int
-cciss_scsi_user_command(int ctlr, int hostno, char *buffer, int length)
+cciss_scsi_user_command(ctlr_info_t *h, int hostno, char *buffer, int length)
 {
 	int arg_len;
 
 	if ((arg_len = is_keyword(buffer, length, "rescan")) != 0)
-		cciss_update_non_disk_devices(ctlr, hostno);
+		cciss_update_non_disk_devices(h, hostno);
 	else
 		return -EINVAL;
 	return length;
@@ -1329,20 +1306,16 @@
 {
 
 	int buflen, datalen;
-	ctlr_info_t *ci;
+	ctlr_info_t *h;
 	int i;
-	int cntl_num;
 
-
-	ci = (ctlr_info_t *) sh->hostdata[0];
-	if (ci == NULL)  /* This really shouldn't ever happen. */
+	h = (ctlr_info_t *) sh->hostdata[0];
+	if (h == NULL)  /* This really shouldn't ever happen. */
 		return -EINVAL;
 
-	cntl_num = ci->ctlr;	/* Get our index into the hba[] array */
-
 	if (func == 0) {	/* User is reading from /proc/scsi/ciss*?/?*  */
 		buflen = sprintf(buffer, "cciss%d: SCSI host: %d\n",
-				cntl_num, sh->host_no);
+				h->ctlr, sh->host_no);
 
 		/* this information is needed by apps to know which cciss
 		   device corresponds to which scsi host number without
@@ -1352,8 +1325,9 @@
 		   this info is for an app to be able to use to know how to
 		   get them back in sync. */
 
-		for (i=0;i<ccissscsi[cntl_num].ndevices;i++) {
-			struct cciss_scsi_dev_t *sd = &ccissscsi[cntl_num].dev[i];
+		for (i = 0; i < ccissscsi[h->ctlr].ndevices; i++) {
+			struct cciss_scsi_dev_t *sd =
+				&ccissscsi[h->ctlr].dev[i];
 			buflen += sprintf(&buffer[buflen], "c%db%dt%dl%d %02d "
 				"0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
 				sh->host_no, sd->bus, sd->target, sd->lun,
@@ -1371,15 +1345,15 @@
 			*start = buffer + offset;
 		return(datalen);
 	} else 	/* User is writing to /proc/scsi/cciss*?/?*  ... */
-		return cciss_scsi_user_command(cntl_num, sh->host_no,
+		return cciss_scsi_user_command(h, sh->host_no,
 			buffer, length);	
 } 
 
 /* cciss_scatter_gather takes a struct scsi_cmnd, (cmd), and does the pci 
    dma mapping  and fills in the scatter gather entries of the 
-   cciss command, cp. */
+   cciss command, c. */
 
-static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *cp,
+static void cciss_scatter_gather(ctlr_info_t *h, CommandList_struct *c,
 	struct scsi_cmnd *cmd)
 {
 	unsigned int len;
@@ -1393,7 +1367,7 @@
 
 	chained = 0;
 	sg_index = 0;
-	curr_sg = cp->SG;
+	curr_sg = c->SG;
 	request_nsgs = scsi_dma_map(cmd);
 	if (request_nsgs) {
 		scsi_for_each_sg(cmd, sg, request_nsgs, i) {
@@ -1401,7 +1375,7 @@
 				!chained && request_nsgs - i > 1) {
 				chained = 1;
 				sg_index = 0;
-				curr_sg = sa->cmd_sg_list[cp->cmdindex];
+				curr_sg = sa->cmd_sg_list[c->cmdindex];
 			}
 			addr64 = (__u64) sg_dma_address(sg);
 			len  = sg_dma_len(sg);
@@ -1414,19 +1388,19 @@
 			++sg_index;
 		}
 		if (chained)
-			cciss_map_sg_chain_block(h, cp,
-				sa->cmd_sg_list[cp->cmdindex],
+			cciss_map_sg_chain_block(h, c,
+				sa->cmd_sg_list[c->cmdindex],
 				(request_nsgs - (h->max_cmd_sgentries - 1)) *
 					sizeof(SGDescriptor_struct));
 	}
 	/* track how many SG entries we are using */
 	if (request_nsgs > h->maxSG)
 		h->maxSG = request_nsgs;
-	cp->Header.SGTotal = (__u8) request_nsgs + chained;
+	c->Header.SGTotal = (__u8) request_nsgs + chained;
 	if (request_nsgs > h->max_cmd_sgentries)
-		cp->Header.SGList = h->max_cmd_sgentries;
+		c->Header.SGList = h->max_cmd_sgentries;
 	else
-		cp->Header.SGList = cp->Header.SGTotal;
+		c->Header.SGList = c->Header.SGTotal;
 	return;
 }
 
@@ -1434,18 +1408,17 @@
 static int
 cciss_scsi_queue_command (struct scsi_cmnd *cmd, void (* done)(struct scsi_cmnd *))
 {
-	ctlr_info_t *c;
-	int ctlr, rc;
+	ctlr_info_t *h;
+	int rc;
 	unsigned char scsi3addr[8];
-	CommandList_struct *cp;
+	CommandList_struct *c;
 	unsigned long flags;
 
 	// Get the ptr to our adapter structure (hba[i]) out of cmd->host.
 	// We violate cmd->host privacy here.  (Is there another way?)
-	c = (ctlr_info_t *) cmd->device->host->hostdata[0];
-	ctlr = c->ctlr;
+	h = (ctlr_info_t *) cmd->device->host->hostdata[0];
 
-	rc = lookup_scsi3addr(ctlr, cmd->device->channel, cmd->device->id, 
+	rc = lookup_scsi3addr(h, cmd->device->channel, cmd->device->id,
 			cmd->device->lun, scsi3addr);
 	if (rc != 0) {
 		/* the scsi nexus does not match any that we presented... */
@@ -1457,19 +1430,14 @@
 		return 0;
 	}
 
-	/* printk("cciss_queue_command, p=%p, cmd=0x%02x, c%db%dt%dl%d\n", 
-		cmd, cmd->cmnd[0], ctlr, cmd->channel, cmd->target, cmd->lun);*/
-	// printk("q:%p:c%db%dt%dl%d ", cmd, ctlr, cmd->channel, 
-	//	cmd->target, cmd->lun);
-
 	/* Ok, we have a reasonable scsi nexus, so send the cmd down, and
            see what the device thinks of it. */
 
-	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-	cp = scsi_cmd_alloc(c);
-	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-	if (cp == NULL) {			/* trouble... */
-		printk("scsi_cmd_alloc returned NULL!\n");
+	spin_lock_irqsave(&h->lock, flags);
+	c = scsi_cmd_alloc(h);
+	spin_unlock_irqrestore(&h->lock, flags);
+	if (c == NULL) {			/* trouble... */
+		dev_warn(&h->pdev->dev, "scsi_cmd_alloc returned NULL!\n");
 		/* FIXME: next 3 lines are -> BAD! <- */
 		cmd->result = DID_NO_CONNECT << 16;
 		done(cmd);
@@ -1480,35 +1448,41 @@
 
 	cmd->scsi_done = done;    // save this for use by completion code 
 
-	// save cp in case we have to abort it 
-	cmd->host_scribble = (unsigned char *) cp; 
+	/* save c in case we have to abort it */
+	cmd->host_scribble = (unsigned char *) c;
 
-	cp->cmd_type = CMD_SCSI;
-	cp->scsi_cmd = cmd;
-	cp->Header.ReplyQueue = 0;  // unused in simple mode
-	memcpy(&cp->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8);
-	cp->Header.Tag.lower = cp->busaddr;  // Use k. address of cmd as tag
+	c->cmd_type = CMD_SCSI;
+	c->scsi_cmd = cmd;
+	c->Header.ReplyQueue = 0;  /* unused in simple mode */
+	memcpy(&c->Header.LUN.LunAddrBytes[0], &scsi3addr[0], 8);
+	c->Header.Tag.lower = c->busaddr;  /* Use k. address of cmd as tag */
 	
 	// Fill in the request block...
 
-	cp->Request.Timeout = 0;
-	memset(cp->Request.CDB, 0, sizeof(cp->Request.CDB));
-	BUG_ON(cmd->cmd_len > sizeof(cp->Request.CDB));
-	cp->Request.CDBLen = cmd->cmd_len;
-	memcpy(cp->Request.CDB, cmd->cmnd, cmd->cmd_len);
-	cp->Request.Type.Type = TYPE_CMD;
-	cp->Request.Type.Attribute = ATTR_SIMPLE;
+	c->Request.Timeout = 0;
+	memset(c->Request.CDB, 0, sizeof(c->Request.CDB));
+	BUG_ON(cmd->cmd_len > sizeof(c->Request.CDB));
+	c->Request.CDBLen = cmd->cmd_len;
+	memcpy(c->Request.CDB, cmd->cmnd, cmd->cmd_len);
+	c->Request.Type.Type = TYPE_CMD;
+	c->Request.Type.Attribute = ATTR_SIMPLE;
 	switch(cmd->sc_data_direction)
 	{
-	  case DMA_TO_DEVICE: cp->Request.Type.Direction = XFER_WRITE; break;
-	  case DMA_FROM_DEVICE: cp->Request.Type.Direction = XFER_READ; break;
-	  case DMA_NONE: cp->Request.Type.Direction = XFER_NONE; break;
+	  case DMA_TO_DEVICE:
+		c->Request.Type.Direction = XFER_WRITE;
+		break;
+	  case DMA_FROM_DEVICE:
+		c->Request.Type.Direction = XFER_READ;
+		break;
+	  case DMA_NONE:
+		c->Request.Type.Direction = XFER_NONE;
+		break;
 	  case DMA_BIDIRECTIONAL:
 		// This can happen if a buggy application does a scsi passthru
 		// and sets both inlen and outlen to non-zero. ( see
 		// ../scsi/scsi_ioctl.c:scsi_ioctl_send_command() )
 
-	  	cp->Request.Type.Direction = XFER_RSVD;
+		c->Request.Type.Direction = XFER_RSVD;
 		// This is technically wrong, and cciss controllers should
 		// reject it with CMD_INVALID, which is the most correct 
 		// response, but non-fibre backends appear to let it 
@@ -1519,27 +1493,18 @@
 		break;
 
 	  default: 
-		printk("cciss: unknown data direction: %d\n", 
+		dev_warn(&h->pdev->dev, "unknown data direction: %d\n",
 			cmd->sc_data_direction);
 		BUG();
 		break;
 	}
-	cciss_scatter_gather(c, cp, cmd);
-
-	/* Put the request on the tail of the request queue */
-
-	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-	addQ(&c->reqQ, cp);
-	c->Qdepth++;
-	start_io(c);
-	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-
+	cciss_scatter_gather(h, c, cmd);
+	enqueue_cmd_and_start_io(h, c);
 	/* the cmd'll come back via intr handler in complete_scsi_command()  */
 	return 0;
 }
 
-static void 
-cciss_unregister_scsi(int ctlr)
+static void cciss_unregister_scsi(ctlr_info_t *h)
 {
 	struct cciss_scsi_adapter_data_t *sa;
 	struct cciss_scsi_cmd_stack_t *stk;
@@ -1547,59 +1512,58 @@
 
 	/* we are being forcibly unloaded, and may not refuse. */
 
-	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-	sa = hba[ctlr]->scsi_ctlr;
+	spin_lock_irqsave(&h->lock, flags);
+	sa = h->scsi_ctlr;
 	stk = &sa->cmd_stack; 
 
 	/* if we weren't ever actually registered, don't unregister */ 
 	if (sa->registered) {
-		spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+		spin_unlock_irqrestore(&h->lock, flags);
 		scsi_remove_host(sa->scsi_host);
 		scsi_host_put(sa->scsi_host);
-		spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
+		spin_lock_irqsave(&h->lock, flags);
 	}
 
 	/* set scsi_host to NULL so our detect routine will 
 	   find us on register */
 	sa->scsi_host = NULL;
-	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-	scsi_cmd_stack_free(ctlr);
+	spin_unlock_irqrestore(&h->lock, flags);
+	scsi_cmd_stack_free(h);
 	kfree(sa);
 }
 
-static int 
-cciss_engage_scsi(int ctlr)
+static int cciss_engage_scsi(ctlr_info_t *h)
 {
 	struct cciss_scsi_adapter_data_t *sa;
 	struct cciss_scsi_cmd_stack_t *stk;
 	unsigned long flags;
 
-	spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
-	sa = hba[ctlr]->scsi_ctlr;
+	spin_lock_irqsave(&h->lock, flags);
+	sa = h->scsi_ctlr;
 	stk = &sa->cmd_stack; 
 
 	if (sa->registered) {
-		printk("cciss%d: SCSI subsystem already engaged.\n", ctlr);
-		spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
+		dev_info(&h->pdev->dev, "SCSI subsystem already engaged.\n");
+		spin_unlock_irqrestore(&h->lock, flags);
 		return -ENXIO;
 	}
 	sa->registered = 1;
-	spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
-	cciss_update_non_disk_devices(ctlr, -1);
-	cciss_scsi_detect(ctlr);
+	spin_unlock_irqrestore(&h->lock, flags);
+	cciss_update_non_disk_devices(h, -1);
+	cciss_scsi_detect(h);
 	return 0;
 }
 
 static void
-cciss_seq_tape_report(struct seq_file *seq, int ctlr)
+cciss_seq_tape_report(struct seq_file *seq, ctlr_info_t *h)
 {
 	unsigned long flags;
 
-	CPQ_TAPE_LOCK(ctlr, flags);
+	CPQ_TAPE_LOCK(h, flags);
 	seq_printf(seq,
 		"Sequential access devices: %d\n\n",
-			ccissscsi[ctlr].ndevices);
-	CPQ_TAPE_UNLOCK(ctlr, flags);
+			ccissscsi[h->ctlr].ndevices);
+	CPQ_TAPE_UNLOCK(h, flags);
 }
 
 static int wait_for_device_to_become_ready(ctlr_info_t *h,
@@ -1610,10 +1574,10 @@
 	int waittime = HZ;
 	CommandList_struct *c;
 
-	c = cmd_alloc(h, 1);
+	c = cmd_alloc(h);
 	if (!c) {
-		printk(KERN_WARNING "cciss%d: out of memory in "
-			"wait_for_device_to_become_ready.\n", h->ctlr);
+		dev_warn(&h->pdev->dev, "out of memory in "
+			"wait_for_device_to_become_ready.\n");
 		return IO_ERROR;
 	}
 
@@ -1631,7 +1595,7 @@
 			waittime = waittime * 2;
 
 		/* Send the Test Unit Ready */
-		rc = fill_cmd(c, TEST_UNIT_READY, h->ctlr, NULL, 0, 0,
+		rc = fill_cmd(h, c, TEST_UNIT_READY, NULL, 0, 0,
 			lunaddr, TYPE_CMD);
 		if (rc == 0)
 			rc = sendcmd_withirq_core(h, c, 0);
@@ -1657,18 +1621,18 @@
 			}
 		}
 retry_tur:
-		printk(KERN_WARNING "cciss%d: Waiting %d secs "
+		dev_warn(&h->pdev->dev, "Waiting %d secs "
 			"for device to become ready.\n",
-			h->ctlr, waittime / HZ);
+			waittime / HZ);
 		rc = 1; /* device not ready. */
 	}
 
 	if (rc)
-		printk("cciss%d: giving up on device.\n", h->ctlr);
+		dev_warn(&h->pdev->dev, "giving up on device.\n");
 	else
-		printk(KERN_WARNING "cciss%d: device is ready.\n", h->ctlr);
+		dev_warn(&h->pdev->dev, "device is ready.\n");
 
-	cmd_free(h, c, 1);
+	cmd_free(h, c);
 	return rc;
 }
 
@@ -1688,26 +1652,24 @@
 	int rc;
 	CommandList_struct *cmd_in_trouble;
 	unsigned char lunaddr[8];
-	ctlr_info_t *c;
-	int ctlr;
+	ctlr_info_t *h;
 
 	/* find the controller to which the command to be aborted was sent */
-	c = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
-	if (c == NULL) /* paranoia */
+	h = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
+	if (h == NULL) /* paranoia */
 		return FAILED;
-	ctlr = c->ctlr;
-	printk(KERN_WARNING "cciss%d: resetting tape drive or medium changer.\n", ctlr);
+	dev_warn(&h->pdev->dev, "resetting tape drive or medium changer.\n");
 	/* find the command that's giving us trouble */
 	cmd_in_trouble = (CommandList_struct *) scsicmd->host_scribble;
 	if (cmd_in_trouble == NULL) /* paranoia */
 		return FAILED;
 	memcpy(lunaddr, &cmd_in_trouble->Header.LUN.LunAddrBytes[0], 8);
 	/* send a reset to the SCSI LUN which the command was sent to */
-	rc = sendcmd_withirq(CCISS_RESET_MSG, ctlr, NULL, 0, 0, lunaddr,
+	rc = sendcmd_withirq(h, CCISS_RESET_MSG, NULL, 0, 0, lunaddr,
 		TYPE_MSG);
-	if (rc == 0 && wait_for_device_to_become_ready(c, lunaddr) == 0)
+	if (rc == 0 && wait_for_device_to_become_ready(h, lunaddr) == 0)
 		return SUCCESS;
-	printk(KERN_WARNING "cciss%d: resetting device failed.\n", ctlr);
+	dev_warn(&h->pdev->dev, "resetting device failed.\n");
 	return FAILED;
 }
 
@@ -1716,22 +1678,20 @@
 	int rc;
 	CommandList_struct *cmd_to_abort;
 	unsigned char lunaddr[8];
-	ctlr_info_t *c;
-	int ctlr;
+	ctlr_info_t *h;
 
 	/* find the controller to which the command to be aborted was sent */
-	c = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
-	if (c == NULL) /* paranoia */
+	h = (ctlr_info_t *) scsicmd->device->host->hostdata[0];
+	if (h == NULL) /* paranoia */
 		return FAILED;
-	ctlr = c->ctlr;
-	printk(KERN_WARNING "cciss%d: aborting tardy SCSI cmd\n", ctlr);
+	dev_warn(&h->pdev->dev, "aborting tardy SCSI cmd\n");
 
 	/* find the command to be aborted */
 	cmd_to_abort = (CommandList_struct *) scsicmd->host_scribble;
 	if (cmd_to_abort == NULL) /* paranoia */
 		return FAILED;
 	memcpy(lunaddr, &cmd_to_abort->Header.LUN.LunAddrBytes[0], 8);
-	rc = sendcmd_withirq(CCISS_ABORT_MSG, ctlr, &cmd_to_abort->Header.Tag,
+	rc = sendcmd_withirq(h, CCISS_ABORT_MSG, &cmd_to_abort->Header.Tag,
 		0, 0, lunaddr, TYPE_MSG);
 	if (rc == 0)
 		return SUCCESS;

diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index abb4ec6..d53b029 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c

@@ -35,6 +35,7 @@
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/hdreg.h>
+#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
 #include <linux/genhd.h>
@@ -157,7 +158,7 @@
 	unsigned int blkcnt,
 	unsigned int log_unit );
 
-static int ida_open(struct block_device *bdev, fmode_t mode);
+static int ida_unlocked_open(struct block_device *bdev, fmode_t mode);
 static int ida_release(struct gendisk *disk, fmode_t mode);
 static int ida_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg);
 static int ida_getgeo(struct block_device *bdev, struct hd_geometry *geo);
@@ -195,9 +196,9 @@
 
 static const struct block_device_operations ida_fops  = {
 	.owner		= THIS_MODULE,
-	.open		= ida_open,
+	.open		= ida_unlocked_open,
 	.release	= ida_release,
-	.locked_ioctl	= ida_ioctl,
+	.ioctl		= ida_ioctl,
 	.getgeo		= ida_getgeo,
 	.revalidate_disk= ida_revalidate,
 };
@@ -840,13 +841,29 @@
 	return 0;
 }
 
+static int ida_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	lock_kernel();
+	ret = ida_open(bdev, mode);
+	unlock_kernel();
+
+	return ret;
+}
+
 /*
  * Close.  Sync first.
  */
 static int ida_release(struct gendisk *disk, fmode_t mode)
 {
-	ctlr_info_t *host = get_host(disk);
+	ctlr_info_t *host;
+
+	lock_kernel();
+	host = get_host(disk);
 	host->usage_count--;
+	unlock_kernel();
+
 	return 0;
 }
 
@@ -1128,7 +1145,7 @@
  *  ida_ioctl does some miscellaneous stuff like reporting drive geometry,
  *  setting readahead and submitting commands from userspace to the controller.
  */
-static int ida_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
+static int ida_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
 {
 	drv_info_t *drv = get_drv(bdev->bd_disk);
 	ctlr_info_t *host = get_host(bdev->bd_disk);
@@ -1162,7 +1179,8 @@
 		return error;
 	case IDAGETCTLRSIG:
 		if (!arg) return -EINVAL;
-		put_user(host->ctlr_sig, (int __user *)arg);
+		if (put_user(host->ctlr_sig, (int __user *)arg))
+			return -EFAULT;
 		return 0;
 	case IDAREVALIDATEVOLS:
 		if (MINOR(bdev->bd_dev) != 0)
@@ -1170,7 +1188,8 @@
 		return revalidate_allvol(host);
 	case IDADRIVERVERSION:
 		if (!arg) return -EINVAL;
-		put_user(DRIVER_VERSION, (unsigned long __user *)arg);
+		if (put_user(DRIVER_VERSION, (unsigned long __user *)arg))
+			return -EFAULT;
 		return 0;
 	case IDAGETPCIINFO:
 	{
@@ -1192,6 +1211,19 @@
 	}
 		
 }
+
+static int ida_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long param)
+{
+	int ret;
+
+	lock_kernel();
+	ret = ida_locked_ioctl(bdev, mode, cmd, param);
+	unlock_kernel();
+
+	return ret;
+}
+
 /*
  * ida_ctlr_ioctl is for passing commands to the controller from userspace.
  * The command block (io) has already been copied to kernel space for us,
@@ -1225,17 +1257,11 @@
 	/* Pre submit processing */
 	switch(io->cmd) {
 	case PASSTHRU_A:
-		p = kmalloc(io->sg[0].size, GFP_KERNEL);
-		if (!p) 
-		{ 
-			error = -ENOMEM; 
-			cmd_free(h, c, 0); 
-			return(error);
-		}
-		if (copy_from_user(p, io->sg[0].addr, io->sg[0].size)) {
-			kfree(p);
-			cmd_free(h, c, 0); 
-			return -EFAULT;
+		p = memdup_user(io->sg[0].addr, io->sg[0].size);
+		if (IS_ERR(p)) {
+			error = PTR_ERR(p);
+			cmd_free(h, c, 0);
+			return error;
 		}
 		c->req.hdr.blk = pci_map_single(h->pci_dev, &(io->c), 
 				sizeof(ida_ioctl_t), 
@@ -1266,18 +1292,12 @@
 	case DIAG_PASS_THRU:
 	case COLLECT_BUFFER:
 	case WRITE_FLASH_ROM:
-		p = kmalloc(io->sg[0].size, GFP_KERNEL);
-		if (!p) 
- 		{ 
-                        error = -ENOMEM; 
-                        cmd_free(h, c, 0);
-                        return(error);
+		p = memdup_user(io->sg[0].addr, io->sg[0].size);
+		if (IS_ERR(p)) {
+			error = PTR_ERR(p);
+			cmd_free(h, c, 0);
+			return error;
                 }
-		if (copy_from_user(p, io->sg[0].addr, io->sg[0].size)) {
-			kfree(p);
-                        cmd_free(h, c, 0);
-			return -EFAULT;
-		}
 		c->req.sg[0].size = io->sg[0].size;
 		c->req.sg[0].addr = pci_map_single(h->pci_dev, p, 
 			c->req.sg[0].size, PCI_DMA_BIDIRECTIONAL); 

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index df01899..9400845 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c

@@ -79,8 +79,8 @@
 	md_io.error = 0;
 
 	if ((rw & WRITE) && !test_bit(MD_NO_BARRIER, &mdev->flags))
-		rw |= (1 << BIO_RW_BARRIER);
-	rw |= ((1<<BIO_RW_UNPLUG) | (1<<BIO_RW_SYNCIO));
+		rw |= REQ_HARDBARRIER;
+	rw |= REQ_UNPLUG | REQ_SYNC;
 
  retry:
 	bio = bio_alloc(GFP_NOIO, 1);
@@ -103,11 +103,11 @@
 	/* check for unsupported barrier op.
 	 * would rather check on EOPNOTSUPP, but that is not reliable.
 	 * don't try again for ANY return value != 0 */
-	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && !ok)) {
+	if (unlikely((bio->bi_rw & REQ_HARDBARRIER) && !ok)) {
 		/* Try again with no barrier */
 		dev_warn(DEV, "Barriers not supported on meta data device - disabling\n");
 		set_bit(MD_NO_BARRIER, &mdev->flags);
-		rw &= ~(1 << BIO_RW_BARRIER);
+		rw &= ~REQ_HARDBARRIER;
 		bio_put(bio);
 		goto retry;
 	}

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 485ed8c..352441b 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h

@@ -550,12 +550,6 @@
 	u32	offset;	 /* usecs the probe got sent after the reference time point */
 } __packed;
 
-struct delay_probe {
-	struct list_head list;
-	unsigned int seq_num;
-	struct timeval time;
-};
-
 /* DCBP: Drbd Compressed Bitmap Packet ... */
 static inline enum drbd_bitmap_code
 DCBP_get_code(struct p_compressed_bm *p)
@@ -942,11 +936,9 @@
 	unsigned int ko_count;
 	struct drbd_work  resync_work,
 			  unplug_work,
-			  md_sync_work,
-			  delay_probe_work;
+			  md_sync_work;
 	struct timer_list resync_timer;
 	struct timer_list md_sync_timer;
-	struct timer_list delay_probe_timer;
 
 	/* Used after attach while negotiating new disk state. */
 	union drbd_state new_state_tmp;
@@ -1062,12 +1054,6 @@
 	u64 ed_uuid; /* UUID of the exposed data */
 	struct mutex state_mutex;
 	char congestion_reason;  /* Why we where congested... */
-	struct list_head delay_probes; /* protected by peer_seq_lock */
-	int data_delay;   /* Delay of packets on the data-sock behind meta-sock */
-	unsigned int delay_seq; /* To generate sequence numbers of delay probes */
-	struct timeval dps_time; /* delay-probes-start-time */
-	unsigned int dp_volume_last;  /* send_cnt of last delay probe */
-	int c_sync_rate; /* current resync rate after delay_probe magic */
 };
 
 static inline struct drbd_conf *minor_to_mdev(unsigned int minor)

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 7258c95..fa650dd 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c

@@ -2184,43 +2184,6 @@
 	return ok;
 }
 
-static int drbd_send_delay_probe(struct drbd_conf *mdev, struct drbd_socket *ds)
-{
-	struct p_delay_probe dp;
-	int offset, ok = 0;
-	struct timeval now;
-
-	mutex_lock(&ds->mutex);
-	if (likely(ds->socket)) {
-		do_gettimeofday(&now);
-		offset = now.tv_usec - mdev->dps_time.tv_usec +
-			 (now.tv_sec - mdev->dps_time.tv_sec) * 1000000;
-		dp.seq_num  = cpu_to_be32(mdev->delay_seq);
-		dp.offset   = cpu_to_be32(offset);
-
-		ok = _drbd_send_cmd(mdev, ds->socket, P_DELAY_PROBE,
-				    (struct p_header *)&dp, sizeof(dp), 0);
-	}
-	mutex_unlock(&ds->mutex);
-
-	return ok;
-}
-
-static int drbd_send_delay_probes(struct drbd_conf *mdev)
-{
-	int ok;
-
-	mdev->delay_seq++;
-	do_gettimeofday(&mdev->dps_time);
-	ok = drbd_send_delay_probe(mdev, &mdev->meta);
-	ok = ok && drbd_send_delay_probe(mdev, &mdev->data);
-
-	mdev->dp_volume_last = mdev->send_cnt;
-	mod_timer(&mdev->delay_probe_timer, jiffies + mdev->sync_conf.dp_interval * HZ / 10);
-
-	return ok;
-}
-
 /* called on sndtimeo
  * returns FALSE if we should retry,
  * TRUE if we think connection is dead
@@ -2369,31 +2332,6 @@
 	return 1;
 }
 
-static void consider_delay_probes(struct drbd_conf *mdev)
-{
-	if (mdev->state.conn != C_SYNC_SOURCE || mdev->agreed_pro_version < 93)
-		return;
-
-	if (mdev->dp_volume_last + mdev->sync_conf.dp_volume * 2 < mdev->send_cnt)
-		drbd_send_delay_probes(mdev);
-}
-
-static int w_delay_probes(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
-{
-	if (!cancel && mdev->state.conn == C_SYNC_SOURCE)
-		drbd_send_delay_probes(mdev);
-
-	return 1;
-}
-
-static void delay_probe_timer_fn(unsigned long data)
-{
-	struct drbd_conf *mdev = (struct drbd_conf *) data;
-
-	if (list_empty(&mdev->delay_probe_work.list))
-		drbd_queue_work(&mdev->data.work, &mdev->delay_probe_work);
-}
-
 /* Used to send write requests
  * R_PRIMARY -> Peer	(P_DATA)
  */
@@ -2425,15 +2363,15 @@
 	/* NOTE: no need to check if barriers supported here as we would
 	 *       not pass the test in make_request_common in that case
 	 */
-	if (bio_rw_flagged(req->master_bio, BIO_RW_BARRIER)) {
+	if (req->master_bio->bi_rw & REQ_HARDBARRIER) {
 		dev_err(DEV, "ASSERT FAILED would have set DP_HARDBARRIER\n");
 		/* dp_flags |= DP_HARDBARRIER; */
 	}
-	if (bio_rw_flagged(req->master_bio, BIO_RW_SYNCIO))
+	if (req->master_bio->bi_rw & REQ_SYNC)
 		dp_flags |= DP_RW_SYNC;
 	/* for now handle SYNCIO and UNPLUG
 	 * as if they still were one and the same flag */
-	if (bio_rw_flagged(req->master_bio, BIO_RW_UNPLUG))
+	if (req->master_bio->bi_rw & REQ_UNPLUG)
 		dp_flags |= DP_RW_SYNC;
 	if (mdev->state.conn >= C_SYNC_SOURCE &&
 	    mdev->state.conn <= C_PAUSED_SYNC_T)
@@ -2457,9 +2395,6 @@
 
 	drbd_put_data_sock(mdev);
 
-	if (ok)
-		consider_delay_probes(mdev);
-
 	return ok;
 }
 
@@ -2506,9 +2441,6 @@
 
 	drbd_put_data_sock(mdev);
 
-	if (ok)
-		consider_delay_probes(mdev);
-
 	return ok;
 }
 
@@ -2604,6 +2536,7 @@
 	unsigned long flags;
 	int rv = 0;
 
+	lock_kernel();
 	spin_lock_irqsave(&mdev->req_lock, flags);
 	/* to have a stable mdev->state.role
 	 * and no race with updating open_cnt */
@@ -2618,6 +2551,7 @@
 	if (!rv)
 		mdev->open_cnt++;
 	spin_unlock_irqrestore(&mdev->req_lock, flags);
+	unlock_kernel();
 
 	return rv;
 }
@@ -2625,7 +2559,9 @@
 static int drbd_release(struct gendisk *gd, fmode_t mode)
 {
 	struct drbd_conf *mdev = gd->private_data;
+	lock_kernel();
 	mdev->open_cnt--;
+	unlock_kernel();
 	return 0;
 }
 
@@ -2660,9 +2596,20 @@
 
 static void drbd_set_defaults(struct drbd_conf *mdev)
 {
-	mdev->sync_conf.after      = DRBD_AFTER_DEF;
-	mdev->sync_conf.rate       = DRBD_RATE_DEF;
-	mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_DEF;
+	/* This way we get a compile error when sync_conf grows,
+	   and we forgot to initialize it here */
+	mdev->sync_conf = (struct syncer_conf) {
+		/* .rate = */		DRBD_RATE_DEF,
+		/* .after = */		DRBD_AFTER_DEF,
+		/* .al_extents = */	DRBD_AL_EXTENTS_DEF,
+		/* .verify_alg = */	{}, 0,
+		/* .cpu_mask = */	{}, 0,
+		/* .csums_alg = */	{}, 0,
+		/* .use_rle = */	0
+	};
+
+	/* Have to use that way, because the layout differs between
+	   big endian and little endian */
 	mdev->state = (union drbd_state) {
 		{ .role = R_SECONDARY,
 		  .peer = R_UNKNOWN,
@@ -2721,24 +2668,17 @@
 	INIT_LIST_HEAD(&mdev->unplug_work.list);
 	INIT_LIST_HEAD(&mdev->md_sync_work.list);
 	INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
-	INIT_LIST_HEAD(&mdev->delay_probes);
-	INIT_LIST_HEAD(&mdev->delay_probe_work.list);
 
 	mdev->resync_work.cb  = w_resync_inactive;
 	mdev->unplug_work.cb  = w_send_write_hint;
 	mdev->md_sync_work.cb = w_md_sync;
 	mdev->bm_io_work.w.cb = w_bitmap_io;
-	mdev->delay_probe_work.cb = w_delay_probes;
 	init_timer(&mdev->resync_timer);
 	init_timer(&mdev->md_sync_timer);
-	init_timer(&mdev->delay_probe_timer);
 	mdev->resync_timer.function = resync_timer_fn;
 	mdev->resync_timer.data = (unsigned long) mdev;
 	mdev->md_sync_timer.function = md_sync_timer_fn;
 	mdev->md_sync_timer.data = (unsigned long) mdev;
-	mdev->delay_probe_timer.function = delay_probe_timer_fn;
-	mdev->delay_probe_timer.data = (unsigned long) mdev;
-
 
 	init_waitqueue_head(&mdev->misc_wait);
 	init_waitqueue_head(&mdev->state_wait);

diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 2151f18..73131c5 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c

@@ -1557,10 +1557,6 @@
 		sc.rate       = DRBD_RATE_DEF;
 		sc.after      = DRBD_AFTER_DEF;
 		sc.al_extents = DRBD_AL_EXTENTS_DEF;
-		sc.dp_volume  = DRBD_DP_VOLUME_DEF;
-		sc.dp_interval = DRBD_DP_INTERVAL_DEF;
-		sc.throttle_th = DRBD_RS_THROTTLE_TH_DEF;
-		sc.hold_off_th = DRBD_RS_HOLD_OFF_TH_DEF;
 	} else
 		memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
 

diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index d0f1767..be3374b 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c

@@ -73,21 +73,14 @@
 	seq_printf(seq, "sync'ed:%3u.%u%% ", res / 10, res % 10);
 	/* if more than 1 GB display in MB */
 	if (mdev->rs_total > 0x100000L)
-		seq_printf(seq, "(%lu/%lu)M",
+		seq_printf(seq, "(%lu/%lu)M\n\t",
 			    (unsigned long) Bit2KB(rs_left >> 10),
 			    (unsigned long) Bit2KB(mdev->rs_total >> 10));
 	else
-		seq_printf(seq, "(%lu/%lu)K",
+		seq_printf(seq, "(%lu/%lu)K\n\t",
 			    (unsigned long) Bit2KB(rs_left),
 			    (unsigned long) Bit2KB(mdev->rs_total));
 
-	if (mdev->state.conn == C_SYNC_TARGET)
-		seq_printf(seq, " queue_delay: %d.%d ms\n\t",
-			   mdev->data_delay / 1000,
-			   (mdev->data_delay % 1000) / 100);
-	else if (mdev->state.conn == C_SYNC_SOURCE)
-		seq_printf(seq, " delay_probe: %u\n\t", mdev->delay_seq);
-
 	/* see drivers/md/md.c
 	 * We do not want to overflow, so the order of operands and
 	 * the * 100 / 100 trick are important. We do a +1 to be
@@ -135,14 +128,6 @@
 	else
 		seq_printf(seq, " (%ld)", dbdt);
 
-	if (mdev->state.conn == C_SYNC_TARGET) {
-		if (mdev->c_sync_rate > 1000)
-			seq_printf(seq, " want: %d,%03d",
-				   mdev->c_sync_rate / 1000, mdev->c_sync_rate % 1000);
-		else
-			seq_printf(seq, " want: %d", mdev->c_sync_rate);
-	}
-
 	seq_printf(seq, " K/sec\n");
 }
 

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index ec1711f..081522d 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c

@@ -1180,7 +1180,7 @@
 	bio->bi_sector = sector;
 	bio->bi_bdev = mdev->ldev->backing_bdev;
 	/* we special case some flags in the multi-bio case, see below
-	 * (BIO_RW_UNPLUG, BIO_RW_BARRIER) */
+	 * (REQ_UNPLUG, REQ_HARDBARRIER) */
 	bio->bi_rw = rw;
 	bio->bi_private = e;
 	bio->bi_end_io = drbd_endio_sec;
@@ -1209,16 +1209,16 @@
 		bios = bios->bi_next;
 		bio->bi_next = NULL;
 
-		/* strip off BIO_RW_UNPLUG unless it is the last bio */
+		/* strip off REQ_UNPLUG unless it is the last bio */
 		if (bios)
-			bio->bi_rw &= ~(1<<BIO_RW_UNPLUG);
+			bio->bi_rw &= ~REQ_UNPLUG;
 
 		drbd_generic_make_request(mdev, fault_type, bio);
 
-		/* strip off BIO_RW_BARRIER,
+		/* strip off REQ_HARDBARRIER,
 		 * unless it is the first or last bio */
 		if (bios && bios->bi_next)
-			bios->bi_rw &= ~(1<<BIO_RW_BARRIER);
+			bios->bi_rw &= ~REQ_HARDBARRIER;
 	} while (bios);
 	maybe_kick_lo(mdev);
 	return 0;
@@ -1233,7 +1233,7 @@
 }
 
 /**
- * w_e_reissue() - Worker callback; Resubmit a bio, without BIO_RW_BARRIER set
+ * w_e_reissue() - Worker callback; Resubmit a bio, without REQ_HARDBARRIER set
  * @mdev:	DRBD device.
  * @w:		work object.
  * @cancel:	The connection will be closed anyways (unused in this callback)
@@ -1245,7 +1245,7 @@
 	   (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch)
 	   so that we can finish that epoch in drbd_may_finish_epoch().
 	   That is necessary if we already have a long chain of Epochs, before
-	   we realize that BIO_RW_BARRIER is actually not supported */
+	   we realize that REQ_HARDBARRIER is actually not supported */
 
 	/* As long as the -ENOTSUPP on the barrier is reported immediately
 	   that will never trigger. If it is reported late, we will just
@@ -1824,14 +1824,14 @@
 		epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list);
 		if (epoch == e->epoch) {
 			set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
-			rw |= (1<<BIO_RW_BARRIER);
+			rw |= REQ_HARDBARRIER;
 			e->flags |= EE_IS_BARRIER;
 		} else {
 			if (atomic_read(&epoch->epoch_size) > 1 ||
 			    !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) {
 				set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags);
 				set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags);
-				rw |= (1<<BIO_RW_BARRIER);
+				rw |= REQ_HARDBARRIER;
 				e->flags |= EE_IS_BARRIER;
 			}
 		}
@@ -1841,10 +1841,10 @@
 	dp_flags = be32_to_cpu(p->dp_flags);
 	if (dp_flags & DP_HARDBARRIER) {
 		dev_err(DEV, "ASSERT FAILED would have submitted barrier request\n");
-		/* rw |= (1<<BIO_RW_BARRIER); */
+		/* rw |= REQ_HARDBARRIER; */
 	}
 	if (dp_flags & DP_RW_SYNC)
-		rw |= (1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG);
+		rw |= REQ_SYNC | REQ_UNPLUG;
 	if (dp_flags & DP_MAY_SET_IN_SYNC)
 		e->flags |= EE_MAY_SET_IN_SYNC;
 
@@ -3555,14 +3555,15 @@
 	return ok;
 }
 
-static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
+static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent)
 {
 	/* TODO zero copy sink :) */
 	static char sink[128];
 	int size, want, r;
 
-	dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
-	     h->command, h->length);
+	if (!silent)
+		dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
+		     h->command, h->length);
 
 	size = h->length;
 	while (size > 0) {
@@ -3574,6 +3575,16 @@
 	return size == 0;
 }
 
+static int receive_skip(struct drbd_conf *mdev, struct p_header *h)
+{
+	return receive_skip_(mdev, h, 0);
+}
+
+static int receive_skip_silent(struct drbd_conf *mdev, struct p_header *h)
+{
+	return receive_skip_(mdev, h, 1);
+}
+
 static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h)
 {
 	if (mdev->state.disk >= D_INCONSISTENT)
@@ -3586,92 +3597,6 @@
 	return TRUE;
 }
 
-static void timeval_sub_us(struct timeval* tv, unsigned int us)
-{
-	tv->tv_sec -= us / 1000000;
-	us = us % 1000000;
-	if (tv->tv_usec > us) {
-		tv->tv_usec += 1000000;
-		tv->tv_sec--;
-	}
-	tv->tv_usec -= us;
-}
-
-static void got_delay_probe(struct drbd_conf *mdev, int from, struct p_delay_probe *p)
-{
-	struct delay_probe *dp;
-	struct list_head *le;
-	struct timeval now;
-	int seq_num;
-	int offset;
-	int data_delay;
-
-	seq_num = be32_to_cpu(p->seq_num);
-	offset  = be32_to_cpu(p->offset);
-
-	spin_lock(&mdev->peer_seq_lock);
-	if (!list_empty(&mdev->delay_probes)) {
-		if (from == USE_DATA_SOCKET)
-			le = mdev->delay_probes.next;
-		else
-			le = mdev->delay_probes.prev;
-
-		dp = list_entry(le, struct delay_probe, list);
-
-		if (dp->seq_num == seq_num) {
-			list_del(le);
-			spin_unlock(&mdev->peer_seq_lock);
-			do_gettimeofday(&now);
-			timeval_sub_us(&now, offset);
-			data_delay =
-				now.tv_usec - dp->time.tv_usec +
-				(now.tv_sec - dp->time.tv_sec) * 1000000;
-
-			if (data_delay > 0)
-				mdev->data_delay = data_delay;
-
-			kfree(dp);
-			return;
-		}
-
-		if (dp->seq_num > seq_num) {
-			spin_unlock(&mdev->peer_seq_lock);
-			dev_warn(DEV, "Previous allocation failure of struct delay_probe?\n");
-			return; /* Do not alloca a struct delay_probe.... */
-		}
-	}
-	spin_unlock(&mdev->peer_seq_lock);
-
-	dp = kmalloc(sizeof(struct delay_probe), GFP_NOIO);
-	if (!dp) {
-		dev_warn(DEV, "Failed to allocate a struct delay_probe, do not worry.\n");
-		return;
-	}
-
-	dp->seq_num = seq_num;
-	do_gettimeofday(&dp->time);
-	timeval_sub_us(&dp->time, offset);
-
-	spin_lock(&mdev->peer_seq_lock);
-	if (from == USE_DATA_SOCKET)
-		list_add(&dp->list, &mdev->delay_probes);
-	else
-		list_add_tail(&dp->list, &mdev->delay_probes);
-	spin_unlock(&mdev->peer_seq_lock);
-}
-
-static int receive_delay_probe(struct drbd_conf *mdev, struct p_header *h)
-{
-	struct p_delay_probe *p = (struct p_delay_probe *)h;
-
-	ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE;
-	if (drbd_recv(mdev, h->payload, h->length) != h->length)
-		return FALSE;
-
-	got_delay_probe(mdev, USE_DATA_SOCKET, p);
-	return TRUE;
-}
-
 typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *);
 
 static drbd_cmd_handler_f drbd_default_handler[] = {
@@ -3695,7 +3620,7 @@
 	[P_OV_REQUEST]      = receive_DataRequest,
 	[P_OV_REPLY]        = receive_DataRequest,
 	[P_CSUM_RS_REQUEST]    = receive_DataRequest,
-	[P_DELAY_PROBE]     = receive_delay_probe,
+	[P_DELAY_PROBE]     = receive_skip_silent,
 	/* anything missing from this table is in
 	 * the asender_tbl, see get_asender_cmd */
 	[P_MAX_CMD]	    = NULL,
@@ -4472,11 +4397,9 @@
 	return TRUE;
 }
 
-static int got_delay_probe_m(struct drbd_conf *mdev, struct p_header *h)
+static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h)
 {
-	struct p_delay_probe *p = (struct p_delay_probe *)h;
-
-	got_delay_probe(mdev, USE_META_SOCKET, p);
+	/* IGNORE */
 	return TRUE;
 }
 
@@ -4504,7 +4427,7 @@
 	[P_BARRIER_ACK]	    = { sizeof(struct p_barrier_ack), got_BarrierAck },
 	[P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
 	[P_RS_IS_IN_SYNC]   = { sizeof(struct p_block_ack), got_IsInSync },
-	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe), got_delay_probe_m },
+	[P_DELAY_PROBE]     = { sizeof(struct p_delay_probe), got_something_to_ignore_m },
 	[P_MAX_CMD]	    = { 0, NULL },
 	};
 	if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)

diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 654f1ef..f761d98 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c

@@ -997,7 +997,7 @@
 	 * because of those XXX, this is not yet enabled,
 	 * i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit.
 	 */
-	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags))) {
+	if (unlikely(bio->bi_rw & REQ_HARDBARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags)) {
 		/* dev_warn(DEV, "Rejecting barrier request as underlying device does not support\n"); */
 		bio_endio(bio, -EOPNOTSUPP);
 		return 0;

diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index b623cee..ca4a16c 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c

@@ -424,18 +424,6 @@
 		drbd_queue_work(&mdev->data.work, &mdev->resync_work);
 }
 
-static int calc_resync_rate(struct drbd_conf *mdev)
-{
-	int d = mdev->data_delay / 1000; /* us -> ms */
-	int td = mdev->sync_conf.throttle_th * 100;  /* 0.1s -> ms */
-	int hd = mdev->sync_conf.hold_off_th * 100;  /* 0.1s -> ms */
-	int cr = mdev->sync_conf.rate;
-
-	return d <= td ? cr :
-		d >= hd ? 0 :
-		cr + (cr * (td - d) / (hd - td));
-}
-
 int w_make_resync_request(struct drbd_conf *mdev,
 		struct drbd_work *w, int cancel)
 {
@@ -473,8 +461,7 @@
 	max_segment_size = mdev->agreed_pro_version < 94 ?
 		queue_max_segment_size(mdev->rq_queue) : DRBD_MAX_SEGMENT_SIZE;
 
-	mdev->c_sync_rate = calc_resync_rate(mdev);
-	number = SLEEP_TIME * mdev->c_sync_rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
+	number = SLEEP_TIME * mdev->sync_conf.rate  / ((BM_BLOCK_SIZE / 1024) * HZ);
 	pe = atomic_read(&mdev->rs_pending_cnt);
 
 	mutex_lock(&mdev->data.mutex);

diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 90c4038..cf04c1b 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c

@@ -178,6 +178,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/bio.h>
+#include <linux/smp_lock.h>
 #include <linux/string.h>
 #include <linux/jiffies.h>
 #include <linux/fcntl.h>
@@ -514,8 +515,6 @@
 static DECLARE_WAIT_QUEUE_HEAD(fdc_wait);
 static DECLARE_WAIT_QUEUE_HEAD(command_done);
 
-#define NO_SIGNAL (!interruptible || !signal_pending(current))
-
 /* Errors during formatting are counted here. */
 static int format_errors;
 
@@ -539,7 +538,7 @@
 
 static int *errors;
 typedef void (*done_f)(int);
-static struct cont_t {
+static const struct cont_t {
 	void (*interrupt)(void);
 				/* this is called after the interrupt of the
 				 * main command */
@@ -578,7 +577,7 @@
 #define NEED_1_RECAL	-2
 #define NEED_2_RECAL	-3
 
-static int usage_count;
+static atomic_t usage_count = ATOMIC_INIT(0);
 
 /* buffer related variables */
 static int buffer_track = -1;
@@ -858,36 +857,15 @@
 }
 
 /* locks the driver */
-static int _lock_fdc(int drive, bool interruptible, int line)
+static int lock_fdc(int drive, bool interruptible)
 {
-	if (!usage_count) {
-		pr_err("Trying to lock fdc while usage count=0 at line %d\n",
-		       line);
+	if (WARN(atomic_read(&usage_count) == 0,
+		 "Trying to lock fdc while usage count=0\n"))
 		return -1;
-	}
 
-	if (test_and_set_bit(0, &fdc_busy)) {
-		DECLARE_WAITQUEUE(wait, current);
-		add_wait_queue(&fdc_wait, &wait);
+	if (wait_event_interruptible(fdc_wait, !test_and_set_bit(0, &fdc_busy)))
+		return -EINTR;
 
-		for (;;) {
-			set_current_state(TASK_INTERRUPTIBLE);
-
-			if (!test_and_set_bit(0, &fdc_busy))
-				break;
-
-			schedule();
-
-			if (!NO_SIGNAL) {
-				remove_wait_queue(&fdc_wait, &wait);
-				return -EINTR;
-			}
-		}
-
-		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&fdc_wait, &wait);
-		flush_scheduled_work();
-	}
 	command_status = FD_COMMAND_NONE;
 
 	__reschedule_timeout(drive, "lock fdc");
@@ -895,11 +873,8 @@
 	return 0;
 }
 
-#define lock_fdc(drive, interruptible)			\
-	_lock_fdc(drive, interruptible, __LINE__)
-
 /* unlocks the driver */
-static inline void unlock_fdc(void)
+static void unlock_fdc(void)
 {
 	unsigned long flags;
 
@@ -1224,7 +1199,7 @@
 /* Set perpendicular mode as required, based on data rate, if supported.
  * 82077 Now tested. 1Mbps data rate only possible with 82077-1.
  */
-static inline void perpendicular_mode(void)
+static void perpendicular_mode(void)
 {
 	unsigned char perp_mode;
 
@@ -1995,14 +1970,14 @@
 	wake_up(&command_done);
 }
 
-static struct cont_t wakeup_cont = {
+static const struct cont_t wakeup_cont = {
 	.interrupt	= empty,
 	.redo		= do_wakeup,
 	.error		= empty,
 	.done		= (done_f)empty
 };
 
-static struct cont_t intr_cont = {
+static const struct cont_t intr_cont = {
 	.interrupt	= empty,
 	.redo		= process_fd_request,
 	.error		= empty,
@@ -2015,25 +1990,10 @@
 
 	schedule_bh(handler);
 
-	if (command_status < 2 && NO_SIGNAL) {
-		DECLARE_WAITQUEUE(wait, current);
-
-		add_wait_queue(&command_done, &wait);
-		for (;;) {
-			set_current_state(interruptible ?
-					  TASK_INTERRUPTIBLE :
-					  TASK_UNINTERRUPTIBLE);
-
-			if (command_status >= 2 || !NO_SIGNAL)
-				break;
-
-			is_alive(__func__, "");
-			schedule();
-		}
-
-		set_current_state(TASK_RUNNING);
-		remove_wait_queue(&command_done, &wait);
-	}
+	if (interruptible)
+		wait_event_interruptible(command_done, command_status >= 2);
+	else
+		wait_event(command_done, command_status >= 2);
 
 	if (command_status < 2) {
 		cancel_activity();
@@ -2223,7 +2183,7 @@
 	debugt(__func__, "queue format request");
 }
 
-static struct cont_t format_cont = {
+static const struct cont_t format_cont = {
 	.interrupt	= format_interrupt,
 	.redo		= redo_format,
 	.error		= bad_flp_intr,
@@ -2583,10 +2543,8 @@
 	int tracksize;
 	int ssize;
 
-	if (max_buffer_sectors == 0) {
-		pr_info("VFS: Block I/O scheduled on unopened device\n");
+	if (WARN(max_buffer_sectors == 0, "VFS: Block I/O scheduled on unopened device\n"))
 		return 0;
-	}
 
 	set_fdc((long)current_req->rq_disk->private_data);
 
@@ -2921,7 +2879,7 @@
 	return;
 }
 
-static struct cont_t rw_cont = {
+static const struct cont_t rw_cont = {
 	.interrupt	= rw_interrupt,
 	.redo		= redo_fd_request,
 	.error		= bad_flp_intr,
@@ -2936,19 +2894,16 @@
 
 static void do_fd_request(struct request_queue *q)
 {
-	if (max_buffer_sectors == 0) {
-		pr_info("VFS: %s called on non-open device\n", __func__);
+	if (WARN(max_buffer_sectors == 0,
+		 "VFS: %s called on non-open device\n", __func__))
 		return;
-	}
 
-	if (usage_count == 0) {
-		pr_info("warning: usage count=0, current_req=%p exiting\n",
-			current_req);
-		pr_info("sect=%ld type=%x flags=%x\n",
-			(long)blk_rq_pos(current_req), current_req->cmd_type,
-			current_req->cmd_flags);
+	if (WARN(atomic_read(&usage_count) == 0,
+		 "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%x\n",
+		 current_req, (long)blk_rq_pos(current_req), current_req->cmd_type,
+		 current_req->cmd_flags))
 		return;
-	}
+
 	if (test_bit(0, &fdc_busy)) {
 		/* fdc busy, this new request will be treated when the
 		   current one is done */
@@ -2960,7 +2915,7 @@
 	is_alive(__func__, "");
 }
 
-static struct cont_t poll_cont = {
+static const struct cont_t poll_cont = {
 	.interrupt	= success_and_wakeup,
 	.redo		= floppy_ready,
 	.error		= generic_failure,
@@ -2991,7 +2946,7 @@
 	pr_info("weird, reset interrupt called\n");
 }
 
-static struct cont_t reset_cont = {
+static const struct cont_t reset_cont = {
 	.interrupt	= reset_intr,
 	.redo		= success_and_wakeup,
 	.error		= generic_failure,
@@ -3033,7 +2988,7 @@
 	return copy_from_user(address, param, size) ? -EFAULT : 0;
 }
 
-static inline const char *drive_name(int type, int drive)
+static const char *drive_name(int type, int drive)
 {
 	struct floppy_struct *floppy;
 
@@ -3096,14 +3051,14 @@
 	generic_done(flag);
 }
 
-static struct cont_t raw_cmd_cont = {
+static const struct cont_t raw_cmd_cont = {
 	.interrupt	= success_and_wakeup,
 	.redo		= floppy_start,
 	.error		= generic_failure,
 	.done		= raw_cmd_done
 };
 
-static inline int raw_cmd_copyout(int cmd, void __user *param,
+static int raw_cmd_copyout(int cmd, void __user *param,
 				  struct floppy_raw_cmd *ptr)
 {
 	int ret;
@@ -3148,7 +3103,7 @@
 	}
 }
 
-static inline int raw_cmd_copyin(int cmd, void __user *param,
+static int raw_cmd_copyin(int cmd, void __user *param,
 				 struct floppy_raw_cmd **rcmd)
 {
 	struct floppy_raw_cmd *ptr;
@@ -3266,7 +3221,7 @@
 	return 0;
 }
 
-static inline int set_geometry(unsigned int cmd, struct floppy_struct *g,
+static int set_geometry(unsigned int cmd, struct floppy_struct *g,
 			       int drive, int type, struct block_device *bdev)
 {
 	int cnt;
@@ -3337,7 +3292,7 @@
 }
 
 /* handle obsolete ioctl's */
-static int ioctl_table[] = {
+static unsigned int ioctl_table[] = {
 	FDCLRPRM,
 	FDSETPRM,
 	FDDEFPRM,
@@ -3365,7 +3320,7 @@
 	FDTWADDLE
 };
 
-static inline int normalize_ioctl(int *cmd, int *size)
+static int normalize_ioctl(unsigned int *cmd, int *size)
 {
 	int i;
 
@@ -3417,7 +3372,7 @@
 	return 0;
 }
 
-static int fd_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
+static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 		    unsigned long param)
 {
 	int drive = (long)bdev->bd_disk->private_data;
@@ -3593,6 +3548,18 @@
 	return 0;
 }
 
+static int fd_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long param)
+{
+	int ret;
+
+	lock_kernel();
+	ret = fd_locked_ioctl(bdev, mode, cmd, param);
+	unlock_kernel();
+
+	return ret;
+}
+
 static void __init config_types(void)
 {
 	bool has_drive = false;
@@ -3649,6 +3616,7 @@
 {
 	int drive = (long)disk->private_data;
 
+	lock_kernel();
 	mutex_lock(&open_lock);
 	if (UDRS->fd_ref < 0)
 		UDRS->fd_ref = 0;
@@ -3659,6 +3627,7 @@
 	if (!UDRS->fd_ref)
 		opened_bdev[drive] = NULL;
 	mutex_unlock(&open_lock);
+	unlock_kernel();
 
 	return 0;
 }
@@ -3676,6 +3645,7 @@
 	int res = -EBUSY;
 	char *tmp;
 
+	lock_kernel();
 	mutex_lock(&open_lock);
 	old_dev = UDRS->fd_device;
 	if (opened_bdev[drive] && opened_bdev[drive] != bdev)
@@ -3752,6 +3722,7 @@
 			goto out;
 	}
 	mutex_unlock(&open_lock);
+	unlock_kernel();
 	return 0;
 out:
 	if (UDRS->fd_ref < 0)
@@ -3762,6 +3733,7 @@
 		opened_bdev[drive] = NULL;
 out2:
 	mutex_unlock(&open_lock);
+	unlock_kernel();
 	return res;
 }
 
@@ -3829,6 +3801,7 @@
 	bio.bi_size = size;
 	bio.bi_bdev = bdev;
 	bio.bi_sector = 0;
+	bio.bi_flags = BIO_QUIET;
 	init_completion(&complete);
 	bio.bi_private = &complete;
 	bio.bi_end_io = floppy_rb0_complete;
@@ -3857,10 +3830,10 @@
 	if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
 	    test_bit(FD_VERIFY_BIT, &UDRS->flags) ||
 	    test_bit(drive, &fake_change) || NO_GEOM) {
-		if (usage_count == 0) {
-			pr_info("VFS: revalidate called on non-open device.\n");
+		if (WARN(atomic_read(&usage_count) == 0,
+			 "VFS: revalidate called on non-open device.\n"))
 			return -EFAULT;
-		}
+
 		lock_fdc(drive, false);
 		cf = (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags) ||
 		      test_bit(FD_VERIFY_BIT, &UDRS->flags));
@@ -3893,7 +3866,7 @@
 	.owner			= THIS_MODULE,
 	.open			= floppy_open,
 	.release		= floppy_release,
-	.locked_ioctl		= fd_ioctl,
+	.ioctl			= fd_ioctl,
 	.getgeo			= fd_getgeo,
 	.media_changed		= check_floppy_change,
 	.revalidate_disk	= floppy_revalidate,
@@ -4126,7 +4099,7 @@
 	return sprintf(buf, "%X\n", UDP->cmos);
 }
 
-DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL);
+static DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL);
 
 static void floppy_device_release(struct device *dev)
 {
@@ -4175,6 +4148,9 @@
 	int i, unit, drive;
 	int err, dr;
 
+	set_debugt();
+	interruptjiffies = resultjiffies = jiffies;
+
 #if defined(CONFIG_PPC)
 	if (check_legacy_ioport(FDC1))
 		return -ENODEV;
@@ -4353,7 +4329,7 @@
 	platform_device_unregister(&floppy_device[drive]);
 out_flush_work:
 	flush_scheduled_work();
-	if (usage_count)
+	if (atomic_read(&usage_count))
 		floppy_release_irq_and_dma();
 out_unreg_region:
 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
@@ -4370,8 +4346,6 @@
 	return err;
 }
 
-static DEFINE_SPINLOCK(floppy_usage_lock);
-
 static const struct io_region {
 	int offset;
 	int size;
@@ -4417,14 +4391,8 @@
 
 static int floppy_grab_irq_and_dma(void)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&floppy_usage_lock, flags);
-	if (usage_count++) {
-		spin_unlock_irqrestore(&floppy_usage_lock, flags);
+	if (atomic_inc_return(&usage_count) > 1)
 		return 0;
-	}
-	spin_unlock_irqrestore(&floppy_usage_lock, flags);
 
 	/*
 	 * We might have scheduled a free_irq(), wait it to
@@ -4435,9 +4403,7 @@
 	if (fd_request_irq()) {
 		DPRINT("Unable to grab IRQ%d for the floppy driver\n",
 		       FLOPPY_IRQ);
-		spin_lock_irqsave(&floppy_usage_lock, flags);
-		usage_count--;
-		spin_unlock_irqrestore(&floppy_usage_lock, flags);
+		atomic_dec(&usage_count);
 		return -1;
 	}
 	if (fd_request_dma()) {
@@ -4447,9 +4413,7 @@
 			use_virtual_dma = can_use_virtual_dma = 1;
 		if (!(can_use_virtual_dma & 1)) {
 			fd_free_irq();
-			spin_lock_irqsave(&floppy_usage_lock, flags);
-			usage_count--;
-			spin_unlock_irqrestore(&floppy_usage_lock, flags);
+			atomic_dec(&usage_count);
 			return -1;
 		}
 	}
@@ -4484,9 +4448,7 @@
 	fd_free_dma();
 	while (--fdc >= 0)
 		floppy_release_regions(fdc);
-	spin_lock_irqsave(&floppy_usage_lock, flags);
-	usage_count--;
-	spin_unlock_irqrestore(&floppy_usage_lock, flags);
+	atomic_dec(&usage_count);
 	return -1;
 }
 
@@ -4498,14 +4460,10 @@
 #endif
 	long tmpsize;
 	unsigned long tmpaddr;
-	unsigned long flags;
 
-	spin_lock_irqsave(&floppy_usage_lock, flags);
-	if (--usage_count) {
-		spin_unlock_irqrestore(&floppy_usage_lock, flags);
+	if (!atomic_dec_and_test(&usage_count))
 		return;
-	}
-	spin_unlock_irqrestore(&floppy_usage_lock, flags);
+
 	if (irqdma_allocated) {
 		fd_disable_dma();
 		fd_free_dma();
@@ -4598,7 +4556,7 @@
 	del_timer_sync(&fd_timer);
 	blk_cleanup_queue(floppy_queue);
 
-	if (usage_count)
+	if (atomic_read(&usage_count))
 		floppy_release_irq_and_dma();
 
 	/* eject disk, if any */

diff --git a/drivers/block/hd.c b/drivers/block/hd.c
index 81c78b3..30ec6b3 100644
--- a/drivers/block/hd.c
+++ b/drivers/block/hd.c

@@ -627,7 +627,7 @@
 		req_data_dir(req) == READ ? "read" : "writ",
 		cyl, head, sec, nsect, req->buffer);
 #endif
-	if (blk_fs_request(req)) {
+	if (req->cmd_type == REQ_TYPE_FS) {
 		switch (rq_data_dir(req)) {
 		case READ:
 			hd_out(disk, nsect, sec, head, cyl, ATA_CMD_PIO_READ,

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 6120922..f3c636d 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c

@@ -67,6 +67,7 @@
 #include <linux/compat.h>
 #include <linux/suspend.h>
 #include <linux/freezer.h>
+#include <linux/smp_lock.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h>		/* for invalidate_bdev() */
 #include <linux/completion.h>
@@ -476,7 +477,7 @@
 	pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
 
 	if (bio_rw(bio) == WRITE) {
-		bool barrier = bio_rw_flagged(bio, BIO_RW_BARRIER);
+		bool barrier = (bio->bi_rw & REQ_HARDBARRIER);
 		struct file *file = lo->lo_backing_file;
 
 		if (barrier) {
@@ -831,7 +832,7 @@
 	lo->lo_queue->unplug_fn = loop_unplug;
 
 	if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
-		blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN, NULL);
+		blk_queue_ordered(lo->lo_queue, QUEUE_ORDERED_DRAIN);
 
 	set_capacity(lo->lo_disk, size);
 	bd_set_size(bdev, size << 9);
@@ -1408,9 +1409,11 @@
 {
 	struct loop_device *lo = bdev->bd_disk->private_data;
 
+	lock_kernel();
 	mutex_lock(&lo->lo_ctl_mutex);
 	lo->lo_refcnt++;
 	mutex_unlock(&lo->lo_ctl_mutex);
+	unlock_kernel();
 
 	return 0;
 }
@@ -1420,6 +1423,7 @@
 	struct loop_device *lo = disk->private_data;
 	int err;
 
+	lock_kernel();
 	mutex_lock(&lo->lo_ctl_mutex);
 
 	if (--lo->lo_refcnt)
@@ -1444,6 +1448,7 @@
 out:
 	mutex_unlock(&lo->lo_ctl_mutex);
 out_unlocked:
+	lock_kernel();
 	return 0;
 }
 

diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c
index 28db925..b82c5ce 100644
--- a/drivers/block/mg_disk.c
+++ b/drivers/block/mg_disk.c

@@ -670,7 +670,7 @@
 				break;
 		}
 
-		if (unlikely(!blk_fs_request(host->req))) {
+		if (unlikely(host->req->cmd_type != REQ_TYPE_FS)) {
 			mg_end_request_cur(host, -EIO);
 			continue;
 		}
@@ -756,7 +756,7 @@
 			continue;
 		}
 
-		if (unlikely(!blk_fs_request(req))) {
+		if (unlikely(req->cmd_type != REQ_TYPE_FS)) {
 			mg_end_request_cur(host, -EIO);
 			continue;
 		}

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 16c3c86..0daa422 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c

@@ -24,6 +24,7 @@
 #include <linux/errno.h>
 #include <linux/file.h>
 #include <linux/ioctl.h>
+#include <linux/smp_lock.h>
 #include <linux/compiler.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
@@ -448,7 +449,7 @@
 
 static void nbd_handle_req(struct nbd_device *lo, struct request *req)
 {
-	if (!blk_fs_request(req))
+	if (req->cmd_type != REQ_TYPE_FS)
 		goto error_out;
 
 	nbd_cmd(req) = NBD_CMD_READ;
@@ -716,9 +717,11 @@
 	dprintk(DBG_IOCTL, "%s: nbd_ioctl cmd=%s(0x%x) arg=%lu\n",
 			lo->disk->disk_name, ioctl_cmd_to_ascii(cmd), cmd, arg);
 
+	lock_kernel();
 	mutex_lock(&lo->tx_lock);
 	error = __nbd_ioctl(bdev, lo, cmd, arg);
 	mutex_unlock(&lo->tx_lock);
+	unlock_kernel();
 
 	return error;
 }
@@ -726,7 +729,7 @@
 static const struct block_device_operations nbd_fops =
 {
 	.owner =	THIS_MODULE,
-	.locked_ioctl =	nbd_ioctl,
+	.ioctl =	nbd_ioctl,
 };
 
 /*

diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 6cd8b70..2284b4f 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c

@@ -310,7 +310,8 @@
 			break;
 
 		/* filter out block requests we don't understand */
-		if (!blk_fs_request(rq) && !blk_barrier_rq(rq)) {
+		if (rq->cmd_type != REQ_TYPE_FS &&
+		    !(rq->cmd_flags & REQ_HARDBARRIER)) {
 			blk_end_request_all(rq, 0);
 			continue;
 		}
@@ -322,7 +323,7 @@
 		 * driver-specific, etc.
 		 */
 
-		do_flush = (rq->special == (void *) 0xdeadbeefUL);
+		do_flush = rq->cmd_flags & REQ_FLUSH;
 		do_write = (rq_data_dir(rq) == WRITE);
 
 		if (!do_flush) { /* osd_flush does not use a bio */
@@ -379,14 +380,6 @@
 	}
 }
 
-static void osdblk_prepare_flush(struct request_queue *q, struct request *rq)
-{
-	/* add driver-specific marker, to indicate that this request
-	 * is a flush command
-	 */
-	rq->special = (void *) 0xdeadbeefUL;
-}
-
 static void osdblk_free_disk(struct osdblk_device *osdev)
 {
 	struct gendisk *disk = osdev->disk;
@@ -446,7 +439,7 @@
 	blk_queue_stack_limits(q, osd_request_queue(osdev->osd));
 
 	blk_queue_prep_rq(q, blk_queue_start_tag);
-	blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH, osdblk_prepare_flush);
+	blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH);
 
 	disk->queue = q;
 

diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 71acf4e..76f8565 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c

@@ -138,6 +138,7 @@
 #include <linux/cdrom.h>
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
 static DEFINE_SPINLOCK(pcd_lock);
@@ -224,13 +225,21 @@
 static int pcd_block_open(struct block_device *bdev, fmode_t mode)
 {
 	struct pcd_unit *cd = bdev->bd_disk->private_data;
-	return cdrom_open(&cd->info, bdev, mode);
+	int ret;
+
+	lock_kernel();
+	ret = cdrom_open(&cd->info, bdev, mode);
+	unlock_kernel();
+
+	return ret;
 }
 
 static int pcd_block_release(struct gendisk *disk, fmode_t mode)
 {
 	struct pcd_unit *cd = disk->private_data;
+	lock_kernel();
 	cdrom_release(&cd->info, mode);
+	unlock_kernel();
 	return 0;
 }
 
@@ -238,7 +247,13 @@
 				unsigned cmd, unsigned long arg)
 {
 	struct pcd_unit *cd = bdev->bd_disk->private_data;
-	return cdrom_ioctl(&cd->info, bdev, mode, cmd, arg);
+	int ret;
+
+	lock_kernel();
+	ret = cdrom_ioctl(&cd->info, bdev, mode, cmd, arg);
+	unlock_kernel();
+
+	return ret;
 }
 
 static int pcd_block_media_changed(struct gendisk *disk)
@@ -251,7 +266,7 @@
 	.owner		= THIS_MODULE,
 	.open		= pcd_block_open,
 	.release	= pcd_block_release,
-	.locked_ioctl	= pcd_block_ioctl,
+	.ioctl		= pcd_block_ioctl,
 	.media_changed	= pcd_block_media_changed,
 };
 

diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c
index c1e5cd0..985f0d4 100644
--- a/drivers/block/paride/pd.c
+++ b/drivers/block/paride/pd.c

@@ -153,6 +153,7 @@
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
 #include <linux/kernel.h>
+#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 #include <linux/workqueue.h>
 
@@ -439,7 +440,7 @@
 
 static enum action do_pd_io_start(void)
 {
-	if (blk_special_request(pd_req)) {
+	if (pd_req->cmd_type == REQ_TYPE_SPECIAL) {
 		phase = pd_special;
 		return pd_special();
 	}
@@ -735,12 +736,14 @@
 {
 	struct pd_unit *disk = bdev->bd_disk->private_data;
 
+	lock_kernel();
 	disk->access++;
 
 	if (disk->removable) {
 		pd_special_command(disk, pd_media_check);
 		pd_special_command(disk, pd_door_lock);
 	}
+	unlock_kernel();
 	return 0;
 }
 
@@ -768,8 +771,10 @@
 
 	switch (cmd) {
 	case CDROMEJECT:
+		lock_kernel();
 		if (disk->access == 1)
 			pd_special_command(disk, pd_eject);
+		unlock_kernel();
 		return 0;
 	default:
 		return -EINVAL;
@@ -780,8 +785,10 @@
 {
 	struct pd_unit *disk = p->private_data;
 
+	lock_kernel();
 	if (!--disk->access && disk->removable)
 		pd_special_command(disk, pd_door_unlock);
+	unlock_kernel();
 
 	return 0;
 }
@@ -812,7 +819,7 @@
 	.owner		= THIS_MODULE,
 	.open		= pd_open,
 	.release	= pd_release,
-	.locked_ioctl	= pd_ioctl,
+	.ioctl		= pd_ioctl,
 	.getgeo		= pd_getgeo,
 	.media_changed	= pd_check_media,
 	.revalidate_disk= pd_revalidate

diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c
index c059aab..4457b49 100644
--- a/drivers/block/paride/pf.c
+++ b/drivers/block/paride/pf.c

@@ -152,6 +152,7 @@
 #include <linux/spinlock.h>
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
+#include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
 static DEFINE_SPINLOCK(pf_spin_lock);
@@ -266,7 +267,7 @@
 	.owner		= THIS_MODULE,
 	.open		= pf_open,
 	.release	= pf_release,
-	.locked_ioctl	= pf_ioctl,
+	.ioctl		= pf_ioctl,
 	.getgeo		= pf_getgeo,
 	.media_changed	= pf_check_media,
 };
@@ -299,20 +300,26 @@
 static int pf_open(struct block_device *bdev, fmode_t mode)
 {
 	struct pf_unit *pf = bdev->bd_disk->private_data;
+	int ret;
 
+	lock_kernel();
 	pf_identify(pf);
 
+	ret = -ENODEV;
 	if (pf->media_status == PF_NM)
-		return -ENODEV;
+		goto out;
 
+	ret = -EROFS;
 	if ((pf->media_status == PF_RO) && (mode & FMODE_WRITE))
-		return -EROFS;
+		goto out;
 
+	ret = 0;
 	pf->access++;
 	if (pf->removable)
 		pf_lock(pf, 1);
-
-	return 0;
+out:
+	unlock_kernel();
+	return ret;
 }
 
 static int pf_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -342,7 +349,10 @@
 
 	if (pf->access != 1)
 		return -EBUSY;
+	lock_kernel();
 	pf_eject(pf);
+	unlock_kernel();
+
 	return 0;
 }
 
@@ -350,14 +360,18 @@
 {
 	struct pf_unit *pf = disk->private_data;
 
-	if (pf->access <= 0)
+	lock_kernel();
+	if (pf->access <= 0) {
+		unlock_kernel();
 		return -EINVAL;
+	}
 
 	pf->access--;
 
 	if (!pf->access && pf->removable)
 		pf_lock(pf, 0);
 
+	unlock_kernel();
 	return 0;
 
 }

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 8a549db..b1cbeb5 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c

@@ -57,6 +57,7 @@
 #include <linux/seq_file.h>
 #include <linux/miscdevice.h>
 #include <linux/freezer.h>
+#include <linux/smp_lock.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <scsi/scsi_cmnd.h>
@@ -1221,7 +1222,7 @@
 	pkt->bio->bi_flags = 1 << BIO_UPTODATE;
 	pkt->bio->bi_idx = 0;
 
-	BUG_ON(pkt->bio->bi_rw != (1 << BIO_RW));
+	BUG_ON(pkt->bio->bi_rw != REQ_WRITE);
 	BUG_ON(pkt->bio->bi_vcnt != pkt->frames);
 	BUG_ON(pkt->bio->bi_size != pkt->frames * CD_FRAMESIZE);
 	BUG_ON(pkt->bio->bi_end_io != pkt_end_io_packet_write);
@@ -2382,6 +2383,7 @@
 
 	VPRINTK(DRIVER_NAME": entering open\n");
 
+	lock_kernel();
 	mutex_lock(&ctl_mutex);
 	pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev));
 	if (!pd) {
@@ -2409,6 +2411,7 @@
 	}
 
 	mutex_unlock(&ctl_mutex);
+	unlock_kernel();
 	return 0;
 
 out_dec:
@@ -2416,6 +2419,7 @@
 out:
 	VPRINTK(DRIVER_NAME": failed open (%d)\n", ret);
 	mutex_unlock(&ctl_mutex);
+	unlock_kernel();
 	return ret;
 }
 
@@ -2424,6 +2428,7 @@
 	struct pktcdvd_device *pd = disk->private_data;
 	int ret = 0;
 
+	lock_kernel();
 	mutex_lock(&ctl_mutex);
 	pd->refcnt--;
 	BUG_ON(pd->refcnt < 0);
@@ -2432,6 +2437,7 @@
 		pkt_release_dev(pd, flush);
 	}
 	mutex_unlock(&ctl_mutex);
+	unlock_kernel();
 	return ret;
 }
 
@@ -2762,10 +2768,12 @@
 static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
 {
 	struct pktcdvd_device *pd = bdev->bd_disk->private_data;
+	int ret;
 
 	VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd,
 		MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
 
+	lock_kernel();
 	switch (cmd) {
 	case CDROMEJECT:
 		/*
@@ -2783,14 +2791,16 @@
 	case CDROM_LAST_WRITTEN:
 	case CDROM_SEND_PACKET:
 	case SCSI_IOCTL_SEND_COMMAND:
-		return __blkdev_driver_ioctl(pd->bdev, mode, cmd, arg);
+		ret = __blkdev_driver_ioctl(pd->bdev, mode, cmd, arg);
+		break;
 
 	default:
 		VPRINTK(DRIVER_NAME": Unknown ioctl for %s (%x)\n", pd->name, cmd);
-		return -ENOTTY;
+		ret = -ENOTTY;
 	}
+	unlock_kernel();
 
-	return 0;
+	return ret;
 }
 
 static int pkt_media_changed(struct gendisk *disk)
@@ -2812,7 +2822,7 @@
 	.owner =		THIS_MODULE,
 	.open =			pkt_open,
 	.release =		pkt_close,
-	.locked_ioctl =		pkt_ioctl,
+	.ioctl =		pkt_ioctl,
 	.media_changed =	pkt_media_changed,
 };
 

diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index 3b419e3..e9da874 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c

@@ -196,13 +196,12 @@
 	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
 
 	while ((req = blk_fetch_request(q))) {
-		if (blk_fs_request(req)) {
-			if (ps3disk_submit_request_sg(dev, req))
-				break;
-		} else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
-			   req->cmd[0] == REQ_LB_OP_FLUSH) {
+		if (req->cmd_flags & REQ_FLUSH) {
 			if (ps3disk_submit_flush_request(dev, req))
 				break;
+		} else if (req->cmd_type == REQ_TYPE_FS) {
+			if (ps3disk_submit_request_sg(dev, req))
+				break;
 		} else {
 			blk_dump_rq_flags(req, DEVICE_NAME " bad request");
 			__blk_end_request_all(req, -EIO);
@@ -257,8 +256,7 @@
 		return IRQ_HANDLED;
 	}
 
-	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
-	    req->cmd[0] == REQ_LB_OP_FLUSH) {
+	if (req->cmd_flags & REQ_FLUSH) {
 		read = 0;
 		op = "flush";
 	} else {
@@ -398,16 +396,6 @@
 	return 0;
 }
 
-static void ps3disk_prepare_flush(struct request_queue *q, struct request *req)
-{
-	struct ps3_storage_device *dev = q->queuedata;
-
-	dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
-
-	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
-	req->cmd[0] = REQ_LB_OP_FLUSH;
-}
-
 static unsigned long ps3disk_mask;
 
 static DEFINE_MUTEX(ps3disk_mask_mutex);
@@ -480,8 +468,7 @@
 	blk_queue_dma_alignment(queue, dev->blk_size-1);
 	blk_queue_logical_block_size(queue, dev->blk_size);
 
-	blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH,
-			  ps3disk_prepare_flush);
+	blk_queue_ordered(queue, QUEUE_ORDERED_DRAIN_FLUSH);
 
 	blk_queue_max_segments(queue, -1);
 	blk_queue_max_segment_size(queue, dev->bounce_size);

diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index e463657..2e46815 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c

@@ -20,6 +20,7 @@
 #include <linux/fd.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/hdreg.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
@@ -661,11 +662,23 @@
 	return err;
 }
 
+static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	lock_kernel();
+	ret = floppy_open(bdev, mode);
+	unlock_kernel();
+
+	return ret;
+}
+
 static int floppy_release(struct gendisk *disk, fmode_t mode)
 {
 	struct floppy_state *fs = disk->private_data;
 	struct swim __iomem *base = fs->swd->base;
 
+	lock_kernel();
 	if (fs->ref_count < 0)
 		fs->ref_count = 0;
 	else if (fs->ref_count > 0)
@@ -673,6 +686,7 @@
 
 	if (fs->ref_count == 0)
 		swim_motor(base, OFF);
+	unlock_kernel();
 
 	return 0;
 }
@@ -690,7 +704,9 @@
 	case FDEJECT:
 		if (fs->ref_count != 1)
 			return -EBUSY;
+		lock_kernel();
 		err = floppy_eject(fs);
+		unlock_kernel();
 		return err;
 
 	case FDGETPRM:
@@ -751,9 +767,9 @@
 
 static const struct block_device_operations floppy_fops = {
 	.owner		 = THIS_MODULE,
-	.open		 = floppy_open,
+	.open		 = floppy_unlocked_open,
 	.release	 = floppy_release,
-	.locked_ioctl	 = floppy_ioctl,
+	.ioctl		 = floppy_ioctl,
 	.getgeo		 = floppy_getgeo,
 	.media_changed	 = floppy_check_change,
 	.revalidate_disk = floppy_revalidate,

diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index ed6fb91..cc6a386 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c

@@ -25,6 +25,7 @@
 #include <linux/ioctl.h>
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
+#include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <asm/io.h>
@@ -839,7 +840,7 @@
 static struct floppy_struct floppy_type =
 	{ 2880,18,2,80,0,0x1B,0x00,0xCF,0x6C,NULL };	/*  7 1.44MB 3.5"   */
 
-static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+static int floppy_locked_ioctl(struct block_device *bdev, fmode_t mode,
 			unsigned int cmd, unsigned long param)
 {
 	struct floppy_state *fs = bdev->bd_disk->private_data;
@@ -867,6 +868,18 @@
 	return -ENOTTY;
 }
 
+static int floppy_ioctl(struct block_device *bdev, fmode_t mode,
+				 unsigned int cmd, unsigned long param)
+{
+	int ret;
+
+	lock_kernel();
+	ret = floppy_locked_ioctl(bdev, mode, cmd, param);
+	unlock_kernel();
+
+	return ret;
+}
+
 static int floppy_open(struct block_device *bdev, fmode_t mode)
 {
 	struct floppy_state *fs = bdev->bd_disk->private_data;
@@ -936,15 +949,28 @@
 	return 0;
 }
 
+static int floppy_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	lock_kernel();
+	ret = floppy_open(bdev, mode);
+	unlock_kernel();
+
+	return ret;
+}
+
 static int floppy_release(struct gendisk *disk, fmode_t mode)
 {
 	struct floppy_state *fs = disk->private_data;
 	struct swim3 __iomem *sw = fs->swim3;
+	lock_kernel();
 	if (fs->ref_count > 0 && --fs->ref_count == 0) {
 		swim3_action(fs, MOTOR_OFF);
 		out_8(&sw->control_bic, 0xff);
 		swim3_select(fs, RELAX);
 	}
+	unlock_kernel();
 	return 0;
 }
 
@@ -995,9 +1021,9 @@
 }
 
 static const struct block_device_operations floppy_fops = {
-	.open		= floppy_open,
+	.open		= floppy_unlocked_open,
 	.release	= floppy_release,
-	.locked_ioctl	= floppy_ioctl,
+	.ioctl		= floppy_ioctl,
 	.media_changed	= floppy_check_change,
 	.revalidate_disk= floppy_revalidate,
 };

diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 0536b5b..c48e148 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c

@@ -28,6 +28,7 @@
 #include <linux/timer.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <scsi/scsi.h>
 
 #define DRV_NAME "ub"
@@ -648,7 +649,7 @@
 		return 0;
 	}
 
-	if (lun->changed && !blk_pc_request(rq)) {
+	if (lun->changed && rq->cmd_type != REQ_TYPE_BLOCK_PC) {
 		blk_start_request(rq);
 		ub_end_rq(rq, SAM_STAT_CHECK_CONDITION);
 		return 0;
@@ -684,7 +685,7 @@
 	}
 	urq->nsg = n_elem;
 
-	if (blk_pc_request(rq)) {
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		ub_cmd_build_packet(sc, lun, cmd, urq);
 	} else {
 		ub_cmd_build_block(sc, lun, cmd, urq);
@@ -781,7 +782,7 @@
 	rq = urq->rq;
 
 	if (cmd->error == 0) {
-		if (blk_pc_request(rq)) {
+		if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 			if (cmd->act_len >= rq->resid_len)
 				rq->resid_len = 0;
 			else
@@ -795,7 +796,7 @@
 			}
 		}
 	} else {
-		if (blk_pc_request(rq)) {
+		if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 			/* UB_SENSE_SIZE is smaller than SCSI_SENSE_BUFFERSIZE */
 			memcpy(rq->sense, sc->top_sense, UB_SENSE_SIZE);
 			rq->sense_len = UB_SENSE_SIZE;
@@ -1710,6 +1711,18 @@
 	return rc;
 }
 
+static int ub_bd_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	lock_kernel();
+	ret = ub_bd_open(bdev, mode);
+	unlock_kernel();
+
+	return ret;
+}
+
+
 /*
  */
 static int ub_bd_release(struct gendisk *disk, fmode_t mode)
@@ -1717,7 +1730,10 @@
 	struct ub_lun *lun = disk->private_data;
 	struct ub_dev *sc = lun->udev;
 
+	lock_kernel();
 	ub_put(sc);
+	unlock_kernel();
+
 	return 0;
 }
 
@@ -1729,8 +1745,13 @@
 {
 	struct gendisk *disk = bdev->bd_disk;
 	void __user *usermem = (void __user *) arg;
+	int ret;
 
-	return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, usermem);
+	lock_kernel();
+	ret = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, usermem);
+	unlock_kernel();
+
+	return ret;
 }
 
 /*
@@ -1792,9 +1813,9 @@
 
 static const struct block_device_operations ub_bd_fops = {
 	.owner		= THIS_MODULE,
-	.open		= ub_bd_open,
+	.open		= ub_bd_unlocked_open,
 	.release	= ub_bd_release,
-	.locked_ioctl	= ub_bd_ioctl,
+	.ioctl		= ub_bd_ioctl,
 	.media_changed	= ub_bd_media_changed,
 	.revalidate_disk = ub_bd_revalidate,
 };

diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index 2f9470f..8be5715 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c

@@ -478,7 +478,7 @@
 				le32_to_cpu(desc->local_addr)>>9,
 				le32_to_cpu(desc->transfer_size));
 			dump_dmastat(card, control);
-		} else if (test_bit(BIO_RW, &bio->bi_rw) &&
+		} else if ((bio->bi_rw & REQ_WRITE) &&
 			   le32_to_cpu(desc->local_addr) >> 9 ==
 				card->init_size) {
 			card->init_size += le32_to_cpu(desc->transfer_size) >> 9;

diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index 788d938..f651e51 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c

@@ -41,6 +41,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/string.h>
+#include <linux/smp_lock.h>
 #include <linux/dma-mapping.h>
 #include <linux/completion.h>
 #include <linux/device.h>
@@ -175,6 +176,18 @@
 	return 0;
 }
 
+static int viodasd_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	lock_kernel();
+	ret = viodasd_open(bdev, mode);
+	unlock_kernel();
+
+	return ret;
+}
+
+
 /*
  * External release entry point.
  */
@@ -183,6 +196,7 @@
 	struct viodasd_device *d = disk->private_data;
 	HvLpEvent_Rc hvrc;
 
+	lock_kernel();
 	/* Send the event to OS/400.  We DON'T expect a response */
 	hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
 			HvLpEvent_Type_VirtualIo,
@@ -195,6 +209,9 @@
 			0, 0, 0);
 	if (hvrc != 0)
 		pr_warning("HV close call failed %d\n", (int)hvrc);
+
+	unlock_kernel();
+
 	return 0;
 }
 
@@ -219,7 +236,7 @@
  */
 static const struct block_device_operations viodasd_fops = {
 	.owner = THIS_MODULE,
-	.open = viodasd_open,
+	.open = viodasd_unlocked_open,
 	.release = viodasd_release,
 	.getgeo = viodasd_getgeo,
 };
@@ -361,7 +378,7 @@
 		if (req == NULL)
 			return;
 		/* check that request contains a valid command */
-		if (!blk_fs_request(req)) {
+		if (req->cmd_type != REQ_TYPE_FS) {
 			viodasd_end_request(req, -EIO, blk_rq_sectors(req));
 			continue;
 		}

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 23b7c48..2aafafc 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c

@@ -2,6 +2,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/hdreg.h>
 #include <linux/virtio.h>
 #include <linux/virtio_blk.h>
@@ -65,13 +66,18 @@
 			break;
 		}
 
-		if (blk_pc_request(vbr->req)) {
+		switch (vbr->req->cmd_type) {
+		case REQ_TYPE_BLOCK_PC:
 			vbr->req->resid_len = vbr->in_hdr.residual;
 			vbr->req->sense_len = vbr->in_hdr.sense_len;
 			vbr->req->errors = vbr->in_hdr.errors;
-		}
-		if (blk_special_request(vbr->req))
+			break;
+		case REQ_TYPE_SPECIAL:
 			vbr->req->errors = (error != 0);
+			break;
+		default:
+			break;
+		}
 
 		__blk_end_request_all(vbr->req, error);
 		list_del(&vbr->list);
@@ -94,36 +100,35 @@
 		return false;
 
 	vbr->req = req;
-	switch (req->cmd_type) {
-	case REQ_TYPE_FS:
-		vbr->out_hdr.type = 0;
-		vbr->out_hdr.sector = blk_rq_pos(vbr->req);
-		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-		break;
-	case REQ_TYPE_BLOCK_PC:
-		vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
+
+	if (req->cmd_flags & REQ_FLUSH) {
+		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
 		vbr->out_hdr.sector = 0;
 		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-		break;
-	case REQ_TYPE_SPECIAL:
-		vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
-		vbr->out_hdr.sector = 0;
-		vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
-		break;
-	case REQ_TYPE_LINUX_BLOCK:
-		if (req->cmd[0] == REQ_LB_OP_FLUSH) {
-			vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
+	} else {
+		switch (req->cmd_type) {
+		case REQ_TYPE_FS:
+			vbr->out_hdr.type = 0;
+			vbr->out_hdr.sector = blk_rq_pos(vbr->req);
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		case REQ_TYPE_BLOCK_PC:
+			vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
 			vbr->out_hdr.sector = 0;
 			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
 			break;
+		case REQ_TYPE_SPECIAL:
+			vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
+			vbr->out_hdr.sector = 0;
+			vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
+			break;
+		default:
+			/* We don't put anything else in the queue. */
+			BUG();
 		}
-		/*FALLTHRU*/
-	default:
-		/* We don't put anything else in the queue. */
-		BUG();
 	}
 
-	if (blk_barrier_rq(vbr->req))
+	if (vbr->req->cmd_flags & REQ_HARDBARRIER)
 		vbr->out_hdr.type |= VIRTIO_BLK_T_BARRIER;
 
 	sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
@@ -134,12 +139,12 @@
 	 * block, and before the normal inhdr we put the sense data and the
 	 * inhdr with additional status information before the normal inhdr.
 	 */
-	if (blk_pc_request(vbr->req))
+	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
 		sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
 
 	num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
 
-	if (blk_pc_request(vbr->req)) {
+	if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
 		sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
 		sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
 			   sizeof(vbr->in_hdr));
@@ -190,12 +195,6 @@
 		virtqueue_kick(vblk->vq);
 }
 
-static void virtblk_prepare_flush(struct request_queue *q, struct request *req)
-{
-	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
-	req->cmd[0] = REQ_LB_OP_FLUSH;
-}
-
 /* return id (s/n) string for *disk to *id_str
  */
 static int virtblk_get_id(struct gendisk *disk, char *id_str)
@@ -219,7 +218,7 @@
 	return blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
 }
 
-static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
+static int virtblk_locked_ioctl(struct block_device *bdev, fmode_t mode,
 			 unsigned cmd, unsigned long data)
 {
 	struct gendisk *disk = bdev->bd_disk;
@@ -235,6 +234,18 @@
 			      (void __user *)data);
 }
 
+static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long param)
+{
+	int ret;
+
+	lock_kernel();
+	ret = virtblk_locked_ioctl(bdev, mode, cmd, param);
+	unlock_kernel();
+
+	return ret;
+}
+
 /* We provide getgeo only to please some old bootloader/partitioning tools */
 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
 {
@@ -261,7 +272,7 @@
 }
 
 static const struct block_device_operations virtblk_fops = {
-	.locked_ioctl = virtblk_ioctl,
+	.ioctl  = virtblk_ioctl,
 	.owner  = THIS_MODULE,
 	.getgeo = virtblk_getgeo,
 };
@@ -383,8 +394,7 @@
 		 * flushing a volatile write cache on the host.  Use that
 		 * to implement write barrier support.
 		 */
-		blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH,
-				  virtblk_prepare_flush);
+		blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH);
 	} else if (virtio_has_feature(vdev, VIRTIO_BLK_F_BARRIER)) {
 		/*
 		 * If the BARRIER feature is supported the host expects us
@@ -393,7 +403,7 @@
 		 * never re-orders outstanding I/O.  This feature is not
 		 * useful for real life scenarious and deprecated.
 		 */
-		blk_queue_ordered(q, QUEUE_ORDERED_TAG, NULL);
+		blk_queue_ordered(q, QUEUE_ORDERED_TAG);
 	} else {
 		/*
 		 * If the FLUSH feature is not supported we must assume that
@@ -401,7 +411,7 @@
 		 * caching. We still need to drain the queue to provider
 		 * proper barrier semantics.
 		 */
-		blk_queue_ordered(q, QUEUE_ORDERED_DRAIN, NULL);
+		blk_queue_ordered(q, QUEUE_ORDERED_DRAIN);
 	}
 
 	/* If disk is read-only in the host, the guest should obey */

diff --git a/drivers/block/xd.c b/drivers/block/xd.c
index 18a80ff..d5a3cd7 100644
--- a/drivers/block/xd.c
+++ b/drivers/block/xd.c

@@ -46,6 +46,7 @@
 #include <linux/init.h>
 #include <linux/wait.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/blkpg.h>
 #include <linux/delay.h>
 #include <linux/io.h>
@@ -133,7 +134,7 @@
 
 static const struct block_device_operations xd_fops = {
 	.owner	= THIS_MODULE,
-	.locked_ioctl	= xd_ioctl,
+	.ioctl	= xd_ioctl,
 	.getgeo = xd_getgeo,
 };
 static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int);
@@ -322,7 +323,7 @@
 		int res = -EIO;
 		int retry;
 
-		if (!blk_fs_request(req))
+		if (req->cmd_type != REQ_TYPE_FS)
 			goto done;
 		if (block + count > get_capacity(req->rq_disk))
 			goto done;
@@ -347,7 +348,7 @@
 }
 
 /* xd_ioctl: handle device ioctl's */
-static int xd_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long arg)
+static int xd_locked_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long arg)
 {
 	switch (cmd) {
 		case HDIO_SET_DMA:
@@ -375,6 +376,18 @@
 	}
 }
 
+static int xd_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long param)
+{
+	int ret;
+
+	lock_kernel();
+	ret = xd_locked_ioctl(bdev, mode, cmd, param);
+	unlock_kernel();
+
+	return ret;
+}
+
 /* xd_readwrite: handle a read/write request */
 static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_int count)
 {

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index f63ac3d..ac1b682e 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c

@@ -41,6 +41,7 @@
 #include <linux/cdrom.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/scatterlist.h>
 
 #include <xen/xen.h>
@@ -79,6 +80,7 @@
  */
 struct blkfront_info
 {
+	struct mutex mutex;
 	struct xenbus_device *xbdev;
 	struct gendisk *gd;
 	int vdevice;
@@ -95,16 +97,14 @@
 	unsigned long shadow_free;
 	int feature_barrier;
 	int is_ready;
-
-	/**
-	 * The number of people holding this device open.  We won't allow a
-	 * hot-unplug unless this is 0.
-	 */
-	int users;
 };
 
 static DEFINE_SPINLOCK(blkif_io_lock);
 
+static unsigned int nr_minors;
+static unsigned long *minors;
+static DEFINE_SPINLOCK(minor_lock);
+
 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
 	(BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
 #define GRANT_INVALID_REF	0
@@ -139,6 +139,55 @@
 	info->shadow_free = id;
 }
 
+static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
+{
+	unsigned int end = minor + nr;
+	int rc;
+
+	if (end > nr_minors) {
+		unsigned long *bitmap, *old;
+
+		bitmap = kzalloc(BITS_TO_LONGS(end) * sizeof(*bitmap),
+				 GFP_KERNEL);
+		if (bitmap == NULL)
+			return -ENOMEM;
+
+		spin_lock(&minor_lock);
+		if (end > nr_minors) {
+			old = minors;
+			memcpy(bitmap, minors,
+			       BITS_TO_LONGS(nr_minors) * sizeof(*bitmap));
+			minors = bitmap;
+			nr_minors = BITS_TO_LONGS(end) * BITS_PER_LONG;
+		} else
+			old = bitmap;
+		spin_unlock(&minor_lock);
+		kfree(old);
+	}
+
+	spin_lock(&minor_lock);
+	if (find_next_bit(minors, end, minor) >= end) {
+		for (; minor < end; ++minor)
+			__set_bit(minor, minors);
+		rc = 0;
+	} else
+		rc = -EBUSY;
+	spin_unlock(&minor_lock);
+
+	return rc;
+}
+
+static void xlbd_release_minors(unsigned int minor, unsigned int nr)
+{
+	unsigned int end = minor + nr;
+
+	BUG_ON(end > nr_minors);
+	spin_lock(&minor_lock);
+	for (; minor < end; ++minor)
+		__clear_bit(minor, minors);
+	spin_unlock(&minor_lock);
+}
+
 static void blkif_restart_queue_callback(void *arg)
 {
 	struct blkfront_info *info = (struct blkfront_info *)arg;
@@ -239,7 +288,7 @@
 
 	ring_req->operation = rq_data_dir(req) ?
 		BLKIF_OP_WRITE : BLKIF_OP_READ;
-	if (blk_barrier_rq(req))
+	if (req->cmd_flags & REQ_HARDBARRIER)
 		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
 
 	ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
@@ -310,7 +359,7 @@
 
 		blk_start_request(req);
 
-		if (!blk_fs_request(req)) {
+		if (req->cmd_type != REQ_TYPE_FS) {
 			__blk_end_request_all(req, -EIO);
 			continue;
 		}
@@ -372,17 +421,22 @@
 static int xlvbd_barrier(struct blkfront_info *info)
 {
 	int err;
+	const char *barrier;
 
-	err = blk_queue_ordered(info->rq,
-				info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE,
-				NULL);
+	switch (info->feature_barrier) {
+	case QUEUE_ORDERED_DRAIN:	barrier = "enabled (drain)"; break;
+	case QUEUE_ORDERED_TAG:		barrier = "enabled (tag)"; break;
+	case QUEUE_ORDERED_NONE:	barrier = "disabled"; break;
+	default:			return -EINVAL;
+	}
+
+	err = blk_queue_ordered(info->rq, info->feature_barrier);
 
 	if (err)
 		return err;
 
 	printk(KERN_INFO "blkfront: %s: barriers %s\n",
-	       info->gd->disk_name,
-	       info->feature_barrier ? "enabled" : "disabled");
+	       info->gd->disk_name, barrier);
 	return 0;
 }
 
@@ -418,9 +472,14 @@
 	if ((minor % nr_parts) == 0)
 		nr_minors = nr_parts;
 
+	err = xlbd_reserve_minors(minor, nr_minors);
+	if (err)
+		goto out;
+	err = -ENODEV;
+
 	gd = alloc_disk(nr_minors);
 	if (gd == NULL)
-		goto out;
+		goto release;
 
 	offset = minor / nr_parts;
 
@@ -451,14 +510,13 @@
 
 	if (xlvbd_init_blk_queue(gd, sector_size)) {
 		del_gendisk(gd);
-		goto out;
+		goto release;
 	}
 
 	info->rq = gd->queue;
 	info->gd = gd;
 
-	if (info->feature_barrier)
-		xlvbd_barrier(info);
+	xlvbd_barrier(info);
 
 	if (vdisk_info & VDISK_READONLY)
 		set_disk_ro(gd, 1);
@@ -471,10 +529,45 @@
 
 	return 0;
 
+ release:
+	xlbd_release_minors(minor, nr_minors);
  out:
 	return err;
 }
 
+static void xlvbd_release_gendisk(struct blkfront_info *info)
+{
+	unsigned int minor, nr_minors;
+	unsigned long flags;
+
+	if (info->rq == NULL)
+		return;
+
+	spin_lock_irqsave(&blkif_io_lock, flags);
+
+	/* No more blkif_request(). */
+	blk_stop_queue(info->rq);
+
+	/* No more gnttab callback work. */
+	gnttab_cancel_free_callback(&info->callback);
+	spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+	/* Flush gnttab callback work. Must be done with no locks held. */
+	flush_scheduled_work();
+
+	del_gendisk(info->gd);
+
+	minor = info->gd->first_minor;
+	nr_minors = info->gd->minors;
+	xlbd_release_minors(minor, nr_minors);
+
+	blk_cleanup_queue(info->rq);
+	info->rq = NULL;
+
+	put_disk(info->gd);
+	info->gd = NULL;
+}
+
 static void kick_pending_request_queues(struct blkfront_info *info)
 {
 	if (!RING_FULL(&info->ring)) {
@@ -569,7 +662,7 @@
 				printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
 				       info->gd->disk_name);
 				error = -EOPNOTSUPP;
-				info->feature_barrier = 0;
+				info->feature_barrier = QUEUE_ORDERED_NONE;
 				xlvbd_barrier(info);
 			}
 			/* fall through */
@@ -652,7 +745,7 @@
 
 
 /* Common code used when first setting up, and when resuming. */
-static int talk_to_backend(struct xenbus_device *dev,
+static int talk_to_blkback(struct xenbus_device *dev,
 			   struct blkfront_info *info)
 {
 	const char *message = NULL;
@@ -712,7 +805,6 @@
 	return err;
 }
 
-
 /**
  * Entry point to this code when a new device is created.  Allocate the basic
  * structures and the ring buffer for communication with the backend, and
@@ -773,6 +865,7 @@
 		return -ENOMEM;
 	}
 
+	mutex_init(&info->mutex);
 	info->xbdev = dev;
 	info->vdevice = vdevice;
 	info->connected = BLKIF_STATE_DISCONNECTED;
@@ -786,7 +879,7 @@
 	info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0);
 	dev_set_drvdata(&dev->dev, info);
 
-	err = talk_to_backend(dev, info);
+	err = talk_to_blkback(dev, info);
 	if (err) {
 		kfree(info);
 		dev_set_drvdata(&dev->dev, NULL);
@@ -881,13 +974,50 @@
 
 	blkif_free(info, info->connected == BLKIF_STATE_CONNECTED);
 
-	err = talk_to_backend(dev, info);
+	err = talk_to_blkback(dev, info);
 	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
 		err = blkif_recover(info);
 
 	return err;
 }
 
+static void
+blkfront_closing(struct blkfront_info *info)
+{
+	struct xenbus_device *xbdev = info->xbdev;
+	struct block_device *bdev = NULL;
+
+	mutex_lock(&info->mutex);
+
+	if (xbdev->state == XenbusStateClosing) {
+		mutex_unlock(&info->mutex);
+		return;
+	}
+
+	if (info->gd)
+		bdev = bdget_disk(info->gd, 0);
+
+	mutex_unlock(&info->mutex);
+
+	if (!bdev) {
+		xenbus_frontend_closed(xbdev);
+		return;
+	}
+
+	mutex_lock(&bdev->bd_mutex);
+
+	if (bdev->bd_openers) {
+		xenbus_dev_error(xbdev, -EBUSY,
+				 "Device in use; refusing to close");
+		xenbus_switch_state(xbdev, XenbusStateClosing);
+	} else {
+		xlvbd_release_gendisk(info);
+		xenbus_frontend_closed(xbdev);
+	}
+
+	mutex_unlock(&bdev->bd_mutex);
+	bdput(bdev);
+}
 
 /*
  * Invoked when the backend is finally 'ready' (and has told produced
@@ -899,11 +1029,31 @@
 	unsigned long sector_size;
 	unsigned int binfo;
 	int err;
+	int barrier;
 
-	if ((info->connected == BLKIF_STATE_CONNECTED) ||
-	    (info->connected == BLKIF_STATE_SUSPENDED) )
+	switch (info->connected) {
+	case BLKIF_STATE_CONNECTED:
+		/*
+		 * Potentially, the back-end may be signalling
+		 * a capacity change; update the capacity.
+		 */
+		err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+				   "sectors", "%Lu", &sectors);
+		if (XENBUS_EXIST_ERR(err))
+			return;
+		printk(KERN_INFO "Setting capacity to %Lu\n",
+		       sectors);
+		set_capacity(info->gd, sectors);
+		revalidate_disk(info->gd);
+
+		/* fall through */
+	case BLKIF_STATE_SUSPENDED:
 		return;
 
+	default:
+		break;
+	}
+
 	dev_dbg(&info->xbdev->dev, "%s:%s.\n",
 		__func__, info->xbdev->otherend);
 
@@ -920,10 +1070,26 @@
 	}
 
 	err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
-			    "feature-barrier", "%lu", &info->feature_barrier,
+			    "feature-barrier", "%lu", &barrier,
 			    NULL);
+
+	/*
+	 * If there's no "feature-barrier" defined, then it means
+	 * we're dealing with a very old backend which writes
+	 * synchronously; draining will do what needs to get done.
+	 *
+	 * If there are barriers, then we can do full queued writes
+	 * with tagged barriers.
+	 *
+	 * If barriers are not supported, then there's no much we can
+	 * do, so just set ordering to NONE.
+	 */
 	if (err)
-		info->feature_barrier = 0;
+		info->feature_barrier = QUEUE_ORDERED_DRAIN;
+	else if (barrier)
+		info->feature_barrier = QUEUE_ORDERED_TAG;
+	else
+		info->feature_barrier = QUEUE_ORDERED_NONE;
 
 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 	if (err) {
@@ -946,52 +1112,14 @@
 }
 
 /**
- * Handle the change of state of the backend to Closing.  We must delete our
- * device-layer structures now, to ensure that writes are flushed through to
- * the backend.  Once is this done, we can switch to Closed in
- * acknowledgement.
- */
-static void blkfront_closing(struct xenbus_device *dev)
-{
-	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-	unsigned long flags;
-
-	dev_dbg(&dev->dev, "blkfront_closing: %s removed\n", dev->nodename);
-
-	if (info->rq == NULL)
-		goto out;
-
-	spin_lock_irqsave(&blkif_io_lock, flags);
-
-	/* No more blkif_request(). */
-	blk_stop_queue(info->rq);
-
-	/* No more gnttab callback work. */
-	gnttab_cancel_free_callback(&info->callback);
-	spin_unlock_irqrestore(&blkif_io_lock, flags);
-
-	/* Flush gnttab callback work. Must be done with no locks held. */
-	flush_scheduled_work();
-
-	blk_cleanup_queue(info->rq);
-	info->rq = NULL;
-
-	del_gendisk(info->gd);
-
- out:
-	xenbus_frontend_closed(dev);
-}
-
-/**
  * Callback received when the backend's state changes.
  */
-static void backend_changed(struct xenbus_device *dev,
+static void blkback_changed(struct xenbus_device *dev,
 			    enum xenbus_state backend_state)
 {
 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
-	struct block_device *bd;
 
-	dev_dbg(&dev->dev, "blkfront:backend_changed.\n");
+	dev_dbg(&dev->dev, "blkfront:blkback_changed to state %d.\n", backend_state);
 
 	switch (backend_state) {
 	case XenbusStateInitialising:
@@ -1006,35 +1134,56 @@
 		break;
 
 	case XenbusStateClosing:
-		if (info->gd == NULL) {
-			xenbus_frontend_closed(dev);
-			break;
-		}
-		bd = bdget_disk(info->gd, 0);
-		if (bd == NULL)
-			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
-
-		mutex_lock(&bd->bd_mutex);
-		if (info->users > 0)
-			xenbus_dev_error(dev, -EBUSY,
-					 "Device in use; refusing to close");
-		else
-			blkfront_closing(dev);
-		mutex_unlock(&bd->bd_mutex);
-		bdput(bd);
+		blkfront_closing(info);
 		break;
 	}
 }
 
-static int blkfront_remove(struct xenbus_device *dev)
+static int blkfront_remove(struct xenbus_device *xbdev)
 {
-	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
+	struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
+	struct block_device *bdev = NULL;
+	struct gendisk *disk;
 
-	dev_dbg(&dev->dev, "blkfront_remove: %s removed\n", dev->nodename);
+	dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
 
 	blkif_free(info, 0);
 
-	kfree(info);
+	mutex_lock(&info->mutex);
+
+	disk = info->gd;
+	if (disk)
+		bdev = bdget_disk(disk, 0);
+
+	info->xbdev = NULL;
+	mutex_unlock(&info->mutex);
+
+	if (!bdev) {
+		kfree(info);
+		return 0;
+	}
+
+	/*
+	 * The xbdev was removed before we reached the Closed
+	 * state. See if it's safe to remove the disk. If the bdev
+	 * isn't closed yet, we let release take care of it.
+	 */
+
+	mutex_lock(&bdev->bd_mutex);
+	info = disk->private_data;
+
+	dev_warn(disk_to_dev(disk),
+		 "%s was hot-unplugged, %d stale handles\n",
+		 xbdev->nodename, bdev->bd_openers);
+
+	if (info && !bdev->bd_openers) {
+		xlvbd_release_gendisk(info);
+		disk->private_data = NULL;
+		kfree(info);
+	}
+
+	mutex_unlock(&bdev->bd_mutex);
+	bdput(bdev);
 
 	return 0;
 }
@@ -1043,30 +1192,78 @@
 {
 	struct blkfront_info *info = dev_get_drvdata(&dev->dev);
 
-	return info->is_ready;
+	return info->is_ready && info->xbdev;
 }
 
 static int blkif_open(struct block_device *bdev, fmode_t mode)
 {
-	struct blkfront_info *info = bdev->bd_disk->private_data;
-	info->users++;
-	return 0;
+	struct gendisk *disk = bdev->bd_disk;
+	struct blkfront_info *info;
+	int err = 0;
+
+	lock_kernel();
+
+	info = disk->private_data;
+	if (!info) {
+		/* xbdev gone */
+		err = -ERESTARTSYS;
+		goto out;
+	}
+
+	mutex_lock(&info->mutex);
+
+	if (!info->gd)
+		/* xbdev is closed */
+		err = -ERESTARTSYS;
+
+	mutex_unlock(&info->mutex);
+
+out:
+	unlock_kernel();
+	return err;
 }
 
 static int blkif_release(struct gendisk *disk, fmode_t mode)
 {
 	struct blkfront_info *info = disk->private_data;
-	info->users--;
-	if (info->users == 0) {
-		/* Check whether we have been instructed to close.  We will
-		   have ignored this request initially, as the device was
-		   still mounted. */
-		struct xenbus_device *dev = info->xbdev;
-		enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
+	struct block_device *bdev;
+	struct xenbus_device *xbdev;
 
-		if (state == XenbusStateClosing && info->is_ready)
-			blkfront_closing(dev);
+	lock_kernel();
+
+	bdev = bdget_disk(disk, 0);
+	bdput(bdev);
+
+	if (bdev->bd_openers)
+		goto out;
+
+	/*
+	 * Check if we have been instructed to close. We will have
+	 * deferred this request, because the bdev was still open.
+	 */
+
+	mutex_lock(&info->mutex);
+	xbdev = info->xbdev;
+
+	if (xbdev && xbdev->state == XenbusStateClosing) {
+		/* pending switch to state closed */
+		dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
+		xlvbd_release_gendisk(info);
+		xenbus_frontend_closed(info->xbdev);
+ 	}
+
+	mutex_unlock(&info->mutex);
+
+	if (!xbdev) {
+		/* sudden device removal */
+		dev_info(disk_to_dev(bdev->bd_disk), "releasing disk\n");
+		xlvbd_release_gendisk(info);
+		disk->private_data = NULL;
+		kfree(info);
 	}
+
+out:
+	unlock_kernel();
 	return 0;
 }
 
@@ -1076,7 +1273,7 @@
 	.open = blkif_open,
 	.release = blkif_release,
 	.getgeo = blkif_getgeo,
-	.locked_ioctl = blkif_ioctl,
+	.ioctl = blkif_ioctl,
 };
 
 
@@ -1092,7 +1289,7 @@
 	.probe = blkfront_probe,
 	.remove = blkfront_remove,
 	.resume = blkfront_resume,
-	.otherend_changed = backend_changed,
+	.otherend_changed = blkback_changed,
 	.is_ready = blkfront_is_ready,
 };
 

diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index a7b83c0..b71888b 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c

@@ -89,6 +89,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/ata.h>
 #include <linux/hdreg.h>
 #include <linux/platform_device.h>
@@ -465,7 +466,7 @@
 	struct request *req;
 
 	while ((req = blk_peek_request(q)) != NULL) {
-		if (blk_fs_request(req))
+		if (req->cmd_type == REQ_TYPE_FS)
 			break;
 		blk_start_request(req);
 		__blk_end_request_all(req, -EIO);
@@ -901,11 +902,14 @@
 
 	dev_dbg(ace->dev, "ace_open() users=%i\n", ace->users + 1);
 
+	lock_kernel();
 	spin_lock_irqsave(&ace->lock, flags);
 	ace->users++;
 	spin_unlock_irqrestore(&ace->lock, flags);
 
 	check_disk_change(bdev);
+	unlock_kernel();
+
 	return 0;
 }
 
@@ -917,6 +921,7 @@
 
 	dev_dbg(ace->dev, "ace_release() users=%i\n", ace->users - 1);
 
+	lock_kernel();
 	spin_lock_irqsave(&ace->lock, flags);
 	ace->users--;
 	if (ace->users == 0) {
@@ -924,6 +929,7 @@
 		ace_out(ace, ACE_CTRL, val & ~ACE_CTRL_LOCKREQ);
 	}
 	spin_unlock_irqrestore(&ace->lock, flags);
+	unlock_kernel();
 	return 0;
 }
 

diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c
index 9114654..d75b2bb 100644
--- a/drivers/block/z2ram.c
+++ b/drivers/block/z2ram.c

@@ -33,6 +33,7 @@
 #include <linux/module.h>
 #include <linux/blkdev.h>
 #include <linux/bitops.h>
+#include <linux/smp_lock.h>
 #include <linux/slab.h>
 
 #include <asm/setup.h>
@@ -153,6 +154,7 @@
 
     device = MINOR(bdev->bd_dev);
 
+    lock_kernel();
     if ( current_device != -1 && current_device != device )
     {
 	rc = -EBUSY;
@@ -294,20 +296,25 @@
 	set_capacity(z2ram_gendisk, z2ram_size >> 9);
     }
 
+    unlock_kernel();
     return 0;
 
 err_out_kfree:
     kfree(z2ram_map);
 err_out:
+    unlock_kernel();
     return rc;
 }
 
 static int
 z2_release(struct gendisk *disk, fmode_t mode)
 {
-    if ( current_device == -1 )
-	return 0;     
-
+    lock_kernel();
+    if ( current_device == -1 ) {
+    	unlock_kernel();
+    	return 0;
+    }
+    unlock_kernel();
     /*
      * FIXME: unmap memory
      */

diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index e3749d0..af13c62d 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c

@@ -242,6 +242,8 @@
 
 -------------------------------------------------------------------------*/
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #define REVISION "Revision: 3.20"
 #define VERSION "Id: cdrom.c 3.20 2003/12/17"
 
@@ -314,11 +316,17 @@
 static const char *mrw_address_space[] = { "DMA", "GAA" };
 
 #if (ERRLOGMASK!=CD_NOTHING)
-#define cdinfo(type, fmt, args...) \
-        if ((ERRLOGMASK & type) || debug==1 ) \
-            printk(KERN_INFO "cdrom: " fmt, ## args)
+#define cdinfo(type, fmt, args...)			\
+do {							\
+	if ((ERRLOGMASK & type) || debug == 1)		\
+		pr_info(fmt, ##args);			\
+} while (0)
 #else
-#define cdinfo(type, fmt, args...) 
+#define cdinfo(type, fmt, args...)			\
+do {							\
+	if (0 && (ERRLOGMASK & type) || debug == 1)	\
+		pr_info(fmt, ##args);			\
+} while (0)
 #endif
 
 /* These are used to simplify getting data in from and back to user land */
@@ -395,7 +403,7 @@
 	if (cdo->open == NULL || cdo->release == NULL)
 		return -EINVAL;
 	if (!banner_printed) {
-		printk(KERN_INFO "Uniform CD-ROM driver " REVISION "\n");
+		pr_info("Uniform CD-ROM driver " REVISION "\n");
 		banner_printed = 1;
 		cdrom_sysctl_register();
 	}
@@ -546,7 +554,7 @@
 	unsigned char buffer[12];
 	int ret;
 
-	printk(KERN_INFO "cdrom: %sstarting format\n", cont ? "Re" : "");
+	pr_info("%sstarting format\n", cont ? "Re" : "");
 
 	/*
 	 * FmtData bit set (bit 4), format type is 1
@@ -576,7 +584,7 @@
 
 	ret = cdi->ops->generic_packet(cdi, &cgc);
 	if (ret)
-		printk(KERN_INFO "cdrom: bgformat failed\n");
+		pr_info("bgformat failed\n");
 
 	return ret;
 }
@@ -622,8 +630,7 @@
 
 	ret = 0;
 	if (di.mrw_status == CDM_MRW_BGFORMAT_ACTIVE) {
-		printk(KERN_INFO "cdrom: issuing MRW back ground "
-				"format suspend\n");
+		pr_info("issuing MRW background format suspend\n");
 		ret = cdrom_mrw_bgformat_susp(cdi, 0);
 	}
 
@@ -658,7 +665,8 @@
 	if ((ret = cdrom_mode_select(cdi, &cgc)))
 		return ret;
 
-	printk(KERN_INFO "cdrom: %s: mrw address space %s selected\n", cdi->name, mrw_address_space[space]);
+	pr_info("%s: mrw address space %s selected\n",
+		cdi->name, mrw_address_space[space]);
 	return 0;
 }
 
@@ -762,7 +770,7 @@
 	 * always reset to DMA lba space on open
 	 */
 	if (cdrom_mrw_set_lba_space(cdi, MRW_LBA_DMA)) {
-		printk(KERN_ERR "cdrom: failed setting lba address space\n");
+		pr_err("failed setting lba address space\n");
 		return 1;
 	}
 
@@ -781,8 +789,7 @@
 	 * 3	-	MRW formatting complete
 	 */
 	ret = 0;
-	printk(KERN_INFO "cdrom open: mrw_status '%s'\n",
-			mrw_format_status[di.mrw_status]);
+	pr_info("open: mrw_status '%s'\n", mrw_format_status[di.mrw_status]);
 	if (!di.mrw_status)
 		ret = 1;
 	else if (di.mrw_status == CDM_MRW_BGFORMAT_INACTIVE &&
@@ -932,8 +939,7 @@
 		return;
 	}
 
-	printk(KERN_INFO "cdrom: %s: dirty DVD+RW media, \"finalizing\"\n",
-	       cdi->name);
+	pr_info("%s: dirty DVD+RW media, \"finalizing\"\n", cdi->name);
 
 	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
 	cgc.cmd[0] = GPCMD_FLUSH_CACHE;
@@ -2176,7 +2182,7 @@
 	 * frame dma, so drop to single frame dma if we need to
 	 */
 	if (cdi->cdda_method == CDDA_BPC_FULL && nframes > 1) {
-		printk("cdrom: dropping to single frame dma\n");
+		pr_info("dropping to single frame dma\n");
 		cdi->cdda_method = CDDA_BPC_SINGLE;
 		goto retry;
 	}
@@ -2189,7 +2195,7 @@
 	if (cdi->last_sense != 0x04 && cdi->last_sense != 0x0b)
 		return ret;
 
-	printk("cdrom: dropping to old style cdda (sense=%x)\n", cdi->last_sense);
+	pr_info("dropping to old style cdda (sense=%x)\n", cdi->last_sense);
 	cdi->cdda_method = CDDA_OLD;
 	return cdrom_read_cdda_old(cdi, ubuf, lba, nframes);	
 }
@@ -3401,7 +3407,7 @@
 					"\t%d", CDROM_CAN(val) != 0);
 			break;
 		default:
-			printk(KERN_INFO "cdrom: invalid option%d\n", option);
+			pr_info("invalid option%d\n", option);
 			return 1;
 		}
 		if (!ret)
@@ -3491,7 +3497,7 @@
 	mutex_unlock(&cdrom_mutex);
 	return proc_dostring(ctl, write, buffer, lenp, ppos);
 done:
-	printk(KERN_INFO "cdrom: info buffer too small\n");
+	pr_info("info buffer too small\n");
 	goto doit;
 }
 
@@ -3665,7 +3671,7 @@
 
 static void __exit cdrom_exit(void)
 {
-	printk(KERN_INFO "Uniform CD-ROM driver unloaded\n");
+	pr_info("Uniform CD-ROM driver unloaded\n");
 	cdrom_sysctl_unregister();
 }
 

diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c
index 03c71f7..261107d 100644
--- a/drivers/cdrom/gdrom.c
+++ b/drivers/cdrom/gdrom.c

@@ -19,6 +19,8 @@
  *
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/fs.h>
@@ -32,6 +34,7 @@
 #include <linux/blkdev.h>
 #include <linux/interrupt.h>
 #include <linux/device.h>
+#include <linux/smp_lock.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/platform_device.h>
@@ -339,8 +342,7 @@
 		tocuse = 0;
 		err = gdrom_readtoc_cmd(gd.toc, 0);
 		if (err) {
-			printk(KERN_INFO "GDROM: Could not get CD "
-				"table of contents\n");
+			pr_info("Could not get CD table of contents\n");
 			return -ENXIO;
 		}
 	}
@@ -357,8 +359,7 @@
 	} while (track >= fentry);
 
 	if ((track > 100) || (track < get_entry_track(gd.toc->first))) {
-		printk(KERN_INFO "GDROM: No data on the last "
-			"session of the CD\n");
+		pr_info("No data on the last session of the CD\n");
 		gdrom_getsense(NULL);
 		return -ENXIO;
 	}
@@ -451,14 +452,14 @@
 		goto cleanup_sense;
 	insw(GDROM_DATA_REG, &sense, sense_command->buflen/2);
 	if (sense[1] & 40) {
-		printk(KERN_INFO "GDROM: Drive not ready - command aborted\n");
+		pr_info("Drive not ready - command aborted\n");
 		goto cleanup_sense;
 	}
 	sense_key = sense[1] & 0x0F;
 	if (sense_key < ARRAY_SIZE(sense_texts))
-		printk(KERN_INFO "GDROM: %s\n", sense_texts[sense_key].text);
+		pr_info("%s\n", sense_texts[sense_key].text);
 	else
-		printk(KERN_ERR "GDROM: Unknown sense key: %d\n", sense_key);
+		pr_err("Unknown sense key: %d\n", sense_key);
 	if (bufstring) /* return addional sense data */
 		memcpy(bufstring, &sense[4], 2);
 	if (sense_key < 2)
@@ -492,12 +493,18 @@
 
 static int gdrom_bdops_open(struct block_device *bdev, fmode_t mode)
 {
-	return cdrom_open(gd.cd_info, bdev, mode);
+	int ret;
+	lock_kernel();
+	ret = cdrom_open(gd.cd_info, bdev, mode);
+	unlock_kernel();
+	return ret;
 }
 
 static int gdrom_bdops_release(struct gendisk *disk, fmode_t mode)
 {
+	lock_kernel();
 	cdrom_release(gd.cd_info, mode);
+	unlock_kernel();
 	return 0;
 }
 
@@ -509,7 +516,13 @@
 static int gdrom_bdops_ioctl(struct block_device *bdev, fmode_t mode,
 	unsigned cmd, unsigned long arg)
 {
-	return cdrom_ioctl(gd.cd_info, bdev, mode, cmd, arg);
+	int ret;
+
+	lock_kernel();
+	ret = cdrom_ioctl(gd.cd_info, bdev, mode, cmd, arg);
+	unlock_kernel();
+
+	return ret;
 }
 
 static const struct block_device_operations gdrom_bdops = {
@@ -517,7 +530,7 @@
 	.open			= gdrom_bdops_open,
 	.release		= gdrom_bdops_release,
 	.media_changed		= gdrom_bdops_mediachanged,
-	.locked_ioctl		= gdrom_bdops_ioctl,
+	.ioctl			= gdrom_bdops_ioctl,
 };
 
 static irqreturn_t gdrom_command_interrupt(int irq, void *dev_id)
@@ -643,14 +656,13 @@
 	struct request *req;
 
 	while ((req = blk_fetch_request(rq)) != NULL) {
-		if (!blk_fs_request(req)) {
-			printk(KERN_DEBUG "GDROM: Non-fs request ignored\n");
+		if (req->cmd_type != REQ_TYPE_FS) {
+			printk(KERN_DEBUG "gdrom: Non-fs request ignored\n");
 			__blk_end_request_all(req, -EIO);
 			continue;
 		}
 		if (rq_data_dir(req) != READ) {
-			printk(KERN_NOTICE "GDROM: Read only device -");
-			printk(" write request ignored\n");
+			pr_notice("Read only device - write request ignored\n");
 			__blk_end_request_all(req, -EIO);
 			continue;
 		}
@@ -685,7 +697,7 @@
 	firmw_ver = kstrndup(id->firmver, 16, GFP_KERNEL);
 	if (!firmw_ver)
 		goto free_manuf_name;
-	printk(KERN_INFO "GDROM: %s from %s with firmware %s\n",
+	pr_info("%s from %s with firmware %s\n",
 		model_name, manuf_name, firmw_ver);
 	err = 0;
 	kfree(firmw_ver);
@@ -757,7 +769,7 @@
 	int err;
 	/* Start the device */
 	if (gdrom_execute_diagnostic() != 1) {
-		printk(KERN_WARNING "GDROM: ATA Probe for GDROM failed.\n");
+		pr_warning("ATA Probe for GDROM failed\n");
 		return -ENODEV;
 	}
 	/* Print out firmware ID */
@@ -767,7 +779,7 @@
 	gdrom_major = register_blkdev(0, GDROM_DEV_NAME);
 	if (gdrom_major <= 0)
 		return gdrom_major;
-	printk(KERN_INFO "GDROM: Registered with major number %d\n",
+	pr_info("Registered with major number %d\n",
 		gdrom_major);
 	/* Specify basic properties of drive */
 	gd.cd_info = kzalloc(sizeof(struct cdrom_device_info), GFP_KERNEL);
@@ -818,7 +830,7 @@
 	unregister_blkdev(gdrom_major, GDROM_DEV_NAME);
 	gdrom_major = 0;
 probe_fail_no_mem:
-	printk(KERN_WARNING "GDROM: Probe failed - error is 0x%X\n", err);
+	pr_warning("Probe failed - error is 0x%X\n", err);
 	return err;
 }
 

diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index 451cd70..56bf9f4 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c

@@ -31,6 +31,8 @@
  * the OS/400 partition.
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/major.h>
 #include <linux/blkdev.h>
 #include <linux/cdrom.h>
@@ -40,6 +42,7 @@
 #include <linux/module.h>
 #include <linux/completion.h>
 #include <linux/proc_fs.h>
+#include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 #include <linux/scatterlist.h>
 
@@ -53,9 +56,6 @@
 
 #define VIOCD_VERS "1.06"
 
-#define VIOCD_KERN_WARNING		KERN_WARNING "viocd: "
-#define VIOCD_KERN_INFO			KERN_INFO "viocd: "
-
 /*
  * Should probably make this a module parameter....sigh
  */
@@ -154,13 +154,21 @@
 static int viocd_blk_open(struct block_device *bdev, fmode_t mode)
 {
 	struct disk_info *di = bdev->bd_disk->private_data;
-	return cdrom_open(&di->viocd_info, bdev, mode);
+	int ret;
+
+	lock_kernel();
+	ret = cdrom_open(&di->viocd_info, bdev, mode);
+	unlock_kernel();
+
+	return ret;
 }
 
 static int viocd_blk_release(struct gendisk *disk, fmode_t mode)
 {
 	struct disk_info *di = disk->private_data;
+	lock_kernel();
 	cdrom_release(&di->viocd_info, mode);
+	unlock_kernel();
 	return 0;
 }
 
@@ -168,7 +176,13 @@
 		unsigned cmd, unsigned long arg)
 {
 	struct disk_info *di = bdev->bd_disk->private_data;
-	return cdrom_ioctl(&di->viocd_info, bdev, mode, cmd, arg);
+	int ret;
+
+	lock_kernel();
+	ret = cdrom_ioctl(&di->viocd_info, bdev, mode, cmd, arg);
+	unlock_kernel();
+
+	return ret;
 }
 
 static int viocd_blk_media_changed(struct gendisk *disk)
@@ -181,7 +195,7 @@
 	.owner =		THIS_MODULE,
 	.open =			viocd_blk_open,
 	.release =		viocd_blk_release,
-	.locked_ioctl =		viocd_blk_ioctl,
+	.ioctl =		viocd_blk_ioctl,
 	.media_changed =	viocd_blk_media_changed,
 };
 
@@ -202,9 +216,8 @@
 			(u64)&we, VIOVERSION << 16, ((u64)device_no << 48),
 			0, 0, 0);
 	if (hvrc != 0) {
-		printk(VIOCD_KERN_WARNING
-				"bad rc on HvCallEvent_signalLpEventFast %d\n",
-				(int)hvrc);
+		pr_warning("bad rc on HvCallEvent_signalLpEventFast %d\n",
+			   (int)hvrc);
 		return -EIO;
 	}
 
@@ -213,8 +226,8 @@
 	if (we.rc) {
 		const struct vio_error_entry *err =
 			vio_lookup_rc(viocd_err_table, we.sub_result);
-		printk(VIOCD_KERN_WARNING "bad rc %d:0x%04X on open: %s\n",
-				we.rc, we.sub_result, err->msg);
+		pr_warning("bad rc %d:0x%04X on open: %s\n",
+			   we.rc, we.sub_result, err->msg);
 		return -err->errno;
 	}
 
@@ -234,9 +247,8 @@
 			viopath_targetinst(viopath_hostLp), 0,
 			VIOVERSION << 16, ((u64)device_no << 48), 0, 0, 0);
 	if (hvrc != 0)
-		printk(VIOCD_KERN_WARNING
-				"bad rc on HvCallEvent_signalLpEventFast %d\n",
-				(int)hvrc);
+		pr_warning("bad rc on HvCallEvent_signalLpEventFast %d\n",
+			   (int)hvrc);
 }
 
 /* Send a read or write request to OS/400 */
@@ -262,13 +274,12 @@
 
 	sg_init_table(&sg, 1);
         if (blk_rq_map_sg(req->q, req, &sg) == 0) {
-		printk(VIOCD_KERN_WARNING
-				"error setting up scatter/gather list\n");
+		pr_warning("error setting up scatter/gather list\n");
 		return -1;
 	}
 
 	if (dma_map_sg(diskinfo->dev, &sg, 1, direction) == 0) {
-		printk(VIOCD_KERN_WARNING "error allocating sg tce\n");
+		pr_warning("error allocating sg tce\n");
 		return -1;
 	}
 	dmaaddr = sg_dma_address(&sg);
@@ -284,7 +295,7 @@
 			((u64)DEVICE_NR(diskinfo) << 48) | dmaaddr,
 			(u64)blk_rq_pos(req) * 512, len, 0);
 	if (hvrc != HvLpEvent_Rc_Good) {
-		printk(VIOCD_KERN_WARNING "hv error on op %d\n", (int)hvrc);
+		pr_warning("hv error on op %d\n", (int)hvrc);
 		return -1;
 	}
 
@@ -298,11 +309,10 @@
 	struct request *req;
 
 	while ((rwreq == 0) && ((req = blk_fetch_request(q)) != NULL)) {
-		if (!blk_fs_request(req))
+		if (req->cmd_type != REQ_TYPE_FS)
 			__blk_end_request_all(req, -EIO);
 		else if (send_request(req) < 0) {
-			printk(VIOCD_KERN_WARNING
-					"unable to send message to OS/400!");
+			pr_warning("unable to send message to OS/400!\n");
 			__blk_end_request_all(req, -EIO);
 		} else
 			rwreq++;
@@ -327,8 +337,8 @@
 			(u64)&we, VIOVERSION << 16, ((u64)device_no << 48),
 			0, 0, 0);
 	if (hvrc != 0) {
-		printk(VIOCD_KERN_WARNING "bad rc on HvCallEvent_signalLpEventFast %d\n",
-				(int)hvrc);
+		pr_warning("bad rc on HvCallEvent_signalLpEventFast %d\n",
+			   (int)hvrc);
 		return -EIO;
 	}
 
@@ -338,9 +348,8 @@
 	if (we.rc) {
 		const struct vio_error_entry *err =
 			vio_lookup_rc(viocd_err_table, we.sub_result);
-		printk(VIOCD_KERN_WARNING
-				"bad rc %d:0x%04X on check_change: %s; Assuming no change\n",
-				we.rc, we.sub_result, err->msg);
+		pr_warning("bad rc %d:0x%04X on check_change: %s; Assuming no change\n",
+			   we.rc, we.sub_result, err->msg);
 		return 0;
 	}
 
@@ -367,8 +376,8 @@
 			(u64)&we, VIOVERSION << 16,
 			(device_no << 48) | (flags << 32), 0, 0, 0);
 	if (hvrc != 0) {
-		printk(VIOCD_KERN_WARNING "bad rc on HvCallEvent_signalLpEventFast %d\n",
-				(int)hvrc);
+		pr_warning("bad rc on HvCallEvent_signalLpEventFast %d\n",
+			   (int)hvrc);
 		return -EIO;
 	}
 
@@ -455,8 +464,7 @@
 		return;
 	/* First, we should NEVER get an int here...only acks */
 	if (hvlpevent_is_int(event)) {
-		printk(VIOCD_KERN_WARNING
-				"Yikes! got an int in viocd event handler!\n");
+		pr_warning("Yikes! got an int in viocd event handler!\n");
 		if (hvlpevent_need_ack(event)) {
 			event->xRc = HvLpEvent_Rc_InvalidSubtype;
 			HvCallEvent_ackLpEvent(event);
@@ -510,10 +518,9 @@
 			const struct vio_error_entry *err =
 				vio_lookup_rc(viocd_err_table,
 						bevent->sub_result);
-			printk(VIOCD_KERN_WARNING "request %p failed "
-					"with rc %d:0x%04X: %s\n",
-					req, event->xRc,
-					bevent->sub_result, err->msg);
+			pr_warning("request %p failed with rc %d:0x%04X: %s\n",
+				   req, event->xRc,
+				   bevent->sub_result, err->msg);
 			__blk_end_request_all(req, -EIO);
 		} else
 			__blk_end_request_all(req, 0);
@@ -524,9 +531,8 @@
 		break;
 
 	default:
-		printk(VIOCD_KERN_WARNING
-				"message with invalid subtype %0x04X!\n",
-				event->xSubtype & VIOMINOR_SUBTYPE_MASK);
+		pr_warning("message with invalid subtype %0x04X!\n",
+			   event->xSubtype & VIOMINOR_SUBTYPE_MASK);
 		if (hvlpevent_need_ack(event)) {
 			event->xRc = HvLpEvent_Rc_InvalidSubtype;
 			HvCallEvent_ackLpEvent(event);
@@ -593,23 +599,19 @@
 	sprintf(c->name, VIOCD_DEVICE "%c", 'a' + deviceno);
 
 	if (register_cdrom(c) != 0) {
-		printk(VIOCD_KERN_WARNING "Cannot register viocd CD-ROM %s!\n",
-				c->name);
+		pr_warning("Cannot register viocd CD-ROM %s!\n", c->name);
 		goto out;
 	}
-	printk(VIOCD_KERN_INFO "cd %s is iSeries resource %10.10s "
-			"type %4.4s, model %3.3s\n",
-			c->name, d->rsrcname, d->type, d->model);
+	pr_info("cd %s is iSeries resource %10.10s type %4.4s, model %3.3s\n",
+		c->name, d->rsrcname, d->type, d->model);
 	q = blk_init_queue(do_viocd_request, &viocd_reqlock);
 	if (q == NULL) {
-		printk(VIOCD_KERN_WARNING "Cannot allocate queue for %s!\n",
-				c->name);
+		pr_warning("Cannot allocate queue for %s!\n", c->name);
 		goto out_unregister_cdrom;
 	}
 	gendisk = alloc_disk(1);
 	if (gendisk == NULL) {
-		printk(VIOCD_KERN_WARNING "Cannot create gendisk for %s!\n",
-				c->name);
+		pr_warning("Cannot create gendisk for %s!\n", c->name);
 		goto out_cleanup_queue;
 	}
 	gendisk->major = VIOCD_MAJOR;
@@ -682,21 +684,19 @@
 			return -ENODEV;
 	}
 
-	printk(VIOCD_KERN_INFO "vers " VIOCD_VERS ", hosting partition %d\n",
-			viopath_hostLp);
+	pr_info("vers " VIOCD_VERS ", hosting partition %d\n", viopath_hostLp);
 
 	if (register_blkdev(VIOCD_MAJOR, VIOCD_DEVICE) != 0) {
-		printk(VIOCD_KERN_WARNING "Unable to get major %d for %s\n",
-				VIOCD_MAJOR, VIOCD_DEVICE);
+		pr_warning("Unable to get major %d for %s\n",
+			   VIOCD_MAJOR, VIOCD_DEVICE);
 		return -EIO;
 	}
 
 	ret = viopath_open(viopath_hostLp, viomajorsubtype_cdio,
 			MAX_CD_REQ + 2);
 	if (ret) {
-		printk(VIOCD_KERN_WARNING
-				"error opening path to host partition %d\n",
-				viopath_hostLp);
+		pr_warning("error opening path to host partition %d\n",
+			   viopath_hostLp);
 		goto out_unregister;
 	}
 

diff --git a/drivers/ide/ide-atapi.c b/drivers/ide/ide-atapi.c
index f9daffd..e88a2cf 100644
--- a/drivers/ide/ide-atapi.c
+++ b/drivers/ide/ide-atapi.c

@@ -190,7 +190,7 @@
 
 	BUG_ON(sense_len > sizeof(*sense));
 
-	if (blk_sense_request(rq) || drive->sense_rq_armed)
+	if (rq->cmd_type == REQ_TYPE_SENSE || drive->sense_rq_armed)
 		return;
 
 	memset(sense, 0, sizeof(*sense));
@@ -307,13 +307,16 @@
 
 int ide_cd_get_xferlen(struct request *rq)
 {
-	if (blk_fs_request(rq))
+	switch (rq->cmd_type) {
+	case REQ_TYPE_FS:
 		return 32768;
-	else if (blk_sense_request(rq) || blk_pc_request(rq) ||
-			 rq->cmd_type == REQ_TYPE_ATA_PC)
+	case REQ_TYPE_SENSE:
+	case REQ_TYPE_BLOCK_PC:
+	case REQ_TYPE_ATA_PC:
 		return blk_rq_bytes(rq);
-	else
+	default:
 		return 0;
+	}
 }
 EXPORT_SYMBOL_GPL(ide_cd_get_xferlen);
 
@@ -474,12 +477,12 @@
 		if (uptodate == 0)
 			drive->failed_pc = NULL;
 
-		if (blk_special_request(rq)) {
+		if (rq->cmd_type == REQ_TYPE_SPECIAL) {
 			rq->errors = 0;
 			error = 0;
 		} else {
 
-			if (blk_fs_request(rq) == 0 && uptodate <= 0) {
+			if (rq->cmd_type != REQ_TYPE_FS && uptodate <= 0) {
 				if (rq->errors == 0)
 					rq->errors = -EIO;
 			}

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 2de76cc..31fc769 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c

@@ -31,6 +31,7 @@
 #include <linux/delay.h>
 #include <linux/timer.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
 #include <linux/errno.h>
@@ -176,7 +177,7 @@
 			if (!sense->valid)
 				break;
 			if (failed_command == NULL ||
-					!blk_fs_request(failed_command))
+			    failed_command->cmd_type != REQ_TYPE_FS)
 				break;
 			sector = (sense->information[0] << 24) |
 				 (sense->information[1] << 16) |
@@ -292,7 +293,7 @@
 				  "stat 0x%x",
 				  rq->cmd[0], rq->cmd_type, err, stat);
 
-	if (blk_sense_request(rq)) {
+	if (rq->cmd_type == REQ_TYPE_SENSE) {
 		/*
 		 * We got an error trying to get sense info from the drive
 		 * (probably while trying to recover from a former error).
@@ -303,7 +304,7 @@
 	}
 
 	/* if we have an error, pass CHECK_CONDITION as the SCSI status byte */
-	if (blk_pc_request(rq) && !rq->errors)
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !rq->errors)
 		rq->errors = SAM_STAT_CHECK_CONDITION;
 
 	if (blk_noretry_request(rq))
@@ -311,13 +312,14 @@
 
 	switch (sense_key) {
 	case NOT_READY:
-		if (blk_fs_request(rq) && rq_data_dir(rq) == WRITE) {
+		if (rq->cmd_type == REQ_TYPE_FS && rq_data_dir(rq) == WRITE) {
 			if (ide_cd_breathe(drive, rq))
 				return 1;
 		} else {
 			cdrom_saw_media_change(drive);
 
-			if (blk_fs_request(rq) && !blk_rq_quiet(rq))
+			if (rq->cmd_type == REQ_TYPE_FS &&
+			    !(rq->cmd_flags & REQ_QUIET))
 				printk(KERN_ERR PFX "%s: tray open\n",
 					drive->name);
 		}
@@ -326,7 +328,7 @@
 	case UNIT_ATTENTION:
 		cdrom_saw_media_change(drive);
 
-		if (blk_fs_request(rq) == 0)
+		if (rq->cmd_type != REQ_TYPE_FS)
 			return 0;
 
 		/*
@@ -352,7 +354,7 @@
 		 * No point in retrying after an illegal request or data
 		 * protect error.
 		 */
-		if (!blk_rq_quiet(rq))
+		if (!(rq->cmd_flags & REQ_QUIET))
 			ide_dump_status(drive, "command error", stat);
 		do_end_request = 1;
 		break;
@@ -361,20 +363,20 @@
 		 * No point in re-trying a zillion times on a bad sector.
 		 * If we got here the error is not correctable.
 		 */
-		if (!blk_rq_quiet(rq))
+		if (!(rq->cmd_flags & REQ_QUIET))
 			ide_dump_status(drive, "media error "
 					"(bad sector)", stat);
 		do_end_request = 1;
 		break;
 	case BLANK_CHECK:
 		/* disk appears blank? */
-		if (!blk_rq_quiet(rq))
+		if (!(rq->cmd_flags & REQ_QUIET))
 			ide_dump_status(drive, "media error (blank)",
 					stat);
 		do_end_request = 1;
 		break;
 	default:
-		if (blk_fs_request(rq) == 0)
+		if (rq->cmd_type != REQ_TYPE_FS)
 			break;
 		if (err & ~ATA_ABORTED) {
 			/* go to the default handler for other errors */
@@ -385,7 +387,7 @@
 			do_end_request = 1;
 	}
 
-	if (blk_fs_request(rq) == 0) {
+	if (rq->cmd_type != REQ_TYPE_FS) {
 		rq->cmd_flags |= REQ_FAILED;
 		do_end_request = 1;
 	}
@@ -532,7 +534,7 @@
 	ide_expiry_t *expiry = NULL;
 	int dma_error = 0, dma, thislen, uptodate = 0;
 	int write = (rq_data_dir(rq) == WRITE) ? 1 : 0, rc = 0;
-	int sense = blk_sense_request(rq);
+	int sense = (rq->cmd_type == REQ_TYPE_SENSE);
 	unsigned int timeout;
 	u16 len;
 	u8 ireason, stat;
@@ -575,7 +577,7 @@
 
 	ide_read_bcount_and_ireason(drive, &len, &ireason);
 
-	thislen = blk_fs_request(rq) ? len : cmd->nleft;
+	thislen = (rq->cmd_type == REQ_TYPE_FS) ? len : cmd->nleft;
 	if (thislen > len)
 		thislen = len;
 
@@ -584,7 +586,7 @@
 
 	/* If DRQ is clear, the command has completed. */
 	if ((stat & ATA_DRQ) == 0) {
-		if (blk_fs_request(rq)) {
+		if (rq->cmd_type == REQ_TYPE_FS) {
 			/*
 			 * If we're not done reading/writing, complain.
 			 * Otherwise, complete the command normally.
@@ -598,7 +600,7 @@
 					rq->cmd_flags |= REQ_FAILED;
 				uptodate = 0;
 			}
-		} else if (!blk_pc_request(rq)) {
+		} else if (rq->cmd_type != REQ_TYPE_BLOCK_PC) {
 			ide_cd_request_sense_fixup(drive, cmd);
 
 			uptodate = cmd->nleft ? 0 : 1;
@@ -647,7 +649,7 @@
 
 	/* pad, if necessary */
 	if (len > 0) {
-		if (blk_fs_request(rq) == 0 || write == 0)
+		if (rq->cmd_type != REQ_TYPE_FS || write == 0)
 			ide_pad_transfer(drive, write, len);
 		else {
 			printk(KERN_ERR PFX "%s: confused, missing data\n",
@@ -656,11 +658,11 @@
 		}
 	}
 
-	if (blk_pc_request(rq)) {
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		timeout = rq->timeout;
 	} else {
 		timeout = ATAPI_WAIT_PC;
-		if (!blk_fs_request(rq))
+		if (rq->cmd_type != REQ_TYPE_FS)
 			expiry = ide_cd_expiry;
 	}
 
@@ -669,7 +671,7 @@
 	return ide_started;
 
 out_end:
-	if (blk_pc_request(rq) && rc == 0) {
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC && rc == 0) {
 		rq->resid_len = 0;
 		blk_end_request_all(rq, 0);
 		hwif->rq = NULL;
@@ -677,7 +679,7 @@
 		if (sense && uptodate)
 			ide_cd_complete_failed_rq(drive, rq);
 
-		if (blk_fs_request(rq)) {
+		if (rq->cmd_type == REQ_TYPE_FS) {
 			if (cmd->nleft == 0)
 				uptodate = 1;
 		} else {
@@ -690,7 +692,7 @@
 				return ide_stopped;
 
 		/* make sure it's fully ended */
-		if (blk_fs_request(rq) == 0) {
+		if (rq->cmd_type != REQ_TYPE_FS) {
 			rq->resid_len -= cmd->nbytes - cmd->nleft;
 			if (uptodate == 0 && (cmd->tf_flags & IDE_TFLAG_WRITE))
 				rq->resid_len += cmd->last_xfer_len;
@@ -750,7 +752,7 @@
 	ide_debug_log(IDE_DBG_PC, "rq->cmd[0]: 0x%x, rq->cmd_type: 0x%x",
 				  rq->cmd[0], rq->cmd_type);
 
-	if (blk_pc_request(rq))
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
 		rq->cmd_flags |= REQ_QUIET;
 	else
 		rq->cmd_flags &= ~REQ_FAILED;
@@ -791,21 +793,26 @@
 	if (drive->debug_mask & IDE_DBG_RQ)
 		blk_dump_rq_flags(rq, "ide_cd_do_request");
 
-	if (blk_fs_request(rq)) {
+	switch (rq->cmd_type) {
+	case REQ_TYPE_FS:
 		if (cdrom_start_rw(drive, rq) == ide_stopped)
 			goto out_end;
-	} else if (blk_sense_request(rq) || blk_pc_request(rq) ||
-		   rq->cmd_type == REQ_TYPE_ATA_PC) {
+		break;
+	case REQ_TYPE_SENSE:
+	case REQ_TYPE_BLOCK_PC:
+	case REQ_TYPE_ATA_PC:
 		if (!rq->timeout)
 			rq->timeout = ATAPI_WAIT_PC;
 
 		cdrom_do_block_pc(drive, rq);
-	} else if (blk_special_request(rq)) {
+		break;
+	case REQ_TYPE_SPECIAL:
 		/* right now this can only be a reset... */
 		uptodate = 1;
 		goto out_end;
-	} else
+	default:
 		BUG();
+	}
 
 	/* prepare sense request for this command */
 	ide_prep_sense(drive, rq);
@@ -817,7 +824,7 @@
 
 	cmd.rq = rq;
 
-	if (blk_fs_request(rq) || blk_rq_bytes(rq)) {
+	if (rq->cmd_type == REQ_TYPE_FS || blk_rq_bytes(rq)) {
 		ide_init_sg_cmd(&cmd, blk_rq_bytes(rq));
 		ide_map_sg(drive, &cmd);
 	}
@@ -1373,9 +1380,9 @@
 
 static int ide_cdrom_prep_fn(struct request_queue *q, struct request *rq)
 {
-	if (blk_fs_request(rq))
+	if (rq->cmd_type == REQ_TYPE_FS)
 		return ide_cdrom_prep_fs(q, rq);
-	else if (blk_pc_request(rq))
+	else if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
 		return ide_cdrom_prep_pc(rq);
 
 	return 0;
@@ -1592,17 +1599,19 @@
 
 static int idecd_open(struct block_device *bdev, fmode_t mode)
 {
-	struct cdrom_info *info = ide_cd_get(bdev->bd_disk);
-	int rc = -ENOMEM;
+	struct cdrom_info *info;
+	int rc = -ENXIO;
 
+	lock_kernel();
+	info = ide_cd_get(bdev->bd_disk);
 	if (!info)
-		return -ENXIO;
+		goto out;
 
 	rc = cdrom_open(&info->devinfo, bdev, mode);
-
 	if (rc < 0)
 		ide_cd_put(info);
-
+out:
+	unlock_kernel();
 	return rc;
 }
 
@@ -1610,9 +1619,11 @@
 {
 	struct cdrom_info *info = ide_drv_g(disk, cdrom_info);
 
+	lock_kernel();
 	cdrom_release(&info->devinfo, mode);
 
 	ide_cd_put(info);
+	unlock_kernel();
 
 	return 0;
 }
@@ -1656,7 +1667,7 @@
 	return 0;
 }
 
-static int idecd_ioctl(struct block_device *bdev, fmode_t mode,
+static int idecd_locked_ioctl(struct block_device *bdev, fmode_t mode,
 			unsigned int cmd, unsigned long arg)
 {
 	struct cdrom_info *info = ide_drv_g(bdev->bd_disk, cdrom_info);
@@ -1678,6 +1689,19 @@
 	return err;
 }
 
+static int idecd_ioctl(struct block_device *bdev, fmode_t mode,
+			     unsigned int cmd, unsigned long arg)
+{
+	int ret;
+
+	lock_kernel();
+	ret = idecd_locked_ioctl(bdev, mode, cmd, arg);
+	unlock_kernel();
+
+	return ret;
+}
+
+
 static int idecd_media_changed(struct gendisk *disk)
 {
 	struct cdrom_info *info = ide_drv_g(disk, cdrom_info);
@@ -1698,7 +1722,7 @@
 	.owner			= THIS_MODULE,
 	.open			= idecd_open,
 	.release		= idecd_release,
-	.locked_ioctl		= idecd_ioctl,
+	.ioctl			= idecd_ioctl,
 	.media_changed		= idecd_media_changed,
 	.revalidate_disk	= idecd_revalidate_disk
 };

diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c
index 02712bf..766b3de 100644
--- a/drivers/ide/ide-cd_ioctl.c
+++ b/drivers/ide/ide-cd_ioctl.c

@@ -454,7 +454,7 @@
 	   touch it at all. */
 
 	if (cgc->data_direction == CGC_DATA_WRITE)
-		flags |= REQ_RW;
+		flags |= REQ_WRITE;
 
 	if (cgc->sense)
 		memset(cgc->sense, 0, sizeof(struct request_sense));

diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 33d6503..7433e07 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c

@@ -184,7 +184,7 @@
 	ide_hwif_t *hwif = drive->hwif;
 
 	BUG_ON(drive->dev_flags & IDE_DFLAG_BLOCKED);
-	BUG_ON(!blk_fs_request(rq));
+	BUG_ON(rq->cmd_type != REQ_TYPE_FS);
 
 	ledtrig_ide_activity();
 
@@ -427,10 +427,15 @@
 		drive->dev_flags |= IDE_DFLAG_NOHPA; /* disable HPA on resume */
 }
 
-static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
+static int idedisk_prep_fn(struct request_queue *q, struct request *rq)
 {
 	ide_drive_t *drive = q->queuedata;
-	struct ide_cmd *cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC);
+	struct ide_cmd *cmd;
+
+	if (!(rq->cmd_flags & REQ_FLUSH))
+		return BLKPREP_OK;
+
+	cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC);
 
 	/* FIXME: map struct ide_taskfile on rq->cmd[] */
 	BUG_ON(cmd == NULL);
@@ -448,6 +453,8 @@
 	rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
 	rq->special = cmd;
 	cmd->rq = rq;
+
+	return BLKPREP_OK;
 }
 
 ide_devset_get(multcount, mult_count);
@@ -513,7 +520,6 @@
 {
 	u16 *id = drive->id;
 	unsigned ordered = QUEUE_ORDERED_NONE;
-	prepare_flush_fn *prep_fn = NULL;
 
 	if (drive->dev_flags & IDE_DFLAG_WCACHE) {
 		unsigned long long capacity;
@@ -538,12 +544,12 @@
 
 		if (barrier) {
 			ordered = QUEUE_ORDERED_DRAIN_FLUSH;
-			prep_fn = idedisk_prepare_flush;
+			blk_queue_prep_rq(drive->queue, idedisk_prep_fn);
 		}
 	} else
 		ordered = QUEUE_ORDERED_DRAIN;
 
-	blk_queue_ordered(drive->queue, ordered, prep_fn);
+	blk_queue_ordered(drive->queue, ordered);
 }
 
 ide_devset_get_flag(wcache, IDE_DFLAG_WCACHE);

diff --git a/drivers/ide/ide-disk_ioctl.c b/drivers/ide/ide-disk_ioctl.c
index 7b783dd..ec94c66 100644
--- a/drivers/ide/ide-disk_ioctl.c
+++ b/drivers/ide/ide-disk_ioctl.c

@@ -1,6 +1,7 @@
 #include <linux/kernel.h>
 #include <linux/ide.h>
 #include <linux/hdreg.h>
+#include <linux/smp_lock.h>
 
 #include "ide-disk.h"
 
@@ -18,9 +19,13 @@
 {
 	int err;
 
+	lock_kernel();
 	err = ide_setting_ioctl(drive, bdev, cmd, arg, ide_disk_ioctl_settings);
 	if (err != -EOPNOTSUPP)
-		return err;
+		goto out;
 
-	return generic_ide_ioctl(drive, bdev, cmd, arg);
+	err = generic_ide_ioctl(drive, bdev, cmd, arg);
+out:
+	unlock_kernel();
+	return err;
 }

diff --git a/drivers/ide/ide-eh.c b/drivers/ide/ide-eh.c
index e9abf2c..c0aa93f 100644
--- a/drivers/ide/ide-eh.c
+++ b/drivers/ide/ide-eh.c

@@ -122,7 +122,7 @@
 		return ide_stopped;
 
 	/* retry only "normal" I/O: */
-	if (!blk_fs_request(rq)) {
+	if (rq->cmd_type != REQ_TYPE_FS) {
 		if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
 			struct ide_cmd *cmd = rq->special;
 
@@ -146,7 +146,8 @@
 {
 	struct request *rq = drive->hwif->rq;
 
-	if (rq && blk_special_request(rq) && rq->cmd[0] == REQ_DRIVE_RESET) {
+	if (rq && rq->cmd_type == REQ_TYPE_SPECIAL &&
+	    rq->cmd[0] == REQ_DRIVE_RESET) {
 		if (err <= 0 && rq->errors == 0)
 			rq->errors = -EIO;
 		ide_complete_rq(drive, err ? err : 0, blk_rq_bytes(rq));

diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c
index 4713bdc..5406b6e 100644
--- a/drivers/ide/ide-floppy.c
+++ b/drivers/ide/ide-floppy.c

@@ -73,7 +73,7 @@
 		drive->failed_pc = NULL;
 
 	if (pc->c[0] == GPCMD_READ_10 || pc->c[0] == GPCMD_WRITE_10 ||
-	    (rq && blk_pc_request(rq)))
+	    (rq && rq->cmd_type == REQ_TYPE_BLOCK_PC))
 		uptodate = 1; /* FIXME */
 	else if (pc->c[0] == GPCMD_REQUEST_SENSE) {
 
@@ -98,7 +98,7 @@
 			       "Aborting request!\n");
 	}
 
-	if (blk_special_request(rq))
+	if (rq->cmd_type == REQ_TYPE_SPECIAL)
 		rq->errors = uptodate ? 0 : IDE_DRV_ERROR_GENERAL;
 
 	return uptodate;
@@ -207,7 +207,7 @@
 	memcpy(rq->cmd, pc->c, 12);
 
 	pc->rq = rq;
-	if (rq->cmd_flags & REQ_RW)
+	if (rq->cmd_flags & REQ_WRITE)
 		pc->flags |= PC_FLAG_WRITING;
 
 	pc->flags |= PC_FLAG_DMA_OK;
@@ -247,14 +247,16 @@
 		} else
 			printk(KERN_ERR PFX "%s: I/O error\n", drive->name);
 
-		if (blk_special_request(rq)) {
+		if (rq->cmd_type == REQ_TYPE_SPECIAL) {
 			rq->errors = 0;
 			ide_complete_rq(drive, 0, blk_rq_bytes(rq));
 			return ide_stopped;
 		} else
 			goto out_end;
 	}
-	if (blk_fs_request(rq)) {
+
+	switch (rq->cmd_type) {
+	case REQ_TYPE_FS:
 		if (((long)blk_rq_pos(rq) % floppy->bs_factor) ||
 		    (blk_rq_sectors(rq) % floppy->bs_factor)) {
 			printk(KERN_ERR PFX "%s: unsupported r/w rq size\n",
@@ -263,13 +265,18 @@
 		}
 		pc = &floppy->queued_pc;
 		idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
-	} else if (blk_special_request(rq) || blk_sense_request(rq)) {
+		break;
+	case REQ_TYPE_SPECIAL:
+	case REQ_TYPE_SENSE:
 		pc = (struct ide_atapi_pc *)rq->special;
-	} else if (blk_pc_request(rq)) {
+		break;
+	case REQ_TYPE_BLOCK_PC:
 		pc = &floppy->queued_pc;
 		idefloppy_blockpc_cmd(floppy, pc, rq);
-	} else
+		break;
+	default:
 		BUG();
+	}
 
 	ide_prep_sense(drive, rq);
 
@@ -280,7 +287,7 @@
 
 	cmd.rq = rq;
 
-	if (blk_fs_request(rq) || blk_rq_bytes(rq)) {
+	if (rq->cmd_type == REQ_TYPE_FS || blk_rq_bytes(rq)) {
 		ide_init_sg_cmd(&cmd, blk_rq_bytes(rq));
 		ide_map_sg(drive, &cmd);
 	}
@@ -290,7 +297,7 @@
 	return ide_floppy_issue_pc(drive, &cmd, pc);
 out_end:
 	drive->failed_pc = NULL;
-	if (blk_fs_request(rq) == 0 && rq->errors == 0)
+	if (rq->cmd_type != REQ_TYPE_FS && rq->errors == 0)
 		rq->errors = -EIO;
 	ide_complete_rq(drive, -EIO, blk_rq_bytes(rq));
 	return ide_stopped;

diff --git a/drivers/ide/ide-floppy_ioctl.c b/drivers/ide/ide-floppy_ioctl.c
index 9c22882..fd3d05a 100644
--- a/drivers/ide/ide-floppy_ioctl.c
+++ b/drivers/ide/ide-floppy_ioctl.c

@@ -5,6 +5,7 @@
 #include <linux/kernel.h>
 #include <linux/ide.h>
 #include <linux/cdrom.h>
+#include <linux/smp_lock.h>
 
 #include <asm/unaligned.h>
 
@@ -275,12 +276,15 @@
 	void __user *argp = (void __user *)arg;
 	int err;
 
-	if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR)
-		return ide_floppy_lockdoor(drive, &pc, arg, cmd);
+	lock_kernel();
+	if (cmd == CDROMEJECT || cmd == CDROM_LOCKDOOR) {
+		err = ide_floppy_lockdoor(drive, &pc, arg, cmd);
+		goto out;
+	}
 
 	err = ide_floppy_format_ioctl(drive, &pc, mode, cmd, argp);
 	if (err != -ENOTTY)
-		return err;
+		goto out;
 
 	/*
 	 * skip SCSI_IOCTL_SEND_COMMAND (deprecated)
@@ -293,5 +297,7 @@
 	if (err == -ENOTTY)
 		err = generic_ide_ioctl(drive, bdev, cmd, arg);
 
+out:
+	unlock_kernel();
 	return err;
 }

diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index 7939953..70aeeb1 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c

@@ -1,3 +1,4 @@
+#include <linux/smp_lock.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -237,6 +238,18 @@
 	return ret;
 }
 
+static int ide_gd_unlocked_open(struct block_device *bdev, fmode_t mode)
+{
+	int ret;
+
+	lock_kernel();
+	ret = ide_gd_open(bdev, mode);
+	unlock_kernel();
+
+	return ret;
+}
+
+
 static int ide_gd_release(struct gendisk *disk, fmode_t mode)
 {
 	struct ide_disk_obj *idkp = ide_drv_g(disk, ide_disk_obj);
@@ -244,6 +257,7 @@
 
 	ide_debug_log(IDE_DBG_FUNC, "enter");
 
+	lock_kernel();
 	if (idkp->openers == 1)
 		drive->disk_ops->flush(drive);
 
@@ -255,6 +269,7 @@
 	idkp->openers--;
 
 	ide_disk_put(idkp);
+	unlock_kernel();
 
 	return 0;
 }
@@ -321,9 +336,9 @@
 
 static const struct block_device_operations ide_gd_ops = {
 	.owner			= THIS_MODULE,
-	.open			= ide_gd_open,
+	.open			= ide_gd_unlocked_open,
 	.release		= ide_gd_release,
-	.locked_ioctl		= ide_gd_ioctl,
+	.ioctl			= ide_gd_ioctl,
 	.getgeo			= ide_gd_getgeo,
 	.media_changed		= ide_gd_media_changed,
 	.unlock_native_capacity	= ide_gd_unlock_native_capacity,

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 172ac92..a381be8 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c

@@ -135,7 +135,7 @@
 
 void ide_kill_rq(ide_drive_t *drive, struct request *rq)
 {
-	u8 drv_req = blk_special_request(rq) && rq->rq_disk;
+	u8 drv_req = (rq->cmd_type == REQ_TYPE_SPECIAL) && rq->rq_disk;
 	u8 media = drive->media;
 
 	drive->failed_pc = NULL;
@@ -145,7 +145,7 @@
 	} else {
 		if (media == ide_tape)
 			rq->errors = IDE_DRV_ERROR_GENERAL;
-		else if (blk_fs_request(rq) == 0 && rq->errors == 0)
+		else if (rq->cmd_type != REQ_TYPE_FS && rq->errors == 0)
 			rq->errors = -EIO;
 	}
 
@@ -307,7 +307,7 @@
 {
 	ide_startstop_t startstop;
 
-	BUG_ON(!blk_rq_started(rq));
+	BUG_ON(!(rq->cmd_flags & REQ_STARTED));
 
 #ifdef DEBUG
 	printk("%s: start_request: current=0x%08lx\n",
@@ -353,7 +353,7 @@
 			    pm->pm_step == IDE_PM_COMPLETED)
 				ide_complete_pm_rq(drive, rq);
 			return startstop;
-		} else if (!rq->rq_disk && blk_special_request(rq))
+		} else if (!rq->rq_disk && rq->cmd_type == REQ_TYPE_SPECIAL)
 			/*
 			 * TODO: Once all ULDs have been modified to
 			 * check for specific op codes rather than

diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c
index 1c08311..9240609 100644
--- a/drivers/ide/ide-pm.c
+++ b/drivers/ide/ide-pm.c

@@ -191,10 +191,10 @@
 
 #ifdef DEBUG_PM
 	printk("%s: completing PM request, %s\n", drive->name,
-	       blk_pm_suspend_request(rq) ? "suspend" : "resume");
+	       (rq->cmd_type == REQ_TYPE_PM_SUSPEND) ? "suspend" : "resume");
 #endif
 	spin_lock_irqsave(q->queue_lock, flags);
-	if (blk_pm_suspend_request(rq))
+	if (rq->cmd_type == REQ_TYPE_PM_SUSPEND)
 		blk_stop_queue(q);
 	else
 		drive->dev_flags &= ~IDE_DFLAG_BLOCKED;
@@ -210,11 +210,11 @@
 {
 	struct request_pm_state *pm = rq->special;
 
-	if (blk_pm_suspend_request(rq) &&
+	if (rq->cmd_type == REQ_TYPE_PM_SUSPEND &&
 	    pm->pm_step == IDE_PM_START_SUSPEND)
 		/* Mark drive blocked when starting the suspend sequence. */
 		drive->dev_flags |= IDE_DFLAG_BLOCKED;
-	else if (blk_pm_resume_request(rq) &&
+	else if (rq->cmd_type == REQ_TYPE_PM_RESUME &&
 		 pm->pm_step == IDE_PM_START_RESUME) {
 		/*
 		 * The first thing we do on wakeup is to wait for BSY bit to

diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c
index b072328..6d622cb 100644
--- a/drivers/ide/ide-tape.c
+++ b/drivers/ide/ide-tape.c

@@ -32,6 +32,7 @@
 #include <linux/errno.h>
 #include <linux/genhd.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
 #include <linux/ide.h>
@@ -577,7 +578,8 @@
 		      rq->cmd[0], (unsigned long long)blk_rq_pos(rq),
 		      blk_rq_sectors(rq));
 
-	BUG_ON(!(blk_special_request(rq) || blk_sense_request(rq)));
+	BUG_ON(!(rq->cmd_type == REQ_TYPE_SPECIAL ||
+		 rq->cmd_type == REQ_TYPE_SENSE));
 
 	/* Retry a failed packet command */
 	if (drive->failed_pc && drive->pc->c[0] == REQUEST_SENSE) {
@@ -1905,7 +1907,11 @@
 
 static int idetape_open(struct block_device *bdev, fmode_t mode)
 {
-	struct ide_tape_obj *tape = ide_tape_get(bdev->bd_disk, false, 0);
+	struct ide_tape_obj *tape;
+
+	lock_kernel();
+	tape = ide_tape_get(bdev->bd_disk, false, 0);
+	unlock_kernel();
 
 	if (!tape)
 		return -ENXIO;
@@ -1917,7 +1923,10 @@
 {
 	struct ide_tape_obj *tape = ide_drv_g(disk, ide_tape_obj);
 
+	lock_kernel();
 	ide_tape_put(tape);
+	unlock_kernel();
+
 	return 0;
 }
 
@@ -1926,9 +1935,14 @@
 {
 	struct ide_tape_obj *tape = ide_drv_g(bdev->bd_disk, ide_tape_obj);
 	ide_drive_t *drive = tape->drive;
-	int err = generic_ide_ioctl(drive, bdev, cmd, arg);
+	int err;
+
+	lock_kernel();
+	err = generic_ide_ioctl(drive, bdev, cmd, arg);
 	if (err == -EINVAL)
 		err = idetape_blkdev_ioctl(drive, cmd, arg);
+	unlock_kernel();
+
 	return err;
 }
 
@@ -1936,7 +1950,7 @@
 	.owner		= THIS_MODULE,
 	.open		= idetape_open,
 	.release	= idetape_release,
-	.locked_ioctl	= idetape_ioctl,
+	.ioctl		= idetape_ioctl,
 };
 
 static int ide_tape_probe(ide_drive_t *drive)

diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 10f457c..0590c75 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c

@@ -356,7 +356,7 @@
 	BUG_ON(num_regions > DM_IO_MAX_REGIONS);
 
 	if (sync)
-		rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+		rw |= REQ_SYNC | REQ_UNPLUG;
 
 	/*
 	 * For multiple regions we need to be careful to rewind
@@ -364,7 +364,7 @@
 	 */
 	for (i = 0; i < num_regions; i++) {
 		*dp = old_pages;
-		if (where[i].count || (rw & (1 << BIO_RW_BARRIER)))
+		if (where[i].count || (rw & REQ_HARDBARRIER))
 			do_region(rw, i, where + i, dp, io);
 	}
 
@@ -412,8 +412,8 @@
 	}
 	set_current_state(TASK_RUNNING);
 
-	if (io->eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
-		rw &= ~(1 << BIO_RW_BARRIER);
+	if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) {
+		rw &= ~REQ_HARDBARRIER;
 		goto retry;
 	}
 
@@ -479,8 +479,8 @@
  * New collapsed (a)synchronous interface.
  *
  * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug
- * the queue with blk_unplug() some time later or set the BIO_RW_SYNC bit in
- * io_req->bi_rw. If you fail to do one of these, the IO will be submitted to
+ * the queue with blk_unplug() some time later or set REQ_SYNC in
+io_req->bi_rw. If you fail to do one of these, the IO will be submitted to
  * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c.
  */
 int dm_io(struct dm_io_request *io_req, unsigned num_regions,

diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index addf834..d8587ba 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c

@@ -345,7 +345,7 @@
 {
 	int r;
 	struct dm_io_request io_req = {
-		.bi_rw = job->rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG),
+		.bi_rw = job->rw | REQ_SYNC | REQ_UNPLUG,
 		.mem.type = DM_IO_PAGE_LIST,
 		.mem.ptr.pl = job->pages,
 		.mem.offset = job->offset,

diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index ddda531..7413626 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c

@@ -1211,7 +1211,7 @@
 	if (error == -EOPNOTSUPP)
 		goto out;
 
-	if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD))
+	if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD))
 		goto out;
 
 	if (unlikely(error)) {

diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index e610725..d6e28d7 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c

@@ -284,7 +284,7 @@
 	if (!error)
 		return 0; /* I/O complete */
 
-	if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD))
+	if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD))
 		return error;
 
 	if (error == -EOPNOTSUPP)

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d21e128..a3f21dc 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c

@@ -15,6 +15,7 @@
 #include <linux/blkpg.h>
 #include <linux/bio.h>
 #include <linux/buffer_head.h>
+#include <linux/smp_lock.h>
 #include <linux/mempool.h>
 #include <linux/slab.h>
 #include <linux/idr.h>
@@ -338,6 +339,7 @@
 {
 	struct mapped_device *md;
 
+	lock_kernel();
 	spin_lock(&_minor_lock);
 
 	md = bdev->bd_disk->private_data;
@@ -355,6 +357,7 @@
 
 out:
 	spin_unlock(&_minor_lock);
+	unlock_kernel();
 
 	return md ? 0 : -ENXIO;
 }
@@ -362,8 +365,12 @@
 static int dm_blk_close(struct gendisk *disk, fmode_t mode)
 {
 	struct mapped_device *md = disk->private_data;
+
+	lock_kernel();
 	atomic_dec(&md->open_count);
 	dm_put(md);
+	unlock_kernel();
+
 	return 0;
 }
 
@@ -614,7 +621,7 @@
 			 */
 			spin_lock_irqsave(&md->deferred_lock, flags);
 			if (__noflush_suspending(md)) {
-				if (!bio_rw_flagged(io->bio, BIO_RW_BARRIER))
+				if (!(io->bio->bi_rw & REQ_HARDBARRIER))
 					bio_list_add_head(&md->deferred,
 							  io->bio);
 			} else
@@ -626,7 +633,7 @@
 		io_error = io->error;
 		bio = io->bio;
 
-		if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
+		if (bio->bi_rw & REQ_HARDBARRIER) {
 			/*
 			 * There can be just one barrier request so we use
 			 * a per-device variable for error reporting.
@@ -792,12 +799,12 @@
 {
 	int rw = rq_data_dir(clone);
 	int run_queue = 1;
-	bool is_barrier = blk_barrier_rq(clone);
+	bool is_barrier = clone->cmd_flags & REQ_HARDBARRIER;
 	struct dm_rq_target_io *tio = clone->end_io_data;
 	struct mapped_device *md = tio->md;
 	struct request *rq = tio->orig;
 
-	if (blk_pc_request(rq) && !is_barrier) {
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC && !is_barrier) {
 		rq->errors = clone->errors;
 		rq->resid_len = clone->resid_len;
 
@@ -844,7 +851,7 @@
 	struct request_queue *q = rq->q;
 	unsigned long flags;
 
-	if (unlikely(blk_barrier_rq(clone))) {
+	if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
 		/*
 		 * Barrier clones share an original request.
 		 * Leave it to dm_end_request(), which handles this special
@@ -943,7 +950,7 @@
 	struct dm_rq_target_io *tio = clone->end_io_data;
 	struct request *rq = tio->orig;
 
-	if (unlikely(blk_barrier_rq(clone))) {
+	if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
 		/*
 		 * Barrier clones share an original request.  So can't use
 		 * softirq_done with the original.
@@ -972,7 +979,7 @@
 	struct dm_rq_target_io *tio = clone->end_io_data;
 	struct request *rq = tio->orig;
 
-	if (unlikely(blk_barrier_rq(clone))) {
+	if (unlikely(clone->cmd_flags & REQ_HARDBARRIER)) {
 		/*
 		 * Barrier clones share an original request.
 		 * Leave it to dm_end_request(), which handles this special
@@ -1106,7 +1113,7 @@
 
 	clone->bi_sector = sector;
 	clone->bi_bdev = bio->bi_bdev;
-	clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER);
+	clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER;
 	clone->bi_vcnt = 1;
 	clone->bi_size = to_bytes(len);
 	clone->bi_io_vec->bv_offset = offset;
@@ -1133,7 +1140,7 @@
 
 	clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
 	__bio_clone(clone, bio);
-	clone->bi_rw &= ~(1 << BIO_RW_BARRIER);
+	clone->bi_rw &= ~REQ_HARDBARRIER;
 	clone->bi_destructor = dm_bio_destructor;
 	clone->bi_sector = sector;
 	clone->bi_idx = idx;
@@ -1301,7 +1308,7 @@
 
 	ci.map = dm_get_live_table(md);
 	if (unlikely(!ci.map)) {
-		if (!bio_rw_flagged(bio, BIO_RW_BARRIER))
+		if (!(bio->bi_rw & REQ_HARDBARRIER))
 			bio_io_error(bio);
 		else
 			if (!md->barrier_error)
@@ -1414,7 +1421,7 @@
 	 * we have to queue this io for later.
 	 */
 	if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
-	    unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+	    unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
 		up_read(&md->io_lock);
 
 		if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
@@ -1455,20 +1462,9 @@
 	return _dm_request(q, bio);
 }
 
-/*
- * Mark this request as flush request, so that dm_request_fn() can
- * recognize.
- */
-static void dm_rq_prepare_flush(struct request_queue *q, struct request *rq)
-{
-	rq->cmd_type = REQ_TYPE_LINUX_BLOCK;
-	rq->cmd[0] = REQ_LB_OP_FLUSH;
-}
-
 static bool dm_rq_is_flush_request(struct request *rq)
 {
-	if (rq->cmd_type == REQ_TYPE_LINUX_BLOCK &&
-	    rq->cmd[0] == REQ_LB_OP_FLUSH)
+	if (rq->cmd_flags & REQ_FLUSH)
 		return true;
 	else
 		return false;
@@ -1912,8 +1908,7 @@
 	blk_queue_softirq_done(md->queue, dm_softirq_done);
 	blk_queue_prep_rq(md->queue, dm_prep_fn);
 	blk_queue_lld_busy(md->queue, dm_lld_busy);
-	blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH,
-			  dm_rq_prepare_flush);
+	blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN_FLUSH);
 
 	md->disk = alloc_disk(1);
 	if (!md->disk)
@@ -2296,7 +2291,7 @@
 		if (dm_request_based(md))
 			generic_make_request(c);
 		else {
-			if (bio_rw_flagged(c, BIO_RW_BARRIER))
+			if (c->bi_rw & REQ_HARDBARRIER)
 				process_barrier(md, c);
 			else
 				__split_and_process_bio(md, c);

diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 7e0e057..ba19060 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c

@@ -294,7 +294,7 @@
 	dev_info_t *tmp_dev;
 	sector_t start_sector;
 
-	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+	if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
 		md_barrier_request(mddev, bio);
 		return 0;
 	}

diff --git a/drivers/md/md.c b/drivers/md/md.c
index cb20d0b..700c96e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c

@@ -36,6 +36,7 @@
 #include <linux/blkdev.h>
 #include <linux/sysctl.h>
 #include <linux/seq_file.h>
+#include <linux/smp_lock.h>
 #include <linux/buffer_head.h> /* for invalidate_bdev */
 #include <linux/poll.h>
 #include <linux/ctype.h>
@@ -353,7 +354,7 @@
 		/* an empty barrier - all done */
 		bio_endio(bio, 0);
 	else {
-		bio->bi_rw &= ~(1<<BIO_RW_BARRIER);
+		bio->bi_rw &= ~REQ_HARDBARRIER;
 		if (mddev->pers->make_request(mddev, bio))
 			generic_make_request(bio);
 		mddev->barrier = POST_REQUEST_BARRIER;
@@ -675,11 +676,11 @@
 	 * if zero is reached.
 	 * If an error occurred, call md_error
 	 *
-	 * As we might need to resubmit the request if BIO_RW_BARRIER
+	 * As we might need to resubmit the request if REQ_HARDBARRIER
 	 * causes ENOTSUPP, we allocate a spare bio...
 	 */
 	struct bio *bio = bio_alloc(GFP_NOIO, 1);
-	int rw = (1<<BIO_RW) | (1<<BIO_RW_SYNCIO) | (1<<BIO_RW_UNPLUG);
+	int rw = REQ_WRITE | REQ_SYNC | REQ_UNPLUG;
 
 	bio->bi_bdev = rdev->bdev;
 	bio->bi_sector = sector;
@@ -691,7 +692,7 @@
 	atomic_inc(&mddev->pending_writes);
 	if (!test_bit(BarriersNotsupp, &rdev->flags)) {
 		struct bio *rbio;
-		rw |= (1<<BIO_RW_BARRIER);
+		rw |= REQ_HARDBARRIER;
 		rbio = bio_clone(bio, GFP_NOIO);
 		rbio->bi_private = bio;
 		rbio->bi_end_io = super_written_barrier;
@@ -736,7 +737,7 @@
 	struct completion event;
 	int ret;
 
-	rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+	rw |= REQ_SYNC | REQ_UNPLUG;
 
 	bio->bi_bdev = bdev;
 	bio->bi_sector = sector;
@@ -5902,6 +5903,7 @@
 	mddev_t *mddev = mddev_find(bdev->bd_dev);
 	int err;
 
+	lock_kernel();
 	if (mddev->gendisk != bdev->bd_disk) {
 		/* we are racing with mddev_put which is discarding this
 		 * bd_disk.
@@ -5910,6 +5912,7 @@
 		/* Wait until bdev->bd_disk is definitely gone */
 		flush_scheduled_work();
 		/* Then retry the open from the top */
+		unlock_kernel();
 		return -ERESTARTSYS;
 	}
 	BUG_ON(mddev != bdev->bd_disk->private_data);
@@ -5923,6 +5926,7 @@
 
 	check_disk_size_change(mddev->gendisk, bdev);
  out:
+	unlock_kernel();
 	return err;
 }
 
@@ -5931,8 +5935,10 @@
  	mddev_t *mddev = disk->private_data;
 
 	BUG_ON(!mddev);
+	lock_kernel();
 	atomic_dec(&mddev->openers);
 	mddev_put(mddev);
+	unlock_kernel();
 
 	return 0;
 }

diff --git a/drivers/md/md.h b/drivers/md/md.h
index 10597bf..fc56e0f 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h

@@ -67,7 +67,7 @@
 #define	Faulty		1		/* device is known to have a fault */
 #define	In_sync		2		/* device is in_sync with rest of array */
 #define	WriteMostly	4		/* Avoid reading if at all possible */
-#define	BarriersNotsupp	5		/* BIO_RW_BARRIER is not supported */
+#define	BarriersNotsupp	5		/* REQ_HARDBARRIER is not supported */
 #define	AllReserved	6		/* If whole device is reserved for
 					 * one array */
 #define	AutoDetected	7		/* added by auto-detect */
@@ -254,7 +254,7 @@
 							 * fails.  Only supported
 							 */
 	struct bio			*biolist; 	/* bios that need to be retried
-							 * because BIO_RW_BARRIER is not supported
+							 * because REQ_HARDBARRIER is not supported
 							 */
 
 	atomic_t			recovery_active; /* blocks scheduled, but not written */

diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 410fb60..0307d21 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c

@@ -91,7 +91,7 @@
 
 	if (uptodate)
 		multipath_end_bh_io(mp_bh, 0);
-	else if (!bio_rw_flagged(bio, BIO_RW_AHEAD)) {
+	else if (!(bio->bi_rw & REQ_RAHEAD)) {
 		/*
 		 * oops, IO error:
 		 */
@@ -142,7 +142,7 @@
 	struct multipath_bh * mp_bh;
 	struct multipath_info *multipath;
 
-	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+	if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
 		md_barrier_request(mddev, bio);
 		return 0;
 	}
@@ -163,7 +163,7 @@
 	mp_bh->bio = *bio;
 	mp_bh->bio.bi_sector += multipath->rdev->data_offset;
 	mp_bh->bio.bi_bdev = multipath->rdev->bdev;
-	mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT);
+	mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT;
 	mp_bh->bio.bi_end_io = multipath_end_request;
 	mp_bh->bio.bi_private = mp_bh;
 	generic_make_request(&mp_bh->bio);
@@ -398,7 +398,7 @@
 			*bio = *(mp_bh->master_bio);
 			bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset;
 			bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev;
-			bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT);
+			bio->bi_rw |= REQ_FAILFAST_TRANSPORT;
 			bio->bi_end_io = multipath_end_request;
 			bio->bi_private = mp_bh;
 			generic_make_request(bio);

diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 563abed..6f7af46 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c

@@ -483,7 +483,7 @@
 	struct strip_zone *zone;
 	mdk_rdev_t *tmp_dev;
 
-	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+	if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
 		md_barrier_request(mddev, bio);
 		return 0;
 	}

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a948da8..73cc74f 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c

@@ -787,7 +787,7 @@
 	struct bio_list bl;
 	struct page **behind_pages = NULL;
 	const int rw = bio_data_dir(bio);
-	const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
+	const bool do_sync = (bio->bi_rw & REQ_SYNC);
 	bool do_barriers;
 	mdk_rdev_t *blocked_rdev;
 
@@ -822,7 +822,7 @@
 		finish_wait(&conf->wait_barrier, &w);
 	}
 	if (unlikely(!mddev->barriers_work &&
-		     bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+		     (bio->bi_rw & REQ_HARDBARRIER))) {
 		if (rw == WRITE)
 			md_write_end(mddev);
 		bio_endio(bio, -EOPNOTSUPP);
@@ -877,7 +877,7 @@
 		read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
 		read_bio->bi_bdev = mirror->rdev->bdev;
 		read_bio->bi_end_io = raid1_end_read_request;
-		read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
+		read_bio->bi_rw = READ | do_sync;
 		read_bio->bi_private = r1_bio;
 
 		generic_make_request(read_bio);
@@ -959,7 +959,7 @@
 	atomic_set(&r1_bio->remaining, 0);
 	atomic_set(&r1_bio->behind_remaining, 0);
 
-	do_barriers = bio_rw_flagged(bio, BIO_RW_BARRIER);
+	do_barriers = bio->bi_rw & REQ_HARDBARRIER;
 	if (do_barriers)
 		set_bit(R1BIO_Barrier, &r1_bio->state);
 
@@ -975,8 +975,7 @@
 		mbio->bi_sector	= r1_bio->sector + conf->mirrors[i].rdev->data_offset;
 		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
 		mbio->bi_end_io	= raid1_end_write_request;
-		mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) |
-			(do_sync << BIO_RW_SYNCIO);
+		mbio->bi_rw = WRITE | do_barriers | do_sync;
 		mbio->bi_private = r1_bio;
 
 		if (behind_pages) {
@@ -1633,7 +1632,7 @@
 			sync_request_write(mddev, r1_bio);
 			unplug = 1;
 		} else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
-			/* some requests in the r1bio were BIO_RW_BARRIER
+			/* some requests in the r1bio were REQ_HARDBARRIER
 			 * requests which failed with -EOPNOTSUPP.  Hohumm..
 			 * Better resubmit without the barrier.
 			 * We know which devices to resubmit for, because
@@ -1641,7 +1640,7 @@
 			 * We already have a nr_pending reference on these rdevs.
 			 */
 			int i;
-			const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO);
+			const bool do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC);
 			clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
 			clear_bit(R1BIO_Barrier, &r1_bio->state);
 			for (i=0; i < conf->raid_disks; i++)
@@ -1662,8 +1661,7 @@
 						conf->mirrors[i].rdev->data_offset;
 					bio->bi_bdev = conf->mirrors[i].rdev->bdev;
 					bio->bi_end_io = raid1_end_write_request;
-					bio->bi_rw = WRITE |
-						(do_sync << BIO_RW_SYNCIO);
+					bio->bi_rw = WRITE | do_sync;
 					bio->bi_private = r1_bio;
 					r1_bio->bios[i] = bio;
 					generic_make_request(bio);
@@ -1698,7 +1696,7 @@
 				       (unsigned long long)r1_bio->sector);
 				raid_end_bio_io(r1_bio);
 			} else {
-				const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO);
+				const bool do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC;
 				r1_bio->bios[r1_bio->read_disk] =
 					mddev->ro ? IO_BLOCKED : NULL;
 				r1_bio->read_disk = disk;
@@ -1715,7 +1713,7 @@
 				bio->bi_sector = r1_bio->sector + rdev->data_offset;
 				bio->bi_bdev = rdev->bdev;
 				bio->bi_end_io = raid1_end_read_request;
-				bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
+				bio->bi_rw = READ | do_sync;
 				bio->bi_private = r1_bio;
 				unplug = 1;
 				generic_make_request(bio);

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 42e64e4..62ecb66 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c

@@ -799,12 +799,12 @@
 	int i;
 	int chunk_sects = conf->chunk_mask + 1;
 	const int rw = bio_data_dir(bio);
-	const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
+	const bool do_sync = (bio->bi_rw & REQ_SYNC);
 	struct bio_list bl;
 	unsigned long flags;
 	mdk_rdev_t *blocked_rdev;
 
-	if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
+	if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) {
 		md_barrier_request(mddev, bio);
 		return 0;
 	}
@@ -879,7 +879,7 @@
 			mirror->rdev->data_offset;
 		read_bio->bi_bdev = mirror->rdev->bdev;
 		read_bio->bi_end_io = raid10_end_read_request;
-		read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
+		read_bio->bi_rw = READ | do_sync;
 		read_bio->bi_private = r10_bio;
 
 		generic_make_request(read_bio);
@@ -947,7 +947,7 @@
 			conf->mirrors[d].rdev->data_offset;
 		mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
 		mbio->bi_end_io	= raid10_end_write_request;
-		mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO);
+		mbio->bi_rw = WRITE | do_sync;
 		mbio->bi_private = r10_bio;
 
 		atomic_inc(&r10_bio->remaining);
@@ -1716,7 +1716,7 @@
 				raid_end_bio_io(r10_bio);
 				bio_put(bio);
 			} else {
-				const bool do_sync = bio_rw_flagged(r10_bio->master_bio, BIO_RW_SYNCIO);
+				const bool do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
 				bio_put(bio);
 				rdev = conf->mirrors[mirror].rdev;
 				if (printk_ratelimit())
@@ -1730,7 +1730,7 @@
 				bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr
 					+ rdev->data_offset;
 				bio->bi_bdev = rdev->bdev;
-				bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO);
+				bio->bi_rw = READ | do_sync;
 				bio->bi_private = r10_bio;
 				bio->bi_end_io = raid10_end_read_request;
 				unplug = 1;

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 96c6902..20ac2f1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c

@@ -3958,7 +3958,7 @@
 	const int rw = bio_data_dir(bi);
 	int remaining;
 
-	if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) {
+	if (unlikely(bi->bi_rw & REQ_HARDBARRIER)) {
 		/* Drain all pending writes.  We only really need
 		 * to ensure they have been submitted, but this is
 		 * easier.

diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 8327e24..eef78a0 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c

@@ -18,6 +18,7 @@
 #include <linux/kthread.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/memstick.h>
 
 #define DRIVER_NAME "mspro_block"
@@ -179,6 +180,7 @@
 	struct mspro_block_data *msb = disk->private_data;
 	int rc = -ENXIO;
 
+	lock_kernel();
 	mutex_lock(&mspro_block_disk_lock);
 
 	if (msb && msb->card) {
@@ -190,6 +192,7 @@
 	}
 
 	mutex_unlock(&mspro_block_disk_lock);
+	unlock_kernel();
 
 	return rc;
 }
@@ -221,7 +224,11 @@
 
 static int mspro_block_bd_release(struct gendisk *disk, fmode_t mode)
 {
-	return mspro_block_disk_release(disk);
+	int ret;
+	lock_kernel();
+	ret = mspro_block_disk_release(disk);
+	unlock_kernel();
+	return ret;
 }
 
 static int mspro_block_bd_getgeo(struct block_device *bdev,
@@ -805,7 +812,8 @@
 
 static int mspro_block_prepare_req(struct request_queue *q, struct request *req)
 {
-	if (!blk_fs_request(req) && !blk_pc_request(req)) {
+	if (req->cmd_type != REQ_TYPE_FS &&
+	    req->cmd_type != REQ_TYPE_BLOCK_PC) {
 		blk_dump_rq_flags(req, "MSPro unsupported request");
 		return BLKPREP_KILL;
 	}

diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index fc593fb..e6733bc 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c

@@ -53,6 +53,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/i2o.h>
+#include <linux/smp_lock.h>
 
 #include <linux/mempool.h>
 
@@ -577,6 +578,7 @@
 	if (!dev->i2o_dev)
 		return -ENODEV;
 
+	lock_kernel();
 	if (dev->power > 0x1f)
 		i2o_block_device_power(dev, 0x02);
 
@@ -585,6 +587,7 @@
 	i2o_block_device_lock(dev->i2o_dev, -1);
 
 	osm_debug("Ready.\n");
+	unlock_kernel();
 
 	return 0;
 };
@@ -615,6 +618,7 @@
 	if (!dev->i2o_dev)
 		return 0;
 
+	lock_kernel();
 	i2o_block_device_flush(dev->i2o_dev);
 
 	i2o_block_device_unlock(dev->i2o_dev, -1);
@@ -625,6 +629,7 @@
 		operation = 0x24;
 
 	i2o_block_device_power(dev, operation);
+	unlock_kernel();
 
 	return 0;
 }
@@ -652,30 +657,40 @@
 {
 	struct gendisk *disk = bdev->bd_disk;
 	struct i2o_block_device *dev = disk->private_data;
+	int ret = -ENOTTY;
 
 	/* Anyone capable of this syscall can do *real bad* things */
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
+	lock_kernel();
 	switch (cmd) {
 	case BLKI2OGRSTRAT:
-		return put_user(dev->rcache, (int __user *)arg);
+		ret = put_user(dev->rcache, (int __user *)arg);
+		break;
 	case BLKI2OGWSTRAT:
-		return put_user(dev->wcache, (int __user *)arg);
+		ret = put_user(dev->wcache, (int __user *)arg);
+		break;
 	case BLKI2OSRSTRAT:
+		ret = -EINVAL;
 		if (arg < 0 || arg > CACHE_SMARTFETCH)
-			return -EINVAL;
+			break;
 		dev->rcache = arg;
+		ret = 0;
 		break;
 	case BLKI2OSWSTRAT:
+		ret = -EINVAL;
 		if (arg != 0
 		    && (arg < CACHE_WRITETHROUGH || arg > CACHE_SMARTBACK))
-			return -EINVAL;
+			break;
 		dev->wcache = arg;
+		ret = 0;
 		break;
 	}
-	return -ENOTTY;
+	unlock_kernel();
+
+	return ret;
 };
 
 /**
@@ -883,7 +898,7 @@
 		if (!req)
 			break;
 
-		if (blk_fs_request(req)) {
+		if (req->cmd_type == REQ_TYPE_FS) {
 			struct i2o_block_delayed_request *dreq;
 			struct i2o_block_request *ireq = req->special;
 			unsigned int queue_depth;
@@ -930,7 +945,8 @@
 	.owner = THIS_MODULE,
 	.open = i2o_block_open,
 	.release = i2o_block_release,
-	.locked_ioctl = i2o_block_ioctl,
+	.ioctl = i2o_block_ioctl,
+	.compat_ioctl = i2o_block_ioctl,
 	.getgeo = i2o_block_getgeo,
 	.media_changed = i2o_block_media_changed
 };

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index cb9fbc8..8433cde 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c

@@ -29,6 +29,7 @@
 #include <linux/kdev_t.h>
 #include <linux/blkdev.h>
 #include <linux/mutex.h>
+#include <linux/smp_lock.h>
 #include <linux/scatterlist.h>
 #include <linux/string_helpers.h>
 
@@ -107,6 +108,7 @@
 	struct mmc_blk_data *md = mmc_blk_get(bdev->bd_disk);
 	int ret = -ENXIO;
 
+	lock_kernel();
 	if (md) {
 		if (md->usage == 2)
 			check_disk_change(bdev);
@@ -117,6 +119,7 @@
 			ret = -EROFS;
 		}
 	}
+	unlock_kernel();
 
 	return ret;
 }
@@ -125,7 +128,9 @@
 {
 	struct mmc_blk_data *md = disk->private_data;
 
+	lock_kernel();
 	mmc_blk_put(md);
+	unlock_kernel();
 	return 0;
 }
 

diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index d6ded24..c77eb49 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c

@@ -32,7 +32,7 @@
 	/*
 	 * We only like normal block requests.
 	 */
-	if (!blk_fs_request(req)) {
+	if (req->cmd_type != REQ_TYPE_FS) {
 		blk_dump_rq_flags(req, "MMC bad request");
 		return BLKPREP_KILL;
 	}
@@ -128,7 +128,7 @@
 	mq->req = NULL;
 
 	blk_queue_prep_rq(mq->queue, mmc_prep_request);
-	blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN, NULL);
+	blk_queue_ordered(mq->queue, QUEUE_ORDERED_DRAIN);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
 
 #ifdef CONFIG_MMC_BLOCK_BOUNCE

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 1d2144d..62e6870 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c

@@ -29,6 +29,7 @@
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
 #include <linux/spinlock.h>
+#include <linux/smp_lock.h>
 #include <linux/hdreg.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
@@ -87,14 +88,14 @@
 
 	buf = req->buffer;
 
-	if (!blk_fs_request(req))
+	if (req->cmd_type != REQ_TYPE_FS)
 		return -EIO;
 
 	if (blk_rq_pos(req) + blk_rq_cur_sectors(req) >
 	    get_capacity(req->rq_disk))
 		return -EIO;
 
-	if (blk_discard_rq(req))
+	if (req->cmd_flags & REQ_DISCARD)
 		return tr->discard(dev, block, nsect);
 
 	switch(rq_data_dir(req)) {
@@ -178,8 +179,9 @@
 	int ret;
 
 	if (!dev)
-		return -ERESTARTSYS;
+		return -ERESTARTSYS; /* FIXME: busy loop! -arnd*/
 
+	lock_kernel();
 	mutex_lock(&dev->lock);
 
 	if (!dev->mtd) {
@@ -196,6 +198,7 @@
 unlock:
 	mutex_unlock(&dev->lock);
 	blktrans_dev_put(dev);
+	unlock_kernel();
 	return ret;
 }
 
@@ -207,6 +210,7 @@
 	if (!dev)
 		return ret;
 
+	lock_kernel();
 	mutex_lock(&dev->lock);
 
 	/* Release one reference, we sure its not the last one here*/
@@ -219,6 +223,7 @@
 unlock:
 	mutex_unlock(&dev->lock);
 	blktrans_dev_put(dev);
+	unlock_kernel();
 	return ret;
 }
 
@@ -251,6 +256,7 @@
 	if (!dev)
 		return ret;
 
+	lock_kernel();
 	mutex_lock(&dev->lock);
 
 	if (!dev->mtd)
@@ -265,6 +271,7 @@
 	}
 unlock:
 	mutex_unlock(&dev->lock);
+	unlock_kernel();
 	blktrans_dev_put(dev);
 	return ret;
 }
@@ -273,7 +280,7 @@
 	.owner		= THIS_MODULE,
 	.open		= blktrans_open,
 	.release	= blktrans_release,
-	.locked_ioctl	= blktrans_ioctl,
+	.ioctl		= blktrans_ioctl,
 	.getgeo		= blktrans_getgeo,
 };
 

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 33975e9..1a84fae 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c

@@ -21,6 +21,7 @@
 #include <linux/hdreg.h>
 #include <linux/async.h>
 #include <linux/mutex.h>
+#include <linux/smp_lock.h>
 
 #include <asm/ccwdev.h>
 #include <asm/ebcdic.h>
@@ -2196,7 +2197,7 @@
 	 */
 	blk_queue_max_segment_size(block->request_queue, PAGE_SIZE);
 	blk_queue_segment_boundary(block->request_queue, PAGE_SIZE - 1);
-	blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN, NULL);
+	blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN);
 }
 
 /*
@@ -2235,6 +2236,7 @@
 	if (!block)
 		return -ENODEV;
 
+	lock_kernel();
 	base = block->base;
 	atomic_inc(&block->open_count);
 	if (test_bit(DASD_FLAG_OFFLINE, &base->flags)) {
@@ -2269,12 +2271,14 @@
 		goto out;
 	}
 
+	unlock_kernel();
 	return 0;
 
 out:
 	module_put(base->discipline->owner);
 unlock:
 	atomic_dec(&block->open_count);
+	unlock_kernel();
 	return rc;
 }
 
@@ -2282,8 +2286,10 @@
 {
 	struct dasd_block *block = disk->private_data;
 
+	lock_kernel();
 	atomic_dec(&block->open_count);
 	module_put(block->base->discipline->owner);
+	unlock_kernel();
 	return 0;
 }
 

diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 9b43ae9..2bd72aa 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c

@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/completion.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
@@ -775,6 +776,7 @@
 	struct dcssblk_dev_info *dev_info;
 	int rc;
 
+	lock_kernel();
 	dev_info = bdev->bd_disk->private_data;
 	if (NULL == dev_info) {
 		rc = -ENODEV;
@@ -784,6 +786,7 @@
 	bdev->bd_block_size = 4096;
 	rc = 0;
 out:
+	unlock_kernel();
 	return rc;
 }
 
@@ -794,6 +797,7 @@
 	struct segment_info *entry;
 	int rc;
 
+	lock_kernel();
 	if (!dev_info) {
 		rc = -ENODEV;
 		goto out;
@@ -811,6 +815,7 @@
 	up_write(&dcssblk_devices_sem);
 	rc = 0;
 out:
+	unlock_kernel();
 	return rc;
 }
 

diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 097da8c..b7de025 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c

@@ -16,6 +16,7 @@
 #include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/blkdev.h>
+#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/buffer_head.h>
 #include <linux/kernel.h>
@@ -361,6 +362,7 @@
 	struct tape_device *	device;
 	int			rc;
 
+	lock_kernel();
 	device = tape_get_device(disk->private_data);
 
 	if (device->required_tapemarks) {
@@ -384,12 +386,14 @@
 	 *       is called.
 	 */
 	tape_state_set(device, TS_BLKUSE);
+	unlock_kernel();
 	return 0;
 
 release:
 	tape_release(device);
  put_device:
 	tape_put_device(device);
+	unlock_kernel();
 	return rc;
 }
 
@@ -403,10 +407,12 @@
 tapeblock_release(struct gendisk *disk, fmode_t mode)
 {
 	struct tape_device *device = disk->private_data;
-
+ 
+	lock_kernel();
 	tape_state_set(device, TS_IN_USE);
 	tape_release(device);
 	tape_put_device(device);
+	unlock_kernel();
 
 	return 0;
 }

diff --git a/drivers/scsi/aha1542.c b/drivers/scsi/aha1542.c
index 2a8cf13..4f785f2 100644
--- a/drivers/scsi/aha1542.c
+++ b/drivers/scsi/aha1542.c

@@ -52,22 +52,6 @@
 #define SCSI_BUF_PA(address)	isa_virt_to_bus(address)
 #define SCSI_SG_PA(sgent)	(isa_page_to_bus(sg_page((sgent))) + (sgent)->offset)
 
-static void BAD_SG_DMA(Scsi_Cmnd * SCpnt,
-		       struct scatterlist *sgp,
-		       int nseg,
-		       int badseg)
-{
-	printk(KERN_CRIT "sgpnt[%d:%d] page %p/0x%llx length %u\n",
-	       badseg, nseg, sg_virt(sgp),
-	       (unsigned long long)SCSI_SG_PA(sgp),
-	       sgp->length);
-
-	/*
-	 * Not safe to continue.
-	 */
-	panic("Buffer at physical address > 16Mb used for aha1542");
-}
-
 #include<linux/stat.h>
 
 #ifdef DEBUG
@@ -691,8 +675,6 @@
 		}
 		scsi_for_each_sg(SCpnt, sg, sg_count, i) {
 			any2scsi(cptr[i].dataptr, SCSI_SG_PA(sg));
-			if (SCSI_SG_PA(sg) + sg->length - 1 > ISA_DMA_THRESHOLD)
-				BAD_SG_DMA(SCpnt, scsi_sglist(SCpnt), sg_count, i);
 			any2scsi(cptr[i].datalen, sg->length);
 		};
 		any2scsi(ccb[mbo].datalen, sg_count * sizeof(struct chain));
@@ -1133,16 +1115,9 @@
 				release_region(bases[indx], 4);
 				continue;
 			}
-			/* For now we do this - until kmalloc is more intelligent
-			   we are resigned to stupid hacks like this */
-			if (SCSI_BUF_PA(shpnt) >= ISA_DMA_THRESHOLD) {
-				printk(KERN_ERR "Invalid address for shpnt with 1542.\n");
-				goto unregister;
-			}
 			if (!aha1542_test_port(bases[indx], shpnt))
 				goto unregister;
 
-
 			base_io = bases[indx];
 
 			/* Set the Bus on/off-times as not to ruin floppy performance */

diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index ee4b691..fda4de3 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c

@@ -716,7 +716,7 @@
 		return PTR_ERR(bio);
 	}
 
-	bio->bi_rw &= ~(1 << BIO_RW);
+	bio->bi_rw &= ~REQ_WRITE;
 	or->in.bio = bio;
 	or->in.total_bytes = bio->bi_size;
 	return 0;
@@ -814,7 +814,7 @@
 {
 	_osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, len);
 	WARN_ON(or->out.bio || or->out.total_bytes);
-	WARN_ON(0 ==  bio_rw_flagged(bio, BIO_RW));
+	WARN_ON(0 == (bio->bi_rw & REQ_WRITE));
 	or->out.bio = bio;
 	or->out.total_bytes = len;
 }
@@ -829,7 +829,7 @@
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
-	bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
+	bio->bi_rw |= REQ_WRITE; /* FIXME: bio_set_dir() */
 	osd_req_write(or, obj, offset, bio, len);
 	return 0;
 }
@@ -865,7 +865,7 @@
 {
 	_osd_req_encode_common(or, OSD_ACT_READ, obj, offset, len);
 	WARN_ON(or->in.bio || or->in.total_bytes);
-	WARN_ON(1 == bio_rw_flagged(bio, BIO_RW));
+	WARN_ON(1 == (bio->bi_rw & REQ_WRITE));
 	or->in.bio = bio;
 	or->in.total_bytes = len;
 }

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 2bf9846..bbbc186 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c

@@ -320,7 +320,7 @@
 				    "changed. The Linux SCSI layer does not "
 				    "automatically adjust these parameters.\n");
 
-		if (blk_barrier_rq(scmd->request))
+		if (scmd->request->cmd_flags & REQ_HARDBARRIER)
 			/*
 			 * barrier requests should always retry on UA
 			 * otherwise block will get a spurious error
@@ -1331,16 +1331,16 @@
 	case DID_OK:
 		break;
 	case DID_BUS_BUSY:
-		return blk_failfast_transport(scmd->request);
+		return (scmd->request->cmd_flags & REQ_FAILFAST_TRANSPORT);
 	case DID_PARITY:
-		return blk_failfast_dev(scmd->request);
+		return (scmd->request->cmd_flags & REQ_FAILFAST_DEV);
 	case DID_ERROR:
 		if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
 		    status_byte(scmd->result) == RESERVATION_CONFLICT)
 			return 0;
 		/* fall through */
 	case DID_SOFT_ERROR:
-		return blk_failfast_driver(scmd->request);
+		return (scmd->request->cmd_flags & REQ_FAILFAST_DRIVER);
 	}
 
 	switch (status_byte(scmd->result)) {
@@ -1349,7 +1349,9 @@
 		 * assume caller has checked sense and determinted
 		 * the check condition was retryable.
 		 */
-		return blk_failfast_dev(scmd->request);
+		if (scmd->request->cmd_flags & REQ_FAILFAST_DEV ||
+		    scmd->request->cmd_type == REQ_TYPE_BLOCK_PC)
+			return 1;
 	}
 
 	return 0;

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 1646fe7..b8de389 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c

@@ -85,7 +85,7 @@
 {
 	struct scsi_cmnd *cmd = req->special;
 
-	req->cmd_flags &= ~REQ_DONTPREP;
+	blk_unprep_request(req);
 	req->special = NULL;
 
 	scsi_put_command(cmd);
@@ -722,7 +722,7 @@
 			sense_deferred = scsi_sense_is_deferred(&sshdr);
 	}
 
-	if (blk_pc_request(req)) { /* SG_IO ioctl from block level */
+	if (req->cmd_type == REQ_TYPE_BLOCK_PC) { /* SG_IO ioctl from block level */
 		req->errors = result;
 		if (result) {
 			if (sense_valid && req->sense) {
@@ -757,7 +757,8 @@
 		}
 	}
 
-	BUG_ON(blk_bidi_rq(req)); /* bidi not support for !blk_pc_request yet */
+	/* no bidi support for !REQ_TYPE_BLOCK_PC yet */
+	BUG_ON(blk_bidi_rq(req));
 
 	/*
 	 * Next deal with any sectors which we were able to correctly
@@ -1010,11 +1011,8 @@
 
 err_exit:
 	scsi_release_buffers(cmd);
-	if (error == BLKPREP_KILL)
-		scsi_put_command(cmd);
-	else /* BLKPREP_DEFER */
-		scsi_unprep_request(cmd->request);
-
+	scsi_put_command(cmd);
+	cmd->request->special = NULL;
 	return error;
 }
 EXPORT_SYMBOL(scsi_init_io);

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index cc8a1d1..8e2e893 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c

@@ -46,6 +46,7 @@
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
 #include <linux/delay.h>
+#include <linux/smp_lock.h>
 #include <linux/mutex.h>
 #include <linux/string_helpers.h>
 #include <linux/async.h>
@@ -411,54 +412,85 @@
 }
 
 /**
- * sd_prepare_discard - unmap blocks on thinly provisioned device
+ * scsi_setup_discard_cmnd - unmap blocks on thinly provisioned device
+ * @sdp: scsi device to operate one
  * @rq: Request to prepare
  *
  * Will issue either UNMAP or WRITE SAME(16) depending on preference
  * indicated by target device.
  **/
-static int sd_prepare_discard(struct request *rq)
+static int scsi_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq)
 {
 	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
 	struct bio *bio = rq->bio;
 	sector_t sector = bio->bi_sector;
-	unsigned int num = bio_sectors(bio);
+	unsigned int nr_sectors = bio_sectors(bio);
+	unsigned int len;
+	int ret;
+	struct page *page;
 
 	if (sdkp->device->sector_size == 4096) {
 		sector >>= 3;
-		num >>= 3;
+		nr_sectors >>= 3;
 	}
 
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->timeout = SD_TIMEOUT;
 
 	memset(rq->cmd, 0, rq->cmd_len);
 
-	if (sdkp->unmap) {
-		char *buf = kmap_atomic(bio_page(bio), KM_USER0);
+	page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+	if (!page)
+		return BLKPREP_DEFER;
 
+	if (sdkp->unmap) {
+		char *buf = page_address(page);
+
+		rq->cmd_len = 10;
 		rq->cmd[0] = UNMAP;
 		rq->cmd[8] = 24;
-		rq->cmd_len = 10;
-
-		/* Ensure that data length matches payload */
-		rq->__data_len = bio->bi_size = bio->bi_io_vec->bv_len = 24;
 
 		put_unaligned_be16(6 + 16, &buf[0]);
 		put_unaligned_be16(16, &buf[2]);
 		put_unaligned_be64(sector, &buf[8]);
-		put_unaligned_be32(num, &buf[16]);
+		put_unaligned_be32(nr_sectors, &buf[16]);
 
-		kunmap_atomic(buf, KM_USER0);
+		len = 24;
 	} else {
+		rq->cmd_len = 16;
 		rq->cmd[0] = WRITE_SAME_16;
 		rq->cmd[1] = 0x8; /* UNMAP */
 		put_unaligned_be64(sector, &rq->cmd[2]);
-		put_unaligned_be32(num, &rq->cmd[10]);
-		rq->cmd_len = 16;
+		put_unaligned_be32(nr_sectors, &rq->cmd[10]);
+
+		len = sdkp->device->sector_size;
 	}
 
-	return BLKPREP_OK;
+	blk_add_request_payload(rq, page, len);
+	ret = scsi_setup_blk_pc_cmnd(sdp, rq);
+	rq->buffer = page_address(page);
+	if (ret != BLKPREP_OK) {
+		__free_page(page);
+		rq->buffer = NULL;
+	}
+	return ret;
+}
+
+static int scsi_setup_flush_cmnd(struct scsi_device *sdp, struct request *rq)
+{
+	rq->timeout = SD_TIMEOUT;
+	rq->retries = SD_MAX_RETRIES;
+	rq->cmd[0] = SYNCHRONIZE_CACHE;
+	rq->cmd_len = 10;
+
+	return scsi_setup_blk_pc_cmnd(sdp, rq);
+}
+
+static void sd_unprep_fn(struct request_queue *q, struct request *rq)
+{
+	if (rq->cmd_flags & REQ_DISCARD) {
+		free_page((unsigned long)rq->buffer);
+		rq->buffer = NULL;
+	}
 }
 
 /**
@@ -485,10 +517,13 @@
 	 * Discard request come in as REQ_TYPE_FS but we turn them into
 	 * block PC requests to make life easier.
 	 */
-	if (blk_discard_rq(rq))
-		ret = sd_prepare_discard(rq);
-
-	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
+	if (rq->cmd_flags & REQ_DISCARD) {
+		ret = scsi_setup_discard_cmnd(sdp, rq);
+		goto out;
+	} else if (rq->cmd_flags & REQ_FLUSH) {
+		ret = scsi_setup_flush_cmnd(sdp, rq);
+		goto out;
+	} else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		ret = scsi_setup_blk_pc_cmnd(sdp, rq);
 		goto out;
 	} else if (rq->cmd_type != REQ_TYPE_FS) {
@@ -636,7 +671,7 @@
 		SCpnt->cmnd[0] = VARIABLE_LENGTH_CMD;
 		SCpnt->cmnd[7] = 0x18;
 		SCpnt->cmnd[9] = (rq_data_dir(rq) == READ) ? READ_32 : WRITE_32;
-		SCpnt->cmnd[10] = protect | (blk_fua_rq(rq) ? 0x8 : 0);
+		SCpnt->cmnd[10] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0);
 
 		/* LBA */
 		SCpnt->cmnd[12] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0;
@@ -661,7 +696,7 @@
 		SCpnt->cmnd[31] = (unsigned char) this_count & 0xff;
 	} else if (block > 0xffffffff) {
 		SCpnt->cmnd[0] += READ_16 - READ_6;
-		SCpnt->cmnd[1] = protect | (blk_fua_rq(rq) ? 0x8 : 0);
+		SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0);
 		SCpnt->cmnd[2] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0;
 		SCpnt->cmnd[3] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0;
 		SCpnt->cmnd[4] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0;
@@ -682,7 +717,7 @@
 			this_count = 0xffff;
 
 		SCpnt->cmnd[0] += READ_10 - READ_6;
-		SCpnt->cmnd[1] = protect | (blk_fua_rq(rq) ? 0x8 : 0);
+		SCpnt->cmnd[1] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : 0);
 		SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff;
 		SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff;
 		SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff;
@@ -691,7 +726,7 @@
 		SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff;
 		SCpnt->cmnd[8] = (unsigned char) this_count & 0xff;
 	} else {
-		if (unlikely(blk_fua_rq(rq))) {
+		if (unlikely(rq->cmd_flags & REQ_FUA)) {
 			/*
 			 * This happens only if this drive failed
 			 * 10byte rw command with ILLEGAL_REQUEST
@@ -745,6 +780,8 @@
  *	or from within the kernel (e.g. as a result of a mount(1) ).
  *	In the latter case @inode and @filp carry an abridged amount
  *	of information as noted above.
+ *
+ *	Locking: called with bdev->bd_mutex held.
  **/
 static int sd_open(struct block_device *bdev, fmode_t mode)
 {
@@ -799,7 +836,7 @@
 	if (!scsi_device_online(sdev))
 		goto error_out;
 
-	if (!sdkp->openers++ && sdev->removable) {
+	if ((atomic_inc_return(&sdkp->openers) == 1) && sdev->removable) {
 		if (scsi_block_when_processing_errors(sdev))
 			scsi_set_medium_removal(sdev, SCSI_REMOVAL_PREVENT);
 	}
@@ -823,6 +860,8 @@
  *
  *	Note: may block (uninterruptible) if error recovery is underway
  *	on this disk.
+ *
+ *	Locking: called with bdev->bd_mutex held.
  **/
 static int sd_release(struct gendisk *disk, fmode_t mode)
 {
@@ -831,7 +870,7 @@
 
 	SCSI_LOG_HLQUEUE(3, sd_printk(KERN_INFO, sdkp, "sd_release\n"));
 
-	if (!--sdkp->openers && sdev->removable) {
+	if (atomic_dec_return(&sdkp->openers) && sdev->removable) {
 		if (scsi_block_when_processing_errors(sdev))
 			scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW);
 	}
@@ -904,7 +943,7 @@
 	error = scsi_nonblockable_ioctl(sdp, cmd, p,
 					(mode & FMODE_NDELAY) != 0);
 	if (!scsi_block_when_processing_errors(sdp) || !error)
-		return error;
+		goto out;
 
 	/*
 	 * Send SCSI addressing ioctls directly to mid level, send other
@@ -914,13 +953,17 @@
 	switch (cmd) {
 		case SCSI_IOCTL_GET_IDLUN:
 		case SCSI_IOCTL_GET_BUS_NUMBER:
-			return scsi_ioctl(sdp, cmd, p);
+			error = scsi_ioctl(sdp, cmd, p);
+			break;
 		default:
 			error = scsi_cmd_ioctl(disk->queue, disk, mode, cmd, p);
 			if (error != -ENOTTY)
-				return error;
+				break;
+			error = scsi_ioctl(sdp, cmd, p);
+			break;
 	}
-	return scsi_ioctl(sdp, cmd, p);
+out:
+	return error;
 }
 
 static void set_media_not_present(struct scsi_disk *sdkp)
@@ -1045,15 +1088,6 @@
 	return 0;
 }
 
-static void sd_prepare_flush(struct request_queue *q, struct request *rq)
-{
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
-	rq->timeout = SD_TIMEOUT;
-	rq->retries = SD_MAX_RETRIES;
-	rq->cmd[0] = SYNCHRONIZE_CACHE;
-	rq->cmd_len = 10;
-}
-
 static void sd_rescan(struct device *dev)
 {
 	struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev);
@@ -1103,7 +1137,7 @@
 	.owner			= THIS_MODULE,
 	.open			= sd_open,
 	.release		= sd_release,
-	.locked_ioctl		= sd_ioctl,
+	.ioctl			= sd_ioctl,
 	.getgeo			= sd_getgeo,
 #ifdef CONFIG_COMPAT
 	.compat_ioctl		= sd_compat_ioctl,
@@ -1120,7 +1154,7 @@
 	u64 bad_lba;
 	int info_valid;
 
-	if (!blk_fs_request(scmd->request))
+	if (scmd->request->cmd_type != REQ_TYPE_FS)
 		return 0;
 
 	info_valid = scsi_get_sense_info_fld(scmd->sense_buffer,
@@ -1171,6 +1205,12 @@
 	int sense_valid = 0;
 	int sense_deferred = 0;
 
+	if (SCpnt->request->cmd_flags & REQ_DISCARD) {
+		if (!result)
+			scsi_set_resid(SCpnt, 0);
+		return good_bytes;
+	}
+
 	if (result) {
 		sense_valid = scsi_command_normalize_sense(SCpnt, &sshdr);
 		if (sense_valid)
@@ -2121,7 +2161,7 @@
 	else
 		ordered = QUEUE_ORDERED_DRAIN;
 
-	blk_queue_ordered(sdkp->disk->queue, ordered, sd_prepare_flush);
+	blk_queue_ordered(sdkp->disk->queue, ordered);
 
 	set_capacity(disk, sdkp->capacity);
 	kfree(buffer);
@@ -2234,6 +2274,7 @@
 	sd_revalidate_disk(gd);
 
 	blk_queue_prep_rq(sdp->request_queue, sd_prep_fn);
+	blk_queue_unprep_rq(sdp->request_queue, sd_unprep_fn);
 
 	gd->driverfs_dev = &sdp->sdev_gendev;
 	gd->flags = GENHD_FL_EXT_DEVT;
@@ -2313,7 +2354,7 @@
 	sdkp->driver = &sd_template;
 	sdkp->disk = gd;
 	sdkp->index = index;
-	sdkp->openers = 0;
+	atomic_set(&sdkp->openers, 0);
 	sdkp->previous_state = 1;
 
 	if (!sdp->request_queue->rq_timeout) {
@@ -2372,6 +2413,7 @@
 
 	async_synchronize_full();
 	blk_queue_prep_rq(sdkp->device->request_queue, scsi_prep_fn);
+	blk_queue_unprep_rq(sdkp->device->request_queue, NULL);
 	device_del(&sdkp->dev);
 	del_gendisk(sdkp->disk);
 	sd_shutdown(dev);

diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 43d3caf..f81a930 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h

@@ -47,7 +47,7 @@
 	struct scsi_device *device;
 	struct device	dev;
 	struct gendisk	*disk;
-	unsigned int	openers;	/* protected by BKL for now, yuck */
+	atomic_t	openers;
 	sector_t	capacity;	/* size in 512-byte sectors */
 	u32		index;
 	unsigned short	hw_sector_size;

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 0a90abc..ba9c3e0 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c

@@ -44,6 +44,7 @@
 #include <linux/init.h>
 #include <linux/blkdev.h>
 #include <linux/mutex.h>
+#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <asm/uaccess.h>
 
@@ -466,22 +467,27 @@
 
 static int sr_block_open(struct block_device *bdev, fmode_t mode)
 {
-	struct scsi_cd *cd = scsi_cd_get(bdev->bd_disk);
+	struct scsi_cd *cd;
 	int ret = -ENXIO;
 
+	lock_kernel();
+	cd = scsi_cd_get(bdev->bd_disk);
 	if (cd) {
 		ret = cdrom_open(&cd->cdi, bdev, mode);
 		if (ret)
 			scsi_cd_put(cd);
 	}
+	unlock_kernel();
 	return ret;
 }
 
 static int sr_block_release(struct gendisk *disk, fmode_t mode)
 {
 	struct scsi_cd *cd = scsi_cd(disk);
+	lock_kernel();
 	cdrom_release(&cd->cdi, mode);
 	scsi_cd_put(cd);
+	unlock_kernel();
 	return 0;
 }
 
@@ -493,6 +499,8 @@
 	void __user *argp = (void __user *)arg;
 	int ret;
 
+	lock_kernel();
+
 	/*
 	 * Send SCSI addressing ioctls directly to mid level, send other
 	 * ioctls to cdrom/block level.
@@ -500,12 +508,13 @@
 	switch (cmd) {
 	case SCSI_IOCTL_GET_IDLUN:
 	case SCSI_IOCTL_GET_BUS_NUMBER:
-		return scsi_ioctl(sdev, cmd, argp);
+		ret = scsi_ioctl(sdev, cmd, argp);
+		goto out;
 	}
 
 	ret = cdrom_ioctl(&cd->cdi, bdev, mode, cmd, arg);
 	if (ret != -ENOSYS)
-		return ret;
+		goto out;
 
 	/*
 	 * ENODEV means that we didn't recognise the ioctl, or that we
@@ -516,8 +525,12 @@
 	ret = scsi_nonblockable_ioctl(sdev, cmd, argp,
 					(mode & FMODE_NDELAY) != 0);
 	if (ret != -ENODEV)
-		return ret;
-	return scsi_ioctl(sdev, cmd, argp);
+		goto out;
+	ret = scsi_ioctl(sdev, cmd, argp);
+
+out:
+	unlock_kernel();
+	return ret;
 }
 
 static int sr_block_media_changed(struct gendisk *disk)
@@ -531,7 +544,7 @@
 	.owner		= THIS_MODULE,
 	.open		= sr_block_open,
 	.release	= sr_block_release,
-	.locked_ioctl	= sr_block_ioctl,
+	.ioctl		= sr_block_ioctl,
 	.media_changed	= sr_block_media_changed,
 	/* 
 	 * No compat_ioctl for now because sr_block_ioctl never

diff --git a/drivers/scsi/sun3_NCR5380.c b/drivers/scsi/sun3_NCR5380.c
index b5838d5..713620e 100644
--- a/drivers/scsi/sun3_NCR5380.c
+++ b/drivers/scsi/sun3_NCR5380.c

@@ -2022,7 +2022,7 @@
 		if((count > SUN3_DMA_MINSIZE) && (sun3_dma_setup_done
 						  != cmd))
 		{
-			if(blk_fs_request(cmd->request)) {
+			if (cmd->request->cmd_type == REQ_TYPE_FS) {
 				sun3scsi_dma_setup(d, count,
 						   rq_data_dir(cmd->request));
 				sun3_dma_setup_done = cmd;

diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c
index e606cf0..613f588 100644
--- a/drivers/scsi/sun3_scsi.c
+++ b/drivers/scsi/sun3_scsi.c

@@ -524,7 +524,7 @@
 						  struct scsi_cmnd *cmd,
 						  int write_flag)
 {
-	if(blk_fs_request(cmd->request))
+	if (cmd->request->cmd_type == REQ_TYPE_FS)
  		return wanted;
 	else
 		return 0;

diff --git a/drivers/scsi/sun3_scsi_vme.c b/drivers/scsi/sun3_scsi_vme.c
index aaa4fd0..7c526b8 100644
--- a/drivers/scsi/sun3_scsi_vme.c
+++ b/drivers/scsi/sun3_scsi_vme.c

@@ -458,7 +458,7 @@
 						  struct scsi_cmnd *cmd,
 						  int write_flag)
 {
-	if(blk_fs_request(cmd->request))
+	if (cmd->request->cmd_type == REQ_TYPE_FS)
  		return wanted;
 	else
 		return 0;

diff --git a/drivers/staging/hv/blkvsc_drv.c b/drivers/staging/hv/blkvsc_drv.c
index f7ea2a3..ff1d247 100644
--- a/drivers/staging/hv/blkvsc_drv.c
+++ b/drivers/staging/hv/blkvsc_drv.c

@@ -25,6 +25,7 @@
 #include <linux/major.h>
 #include <linux/delay.h>
 #include <linux/hdreg.h>
+#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -805,7 +806,8 @@
 			blkvsc_req->cmnd[0] = READ_16;
 		}
 
-		blkvsc_req->cmnd[1] |= blk_fua_rq(blkvsc_req->req) ? 0x8 : 0;
+		blkvsc_req->cmnd[1] |=
+			(blkvsc_req->req->cmd_flags & REQ_FUA) ? 0x8 : 0;
 
 		*(unsigned long long *)&blkvsc_req->cmnd[2] =
 				cpu_to_be64(blkvsc_req->sector_start);
@@ -821,7 +823,8 @@
 			blkvsc_req->cmnd[0] = READ_10;
 		}
 
-		blkvsc_req->cmnd[1] |= blk_fua_rq(blkvsc_req->req) ? 0x8 : 0;
+		blkvsc_req->cmnd[1] |=
+			(blkvsc_req->req->cmd_flags & REQ_FUA) ? 0x8 : 0;
 
 		*(unsigned int *)&blkvsc_req->cmnd[2] =
 				cpu_to_be32(blkvsc_req->sector_start);
@@ -1268,7 +1271,7 @@
 		DPRINT_DBG(BLKVSC_DRV, "- req %p\n", req);
 
 		blkdev = req->rq_disk->private_data;
-		if (blkdev->shutting_down || !blk_fs_request(req) ||
+		if (blkdev->shutting_down || req->cmd_type != REQ_TYPE_FS ||
 		    blkdev->media_not_present) {
 			__blk_end_request_cur(req, 0);
 			continue;
@@ -1306,6 +1309,7 @@
 	DPRINT_DBG(BLKVSC_DRV, "- users %d disk %s\n", blkdev->users,
 		   blkdev->gd->disk_name);
 
+	lock_kernel();
 	spin_lock(&blkdev->lock);
 
 	if (!blkdev->users && blkdev->device_type == DVD_TYPE) {
@@ -1317,6 +1321,7 @@
 	blkdev->users++;
 
 	spin_unlock(&blkdev->lock);
+	unlock_kernel();
 	return 0;
 }
 
@@ -1327,6 +1332,7 @@
 	DPRINT_DBG(BLKVSC_DRV, "- users %d disk %s\n", blkdev->users,
 		   blkdev->gd->disk_name);
 
+	lock_kernel();
 	spin_lock(&blkdev->lock);
 	if (blkdev->users == 1) {
 		spin_unlock(&blkdev->lock);
@@ -1337,6 +1343,7 @@
 	blkdev->users--;
 
 	spin_unlock(&blkdev->lock);
+	unlock_kernel();
 	return 0;
 }
 

diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 7b3e973..7e49527 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c

@@ -133,6 +133,64 @@
 }
 EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
 
+static void xenbus_switch_fatal(struct xenbus_device *, int, int,
+				const char *, ...);
+
+static int
+__xenbus_switch_state(struct xenbus_device *dev,
+		      enum xenbus_state state, int depth)
+{
+	/* We check whether the state is currently set to the given value, and
+	   if not, then the state is set.  We don't want to unconditionally
+	   write the given state, because we don't want to fire watches
+	   unnecessarily.  Furthermore, if the node has gone, we don't write
+	   to it, as the device will be tearing down, and we don't want to
+	   resurrect that directory.
+
+	   Note that, because of this cached value of our state, this
+	   function will not take a caller's Xenstore transaction
+	   (something it was trying to in the past) because dev->state
+	   would not get reset if the transaction was aborted.
+	 */
+
+	struct xenbus_transaction xbt;
+	int current_state;
+	int err, abort;
+
+	if (state == dev->state)
+		return 0;
+
+again:
+	abort = 1;
+
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_switch_fatal(dev, depth, err, "starting transaction");
+		return 0;
+	}
+
+	err = xenbus_scanf(xbt, dev->nodename, "state", "%d", &current_state);
+	if (err != 1)
+		goto abort;
+
+	err = xenbus_printf(xbt, dev->nodename, "state", "%d", state);
+	if (err) {
+		xenbus_switch_fatal(dev, depth, err, "writing new state");
+		goto abort;
+	}
+
+	abort = 0;
+abort:
+	err = xenbus_transaction_end(xbt, abort);
+	if (err) {
+		if (err == -EAGAIN && !abort)
+			goto again;
+		xenbus_switch_fatal(dev, depth, err, "ending transaction");
+	} else
+		dev->state = state;
+
+	return 0;
+}
 
 /**
  * xenbus_switch_state
@@ -145,42 +203,9 @@
  */
 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
 {
-	/* We check whether the state is currently set to the given value, and
-	   if not, then the state is set.  We don't want to unconditionally
-	   write the given state, because we don't want to fire watches
-	   unnecessarily.  Furthermore, if the node has gone, we don't write
-	   to it, as the device will be tearing down, and we don't want to
-	   resurrect that directory.
-
-	   Note that, because of this cached value of our state, this function
-	   will not work inside a Xenstore transaction (something it was
-	   trying to in the past) because dev->state would not get reset if
-	   the transaction was aborted.
-
-	 */
-
-	int current_state;
-	int err;
-
-	if (state == dev->state)
-		return 0;
-
-	err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d",
-			   &current_state);
-	if (err != 1)
-		return 0;
-
-	err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state);
-	if (err) {
-		if (state != XenbusStateClosing) /* Avoid looping */
-			xenbus_dev_fatal(dev, err, "writing new state");
-		return err;
-	}
-
-	dev->state = state;
-
-	return 0;
+	return __xenbus_switch_state(dev, state, 0);
 }
+
 EXPORT_SYMBOL_GPL(xenbus_switch_state);
 
 int xenbus_frontend_closed(struct xenbus_device *dev)
@@ -284,6 +309,23 @@
 EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
 
 /**
+ * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps
+ * avoiding recursion within xenbus_switch_state.
+ */
+static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
+				const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	xenbus_va_dev_error(dev, err, fmt, ap);
+	va_end(ap);
+
+	if (!depth)
+		__xenbus_switch_state(dev, XenbusStateClosing, 1);
+}
+
+/**
  * xenbus_grant_ring
  * @dev: xenbus device
  * @ring_mfn: mfn of ring to grant

diff --git a/fs/bio.c b/fs/bio.c
index e7bf6ca..8abb2df 100644
--- a/fs/bio.c
+++ b/fs/bio.c

@@ -843,7 +843,8 @@
 	if (!bio)
 		goto out_bmd;
 
-	bio->bi_rw |= (!write_to_vm << BIO_RW);
+	if (!write_to_vm)
+		bio->bi_rw |= REQ_WRITE;
 
 	ret = 0;
 
@@ -1024,7 +1025,7 @@
 	 * set data direction, and check if mapped pages need bouncing
 	 */
 	if (!write_to_vm)
-		bio->bi_rw |= (1 << BIO_RW);
+		bio->bi_rw |= REQ_WRITE;
 
 	bio->bi_bdev = bdev;
 	bio->bi_flags |= (1 << BIO_USER_MAPPED);

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 451afbd..6641146 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c

@@ -1346,13 +1346,12 @@
 		return ret;
 	}
 
-	lock_kernel();
  restart:
 
 	ret = -ENXIO;
 	disk = get_gendisk(bdev->bd_dev, &partno);
 	if (!disk)
-		goto out_unlock_kernel;
+		goto out;
 
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (!bdev->bd_openers) {
@@ -1432,7 +1431,6 @@
 	if (for_part)
 		bdev->bd_part_count++;
 	mutex_unlock(&bdev->bd_mutex);
-	unlock_kernel();
 	return 0;
 
  out_clear:
@@ -1445,9 +1443,7 @@
 	bdev->bd_contains = NULL;
  out_unlock_bdev:
 	mutex_unlock(&bdev->bd_mutex);
- out_unlock_kernel:
-	unlock_kernel();
-
+ out:
 	if (disk)
 		module_put(disk->fops->owner);
 	put_disk(disk);
@@ -1516,7 +1512,6 @@
 	struct block_device *victim = NULL;
 
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
-	lock_kernel();
 	if (for_part)
 		bdev->bd_part_count--;
 
@@ -1541,7 +1536,6 @@
 			victim = bdev->bd_contains;
 		bdev->bd_contains = NULL;
 	}
-	unlock_kernel();
 	mutex_unlock(&bdev->bd_mutex);
 	bdput(bdev);
 	if (victim)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 34f7c37..64f1008 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c

@@ -480,7 +480,7 @@
 	end_io_wq->work.func = end_workqueue_fn;
 	end_io_wq->work.flags = 0;
 
-	if (bio->bi_rw & (1 << BIO_RW)) {
+	if (bio->bi_rw & REQ_WRITE) {
 		if (end_io_wq->metadata)
 			btrfs_queue_worker(&fs_info->endio_meta_write_workers,
 					   &end_io_wq->work);
@@ -604,7 +604,7 @@
 
 	atomic_inc(&fs_info->nr_async_submits);
 
-	if (rw & (1 << BIO_RW_SYNCIO))
+	if (rw & REQ_SYNC)
 		btrfs_set_work_high_prio(&async->work);
 
 	btrfs_queue_worker(&fs_info->workers, &async->work);
@@ -668,7 +668,7 @@
 					  bio, 1);
 	BUG_ON(ret);
 
-	if (!(rw & (1 << BIO_RW))) {
+	if (!(rw & REQ_WRITE)) {
 		/*
 		 * called for a read, do the setup so that checksum validation
 		 * can happen in the async kernel threads
@@ -1427,7 +1427,7 @@
 	 * ram and up to date before trying to verify things.  For
 	 * blocksize <= pagesize, it is basically a noop
 	 */
-	if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata &&
+	if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata &&
 	    !bio_ready_for_csum(bio)) {
 		btrfs_queue_worker(&fs_info->endio_meta_workers,
 				   &end_io_wq->work);

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8976c33..c038644 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c

@@ -1429,7 +1429,7 @@
 	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
 	BUG_ON(ret);
 
-	if (!(rw & (1 << BIO_RW))) {
+	if (!(rw & REQ_WRITE)) {
 		if (bio_flags & EXTENT_BIO_COMPRESSED) {
 			return btrfs_submit_compressed_read(inode, bio,
 						    mirror_num, bio_flags);
@@ -1841,7 +1841,7 @@
 	bio->bi_size = 0;
 
 	bio_add_page(bio, page, failrec->len, start - page_offset(page));
-	if (failed_bio->bi_rw & (1 << BIO_RW))
+	if (failed_bio->bi_rw & REQ_WRITE)
 		rw = WRITE;
 	else
 		rw = READ;
@@ -5647,7 +5647,7 @@
 	struct bio_vec *bvec = bio->bi_io_vec;
 	u64 start;
 	int skip_sum;
-	int write = rw & (1 << BIO_RW);
+	int write = rw & REQ_WRITE;
 	int ret = 0;
 
 	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d6e3af8..dd318ff 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c

@@ -258,7 +258,7 @@
 
 		BUG_ON(atomic_read(&cur->bi_cnt) == 0);
 
-		if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
+		if (cur->bi_rw & REQ_SYNC)
 			num_sync_run++;
 
 		submit_bio(cur->bi_rw, cur);
@@ -2651,7 +2651,7 @@
 	int max_errors = 0;
 	struct btrfs_multi_bio *multi = NULL;
 
-	if (multi_ret && !(rw & (1 << BIO_RW)))
+	if (multi_ret && !(rw & REQ_WRITE))
 		stripes_allocated = 1;
 again:
 	if (multi_ret) {
@@ -2687,7 +2687,7 @@
 		mirror_num = 0;
 
 	/* if our multi bio struct is too small, back off and try again */
-	if (rw & (1 << BIO_RW)) {
+	if (rw & REQ_WRITE) {
 		if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
 				 BTRFS_BLOCK_GROUP_DUP)) {
 			stripes_required = map->num_stripes;
@@ -2697,7 +2697,7 @@
 			max_errors = 1;
 		}
 	}
-	if (multi_ret && (rw & (1 << BIO_RW)) &&
+	if (multi_ret && (rw & REQ_WRITE) &&
 	    stripes_allocated < stripes_required) {
 		stripes_allocated = map->num_stripes;
 		free_extent_map(em);
@@ -2733,7 +2733,7 @@
 	num_stripes = 1;
 	stripe_index = 0;
 	if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
-		if (unplug_page || (rw & (1 << BIO_RW)))
+		if (unplug_page || (rw & REQ_WRITE))
 			num_stripes = map->num_stripes;
 		else if (mirror_num)
 			stripe_index = mirror_num - 1;
@@ -2744,7 +2744,7 @@
 		}
 
 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
-		if (rw & (1 << BIO_RW))
+		if (rw & REQ_WRITE)
 			num_stripes = map->num_stripes;
 		else if (mirror_num)
 			stripe_index = mirror_num - 1;
@@ -2755,7 +2755,7 @@
 		stripe_index = do_div(stripe_nr, factor);
 		stripe_index *= map->sub_stripes;
 
-		if (unplug_page || (rw & (1 << BIO_RW)))
+		if (unplug_page || (rw & REQ_WRITE))
 			num_stripes = map->sub_stripes;
 		else if (mirror_num)
 			stripe_index += mirror_num - 1;
@@ -2945,7 +2945,7 @@
 	struct btrfs_pending_bios *pending_bios;
 
 	/* don't bother with additional async steps for reads, right now */
-	if (!(rw & (1 << BIO_RW))) {
+	if (!(rw & REQ_WRITE)) {
 		bio_get(bio);
 		submit_bio(rw, bio);
 		bio_put(bio);
@@ -2964,7 +2964,7 @@
 	bio->bi_rw |= rw;
 
 	spin_lock(&device->io_lock);
-	if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
+	if (bio->bi_rw & REQ_SYNC)
 		pending_bios = &device->pending_sync_bios;
 	else
 		pending_bios = &device->pending_bios;

diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index 66b9cf7..de89645 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c

@@ -177,7 +177,7 @@
 		nbytes = req->uc_outSize; /* don't have more space! */
 	}
         if (copy_from_user(req->uc_data, buf, nbytes)) {
-		req->uc_flags |= REQ_ABORT;
+		req->uc_flags |= CODA_REQ_ABORT;
 		wake_up(&req->uc_sleep);
 		retval = -EFAULT;
 		goto out;
@@ -254,8 +254,8 @@
 	        retval = -EFAULT;
         
 	/* If request was not a signal, enqueue and don't free */
-	if (!(req->uc_flags & REQ_ASYNC)) {
-		req->uc_flags |= REQ_READ;
+	if (!(req->uc_flags & CODA_REQ_ASYNC)) {
+		req->uc_flags |= CODA_REQ_READ;
 		list_add_tail(&(req->uc_chain), &vcp->vc_processing);
 		goto out;
 	}
@@ -315,19 +315,19 @@
 		list_del(&req->uc_chain);
 
 		/* Async requests need to be freed here */
-		if (req->uc_flags & REQ_ASYNC) {
+		if (req->uc_flags & CODA_REQ_ASYNC) {
 			CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr));
 			kfree(req);
 			continue;
 		}
-		req->uc_flags |= REQ_ABORT;
+		req->uc_flags |= CODA_REQ_ABORT;
 		wake_up(&req->uc_sleep);
 	}
 
 	list_for_each_entry_safe(req, tmp, &vcp->vc_processing, uc_chain) {
 		list_del(&req->uc_chain);
 
-		req->uc_flags |= REQ_ABORT;
+		req->uc_flags |= CODA_REQ_ABORT;
 		wake_up(&req->uc_sleep);
 	}
 

diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index f09c5ed..b8893ab 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c

@@ -604,7 +604,7 @@
 			       (((r)->uc_opcode != CODA_CLOSE && \
 				 (r)->uc_opcode != CODA_STORE && \
 				 (r)->uc_opcode != CODA_RELEASE) || \
-				(r)->uc_flags & REQ_READ))
+				(r)->uc_flags & CODA_REQ_READ))
 
 static inline void coda_waitfor_upcall(struct upc_req *req)
 {
@@ -624,7 +624,7 @@
 			set_current_state(TASK_UNINTERRUPTIBLE);
 
 		/* got a reply */
-		if (req->uc_flags & (REQ_WRITE | REQ_ABORT))
+		if (req->uc_flags & (CODA_REQ_WRITE | CODA_REQ_ABORT))
 			break;
 
 		if (blocked && time_after(jiffies, timeout) &&
@@ -708,7 +708,7 @@
 	coda_waitfor_upcall(req);
 
 	/* Op went through, interrupt or not... */
-	if (req->uc_flags & REQ_WRITE) {
+	if (req->uc_flags & CODA_REQ_WRITE) {
 		out = (union outputArgs *)req->uc_data;
 		/* here we map positive Venus errors to kernel errors */
 		error = -out->oh.result;
@@ -717,13 +717,13 @@
 	}
 
 	error = -EINTR;
-	if ((req->uc_flags & REQ_ABORT) || !signal_pending(current)) {
+	if ((req->uc_flags & CODA_REQ_ABORT) || !signal_pending(current)) {
 		printk(KERN_WARNING "coda: Unexpected interruption.\n");
 		goto exit;
 	}
 
 	/* Interrupted before venus read it. */
-	if (!(req->uc_flags & REQ_READ))
+	if (!(req->uc_flags & CODA_REQ_READ))
 		goto exit;
 
 	/* Venus saw the upcall, make sure we can send interrupt signal */
@@ -747,7 +747,7 @@
 	sig_inputArgs->ih.opcode = CODA_SIGNAL;
 	sig_inputArgs->ih.unique = req->uc_unique;
 
-	sig_req->uc_flags = REQ_ASYNC;
+	sig_req->uc_flags = CODA_REQ_ASYNC;
 	sig_req->uc_opcode = sig_inputArgs->ih.opcode;
 	sig_req->uc_unique = sig_inputArgs->ih.unique;
 	sig_req->uc_inSize = sizeof(struct coda_in_hdr);

diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
index 4337cad..e273220 100644
--- a/fs/exofs/ios.c
+++ b/fs/exofs/ios.c

@@ -599,7 +599,7 @@
 			} else {
 				bio = master_dev->bio;
 				/* FIXME: bio_set_dir() */
-				bio->bi_rw |= (1 << BIO_RW);
+				bio->bi_rw |= REQ_WRITE;
 			}
 
 			osd_req_write(or, &ios->obj, per_dev->offset, bio,

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b7c7586..2f76c4a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c

@@ -26,15 +26,9 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/buffer_head.h>
+#include <linux/tracepoint.h>
 #include "internal.h"
 
-#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
-
-/*
- * We don't actually have pdflush, but this one is exported though /proc...
- */
-int nr_pdflush_threads;
-
 /*
  * Passed into wb_writeback(), essentially a subset of writeback_control
  */
@@ -50,6 +44,21 @@
 	struct completion *done;	/* set if the caller waits */
 };
 
+/*
+ * Include the creation of the trace points after defining the
+ * wb_writeback_work structure so that the definition remains local to this
+ * file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
+
+#define inode_to_bdi(inode)	((inode)->i_mapping->backing_dev_info)
+
+/*
+ * We don't actually have pdflush, but this one is exported though /proc...
+ */
+int nr_pdflush_threads;
+
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
@@ -65,22 +74,21 @@
 static void bdi_queue_work(struct backing_dev_info *bdi,
 		struct wb_writeback_work *work)
 {
-	spin_lock(&bdi->wb_lock);
+	trace_writeback_queue(bdi, work);
+
+	spin_lock_bh(&bdi->wb_lock);
 	list_add_tail(&work->list, &bdi->work_list);
-	spin_unlock(&bdi->wb_lock);
-
-	/*
-	 * If the default thread isn't there, make sure we add it. When
-	 * it gets created and wakes up, we'll run this work.
-	 */
-	if (unlikely(list_empty_careful(&bdi->wb_list)))
+	if (bdi->wb.task) {
+		wake_up_process(bdi->wb.task);
+	} else {
+		/*
+		 * The bdi thread isn't there, wake up the forker thread which
+		 * will create and run it.
+		 */
+		trace_writeback_nothread(bdi, work);
 		wake_up_process(default_backing_dev_info.wb.task);
-	else {
-		struct bdi_writeback *wb = &bdi->wb;
-
-		if (wb->task)
-			wake_up_process(wb->task);
 	}
+	spin_unlock_bh(&bdi->wb_lock);
 }
 
 static void
@@ -95,8 +103,10 @@
 	 */
 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
 	if (!work) {
-		if (bdi->wb.task)
+		if (bdi->wb.task) {
+			trace_writeback_nowork(bdi);
 			wake_up_process(bdi->wb.task);
+		}
 		return;
 	}
 
@@ -643,10 +653,14 @@
 		wbc.more_io = 0;
 		wbc.nr_to_write = MAX_WRITEBACK_PAGES;
 		wbc.pages_skipped = 0;
+
+		trace_wbc_writeback_start(&wbc, wb->bdi);
 		if (work->sb)
 			__writeback_inodes_sb(work->sb, wb, &wbc);
 		else
 			writeback_inodes_wb(wb, &wbc);
+		trace_wbc_writeback_written(&wbc, wb->bdi);
+
 		work->nr_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 		wrote += MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 
@@ -674,6 +688,7 @@
 		if (!list_empty(&wb->b_more_io))  {
 			inode = list_entry(wb->b_more_io.prev,
 						struct inode, i_list);
+			trace_wbc_writeback_wait(&wbc, wb->bdi);
 			inode_wait_for_writeback(inode);
 		}
 		spin_unlock(&inode_lock);
@@ -686,17 +701,17 @@
  * Return the next wb_writeback_work struct that hasn't been processed yet.
  */
 static struct wb_writeback_work *
-get_next_work_item(struct backing_dev_info *bdi, struct bdi_writeback *wb)
+get_next_work_item(struct backing_dev_info *bdi)
 {
 	struct wb_writeback_work *work = NULL;
 
-	spin_lock(&bdi->wb_lock);
+	spin_lock_bh(&bdi->wb_lock);
 	if (!list_empty(&bdi->work_list)) {
 		work = list_entry(bdi->work_list.next,
 				  struct wb_writeback_work, list);
 		list_del_init(&work->list);
 	}
-	spin_unlock(&bdi->wb_lock);
+	spin_unlock_bh(&bdi->wb_lock);
 	return work;
 }
 
@@ -744,7 +759,7 @@
 	struct wb_writeback_work *work;
 	long wrote = 0;
 
-	while ((work = get_next_work_item(bdi, wb)) != NULL) {
+	while ((work = get_next_work_item(bdi)) != NULL) {
 		/*
 		 * Override sync mode, in case we must wait for completion
 		 * because this thread is exiting now.
@@ -752,6 +767,8 @@
 		if (force_wait)
 			work->sync_mode = WB_SYNC_ALL;
 
+		trace_writeback_exec(bdi, work);
+
 		wrote += wb_writeback(wb, work);
 
 		/*
@@ -776,47 +793,66 @@
  * Handle writeback of dirty data for the device backed by this bdi. Also
  * wakes up periodically and does kupdated style flushing.
  */
-int bdi_writeback_task(struct bdi_writeback *wb)
+int bdi_writeback_thread(void *data)
 {
-	unsigned long last_active = jiffies;
-	unsigned long wait_jiffies = -1UL;
+	struct bdi_writeback *wb = data;
+	struct backing_dev_info *bdi = wb->bdi;
 	long pages_written;
 
+	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
+	set_freezable();
+	wb->last_active = jiffies;
+
+	/*
+	 * Our parent may run at a different priority, just set us to normal
+	 */
+	set_user_nice(current, 0);
+
+	trace_writeback_thread_start(bdi);
+
 	while (!kthread_should_stop()) {
+		/*
+		 * Remove own delayed wake-up timer, since we are already awake
+		 * and we'll take care of the preriodic write-back.
+		 */
+		del_timer(&wb->wakeup_timer);
+
 		pages_written = wb_do_writeback(wb, 0);
 
-		if (pages_written)
-			last_active = jiffies;
-		else if (wait_jiffies != -1UL) {
-			unsigned long max_idle;
+		trace_writeback_pages_written(pages_written);
 
-			/*
-			 * Longest period of inactivity that we tolerate. If we
-			 * see dirty data again later, the task will get
-			 * recreated automatically.
-			 */
-			max_idle = max(5UL * 60 * HZ, wait_jiffies);
-			if (time_after(jiffies, max_idle + last_active))
-				break;
+		if (pages_written)
+			wb->last_active = jiffies;
+
+		set_current_state(TASK_INTERRUPTIBLE);
+		if (!list_empty(&bdi->work_list)) {
+			__set_current_state(TASK_RUNNING);
+			continue;
 		}
 
-		if (dirty_writeback_interval) {
-			wait_jiffies = msecs_to_jiffies(dirty_writeback_interval * 10);
-			schedule_timeout_interruptible(wait_jiffies);
-		} else {
-			set_current_state(TASK_INTERRUPTIBLE);
-			if (list_empty_careful(&wb->bdi->work_list) &&
-			    !kthread_should_stop())
-				schedule();
-			__set_current_state(TASK_RUNNING);
+		if (wb_has_dirty_io(wb) && dirty_writeback_interval)
+			schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
+		else {
+			/*
+			 * We have nothing to do, so can go sleep without any
+			 * timeout and save power. When a work is queued or
+			 * something is made dirty - we will be woken up.
+			 */
+			schedule();
 		}
 
 		try_to_freeze();
 	}
 
+	/* Flush any work that raced with us exiting */
+	if (!list_empty(&bdi->work_list))
+		wb_do_writeback(wb, 1);
+
+	trace_writeback_thread_stop(bdi);
 	return 0;
 }
 
+
 /*
  * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
  * the whole world.
@@ -891,6 +927,8 @@
 void __mark_inode_dirty(struct inode *inode, int flags)
 {
 	struct super_block *sb = inode->i_sb;
+	struct backing_dev_info *bdi = NULL;
+	bool wakeup_bdi = false;
 
 	/*
 	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
@@ -944,22 +982,31 @@
 		 * reposition it (that would break b_dirty time-ordering).
 		 */
 		if (!was_dirty) {
-			struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
-			struct backing_dev_info *bdi = wb->bdi;
+			bdi = inode_to_bdi(inode);
 
-			if (bdi_cap_writeback_dirty(bdi) &&
-			    !test_bit(BDI_registered, &bdi->state)) {
-				WARN_ON(1);
-				printk(KERN_ERR "bdi-%s not registered\n",
-								bdi->name);
+			if (bdi_cap_writeback_dirty(bdi)) {
+				WARN(!test_bit(BDI_registered, &bdi->state),
+				     "bdi-%s not registered\n", bdi->name);
+
+				/*
+				 * If this is the first dirty inode for this
+				 * bdi, we have to wake-up the corresponding
+				 * bdi thread to make sure background
+				 * write-back happens later.
+				 */
+				if (!wb_has_dirty_io(&bdi->wb))
+					wakeup_bdi = true;
 			}
 
 			inode->dirtied_when = jiffies;
-			list_move(&inode->i_list, &wb->b_dirty);
+			list_move(&inode->i_list, &bdi->wb.b_dirty);
 		}
 	}
 out:
 	spin_unlock(&inode_lock);
+
+	if (wakeup_bdi)
+		bdi_wakeup_thread_delayed(bdi);
 }
 EXPORT_SYMBOL(__mark_inode_dirty);
 

diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6a857e24..cde1248 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c

@@ -595,7 +595,7 @@
 	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
 		goto skip_barrier;
 	get_bh(bh);
-	submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh);
+	submit_bh(WRITE_BARRIER | REQ_META, bh);
 	wait_on_buffer(bh);
 	if (buffer_eopnotsupp(bh)) {
 		clear_buffer_eopnotsupp(bh);
@@ -605,7 +605,7 @@
 		lock_buffer(bh);
 skip_barrier:
 		get_bh(bh);
-		submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh);
+		submit_bh(WRITE_SYNC | REQ_META, bh);
 		wait_on_buffer(bh);
 	}
 	if (!buffer_uptodate(bh))

diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 18176d0..f3b071f 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c

@@ -36,8 +36,8 @@
 {
 	struct buffer_head *bh, *head;
 	int nr_underway = 0;
-	int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ?
-			WRITE_SYNC_PLUG : WRITE));
+	int write_op = REQ_META |
+		(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE);
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(!page_has_buffers(page));
@@ -225,7 +225,7 @@
 	}
 	bh->b_end_io = end_buffer_read_sync;
 	get_bh(bh);
-	submit_bh(READ_SYNC | (1 << BIO_RW_META), bh);
+	submit_bh(READ_SYNC | REQ_META, bh);
 	if (!(flags & DIO_WAIT))
 		return 0;
 
@@ -432,7 +432,7 @@
 	if (buffer_uptodate(first_bh))
 		goto out;
 	if (!buffer_locked(first_bh))
-		ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh);
+		ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
 
 	dblock++;
 	extlen--;

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 4f44bde..4d4b1e8 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c

@@ -274,7 +274,7 @@
 
 	bio->bi_end_io = end_bio_io_page;
 	bio->bi_private = page;
-	submit_bio(READ_SYNC | (1 << BIO_RW_META), bio);
+	submit_bio(READ_SYNC | REQ_META, bio);
 	wait_on_page_locked(page);
 	bio_put(bio);
 	if (!PageUptodate(page)) {

diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 2e6a272..4588fb9 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c

@@ -508,7 +508,7 @@
 		 * Last BIO is always sent through the following
 		 * submission.
 		 */
-		rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+		rw |= REQ_SYNC | REQ_UNPLUG;
 		res = nilfs_segbuf_submit_bio(segbuf, &wi, rw);
 	}
 

diff --git a/fs/splice.c b/fs/splice.c
index efdbfec..8f1dfae 100644
--- a/fs/splice.c
+++ b/fs/splice.c

@@ -399,17 +399,7 @@
 		 * If the page isn't uptodate, we may need to start io on it
 		 */
 		if (!PageUptodate(page)) {
-			/*
-			 * If in nonblock mode then dont block on waiting
-			 * for an in-flight io page
-			 */
-			if (flags & SPLICE_F_NONBLOCK) {
-				if (!trylock_page(page)) {
-					error = -EAGAIN;
-					break;
-				}
-			} else
-				lock_page(page);
+			lock_page(page);
 
 			/*
 			 * Page was truncated, or invalidated by the
@@ -597,7 +587,6 @@
 	struct page *pages[PIPE_DEF_BUFFERS];
 	struct partial_page partial[PIPE_DEF_BUFFERS];
 	struct iovec *vec, __vec[PIPE_DEF_BUFFERS];
-	pgoff_t index;
 	ssize_t res;
 	size_t this_len;
 	int error;
@@ -621,7 +610,6 @@
 			goto shrink_ret;
 	}
 
-	index = *ppos >> PAGE_CACHE_SHIFT;
 	offset = *ppos & ~PAGE_CACHE_MASK;
 	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 2547daf..9d65d4d 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild

@@ -39,6 +39,7 @@
 header-y += b1lli.h
 header-y += baycom.h
 header-y += bfs_fs.h
+header-y += blk_types.h
 header-y += blkpg.h
 header-y += bpqether.h
 header-y += bsg.h

diff --git a/include/linux/audit.h b/include/linux/audit.h
index f391d45..e24afab 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h

@@ -544,7 +544,7 @@
 #define audit_putname(n) do { ; } while (0)
 #define __audit_inode(n,d) do { ; } while (0)
 #define __audit_inode_child(i,p) do { ; } while (0)
-#define audit_inode(n,d) do { ; } while (0)
+#define audit_inode(n,d) do { (void)(d); } while (0)
 #define audit_inode_child(i,p) do { ; } while (0)
 #define audit_core_dumps(i) do { ; } while (0)
 #define auditsc_get_stamp(c,t,s) (0)

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index e9aec0d..7628219 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h

@@ -45,22 +45,21 @@
 #define BDI_STAT_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
 struct bdi_writeback {
-	struct list_head list;			/* hangs off the bdi */
-
-	struct backing_dev_info *bdi;		/* our parent bdi */
+	struct backing_dev_info *bdi;	/* our parent bdi */
 	unsigned int nr;
 
-	unsigned long last_old_flush;		/* last old data flush */
+	unsigned long last_old_flush;	/* last old data flush */
+	unsigned long last_active;	/* last time bdi thread was active */
 
-	struct task_struct	*task;		/* writeback task */
-	struct list_head	b_dirty;	/* dirty inodes */
-	struct list_head	b_io;		/* parked for writeback */
-	struct list_head	b_more_io;	/* parked for more writeback */
+	struct task_struct *task;	/* writeback thread */
+	struct timer_list wakeup_timer; /* used for delayed bdi thread wakeup */
+	struct list_head b_dirty;	/* dirty inodes */
+	struct list_head b_io;		/* parked for writeback */
+	struct list_head b_more_io;	/* parked for more writeback */
 };
 
 struct backing_dev_info {
 	struct list_head bdi_list;
-	struct rcu_head rcu_head;
 	unsigned long ra_pages;	/* max readahead in PAGE_CACHE_SIZE units */
 	unsigned long state;	/* Always use atomic bitops on this */
 	unsigned int capabilities; /* Device capabilities */
@@ -80,8 +79,7 @@
 	unsigned int max_ratio, max_prop_frac;
 
 	struct bdi_writeback wb;  /* default writeback info for this bdi */
-	spinlock_t wb_lock;	  /* protects update side of wb_list */
-	struct list_head wb_list; /* the flusher threads hanging off this bdi */
+	spinlock_t wb_lock;	  /* protects work_list */
 
 	struct list_head work_list;
 
@@ -105,9 +103,10 @@
 int bdi_setup_and_register(struct backing_dev_info *, char *, unsigned int);
 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages);
 void bdi_start_background_writeback(struct backing_dev_info *bdi);
-int bdi_writeback_task(struct bdi_writeback *wb);
+int bdi_writeback_thread(void *data);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 void bdi_arm_supers_timer(void);
+void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
 
 extern spinlock_t bdi_lock;
 extern struct list_head bdi_list;

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7fc5606..5274103 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h

@@ -9,7 +9,7 @@
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
-
+ *
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
@@ -28,6 +28,9 @@
 
 #include <asm/io.h>
 
+/* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
+#include <linux/blk_types.h>
+
 #define BIO_DEBUG
 
 #ifdef BIO_DEBUG
@@ -41,154 +44,6 @@
 #define BIO_MAX_SECTORS		(BIO_MAX_SIZE >> 9)
 
 /*
- * was unsigned short, but we might as well be ready for > 64kB I/O pages
- */
-struct bio_vec {
-	struct page	*bv_page;
-	unsigned int	bv_len;
-	unsigned int	bv_offset;
-};
-
-struct bio_set;
-struct bio;
-struct bio_integrity_payload;
-typedef void (bio_end_io_t) (struct bio *, int);
-typedef void (bio_destructor_t) (struct bio *);
-
-/*
- * main unit of I/O for the block layer and lower layers (ie drivers and
- * stacking drivers)
- */
-struct bio {
-	sector_t		bi_sector;	/* device address in 512 byte
-						   sectors */
-	struct bio		*bi_next;	/* request queue link */
-	struct block_device	*bi_bdev;
-	unsigned long		bi_flags;	/* status, command, etc */
-	unsigned long		bi_rw;		/* bottom bits READ/WRITE,
-						 * top bits priority
-						 */
-
-	unsigned short		bi_vcnt;	/* how many bio_vec's */
-	unsigned short		bi_idx;		/* current index into bvl_vec */
-
-	/* Number of segments in this BIO after
-	 * physical address coalescing is performed.
-	 */
-	unsigned int		bi_phys_segments;
-
-	unsigned int		bi_size;	/* residual I/O count */
-
-	/*
-	 * To keep track of the max segment size, we account for the
-	 * sizes of the first and last mergeable segments in this bio.
-	 */
-	unsigned int		bi_seg_front_size;
-	unsigned int		bi_seg_back_size;
-
-	unsigned int		bi_max_vecs;	/* max bvl_vecs we can hold */
-
-	unsigned int		bi_comp_cpu;	/* completion CPU */
-
-	atomic_t		bi_cnt;		/* pin count */
-
-	struct bio_vec		*bi_io_vec;	/* the actual vec list */
-
-	bio_end_io_t		*bi_end_io;
-
-	void			*bi_private;
-#if defined(CONFIG_BLK_DEV_INTEGRITY)
-	struct bio_integrity_payload *bi_integrity;  /* data integrity */
-#endif
-
-	bio_destructor_t	*bi_destructor;	/* destructor */
-
-	/*
-	 * We can inline a number of vecs at the end of the bio, to avoid
-	 * double allocations for a small number of bio_vecs. This member
-	 * MUST obviously be kept at the very end of the bio.
-	 */
-	struct bio_vec		bi_inline_vecs[0];
-};
-
-/*
- * bio flags
- */
-#define BIO_UPTODATE	0	/* ok after I/O completion */
-#define BIO_RW_BLOCK	1	/* RW_AHEAD set, and read/write would block */
-#define BIO_EOF		2	/* out-out-bounds error */
-#define BIO_SEG_VALID	3	/* bi_phys_segments valid */
-#define BIO_CLONED	4	/* doesn't own data */
-#define BIO_BOUNCED	5	/* bio is a bounce bio */
-#define BIO_USER_MAPPED 6	/* contains user pages */
-#define BIO_EOPNOTSUPP	7	/* not supported */
-#define BIO_CPU_AFFINE	8	/* complete bio on same CPU as submitted */
-#define BIO_NULL_MAPPED 9	/* contains invalid user pages */
-#define BIO_FS_INTEGRITY 10	/* fs owns integrity data, not block layer */
-#define BIO_QUIET	11	/* Make BIO Quiet */
-#define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
-
-/*
- * top 4 bits of bio flags indicate the pool this bio came from
- */
-#define BIO_POOL_BITS		(4)
-#define BIO_POOL_NONE		((1UL << BIO_POOL_BITS) - 1)
-#define BIO_POOL_OFFSET		(BITS_PER_LONG - BIO_POOL_BITS)
-#define BIO_POOL_MASK		(1UL << BIO_POOL_OFFSET)
-#define BIO_POOL_IDX(bio)	((bio)->bi_flags >> BIO_POOL_OFFSET)	
-
-/*
- * bio bi_rw flags
- *
- * bit 0 -- data direction
- *	If not set, bio is a read from device. If set, it's a write to device.
- * bit 1 -- fail fast device errors
- * bit 2 -- fail fast transport errors
- * bit 3 -- fail fast driver errors
- * bit 4 -- rw-ahead when set
- * bit 5 -- barrier
- *	Insert a serialization point in the IO queue, forcing previously
- *	submitted IO to be completed before this one is issued.
- * bit 6 -- synchronous I/O hint.
- * bit 7 -- Unplug the device immediately after submitting this bio.
- * bit 8 -- metadata request
- *	Used for tracing to differentiate metadata and data IO. May also
- *	get some preferential treatment in the IO scheduler
- * bit 9 -- discard sectors
- *	Informs the lower level device that this range of sectors is no longer
- *	used by the file system and may thus be freed by the device. Used
- *	for flash based storage.
- *	Don't want driver retries for any fast fail whatever the reason.
- * bit 10 -- Tell the IO scheduler not to wait for more requests after this
-	one has been submitted, even if it is a SYNC request.
- */
-enum bio_rw_flags {
-	BIO_RW,
-	BIO_RW_FAILFAST_DEV,
-	BIO_RW_FAILFAST_TRANSPORT,
-	BIO_RW_FAILFAST_DRIVER,
-	/* above flags must match REQ_* */
-	BIO_RW_AHEAD,
-	BIO_RW_BARRIER,
-	BIO_RW_SYNCIO,
-	BIO_RW_UNPLUG,
-	BIO_RW_META,
-	BIO_RW_DISCARD,
-	BIO_RW_NOIDLE,
-};
-
-/*
- * First four bits must match between bio->bi_rw and rq->cmd_flags, make
- * that explicit here.
- */
-#define BIO_RW_RQ_MASK		0xf
-
-static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag)
-{
-	return (bio->bi_rw & (1 << flag)) != 0;
-}
-
-/*
  * upper 16 bits of bi_rw define the io priority of this bio
  */
 #define BIO_PRIO_SHIFT	(8 * sizeof(unsigned long) - IOPRIO_BITS)
@@ -211,7 +66,10 @@
 #define bio_offset(bio)		bio_iovec((bio))->bv_offset
 #define bio_segments(bio)	((bio)->bi_vcnt - (bio)->bi_idx)
 #define bio_sectors(bio)	((bio)->bi_size >> 9)
-#define bio_empty_barrier(bio)	(bio_rw_flagged(bio, BIO_RW_BARRIER) && !bio_has_data(bio) && !bio_rw_flagged(bio, BIO_RW_DISCARD))
+#define bio_empty_barrier(bio) \
+	((bio->bi_rw & REQ_HARDBARRIER) && \
+	 !bio_has_data(bio) && \
+	 !(bio->bi_rw & REQ_DISCARD))
 
 static inline unsigned int bio_cur_bytes(struct bio *bio)
 {

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
new file mode 100644
index 0000000..5369177
--- /dev/null
+++ b/include/linux/blk_types.h

@@ -0,0 +1,194 @@
+/*
+ * Block data types and constants.  Directly include this file only to
+ * break include dependency loop.
+ */
+#ifndef __LINUX_BLK_TYPES_H
+#define __LINUX_BLK_TYPES_H
+
+#ifdef CONFIG_BLOCK
+
+#include <linux/types.h>
+
+struct bio_set;
+struct bio;
+struct bio_integrity_payload;
+struct page;
+struct block_device;
+typedef void (bio_end_io_t) (struct bio *, int);
+typedef void (bio_destructor_t) (struct bio *);
+
+/*
+ * was unsigned short, but we might as well be ready for > 64kB I/O pages
+ */
+struct bio_vec {
+	struct page	*bv_page;
+	unsigned int	bv_len;
+	unsigned int	bv_offset;
+};
+
+/*
+ * main unit of I/O for the block layer and lower layers (ie drivers and
+ * stacking drivers)
+ */
+struct bio {
+	sector_t		bi_sector;	/* device address in 512 byte
+						   sectors */
+	struct bio		*bi_next;	/* request queue link */
+	struct block_device	*bi_bdev;
+	unsigned long		bi_flags;	/* status, command, etc */
+	unsigned long		bi_rw;		/* bottom bits READ/WRITE,
+						 * top bits priority
+						 */
+
+	unsigned short		bi_vcnt;	/* how many bio_vec's */
+	unsigned short		bi_idx;		/* current index into bvl_vec */
+
+	/* Number of segments in this BIO after
+	 * physical address coalescing is performed.
+	 */
+	unsigned int		bi_phys_segments;
+
+	unsigned int		bi_size;	/* residual I/O count */
+
+	/*
+	 * To keep track of the max segment size, we account for the
+	 * sizes of the first and last mergeable segments in this bio.
+	 */
+	unsigned int		bi_seg_front_size;
+	unsigned int		bi_seg_back_size;
+
+	unsigned int		bi_max_vecs;	/* max bvl_vecs we can hold */
+
+	unsigned int		bi_comp_cpu;	/* completion CPU */
+
+	atomic_t		bi_cnt;		/* pin count */
+
+	struct bio_vec		*bi_io_vec;	/* the actual vec list */
+
+	bio_end_io_t		*bi_end_io;
+
+	void			*bi_private;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+	struct bio_integrity_payload *bi_integrity;  /* data integrity */
+#endif
+
+	bio_destructor_t	*bi_destructor;	/* destructor */
+
+	/*
+	 * We can inline a number of vecs at the end of the bio, to avoid
+	 * double allocations for a small number of bio_vecs. This member
+	 * MUST obviously be kept at the very end of the bio.
+	 */
+	struct bio_vec		bi_inline_vecs[0];
+};
+
+/*
+ * bio flags
+ */
+#define BIO_UPTODATE	0	/* ok after I/O completion */
+#define BIO_RW_BLOCK	1	/* RW_AHEAD set, and read/write would block */
+#define BIO_EOF		2	/* out-out-bounds error */
+#define BIO_SEG_VALID	3	/* bi_phys_segments valid */
+#define BIO_CLONED	4	/* doesn't own data */
+#define BIO_BOUNCED	5	/* bio is a bounce bio */
+#define BIO_USER_MAPPED 6	/* contains user pages */
+#define BIO_EOPNOTSUPP	7	/* not supported */
+#define BIO_CPU_AFFINE	8	/* complete bio on same CPU as submitted */
+#define BIO_NULL_MAPPED 9	/* contains invalid user pages */
+#define BIO_FS_INTEGRITY 10	/* fs owns integrity data, not block layer */
+#define BIO_QUIET	11	/* Make BIO Quiet */
+#define bio_flagged(bio, flag)	((bio)->bi_flags & (1 << (flag)))
+
+/*
+ * top 4 bits of bio flags indicate the pool this bio came from
+ */
+#define BIO_POOL_BITS		(4)
+#define BIO_POOL_NONE		((1UL << BIO_POOL_BITS) - 1)
+#define BIO_POOL_OFFSET		(BITS_PER_LONG - BIO_POOL_BITS)
+#define BIO_POOL_MASK		(1UL << BIO_POOL_OFFSET)
+#define BIO_POOL_IDX(bio)	((bio)->bi_flags >> BIO_POOL_OFFSET)
+
+#endif /* CONFIG_BLOCK */
+
+/*
+ * Request flags.  For use in the cmd_flags field of struct request, and in
+ * bi_rw of struct bio.  Note that some flags are only valid in either one.
+ */
+enum rq_flag_bits {
+	/* common flags */
+	__REQ_WRITE,		/* not set, read. set, write */
+	__REQ_FAILFAST_DEV,	/* no driver retries of device errors */
+	__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
+	__REQ_FAILFAST_DRIVER,	/* no driver retries of driver errors */
+
+	__REQ_HARDBARRIER,	/* may not be passed by drive either */
+	__REQ_SYNC,		/* request is sync (sync write or read) */
+	__REQ_META,		/* metadata io request */
+	__REQ_DISCARD,		/* request to discard sectors */
+	__REQ_NOIDLE,		/* don't anticipate more IO after this one */
+
+	/* bio only flags */
+	__REQ_UNPLUG,		/* unplug the immediately after submission */
+	__REQ_RAHEAD,		/* read ahead, can fail anytime */
+
+	/* request only flags */
+	__REQ_SORTED,		/* elevator knows about this request */
+	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
+	__REQ_FUA,		/* forced unit access */
+	__REQ_NOMERGE,		/* don't touch this for merging */
+	__REQ_STARTED,		/* drive already may have started this one */
+	__REQ_DONTPREP,		/* don't call prep for this one */
+	__REQ_QUEUED,		/* uses queueing */
+	__REQ_ELVPRIV,		/* elevator private data attached */
+	__REQ_FAILED,		/* set if the request failed */
+	__REQ_QUIET,		/* don't worry about errors */
+	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
+	__REQ_ORDERED_COLOR,	/* is before or after barrier */
+	__REQ_ALLOCED,		/* request came from our alloc pool */
+	__REQ_COPY_USER,	/* contains copies of user pages */
+	__REQ_INTEGRITY,	/* integrity metadata has been remapped */
+	__REQ_FLUSH,		/* request for cache flush */
+	__REQ_IO_STAT,		/* account I/O stat */
+	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
+	__REQ_NR_BITS,		/* stops here */
+};
+
+#define REQ_WRITE		(1 << __REQ_WRITE)
+#define REQ_FAILFAST_DEV	(1 << __REQ_FAILFAST_DEV)
+#define REQ_FAILFAST_TRANSPORT	(1 << __REQ_FAILFAST_TRANSPORT)
+#define REQ_FAILFAST_DRIVER	(1 << __REQ_FAILFAST_DRIVER)
+#define REQ_HARDBARRIER		(1 << __REQ_HARDBARRIER)
+#define REQ_SYNC		(1 << __REQ_SYNC)
+#define REQ_META		(1 << __REQ_META)
+#define REQ_DISCARD		(1 << __REQ_DISCARD)
+#define REQ_NOIDLE		(1 << __REQ_NOIDLE)
+
+#define REQ_FAILFAST_MASK \
+	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
+#define REQ_COMMON_MASK \
+	(REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \
+	 REQ_META| REQ_DISCARD | REQ_NOIDLE)
+
+#define REQ_UNPLUG		(1 << __REQ_UNPLUG)
+#define REQ_RAHEAD		(1 << __REQ_RAHEAD)
+
+#define REQ_SORTED		(1 << __REQ_SORTED)
+#define REQ_SOFTBARRIER		(1 << __REQ_SOFTBARRIER)
+#define REQ_FUA			(1 << __REQ_FUA)
+#define REQ_NOMERGE		(1 << __REQ_NOMERGE)
+#define REQ_STARTED		(1 << __REQ_STARTED)
+#define REQ_DONTPREP		(1 << __REQ_DONTPREP)
+#define REQ_QUEUED		(1 << __REQ_QUEUED)
+#define REQ_ELVPRIV		(1 << __REQ_ELVPRIV)
+#define REQ_FAILED		(1 << __REQ_FAILED)
+#define REQ_QUIET		(1 << __REQ_QUIET)
+#define REQ_PREEMPT		(1 << __REQ_PREEMPT)
+#define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
+#define REQ_ALLOCED		(1 << __REQ_ALLOCED)
+#define REQ_COPY_USER		(1 << __REQ_COPY_USER)
+#define REQ_INTEGRITY		(1 << __REQ_INTEGRITY)
+#define REQ_FLUSH		(1 << __REQ_FLUSH)
+#define REQ_IO_STAT		(1 << __REQ_IO_STAT)
+#define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)
+
+#endif /* __LINUX_BLK_TYPES_H */

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 09a8402..89c855c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h

@@ -60,7 +60,6 @@
 	REQ_TYPE_PM_RESUME,		/* resume request */
 	REQ_TYPE_PM_SHUTDOWN,		/* shutdown request */
 	REQ_TYPE_SPECIAL,		/* driver defined type */
-	REQ_TYPE_LINUX_BLOCK,		/* generic block layer message */
 	/*
 	 * for ATA/ATAPI devices. this really doesn't belong here, ide should
 	 * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
@@ -70,84 +69,6 @@
 	REQ_TYPE_ATA_PC,
 };
 
-/*
- * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being
- * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a
- * SCSI cdb.
- *
- * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need,
- * typically to differentiate REQ_TYPE_SPECIAL requests.
- *
- */
-enum {
-	REQ_LB_OP_EJECT	= 0x40,		/* eject request */
-	REQ_LB_OP_FLUSH = 0x41,		/* flush request */
-};
-
-/*
- * request type modified bits. first four bits match BIO_RW* bits, important
- */
-enum rq_flag_bits {
-	__REQ_RW,		/* not set, read. set, write */
-	__REQ_FAILFAST_DEV,	/* no driver retries of device errors */
-	__REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
-	__REQ_FAILFAST_DRIVER,	/* no driver retries of driver errors */
-	/* above flags must match BIO_RW_* */
-	__REQ_DISCARD,		/* request to discard sectors */
-	__REQ_SORTED,		/* elevator knows about this request */
-	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
-	__REQ_HARDBARRIER,	/* may not be passed by drive either */
-	__REQ_FUA,		/* forced unit access */
-	__REQ_NOMERGE,		/* don't touch this for merging */
-	__REQ_STARTED,		/* drive already may have started this one */
-	__REQ_DONTPREP,		/* don't call prep for this one */
-	__REQ_QUEUED,		/* uses queueing */
-	__REQ_ELVPRIV,		/* elevator private data attached */
-	__REQ_FAILED,		/* set if the request failed */
-	__REQ_QUIET,		/* don't worry about errors */
-	__REQ_PREEMPT,		/* set for "ide_preempt" requests */
-	__REQ_ORDERED_COLOR,	/* is before or after barrier */
-	__REQ_RW_SYNC,		/* request is sync (sync write or read) */
-	__REQ_ALLOCED,		/* request came from our alloc pool */
-	__REQ_RW_META,		/* metadata io request */
-	__REQ_COPY_USER,	/* contains copies of user pages */
-	__REQ_INTEGRITY,	/* integrity metadata has been remapped */
-	__REQ_NOIDLE,		/* Don't anticipate more IO after this one */
-	__REQ_IO_STAT,		/* account I/O stat */
-	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
-	__REQ_NR_BITS,		/* stops here */
-};
-
-#define REQ_RW		(1 << __REQ_RW)
-#define REQ_FAILFAST_DEV	(1 << __REQ_FAILFAST_DEV)
-#define REQ_FAILFAST_TRANSPORT	(1 << __REQ_FAILFAST_TRANSPORT)
-#define REQ_FAILFAST_DRIVER	(1 << __REQ_FAILFAST_DRIVER)
-#define REQ_DISCARD	(1 << __REQ_DISCARD)
-#define REQ_SORTED	(1 << __REQ_SORTED)
-#define REQ_SOFTBARRIER	(1 << __REQ_SOFTBARRIER)
-#define REQ_HARDBARRIER	(1 << __REQ_HARDBARRIER)
-#define REQ_FUA		(1 << __REQ_FUA)
-#define REQ_NOMERGE	(1 << __REQ_NOMERGE)
-#define REQ_STARTED	(1 << __REQ_STARTED)
-#define REQ_DONTPREP	(1 << __REQ_DONTPREP)
-#define REQ_QUEUED	(1 << __REQ_QUEUED)
-#define REQ_ELVPRIV	(1 << __REQ_ELVPRIV)
-#define REQ_FAILED	(1 << __REQ_FAILED)
-#define REQ_QUIET	(1 << __REQ_QUIET)
-#define REQ_PREEMPT	(1 << __REQ_PREEMPT)
-#define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
-#define REQ_RW_SYNC	(1 << __REQ_RW_SYNC)
-#define REQ_ALLOCED	(1 << __REQ_ALLOCED)
-#define REQ_RW_META	(1 << __REQ_RW_META)
-#define REQ_COPY_USER	(1 << __REQ_COPY_USER)
-#define REQ_INTEGRITY	(1 << __REQ_INTEGRITY)
-#define REQ_NOIDLE	(1 << __REQ_NOIDLE)
-#define REQ_IO_STAT	(1 << __REQ_IO_STAT)
-#define REQ_MIXED_MERGE	(1 << __REQ_MIXED_MERGE)
-
-#define REQ_FAILFAST_MASK	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \
-				 REQ_FAILFAST_DRIVER)
-
 #define BLK_MAX_CDB	16
 
 /*
@@ -264,6 +185,7 @@
 typedef void (request_fn_proc) (struct request_queue *q);
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
+typedef void (unprep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unplug_fn) (struct request_queue *);
 
 struct bio_vec;
@@ -275,7 +197,6 @@
 };
 typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *,
 			     struct bio_vec *);
-typedef void (prepare_flush_fn) (struct request_queue *, struct request *);
 typedef void (softirq_done_fn)(struct request *);
 typedef int (dma_drain_needed_fn)(struct request *);
 typedef int (lld_busy_fn) (struct request_queue *q);
@@ -346,9 +267,9 @@
 	request_fn_proc		*request_fn;
 	make_request_fn		*make_request_fn;
 	prep_rq_fn		*prep_rq_fn;
+	unprep_rq_fn		*unprep_rq_fn;
 	unplug_fn		*unplug_fn;
 	merge_bvec_fn		*merge_bvec_fn;
-	prepare_flush_fn	*prepare_flush_fn;
 	softirq_done_fn		*softirq_done_fn;
 	rq_timed_out_fn		*rq_timed_out_fn;
 	dma_drain_needed_fn	*dma_drain_needed;
@@ -467,11 +388,13 @@
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
 #define QUEUE_FLAG_DISCARD     16	/* supports DISCARD */
 #define QUEUE_FLAG_NOXMERGES   17	/* No extended merges */
+#define QUEUE_FLAG_ADD_RANDOM  18	/* Contributes to random pool */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
-				 (1 << QUEUE_FLAG_SAME_COMP))
+				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
+				 (1 << QUEUE_FLAG_ADD_RANDOM))
 
 static inline int queue_is_locked(struct request_queue *q)
 {
@@ -596,38 +519,26 @@
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
 #define blk_queue_io_stat(q)	test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
+#define blk_queue_add_random(q)	test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
 #define blk_queue_flushing(q)	((q)->ordseq)
 #define blk_queue_stackable(q)	\
 	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
 #define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 
-#define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
-#define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
-#define blk_special_request(rq)	((rq)->cmd_type == REQ_TYPE_SPECIAL)
-#define blk_sense_request(rq)	((rq)->cmd_type == REQ_TYPE_SENSE)
+#define blk_noretry_request(rq) \
+	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
+			     REQ_FAILFAST_DRIVER))
 
-#define blk_failfast_dev(rq)	((rq)->cmd_flags & REQ_FAILFAST_DEV)
-#define blk_failfast_transport(rq) ((rq)->cmd_flags & REQ_FAILFAST_TRANSPORT)
-#define blk_failfast_driver(rq)	((rq)->cmd_flags & REQ_FAILFAST_DRIVER)
-#define blk_noretry_request(rq)	(blk_failfast_dev(rq) ||	\
-				 blk_failfast_transport(rq) ||	\
-				 blk_failfast_driver(rq))
-#define blk_rq_started(rq)	((rq)->cmd_flags & REQ_STARTED)
-#define blk_rq_io_stat(rq)	((rq)->cmd_flags & REQ_IO_STAT)
-#define blk_rq_quiet(rq)	((rq)->cmd_flags & REQ_QUIET)
+#define blk_account_rq(rq) \
+	(((rq)->cmd_flags & REQ_STARTED) && \
+	 ((rq)->cmd_type == REQ_TYPE_FS || \
+	  ((rq)->cmd_flags & REQ_DISCARD)))
 
-#define blk_account_rq(rq)	(blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) 
-
-#define blk_pm_suspend_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_SUSPEND)
-#define blk_pm_resume_request(rq)	((rq)->cmd_type == REQ_TYPE_PM_RESUME)
 #define blk_pm_request(rq)	\
-	(blk_pm_suspend_request(rq) || blk_pm_resume_request(rq))
+	((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \
+	 (rq)->cmd_type == REQ_TYPE_PM_RESUME)
 
 #define blk_rq_cpu_valid(rq)	((rq)->cpu != -1)
-#define blk_sorted_rq(rq)	((rq)->cmd_flags & REQ_SORTED)
-#define blk_barrier_rq(rq)	((rq)->cmd_flags & REQ_HARDBARRIER)
-#define blk_fua_rq(rq)		((rq)->cmd_flags & REQ_FUA)
-#define blk_discard_rq(rq)	((rq)->cmd_flags & REQ_DISCARD)
 #define blk_bidi_rq(rq)		((rq)->next_rq != NULL)
 /* rq->queuelist of dequeued request must be list_empty() */
 #define blk_queued_rq(rq)	(!list_empty(&(rq)->queuelist))
@@ -641,7 +552,7 @@
  */
 static inline bool rw_is_sync(unsigned int rw_flags)
 {
-	return !(rw_flags & REQ_RW) || (rw_flags & REQ_RW_SYNC);
+	return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC);
 }
 
 static inline bool rq_is_sync(struct request *rq)
@@ -649,9 +560,6 @@
 	return rw_is_sync(rq->cmd_flags);
 }
 
-#define rq_is_meta(rq)		((rq)->cmd_flags & REQ_RW_META)
-#define rq_noidle(rq)		((rq)->cmd_flags & REQ_NOIDLE)
-
 static inline int blk_queue_full(struct request_queue *q, int sync)
 {
 	if (sync)
@@ -684,7 +592,8 @@
 	(REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
 #define rq_mergeable(rq)	\
 	(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
-	 (blk_discard_rq(rq) || blk_fs_request((rq))))
+	 (((rq)->cmd_flags & REQ_DISCARD) || \
+	  (rq)->cmd_type == REQ_TYPE_FS))
 
 /*
  * q->prep_rq_fn return values
@@ -709,7 +618,7 @@
 #define BLK_BOUNCE_HIGH		-1ULL
 #endif
 #define BLK_BOUNCE_ANY		(-1ULL)
-#define BLK_BOUNCE_ISA		(ISA_DMA_THRESHOLD)
+#define BLK_BOUNCE_ISA		(DMA_BIT_MASK(24))
 
 /*
  * default timeout for SG_IO if none specified
@@ -781,6 +690,8 @@
 					gfp_t);
 extern void blk_insert_request(struct request_queue *, struct request *, int, void *);
 extern void blk_requeue_request(struct request_queue *, struct request *);
+extern void blk_add_request_payload(struct request *rq, struct page *page,
+		unsigned int len);
 extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
 extern int blk_lld_busy(struct request_queue *q);
 extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
@@ -915,6 +826,7 @@
 extern void __blk_complete_request(struct request *);
 extern void blk_abort_request(struct request *);
 extern void blk_abort_queue(struct request_queue *);
+extern void blk_unprep_request(struct request *);
 
 /*
  * Access functions for manipulating queue properties
@@ -959,6 +871,7 @@
 extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn);
 extern void blk_queue_segment_boundary(struct request_queue *, unsigned long);
 extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn);
+extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn);
 extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
@@ -966,7 +879,7 @@
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *);
+extern int blk_queue_ordered(struct request_queue *, unsigned);
 extern bool blk_do_ordered(struct request_queue *, struct request **);
 extern unsigned blk_ordered_cur_seq(struct request_queue *);
 extern unsigned blk_ordered_req_seq(struct request *);
@@ -1020,7 +933,7 @@
 {
 	block <<= (sb->s_blocksize_bits - 9);
 	nr_blocks <<= (sb->s_blocksize_bits - 9);
-	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL,
+	return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS,
 				   BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
 }
 
@@ -1333,7 +1246,6 @@
 struct block_device_operations {
 	int (*open) (struct block_device *, fmode_t);
 	int (*release) (struct gendisk *, fmode_t);
-	int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*direct_access) (struct block_device *, sector_t,

diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 416bf62..3395cf7 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h

@@ -5,6 +5,7 @@
 #ifdef __KERNEL__
 #include <linux/blkdev.h>
 #include <linux/relay.h>
+#include <linux/compat.h>
 #endif
 
 /*
@@ -220,11 +221,26 @@
 
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
+#ifdef CONFIG_COMPAT
+
+struct compat_blk_user_trace_setup {
+	char name[32];
+	u16 act_mask;
+	u32 buf_size;
+	u32 buf_nr;
+	compat_u64 start_lba;
+	compat_u64 end_lba;
+	u32 pid;
+};
+#define BLKTRACESETUP32 _IOWR(0x12, 115, struct compat_blk_user_trace_setup)
+
+#endif
+
 #if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK)
 
 static inline int blk_cmd_buf_len(struct request *rq)
 {
-	return blk_pc_request(rq) ? rq->cmd_len * 3 : 1;
+	return (rq->cmd_type == REQ_TYPE_BLOCK_PC) ? rq->cmd_len * 3 : 1;
 }
 
 extern void blk_dump_cmd(char *buf, struct request *rq);

diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h
index 8859e2e..284b520 100644
--- a/include/linux/coda_psdev.h
+++ b/include/linux/coda_psdev.h

@@ -86,9 +86,9 @@
 	wait_queue_head_t   uc_sleep;   /* process' wait queue */
 };
 
-#define REQ_ASYNC  0x1
-#define REQ_READ   0x2
-#define REQ_WRITE  0x4
-#define REQ_ABORT  0x8
+#define CODA_REQ_ASYNC  0x1
+#define CODA_REQ_READ   0x2
+#define CODA_REQ_WRITE  0x4
+#define CODA_REQ_ABORT  0x8
 
 #endif

diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index b8d2516..479ee3a 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h

@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.8"
+#define REL_VERSION "8.3.8.1"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 94

diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index ce77a74..5f04281 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h

@@ -78,10 +78,11 @@
 	NL_INTEGER(	30,	T_MAY_IGNORE,	rate)
 	NL_INTEGER(	31,	T_MAY_IGNORE,	after)
 	NL_INTEGER(	32,	T_MAY_IGNORE,	al_extents)
-	NL_INTEGER(     71,	T_MAY_IGNORE,	dp_volume)
-	NL_INTEGER(     72,	T_MAY_IGNORE,	dp_interval)
-	NL_INTEGER(     73,	T_MAY_IGNORE,	throttle_th)
-	NL_INTEGER(     74,	T_MAY_IGNORE,	hold_off_th)
+/*	NL_INTEGER(     71,	T_MAY_IGNORE,	dp_volume)
+ *	NL_INTEGER(     72,	T_MAY_IGNORE,	dp_interval)
+ *	NL_INTEGER(     73,	T_MAY_IGNORE,	throttle_th)
+ *	NL_INTEGER(     74,	T_MAY_IGNORE,	hold_off_th)
+ * feature will be reimplemented differently with 8.3.9 */
 	NL_STRING(      52,     T_MAY_IGNORE,   verify_alg,     SHARED_SECRET_MAX)
 	NL_STRING(      51,     T_MAY_IGNORE,   cpu_mask,       32)
 	NL_STRING(	64,	T_MAY_IGNORE,	csums_alg,	SHARED_SECRET_MAX)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index a8ccf85..1542e0e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h

@@ -8,6 +8,7 @@
 
 #include <linux/limits.h>
 #include <linux/ioctl.h>
+#include <linux/blk_types.h>
 
 /*
  * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
@@ -121,7 +122,7 @@
  *			immediately wait on this read without caring about
  *			unplugging.
  * READA		Used for read-ahead operations. Lower priority, and the
- *			 block layer could (in theory) choose to ignore this
+ *			block layer could (in theory) choose to ignore this
  *			request if it runs into resource problems.
  * WRITE		A normal async write. Device will be plugged.
  * SWRITE		Like WRITE, but a special case for ll_rw_block() that
@@ -140,7 +141,7 @@
  * SWRITE_SYNC
  * SWRITE_SYNC_PLUG	Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
  *			See SWRITE.
- * WRITE_BARRIER	Like WRITE, but tells the block layer that all
+ * WRITE_BARRIER	Like WRITE_SYNC, but tells the block layer that all
  *			previously submitted writes must be safely on storage
  *			before this one is started. Also guarantees that when
  *			this write is complete, it itself is also safely on
@@ -148,29 +149,31 @@
  *			of this IO.
  *
  */
-#define RW_MASK		1
-#define RWA_MASK	2
-#define READ 0
-#define WRITE 1
-#define READA 2		/* read-ahead  - don't block if no resources */
-#define SWRITE 3	/* for ll_rw_block() - wait for buffer lock */
-#define READ_SYNC	(READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
-#define READ_META	(READ | (1 << BIO_RW_META))
-#define WRITE_SYNC_PLUG	(WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
-#define WRITE_SYNC	(WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
-#define WRITE_ODIRECT_PLUG	(WRITE | (1 << BIO_RW_SYNCIO))
-#define WRITE_META	(WRITE | (1 << BIO_RW_META))
-#define SWRITE_SYNC_PLUG	\
-			(SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
-#define SWRITE_SYNC	(SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
-#define WRITE_BARRIER	(WRITE | (1 << BIO_RW_BARRIER))
+#define RW_MASK			REQ_WRITE
+#define RWA_MASK		REQ_RAHEAD
+
+#define READ			0
+#define WRITE			RW_MASK
+#define READA			RWA_MASK
+#define SWRITE			(WRITE | READA)
+
+#define READ_SYNC		(READ | REQ_SYNC | REQ_UNPLUG)
+#define READ_META		(READ | REQ_META)
+#define WRITE_SYNC_PLUG		(WRITE | REQ_SYNC | REQ_NOIDLE)
+#define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
+#define WRITE_ODIRECT_PLUG	(WRITE | REQ_SYNC)
+#define WRITE_META		(WRITE | REQ_META)
+#define WRITE_BARRIER		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
+				 REQ_HARDBARRIER)
+#define SWRITE_SYNC_PLUG	(SWRITE | REQ_SYNC | REQ_NOIDLE)
+#define SWRITE_SYNC		(SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
 
 /*
  * These aren't really reads or writes, they pass down information about
  * parts of device that are now unused by the file system.
  */
-#define DISCARD_NOBARRIER (WRITE | (1 << BIO_RW_DISCARD))
-#define DISCARD_BARRIER (DISCARD_NOBARRIER | (1 << BIO_RW_BARRIER))
+#define DISCARD_NOBARRIER	(WRITE | REQ_DISCARD)
+#define DISCARD_BARRIER		(WRITE | REQ_DISCARD | REQ_HARDBARRIER)
 
 #define SEL_IN		1
 #define SEL_OUT		2
@@ -2196,7 +2199,6 @@
 extern void file_move(struct file *f, struct list_head *list);
 extern void file_kill(struct file *f);
 #ifdef CONFIG_BLOCK
-struct bio;
 extern void submit_bio(int, struct bio *);
 extern int bdev_read_only(struct block_device *);
 #endif
@@ -2263,7 +2265,6 @@
 #endif
 
 #ifdef CONFIG_BLOCK
-struct bio;
 typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
 			    loff_t file_offset);
 

diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index d870a918..d8ce278 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h

@@ -25,8 +25,10 @@
 
 	TP_fast_assign(
 		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
+		__entry->sector    = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+					0 : blk_rq_pos(rq);
+		__entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+					0 : blk_rq_sectors(rq);
 		__entry->errors    = rq->errors;
 
 		blk_fill_rwbs_rq(__entry->rwbs, rq);
@@ -109,9 +111,12 @@
 
 	TP_fast_assign(
 		__entry->dev	   = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
-		__entry->sector    = blk_pc_request(rq) ? 0 : blk_rq_pos(rq);
-		__entry->nr_sector = blk_pc_request(rq) ? 0 : blk_rq_sectors(rq);
-		__entry->bytes     = blk_pc_request(rq) ? blk_rq_bytes(rq) : 0;
+		__entry->sector    = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+					0 : blk_rq_pos(rq);
+		__entry->nr_sector = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+					0 : blk_rq_sectors(rq);
+		__entry->bytes     = (rq->cmd_type == REQ_TYPE_BLOCK_PC) ?
+					blk_rq_bytes(rq) : 0;
 
 		blk_fill_rwbs_rq(__entry->rwbs, rq);
 		blk_dump_cmd(__get_str(cmd), rq);

diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
new file mode 100644
index 0000000..f345f66
--- /dev/null
+++ b/include/trace/events/writeback.h

@@ -0,0 +1,159 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM writeback
+
+#if !defined(_TRACE_WRITEBACK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WRITEBACK_H
+
+#include <linux/backing-dev.h>
+#include <linux/device.h>
+#include <linux/writeback.h>
+
+struct wb_writeback_work;
+
+DECLARE_EVENT_CLASS(writeback_work_class,
+	TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work),
+	TP_ARGS(bdi, work),
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(long, nr_pages)
+		__field(dev_t, sb_dev)
+		__field(int, sync_mode)
+		__field(int, for_kupdate)
+		__field(int, range_cyclic)
+		__field(int, for_background)
+	),
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		__entry->nr_pages = work->nr_pages;
+		__entry->sb_dev = work->sb ? work->sb->s_dev : 0;
+		__entry->sync_mode = work->sync_mode;
+		__entry->for_kupdate = work->for_kupdate;
+		__entry->range_cyclic = work->range_cyclic;
+		__entry->for_background	= work->for_background;
+	),
+	TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
+		  "kupdate=%d range_cyclic=%d background=%d",
+		  __entry->name,
+		  MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
+		  __entry->nr_pages,
+		  __entry->sync_mode,
+		  __entry->for_kupdate,
+		  __entry->range_cyclic,
+		  __entry->for_background
+	)
+);
+#define DEFINE_WRITEBACK_WORK_EVENT(name) \
+DEFINE_EVENT(writeback_work_class, name, \
+	TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \
+	TP_ARGS(bdi, work))
+DEFINE_WRITEBACK_WORK_EVENT(writeback_nothread);
+DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
+DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
+
+TRACE_EVENT(writeback_pages_written,
+	TP_PROTO(long pages_written),
+	TP_ARGS(pages_written),
+	TP_STRUCT__entry(
+		__field(long,		pages)
+	),
+	TP_fast_assign(
+		__entry->pages		= pages_written;
+	),
+	TP_printk("%ld", __entry->pages)
+);
+
+DECLARE_EVENT_CLASS(writeback_class,
+	TP_PROTO(struct backing_dev_info *bdi),
+	TP_ARGS(bdi),
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+	),
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+	),
+	TP_printk("bdi %s",
+		  __entry->name
+	)
+);
+#define DEFINE_WRITEBACK_EVENT(name) \
+DEFINE_EVENT(writeback_class, name, \
+	TP_PROTO(struct backing_dev_info *bdi), \
+	TP_ARGS(bdi))
+
+DEFINE_WRITEBACK_EVENT(writeback_nowork);
+DEFINE_WRITEBACK_EVENT(writeback_wake_thread);
+DEFINE_WRITEBACK_EVENT(writeback_wake_forker_thread);
+DEFINE_WRITEBACK_EVENT(writeback_bdi_register);
+DEFINE_WRITEBACK_EVENT(writeback_bdi_unregister);
+DEFINE_WRITEBACK_EVENT(writeback_thread_start);
+DEFINE_WRITEBACK_EVENT(writeback_thread_stop);
+
+DECLARE_EVENT_CLASS(wbc_class,
+	TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
+	TP_ARGS(wbc, bdi),
+	TP_STRUCT__entry(
+		__array(char, name, 32)
+		__field(long, nr_to_write)
+		__field(long, pages_skipped)
+		__field(int, sync_mode)
+		__field(int, nonblocking)
+		__field(int, encountered_congestion)
+		__field(int, for_kupdate)
+		__field(int, for_background)
+		__field(int, for_reclaim)
+		__field(int, range_cyclic)
+		__field(int, more_io)
+		__field(unsigned long, older_than_this)
+		__field(long, range_start)
+		__field(long, range_end)
+	),
+
+	TP_fast_assign(
+		strncpy(__entry->name, dev_name(bdi->dev), 32);
+		__entry->nr_to_write	= wbc->nr_to_write;
+		__entry->pages_skipped	= wbc->pages_skipped;
+		__entry->sync_mode	= wbc->sync_mode;
+		__entry->for_kupdate	= wbc->for_kupdate;
+		__entry->for_background	= wbc->for_background;
+		__entry->for_reclaim	= wbc->for_reclaim;
+		__entry->range_cyclic	= wbc->range_cyclic;
+		__entry->more_io	= wbc->more_io;
+		__entry->older_than_this = wbc->older_than_this ?
+						*wbc->older_than_this : 0;
+		__entry->range_start	= (long)wbc->range_start;
+		__entry->range_end	= (long)wbc->range_end;
+	),
+
+	TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
+		"bgrd=%d reclm=%d cyclic=%d more=%d older=0x%lx "
+		"start=0x%lx end=0x%lx",
+		__entry->name,
+		__entry->nr_to_write,
+		__entry->pages_skipped,
+		__entry->sync_mode,
+		__entry->for_kupdate,
+		__entry->for_background,
+		__entry->for_reclaim,
+		__entry->range_cyclic,
+		__entry->more_io,
+		__entry->older_than_this,
+		__entry->range_start,
+		__entry->range_end)
+)
+
+#define DEFINE_WBC_EVENT(name) \
+DEFINE_EVENT(wbc_class, name, \
+	TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), \
+	TP_ARGS(wbc, bdi))
+DEFINE_WBC_EVENT(wbc_writeback_start);
+DEFINE_WBC_EVENT(wbc_writeback_written);
+DEFINE_WBC_EVENT(wbc_writeback_wait);
+DEFINE_WBC_EVENT(wbc_balance_dirty_start);
+DEFINE_WBC_EVENT(wbc_balance_dirty_written);
+DEFINE_WBC_EVENT(wbc_balance_dirty_wait);
+DEFINE_WBC_EVENT(wbc_writepage);
+
+#endif /* _TRACE_WRITEBACK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>

diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
index 97024fd..83bbc7c 100644
--- a/kernel/power/block_io.c
+++ b/kernel/power/block_io.c

@@ -28,7 +28,7 @@
 static int submit(int rw, struct block_device *bdev, sector_t sector,
 		struct page *page, struct bio **bio_chain)
 {
-	const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+	const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG;
 	struct bio *bio;
 
 	bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);

diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 638711c..82499a5 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c

@@ -169,9 +169,12 @@
 static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
 				 BLK_TC_ACT(BLK_TC_WRITE) };
 
+#define BLK_TC_HARDBARRIER	BLK_TC_BARRIER
+#define BLK_TC_RAHEAD		BLK_TC_AHEAD
+
 /* The ilog2() calls fall out because they're constant */
-#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
-	  (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name))
+#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \
+	  (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name))
 
 /*
  * The worker for the various blk_add_trace*() types. Fills out a
@@ -194,9 +197,9 @@
 		return;
 
 	what |= ddir_act[rw & WRITE];
-	what |= MASK_TC_BIT(rw, BARRIER);
-	what |= MASK_TC_BIT(rw, SYNCIO);
-	what |= MASK_TC_BIT(rw, AHEAD);
+	what |= MASK_TC_BIT(rw, HARDBARRIER);
+	what |= MASK_TC_BIT(rw, SYNC);
+	what |= MASK_TC_BIT(rw, RAHEAD);
 	what |= MASK_TC_BIT(rw, META);
 	what |= MASK_TC_BIT(rw, DISCARD);
 
@@ -549,6 +552,41 @@
 }
 EXPORT_SYMBOL_GPL(blk_trace_setup);
 
+#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
+static int compat_blk_trace_setup(struct request_queue *q, char *name,
+				  dev_t dev, struct block_device *bdev,
+				  char __user *arg)
+{
+	struct blk_user_trace_setup buts;
+	struct compat_blk_user_trace_setup cbuts;
+	int ret;
+
+	if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
+		return -EFAULT;
+
+	buts = (struct blk_user_trace_setup) {
+		.act_mask = cbuts.act_mask,
+		.buf_size = cbuts.buf_size,
+		.buf_nr = cbuts.buf_nr,
+		.start_lba = cbuts.start_lba,
+		.end_lba = cbuts.end_lba,
+		.pid = cbuts.pid,
+	};
+	memcpy(&buts.name, &cbuts.name, 32);
+
+	ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(arg, &buts.name, 32)) {
+		blk_trace_remove(q);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+#endif
+
 int blk_trace_startstop(struct request_queue *q, int start)
 {
 	int ret;
@@ -601,6 +639,7 @@
 	if (!q)
 		return -ENXIO;
 
+	lock_kernel();
 	mutex_lock(&bdev->bd_mutex);
 
 	switch (cmd) {
@@ -608,6 +647,12 @@
 		bdevname(bdev, b);
 		ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
 		break;
+#if defined(CONFIG_COMPAT) && defined(CONFIG_X86_64)
+	case BLKTRACESETUP32:
+		bdevname(bdev, b);
+		ret = compat_blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
+		break;
+#endif
 	case BLKTRACESTART:
 		start = 1;
 	case BLKTRACESTOP:
@@ -622,6 +667,7 @@
 	}
 
 	mutex_unlock(&bdev->bd_mutex);
+	unlock_kernel();
 	return ret;
 }
 
@@ -661,10 +707,10 @@
 	if (likely(!bt))
 		return;
 
-	if (blk_discard_rq(rq))
-		rw |= (1 << BIO_RW_DISCARD);
+	if (rq->cmd_flags & REQ_DISCARD)
+		rw |= REQ_DISCARD;
 
-	if (blk_pc_request(rq)) {
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
 		what |= BLK_TC_ACT(BLK_TC_PC);
 		__blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
 				what, rq->errors, rq->cmd_len, rq->cmd);
@@ -925,7 +971,7 @@
 	if (likely(!bt))
 		return;
 
-	if (blk_pc_request(rq))
+	if (rq->cmd_type == REQ_TYPE_BLOCK_PC)
 		__blk_add_trace(bt, 0, blk_rq_bytes(rq), 0,
 				BLK_TA_DRV_DATA, rq->errors, len, data);
 	else
@@ -1730,7 +1776,7 @@
 	int len = rq->cmd_len;
 	unsigned char *cmd = rq->cmd;
 
-	if (!blk_pc_request(rq)) {
+	if (rq->cmd_type != REQ_TYPE_BLOCK_PC) {
 		buf[0] = '\0';
 		return;
 	}
@@ -1755,20 +1801,20 @@
 
 	if (rw & WRITE)
 		rwbs[i++] = 'W';
-	else if (rw & 1 << BIO_RW_DISCARD)
+	else if (rw & REQ_DISCARD)
 		rwbs[i++] = 'D';
 	else if (bytes)
 		rwbs[i++] = 'R';
 	else
 		rwbs[i++] = 'N';
 
-	if (rw & 1 << BIO_RW_AHEAD)
+	if (rw & REQ_RAHEAD)
 		rwbs[i++] = 'A';
-	if (rw & 1 << BIO_RW_BARRIER)
+	if (rw & REQ_HARDBARRIER)
 		rwbs[i++] = 'B';
-	if (rw & 1 << BIO_RW_SYNCIO)
+	if (rw & REQ_SYNC)
 		rwbs[i++] = 'S';
-	if (rw & 1 << BIO_RW_META)
+	if (rw & REQ_META)
 		rwbs[i++] = 'M';
 
 	rwbs[i] = '\0';
@@ -1779,8 +1825,8 @@
 	int rw = rq->cmd_flags & 0x03;
 	int bytes;
 
-	if (blk_discard_rq(rq))
-		rw |= (1 << BIO_RW_DISCARD);
+	if (rq->cmd_flags & REQ_DISCARD)
+		rw |= REQ_DISCARD;
 
 	bytes = blk_rq_bytes(rq);
 

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f9fd3dd..08d3575 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c

@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/writeback.h>
 #include <linux/device.h>
+#include <trace/events/writeback.h>
 
 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 
@@ -49,8 +50,6 @@
 static int bdi_sync_supers(void *);
 static void sync_supers_timer_fn(unsigned long);
 
-static void bdi_add_default_flusher_task(struct backing_dev_info *bdi);
-
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
@@ -65,28 +64,21 @@
 static int bdi_debug_stats_show(struct seq_file *m, void *v)
 {
 	struct backing_dev_info *bdi = m->private;
-	struct bdi_writeback *wb;
+	struct bdi_writeback *wb = &bdi->wb;
 	unsigned long background_thresh;
 	unsigned long dirty_thresh;
 	unsigned long bdi_thresh;
 	unsigned long nr_dirty, nr_io, nr_more_io, nr_wb;
 	struct inode *inode;
 
-	/*
-	 * inode lock is enough here, the bdi->wb_list is protected by
-	 * RCU on the reader side
-	 */
 	nr_wb = nr_dirty = nr_io = nr_more_io = 0;
 	spin_lock(&inode_lock);
-	list_for_each_entry(wb, &bdi->wb_list, list) {
-		nr_wb++;
-		list_for_each_entry(inode, &wb->b_dirty, i_list)
-			nr_dirty++;
-		list_for_each_entry(inode, &wb->b_io, i_list)
-			nr_io++;
-		list_for_each_entry(inode, &wb->b_more_io, i_list)
-			nr_more_io++;
-	}
+	list_for_each_entry(inode, &wb->b_dirty, i_list)
+		nr_dirty++;
+	list_for_each_entry(inode, &wb->b_io, i_list)
+		nr_io++;
+	list_for_each_entry(inode, &wb->b_more_io, i_list)
+		nr_more_io++;
 	spin_unlock(&inode_lock);
 
 	get_dirty_limits(&background_thresh, &dirty_thresh, &bdi_thresh, bdi);
@@ -98,19 +90,16 @@
 		   "BdiDirtyThresh:   %8lu kB\n"
 		   "DirtyThresh:      %8lu kB\n"
 		   "BackgroundThresh: %8lu kB\n"
-		   "WritebackThreads: %8lu\n"
 		   "b_dirty:          %8lu\n"
 		   "b_io:             %8lu\n"
 		   "b_more_io:        %8lu\n"
 		   "bdi_list:         %8u\n"
-		   "state:            %8lx\n"
-		   "wb_list:          %8u\n",
+		   "state:            %8lx\n",
 		   (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
 		   (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)),
 		   K(bdi_thresh), K(dirty_thresh),
-		   K(background_thresh), nr_wb, nr_dirty, nr_io, nr_more_io,
-		   !list_empty(&bdi->bdi_list), bdi->state,
-		   !list_empty(&bdi->wb_list));
+		   K(background_thresh), nr_dirty, nr_io, nr_more_io,
+		   !list_empty(&bdi->bdi_list), bdi->state);
 #undef K
 
 	return 0;
@@ -247,7 +236,6 @@
 	sync_supers_tsk = kthread_run(bdi_sync_supers, NULL, "sync_supers");
 	BUG_ON(IS_ERR(sync_supers_tsk));
 
-	init_timer(&sync_supers_timer);
 	setup_timer(&sync_supers_timer, sync_supers_timer_fn, 0);
 	bdi_arm_supers_timer();
 
@@ -259,77 +247,6 @@
 }
 subsys_initcall(default_bdi_init);
 
-static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
-{
-	memset(wb, 0, sizeof(*wb));
-
-	wb->bdi = bdi;
-	wb->last_old_flush = jiffies;
-	INIT_LIST_HEAD(&wb->b_dirty);
-	INIT_LIST_HEAD(&wb->b_io);
-	INIT_LIST_HEAD(&wb->b_more_io);
-}
-
-static void bdi_task_init(struct backing_dev_info *bdi,
-			  struct bdi_writeback *wb)
-{
-	struct task_struct *tsk = current;
-
-	spin_lock(&bdi->wb_lock);
-	list_add_tail_rcu(&wb->list, &bdi->wb_list);
-	spin_unlock(&bdi->wb_lock);
-
-	tsk->flags |= PF_FLUSHER | PF_SWAPWRITE;
-	set_freezable();
-
-	/*
-	 * Our parent may run at a different priority, just set us to normal
-	 */
-	set_user_nice(tsk, 0);
-}
-
-static int bdi_start_fn(void *ptr)
-{
-	struct bdi_writeback *wb = ptr;
-	struct backing_dev_info *bdi = wb->bdi;
-	int ret;
-
-	/*
-	 * Add us to the active bdi_list
-	 */
-	spin_lock_bh(&bdi_lock);
-	list_add_rcu(&bdi->bdi_list, &bdi_list);
-	spin_unlock_bh(&bdi_lock);
-
-	bdi_task_init(bdi, wb);
-
-	/*
-	 * Clear pending bit and wakeup anybody waiting to tear us down
-	 */
-	clear_bit(BDI_pending, &bdi->state);
-	smp_mb__after_clear_bit();
-	wake_up_bit(&bdi->state, BDI_pending);
-
-	ret = bdi_writeback_task(wb);
-
-	/*
-	 * Remove us from the list
-	 */
-	spin_lock(&bdi->wb_lock);
-	list_del_rcu(&wb->list);
-	spin_unlock(&bdi->wb_lock);
-
-	/*
-	 * Flush any work that raced with us exiting. No new work
-	 * will be added, since this bdi isn't discoverable anymore.
-	 */
-	if (!list_empty(&bdi->work_list))
-		wb_do_writeback(wb, 1);
-
-	wb->task = NULL;
-	return ret;
-}
-
 int bdi_has_dirty_io(struct backing_dev_info *bdi)
 {
 	return wb_has_dirty_io(&bdi->wb);
@@ -348,10 +265,10 @@
 }
 
 /*
- * kupdated() used to do this. We cannot do it from the bdi_forker_task()
+ * kupdated() used to do this. We cannot do it from the bdi_forker_thread()
  * or we risk deadlocking on ->s_umount. The longer term solution would be
  * to implement sync_supers_bdi() or similar and simply do it from the
- * bdi writeback tasks individually.
+ * bdi writeback thread individually.
  */
 static int bdi_sync_supers(void *unused)
 {
@@ -387,144 +304,198 @@
 	bdi_arm_supers_timer();
 }
 
-static int bdi_forker_task(void *ptr)
+static void wakeup_timer_fn(unsigned long data)
+{
+	struct backing_dev_info *bdi = (struct backing_dev_info *)data;
+
+	spin_lock_bh(&bdi->wb_lock);
+	if (bdi->wb.task) {
+		trace_writeback_wake_thread(bdi);
+		wake_up_process(bdi->wb.task);
+	} else {
+		/*
+		 * When bdi tasks are inactive for long time, they are killed.
+		 * In this case we have to wake-up the forker thread which
+		 * should create and run the bdi thread.
+		 */
+		trace_writeback_wake_forker_thread(bdi);
+		wake_up_process(default_backing_dev_info.wb.task);
+	}
+	spin_unlock_bh(&bdi->wb_lock);
+}
+
+/*
+ * This function is used when the first inode for this bdi is marked dirty. It
+ * wakes-up the corresponding bdi thread which should then take care of the
+ * periodic background write-out of dirty inodes. Since the write-out would
+ * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
+ * set up a timer which wakes the bdi thread up later.
+ *
+ * Note, we wouldn't bother setting up the timer, but this function is on the
+ * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
+ * by delaying the wake-up.
+ */
+void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)
+{
+	unsigned long timeout;
+
+	timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
+	mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout);
+}
+
+/*
+ * Calculate the longest interval (jiffies) bdi threads are allowed to be
+ * inactive.
+ */
+static unsigned long bdi_longest_inactive(void)
+{
+	unsigned long interval;
+
+	interval = msecs_to_jiffies(dirty_writeback_interval * 10);
+	return max(5UL * 60 * HZ, interval);
+}
+
+static int bdi_forker_thread(void *ptr)
 {
 	struct bdi_writeback *me = ptr;
 
-	bdi_task_init(me->bdi, me);
+	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
+	set_freezable();
+
+	/*
+	 * Our parent may run at a different priority, just set us to normal
+	 */
+	set_user_nice(current, 0);
 
 	for (;;) {
-		struct backing_dev_info *bdi, *tmp;
-		struct bdi_writeback *wb;
+		struct task_struct *task = NULL;
+		struct backing_dev_info *bdi;
+		enum {
+			NO_ACTION,   /* Nothing to do */
+			FORK_THREAD, /* Fork bdi thread */
+			KILL_THREAD, /* Kill inactive bdi thread */
+		} action = NO_ACTION;
 
 		/*
 		 * Temporary measure, we want to make sure we don't see
 		 * dirty data on the default backing_dev_info
 		 */
-		if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list))
+		if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) {
+			del_timer(&me->wakeup_timer);
 			wb_do_writeback(me, 0);
-
-		spin_lock_bh(&bdi_lock);
-
-		/*
-		 * Check if any existing bdi's have dirty data without
-		 * a thread registered. If so, set that up.
-		 */
-		list_for_each_entry_safe(bdi, tmp, &bdi_list, bdi_list) {
-			if (bdi->wb.task)
-				continue;
-			if (list_empty(&bdi->work_list) &&
-			    !bdi_has_dirty_io(bdi))
-				continue;
-
-			bdi_add_default_flusher_task(bdi);
 		}
 
+		spin_lock_bh(&bdi_lock);
 		set_current_state(TASK_INTERRUPTIBLE);
 
-		if (list_empty(&bdi_pending_list)) {
-			unsigned long wait;
+		list_for_each_entry(bdi, &bdi_list, bdi_list) {
+			bool have_dirty_io;
 
-			spin_unlock_bh(&bdi_lock);
-			wait = msecs_to_jiffies(dirty_writeback_interval * 10);
-			if (wait)
-				schedule_timeout(wait);
+			if (!bdi_cap_writeback_dirty(bdi) ||
+			     bdi_cap_flush_forker(bdi))
+				continue;
+
+			WARN(!test_bit(BDI_registered, &bdi->state),
+			     "bdi %p/%s is not registered!\n", bdi, bdi->name);
+
+			have_dirty_io = !list_empty(&bdi->work_list) ||
+					wb_has_dirty_io(&bdi->wb);
+
+			/*
+			 * If the bdi has work to do, but the thread does not
+			 * exist - create it.
+			 */
+			if (!bdi->wb.task && have_dirty_io) {
+				/*
+				 * Set the pending bit - if someone will try to
+				 * unregister this bdi - it'll wait on this bit.
+				 */
+				set_bit(BDI_pending, &bdi->state);
+				action = FORK_THREAD;
+				break;
+			}
+
+			spin_lock(&bdi->wb_lock);
+
+			/*
+			 * If there is no work to do and the bdi thread was
+			 * inactive long enough - kill it. The wb_lock is taken
+			 * to make sure no-one adds more work to this bdi and
+			 * wakes the bdi thread up.
+			 */
+			if (bdi->wb.task && !have_dirty_io &&
+			    time_after(jiffies, bdi->wb.last_active +
+						bdi_longest_inactive())) {
+				task = bdi->wb.task;
+				bdi->wb.task = NULL;
+				spin_unlock(&bdi->wb_lock);
+				set_bit(BDI_pending, &bdi->state);
+				action = KILL_THREAD;
+				break;
+			}
+			spin_unlock(&bdi->wb_lock);
+		}
+		spin_unlock_bh(&bdi_lock);
+
+		/* Keep working if default bdi still has things to do */
+		if (!list_empty(&me->bdi->work_list))
+			__set_current_state(TASK_RUNNING);
+
+		switch (action) {
+		case FORK_THREAD:
+			__set_current_state(TASK_RUNNING);
+			task = kthread_run(bdi_writeback_thread, &bdi->wb, "flush-%s",
+					   dev_name(bdi->dev));
+			if (IS_ERR(task)) {
+				/*
+				 * If thread creation fails, force writeout of
+				 * the bdi from the thread.
+				 */
+				bdi_flush_io(bdi);
+			} else {
+				/*
+				 * The spinlock makes sure we do not lose
+				 * wake-ups when racing with 'bdi_queue_work()'.
+				 */
+				spin_lock_bh(&bdi->wb_lock);
+				bdi->wb.task = task;
+				spin_unlock_bh(&bdi->wb_lock);
+			}
+			break;
+
+		case KILL_THREAD:
+			__set_current_state(TASK_RUNNING);
+			kthread_stop(task);
+			break;
+
+		case NO_ACTION:
+			if (!wb_has_dirty_io(me) || !dirty_writeback_interval)
+				/*
+				 * There are no dirty data. The only thing we
+				 * should now care about is checking for
+				 * inactive bdi threads and killing them. Thus,
+				 * let's sleep for longer time, save energy and
+				 * be friendly for battery-driven devices.
+				 */
+				schedule_timeout(bdi_longest_inactive());
 			else
-				schedule();
+				schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10));
 			try_to_freeze();
+			/* Back to the main loop */
 			continue;
 		}
 
-		__set_current_state(TASK_RUNNING);
-
 		/*
-		 * This is our real job - check for pending entries in
-		 * bdi_pending_list, and create the tasks that got added
+		 * Clear pending bit and wakeup anybody waiting to tear us down.
 		 */
-		bdi = list_entry(bdi_pending_list.next, struct backing_dev_info,
-				 bdi_list);
-		list_del_init(&bdi->bdi_list);
-		spin_unlock_bh(&bdi_lock);
-
-		wb = &bdi->wb;
-		wb->task = kthread_run(bdi_start_fn, wb, "flush-%s",
-					dev_name(bdi->dev));
-		/*
-		 * If task creation fails, then readd the bdi to
-		 * the pending list and force writeout of the bdi
-		 * from this forker thread. That will free some memory
-		 * and we can try again.
-		 */
-		if (IS_ERR(wb->task)) {
-			wb->task = NULL;
-
-			/*
-			 * Add this 'bdi' to the back, so we get
-			 * a chance to flush other bdi's to free
-			 * memory.
-			 */
-			spin_lock_bh(&bdi_lock);
-			list_add_tail(&bdi->bdi_list, &bdi_pending_list);
-			spin_unlock_bh(&bdi_lock);
-
-			bdi_flush_io(bdi);
-		}
+		clear_bit(BDI_pending, &bdi->state);
+		smp_mb__after_clear_bit();
+		wake_up_bit(&bdi->state, BDI_pending);
 	}
 
 	return 0;
 }
 
-static void bdi_add_to_pending(struct rcu_head *head)
-{
-	struct backing_dev_info *bdi;
-
-	bdi = container_of(head, struct backing_dev_info, rcu_head);
-	INIT_LIST_HEAD(&bdi->bdi_list);
-
-	spin_lock(&bdi_lock);
-	list_add_tail(&bdi->bdi_list, &bdi_pending_list);
-	spin_unlock(&bdi_lock);
-
-	/*
-	 * We are now on the pending list, wake up bdi_forker_task()
-	 * to finish the job and add us back to the active bdi_list
-	 */
-	wake_up_process(default_backing_dev_info.wb.task);
-}
-
-/*
- * Add the default flusher task that gets created for any bdi
- * that has dirty data pending writeout
- */
-void static bdi_add_default_flusher_task(struct backing_dev_info *bdi)
-{
-	if (!bdi_cap_writeback_dirty(bdi))
-		return;
-
-	if (WARN_ON(!test_bit(BDI_registered, &bdi->state))) {
-		printk(KERN_ERR "bdi %p/%s is not registered!\n",
-							bdi, bdi->name);
-		return;
-	}
-
-	/*
-	 * Check with the helper whether to proceed adding a task. Will only
-	 * abort if we two or more simultanous calls to
-	 * bdi_add_default_flusher_task() occured, further additions will block
-	 * waiting for previous additions to finish.
-	 */
-	if (!test_and_set_bit(BDI_pending, &bdi->state)) {
-		list_del_rcu(&bdi->bdi_list);
-
-		/*
-		 * We must wait for the current RCU period to end before
-		 * moving to the pending list. So schedule that operation
-		 * from an RCU callback.
-		 */
-		call_rcu(&bdi->rcu_head, bdi_add_to_pending);
-	}
-}
-
 /*
  * Remove bdi from bdi_list, and ensure that it is no longer visible
  */
@@ -541,23 +512,16 @@
 		const char *fmt, ...)
 {
 	va_list args;
-	int ret = 0;
 	struct device *dev;
 
 	if (bdi->dev)	/* The driver needs to use separate queues per device */
-		goto exit;
+		return 0;
 
 	va_start(args, fmt);
 	dev = device_create_vargs(bdi_class, parent, MKDEV(0, 0), bdi, fmt, args);
 	va_end(args);
-	if (IS_ERR(dev)) {
-		ret = PTR_ERR(dev);
-		goto exit;
-	}
-
-	spin_lock_bh(&bdi_lock);
-	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
-	spin_unlock_bh(&bdi_lock);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
 
 	bdi->dev = dev;
 
@@ -569,21 +533,21 @@
 	if (bdi_cap_flush_forker(bdi)) {
 		struct bdi_writeback *wb = &bdi->wb;
 
-		wb->task = kthread_run(bdi_forker_task, wb, "bdi-%s",
+		wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s",
 						dev_name(dev));
-		if (IS_ERR(wb->task)) {
-			wb->task = NULL;
-			ret = -ENOMEM;
-
-			bdi_remove_from_list(bdi);
-			goto exit;
-		}
+		if (IS_ERR(wb->task))
+			return PTR_ERR(wb->task);
 	}
 
 	bdi_debug_register(bdi, dev_name(dev));
 	set_bit(BDI_registered, &bdi->state);
-exit:
-	return ret;
+
+	spin_lock_bh(&bdi_lock);
+	list_add_tail_rcu(&bdi->bdi_list, &bdi_list);
+	spin_unlock_bh(&bdi_lock);
+
+	trace_writeback_bdi_register(bdi);
+	return 0;
 }
 EXPORT_SYMBOL(bdi_register);
 
@@ -598,31 +562,29 @@
  */
 static void bdi_wb_shutdown(struct backing_dev_info *bdi)
 {
-	struct bdi_writeback *wb;
-
 	if (!bdi_cap_writeback_dirty(bdi))
 		return;
 
 	/*
+	 * Make sure nobody finds us on the bdi_list anymore
+	 */
+	bdi_remove_from_list(bdi);
+
+	/*
 	 * If setup is pending, wait for that to complete first
 	 */
 	wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait,
 			TASK_UNINTERRUPTIBLE);
 
 	/*
-	 * Make sure nobody finds us on the bdi_list anymore
-	 */
-	bdi_remove_from_list(bdi);
-
-	/*
-	 * Finally, kill the kernel threads. We don't need to be RCU
+	 * Finally, kill the kernel thread. We don't need to be RCU
 	 * safe anymore, since the bdi is gone from visibility. Force
 	 * unfreeze of the thread before calling kthread_stop(), otherwise
 	 * it would never exet if it is currently stuck in the refrigerator.
 	 */
-	list_for_each_entry(wb, &bdi->wb_list, list) {
-		thaw_process(wb->task);
-		kthread_stop(wb->task);
+	if (bdi->wb.task) {
+		thaw_process(bdi->wb.task);
+		kthread_stop(bdi->wb.task);
 	}
 }
 
@@ -644,7 +606,9 @@
 void bdi_unregister(struct backing_dev_info *bdi)
 {
 	if (bdi->dev) {
+		trace_writeback_bdi_unregister(bdi);
 		bdi_prune_sb(bdi);
+		del_timer_sync(&bdi->wb.wakeup_timer);
 
 		if (!bdi_cap_flush_forker(bdi))
 			bdi_wb_shutdown(bdi);
@@ -655,6 +619,18 @@
 }
 EXPORT_SYMBOL(bdi_unregister);
 
+static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
+{
+	memset(wb, 0, sizeof(*wb));
+
+	wb->bdi = bdi;
+	wb->last_old_flush = jiffies;
+	INIT_LIST_HEAD(&wb->b_dirty);
+	INIT_LIST_HEAD(&wb->b_io);
+	INIT_LIST_HEAD(&wb->b_more_io);
+	setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi);
+}
+
 int bdi_init(struct backing_dev_info *bdi)
 {
 	int i, err;
@@ -666,7 +642,6 @@
 	bdi->max_prop_frac = PROP_FRAC_BASE;
 	spin_lock_init(&bdi->wb_lock);
 	INIT_LIST_HEAD(&bdi->bdi_list);
-	INIT_LIST_HEAD(&bdi->wb_list);
 	INIT_LIST_HEAD(&bdi->work_list);
 
 	bdi_wb_init(&bdi->wb, bdi);

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index df8202e..0c6258b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c

@@ -34,6 +34,7 @@
 #include <linux/syscalls.h>
 #include <linux/buffer_head.h>
 #include <linux/pagevec.h>
+#include <trace/events/writeback.h>
 
 /*
  * After a CPU has dirtied this many pages, balance_dirty_pages_ratelimited
@@ -535,11 +536,13 @@
 		 * threshold otherwise wait until the disk writes catch
 		 * up.
 		 */
+		trace_wbc_balance_dirty_start(&wbc, bdi);
 		if (bdi_nr_reclaimable > bdi_thresh) {
 			writeback_inodes_wb(&bdi->wb, &wbc);
 			pages_written += write_chunk - wbc.nr_to_write;
 			get_dirty_limits(&background_thresh, &dirty_thresh,
 				       &bdi_thresh, bdi);
+			trace_wbc_balance_dirty_written(&wbc, bdi);
 		}
 
 		/*
@@ -565,6 +568,7 @@
 		if (pages_written >= write_chunk)
 			break;		/* We've done our duty */
 
+		trace_wbc_balance_dirty_wait(&wbc, bdi);
 		__set_current_state(TASK_INTERRUPTIBLE);
 		io_schedule_timeout(pause);
 
@@ -962,6 +966,7 @@
 			if (!clear_page_dirty_for_io(page))
 				goto continue_unlock;
 
+			trace_wbc_writepage(wbc, mapping->backing_dev_info);
 			ret = (*writepage)(page, wbc, data);
 			if (unlikely(ret)) {
 				if (ret == AOP_WRITEPAGE_ACTIVATE) {

diff --git a/mm/page_io.c b/mm/page_io.c
index 31a3b96..2dee975 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c

@@ -106,7 +106,7 @@
 		goto out;
 	}
 	if (wbc->sync_mode == WB_SYNC_ALL)
-		rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
+		rw |= REQ_SYNC | REQ_UNPLUG;
 	count_vm_event(PSWPOUT);
 	set_page_writeback(page);
 	unlock_page(page);
commit	2f9e825d3e0e2b407ae8f082de5c00afcf7378fb	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Tue Aug 10 15:22:42 2010 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	Tue Aug 10 15:22:42 2010 -0700
tree	f8b3ee40674ce4acd5508a0a0bf52a30904caf6c
parent	7ae0dea900b027cd90e8a3e14deca9a19e17638b [diff]
parent	de75d60d5ea235e6e09f4962ab22541ce0fe176a [diff]