Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block

* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (41 commits)
  Revert "Seperate read and write statistics of in_flight requests"
  cfq-iosched: don't delay async queue if it hasn't dispatched at all
  block: Topology ioctls
  cfq-iosched: use assigned slice sync value, not default
  cfq-iosched: rename 'desktop' sysfs entry to 'low_latency'
  cfq-iosched: implement slower async initiate and queue ramp up
  cfq-iosched: delay async IO dispatch, if sync IO was just done
  cfq-iosched: add a knob for desktop interactiveness
  Add a tracepoint for block request remapping
  block: allow large discard requests
  block: use normal I/O path for discard requests
  swapfile: avoid NULL pointer dereference in swapon when s_bdev is NULL
  fs/bio.c: move EXPORT* macros to line after function
  Add missing blk_trace_remove_sysfs to be in pair with blk_trace_init_sysfs
  cciss: fix build when !PROC_FS
  block: Do not clamp max_hw_sectors for stacking devices
  block: Set max_sectors correctly for stacking devices
  cciss: cciss_host_attr_groups should be const
  cciss: Dynamically allocate the drive_info_struct for each logical drive.
  cciss: Add usage_count attribute to each logical drive in /sys
  ...
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
index 0a92a7c..4f29e5f1 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
@@ -31,3 +31,31 @@
 Kernel Version: 2.6.30
 Contact:	iss_storagedev@hp.com
 Description:	A symbolic link to /sys/block/cciss!cXdY
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/rescan
+Date:		August 2009
+Kernel Version:	2.6.31
+Contact:	iss_storagedev@hp.com
+Description:	Kicks of a rescan of the controller to discover logical
+		drive topology changes.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/lunid
+Date:		August 2009
+Kernel Version: 2.6.31
+Contact:	iss_storagedev@hp.com
+Description:	Displays the 8-byte LUN ID used to address logical
+		drive Y of controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/raid_level
+Date:		August 2009
+Kernel Version: 2.6.31
+Contact:	iss_storagedev@hp.com
+Description:	Displays the RAID level of logical drive Y of
+		controller X.
+
+Where:		/sys/bus/pci/devices/<dev>/ccissX/cXdY/usage_count
+Date:		August 2009
+Kernel Version: 2.6.31
+Contact:	iss_storagedev@hp.com
+Description:	Displays the usage count (number of opens) of logical drive Y
+		of controller X.
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6593ab3..8873b9b 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -350,6 +350,7 @@
 
 	if (bio->bi_private)
 		complete(bio->bi_private);
+	__free_page(bio_page(bio));
 
 	bio_put(bio);
 }
@@ -372,30 +373,50 @@
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = flags & DISCARD_FL_BARRIER ?
 		DISCARD_BARRIER : DISCARD_NOBARRIER;
+	struct bio *bio;
+	struct page *page;
 	int ret = 0;
 
 	if (!q)
 		return -ENXIO;
 
-	if (!q->prepare_discard_fn)
+	if (!blk_queue_discard(q))
 		return -EOPNOTSUPP;
 
 	while (nr_sects && !ret) {
-		struct bio *bio = bio_alloc(gfp_mask, 0);
-		if (!bio)
-			return -ENOMEM;
+		unsigned int sector_size = q->limits.logical_block_size;
+		unsigned int max_discard_sectors =
+			min(q->limits.max_discard_sectors, UINT_MAX >> 9);
 
+		bio = bio_alloc(gfp_mask, 1);
+		if (!bio)
+			goto out;
+		bio->bi_sector = sector;
 		bio->bi_end_io = blkdev_discard_end_io;
 		bio->bi_bdev = bdev;
 		if (flags & DISCARD_FL_WAIT)
 			bio->bi_private = &wait;
 
-		bio->bi_sector = sector;
+		/*
+		 * Add a zeroed one-sector payload as that's what
+		 * our current implementations need.  If we'll ever need
+		 * more the interface will need revisiting.
+		 */
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!page)
+			goto out_free_bio;
+		if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+			goto out_free_page;
 
-		if (nr_sects > queue_max_hw_sectors(q)) {
-			bio->bi_size = queue_max_hw_sectors(q) << 9;
-			nr_sects -= queue_max_hw_sectors(q);
-			sector += queue_max_hw_sectors(q);
+		/*
+		 * And override the bio size - the way discard works we
+		 * touch many more blocks on disk than the actual payload
+		 * length.
+		 */
+		if (nr_sects > max_discard_sectors) {
+			bio->bi_size = max_discard_sectors << 9;
+			nr_sects -= max_discard_sectors;
+			sector += max_discard_sectors;
 		} else {
 			bio->bi_size = nr_sects << 9;
 			nr_sects = 0;
@@ -414,5 +435,11 @@
 		bio_put(bio);
 	}
 	return ret;
+out_free_page:
+	__free_page(page);
+out_free_bio:
+	bio_put(bio);
+out:
+	return -ENOMEM;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-core.c b/block/blk-core.c
index 8135228..81f3431 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
 #include "blk.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
 static int __make_request(struct request_queue *q, struct bio *bio);
@@ -69,7 +70,7 @@
 		part_stat_inc(cpu, part, merges[rw]);
 	else {
 		part_round_stats(cpu, part);
-		part_inc_in_flight(part, rw);
+		part_inc_in_flight(part);
 	}
 
 	part_stat_unlock();
@@ -1031,7 +1032,7 @@
 
 	if (part->in_flight) {
 		__part_stat_add(cpu, part, time_in_queue,
-				part_in_flight(part) * (now - part->stamp));
+				part->in_flight * (now - part->stamp));
 		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
 	}
 	part->stamp = now;
@@ -1124,7 +1125,6 @@
 		req->cmd_flags |= REQ_DISCARD;
 		if (bio_rw_flagged(bio, BIO_RW_BARRIER))
 			req->cmd_flags |= REQ_SOFTBARRIER;
-		req->q->prepare_discard_fn(req->q, req);
 	} else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
 		req->cmd_flags |= REQ_HARDBARRIER;
 
@@ -1437,7 +1437,8 @@
 			goto end_io;
 		}
 
-		if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
+		if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+			     nr_sectors > queue_max_hw_sectors(q))) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
 			       bdevname(bio->bi_bdev, b),
 			       bio_sectors(bio),
@@ -1470,7 +1471,7 @@
 			goto end_io;
 
 		if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
-		    !q->prepare_discard_fn) {
+		    !blk_queue_discard(q)) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
@@ -1738,7 +1739,7 @@
 		part_stat_inc(cpu, part, ios[rw]);
 		part_stat_add(cpu, part, ticks[rw], duration);
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part, rw);
+		part_dec_in_flight(part);
 
 		part_stat_unlock();
 	}
@@ -2491,6 +2492,14 @@
 }
 EXPORT_SYMBOL(kblockd_schedule_work);
 
+int kblockd_schedule_delayed_work(struct request_queue *q,
+				  struct delayed_work *work,
+				  unsigned long delay)
+{
+	return queue_delayed_work(kblockd_workqueue, work, delay);
+}
+EXPORT_SYMBOL(kblockd_schedule_delayed_work);
+
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(__REQ_NR_BITS > 8 *
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 99cb5cf..b0de857 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -351,7 +351,7 @@
 		part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
 
 		part_round_stats(cpu, part);
-		part_dec_in_flight(part, rq_data_dir(req));
+		part_dec_in_flight(part);
 
 		part_stat_unlock();
 	}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 83413ff..e0695bc 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -34,23 +34,6 @@
 EXPORT_SYMBOL(blk_queue_prep_rq);
 
 /**
- * blk_queue_set_discard - set a discard_sectors function for queue
- * @q:		queue
- * @dfn:	prepare_discard function
- *
- * It's possible for a queue to register a discard callback which is used
- * to transform a discard request into the appropriate type for the
- * hardware. If none is registered, then discard requests are failed
- * with %EOPNOTSUPP.
- *
- */
-void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
-{
-	q->prepare_discard_fn = dfn;
-}
-EXPORT_SYMBOL(blk_queue_set_discard);
-
-/**
  * blk_queue_merge_bvec - set a merge_bvec function for queue
  * @q:		queue
  * @mbfn:	merge_bvec_fn
@@ -111,7 +94,9 @@
 	lim->max_hw_segments = MAX_HW_SEGMENTS;
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->max_segment_size = MAX_SEGMENT_SIZE;
-	lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;
+	lim->max_sectors = BLK_DEF_MAX_SECTORS;
+	lim->max_hw_sectors = INT_MAX;
+	lim->max_discard_sectors = SAFE_MAX_SECTORS;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -164,6 +149,7 @@
 	q->unplug_timer.data = (unsigned long)q;
 
 	blk_set_default_limits(&q->limits);
+	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
 
 	/*
 	 * If the caller didn't supply a lock, fall back to our embedded
@@ -254,6 +240,18 @@
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
 /**
+ * blk_queue_max_discard_sectors - set max sectors for a single discard
+ * @q:  the request queue for the device
+ * @max_discard: maximum number of sectors to discard
+ **/
+void blk_queue_max_discard_sectors(struct request_queue *q,
+		unsigned int max_discard_sectors)
+{
+	q->limits.max_discard_sectors = max_discard_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_discard_sectors);
+
+/**
  * blk_queue_max_phys_segments - set max phys segments for a request for this queue
  * @q:  the request queue for the device
  * @max_segments:  max number of segments
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b78c9c3..8a6d81a 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -452,6 +452,7 @@
 	if (ret) {
 		kobject_uevent(&q->kobj, KOBJ_REMOVE);
 		kobject_del(&q->kobj);
+		blk_trace_remove_sysfs(disk_to_dev(disk));
 		return ret;
 	}
 
@@ -465,11 +466,11 @@
 	if (WARN_ON(!q))
 		return;
 
-	if (q->request_fn) {
+	if (q->request_fn)
 		elv_unregister_queue(q);
 
-		kobject_uevent(&q->kobj, KOBJ_REMOVE);
-		kobject_del(&q->kobj);
-		kobject_put(&disk_to_dev(disk)->kobj);
-	}
+	kobject_uevent(&q->kobj, KOBJ_REMOVE);
+	kobject_del(&q->kobj);
+	blk_trace_remove_sysfs(disk_to_dev(disk));
+	kobject_put(&disk_to_dev(disk)->kobj);
 }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1ca813b..9c4b679 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -150,7 +150,7 @@
 	 * idle window management
 	 */
 	struct timer_list idle_slice_timer;
-	struct work_struct unplug_work;
+	struct delayed_work unplug_work;
 
 	struct cfq_queue *active_queue;
 	struct cfq_io_context *active_cic;
@@ -173,6 +173,7 @@
 	unsigned int cfq_slice[2];
 	unsigned int cfq_slice_async_rq;
 	unsigned int cfq_slice_idle;
+	unsigned int cfq_latency;
 
 	struct list_head cic_list;
 
@@ -180,6 +181,8 @@
 	 * Fallback dummy cfqq for extreme OOM conditions
 	 */
 	struct cfq_queue oom_cfqq;
+
+	unsigned long last_end_sync_rq;
 };
 
 enum cfqq_state_flags {
@@ -265,11 +268,13 @@
  * scheduler run of queue, if there are requests pending and no one in the
  * driver that will restart queueing
  */
-static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
+static inline void cfq_schedule_dispatch(struct cfq_data *cfqd,
+					 unsigned long delay)
 {
 	if (cfqd->busy_queues) {
 		cfq_log(cfqd, "schedule dispatch");
-		kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work);
+		kblockd_schedule_delayed_work(cfqd->queue, &cfqd->unplug_work,
+						delay);
 	}
 }
 
@@ -1326,13 +1331,31 @@
 			return 0;
 
 		/*
-		 * we are the only queue, allow up to 4 times of 'quantum'
+		 * Sole queue user, allow bigger slice
 		 */
-		if (cfqq->dispatched >= 4 * max_dispatch)
-			return 0;
+		max_dispatch *= 4;
 	}
 
 	/*
+	 * Async queues must wait a bit before being allowed dispatch.
+	 * We also ramp up the dispatch depth gradually for async IO,
+	 * based on the last sync IO we serviced
+	 */
+	if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
+		unsigned long last_sync = jiffies - cfqd->last_end_sync_rq;
+		unsigned int depth;
+
+		depth = last_sync / cfqd->cfq_slice[1];
+		if (!depth && !cfqq->dispatched)
+			depth = 1;
+		if (depth < max_dispatch)
+			max_dispatch = depth;
+	}
+
+	if (cfqq->dispatched >= max_dispatch)
+		return 0;
+
+	/*
 	 * Dispatch a request from this cfqq
 	 */
 	cfq_dispatch_request(cfqd, cfqq);
@@ -1376,7 +1399,7 @@
 
 	if (unlikely(cfqd->active_queue == cfqq)) {
 		__cfq_slice_expired(cfqd, cfqq, 0);
-		cfq_schedule_dispatch(cfqd);
+		cfq_schedule_dispatch(cfqd, 0);
 	}
 
 	kmem_cache_free(cfq_pool, cfqq);
@@ -1471,7 +1494,7 @@
 {
 	if (unlikely(cfqq == cfqd->active_queue)) {
 		__cfq_slice_expired(cfqd, cfqq, 0);
-		cfq_schedule_dispatch(cfqd);
+		cfq_schedule_dispatch(cfqd, 0);
 	}
 
 	cfq_put_queue(cfqq);
@@ -1951,7 +1974,7 @@
 	enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
 
 	if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
-	    (cfqd->hw_tag && CIC_SEEKY(cic)))
+	    (!cfqd->cfq_latency && cfqd->hw_tag && CIC_SEEKY(cic)))
 		enable_idle = 0;
 	else if (sample_valid(cic->ttime_samples)) {
 		if (cic->ttime_mean > cfqd->cfq_slice_idle)
@@ -2157,8 +2180,10 @@
 	if (cfq_cfqq_sync(cfqq))
 		cfqd->sync_flight--;
 
-	if (sync)
+	if (sync) {
 		RQ_CIC(rq)->last_end_request = now;
+		cfqd->last_end_sync_rq = now;
+	}
 
 	/*
 	 * If this is the active queue, check if it needs to be expired,
@@ -2186,7 +2211,7 @@
 	}
 
 	if (!rq_in_driver(cfqd))
-		cfq_schedule_dispatch(cfqd);
+		cfq_schedule_dispatch(cfqd, 0);
 }
 
 /*
@@ -2316,7 +2341,7 @@
 	if (cic)
 		put_io_context(cic->ioc);
 
-	cfq_schedule_dispatch(cfqd);
+	cfq_schedule_dispatch(cfqd, 0);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 	cfq_log(cfqd, "set_request fail");
 	return 1;
@@ -2325,7 +2350,7 @@
 static void cfq_kick_queue(struct work_struct *work)
 {
 	struct cfq_data *cfqd =
-		container_of(work, struct cfq_data, unplug_work);
+		container_of(work, struct cfq_data, unplug_work.work);
 	struct request_queue *q = cfqd->queue;
 
 	spin_lock_irq(q->queue_lock);
@@ -2379,7 +2404,7 @@
 expire:
 	cfq_slice_expired(cfqd, timed_out);
 out_kick:
-	cfq_schedule_dispatch(cfqd);
+	cfq_schedule_dispatch(cfqd, 0);
 out_cont:
 	spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
 }
@@ -2387,7 +2412,7 @@
 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
 {
 	del_timer_sync(&cfqd->idle_slice_timer);
-	cancel_work_sync(&cfqd->unplug_work);
+	cancel_delayed_work_sync(&cfqd->unplug_work);
 }
 
 static void cfq_put_async_queues(struct cfq_data *cfqd)
@@ -2469,7 +2494,7 @@
 	cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
 	cfqd->idle_slice_timer.data = (unsigned long) cfqd;
 
-	INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
+	INIT_DELAYED_WORK(&cfqd->unplug_work, cfq_kick_queue);
 
 	cfqd->cfq_quantum = cfq_quantum;
 	cfqd->cfq_fifo_expire[0] = cfq_fifo_expire[0];
@@ -2480,8 +2505,9 @@
 	cfqd->cfq_slice[1] = cfq_slice_sync;
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
+	cfqd->cfq_latency = 1;
 	cfqd->hw_tag = 1;
-
+	cfqd->last_end_sync_rq = jiffies;
 	return cfqd;
 }
 
@@ -2549,6 +2575,7 @@
 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
+SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -2580,6 +2607,7 @@
 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
 		UINT_MAX, 0);
+STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
 #undef STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
@@ -2595,6 +2623,7 @@
 	CFQ_ATTR(slice_async),
 	CFQ_ATTR(slice_async_rq),
 	CFQ_ATTR(slice_idle),
+	CFQ_ATTR(low_latency),
 	__ATTR_NULL
 };
 
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 7865a34..9bd086c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -21,6 +21,11 @@
 	return put_user(val, (compat_int_t __user *)compat_ptr(arg));
 }
 
+static int compat_put_uint(unsigned long arg, unsigned int val)
+{
+	return put_user(val, (compat_uint_t __user *)compat_ptr(arg));
+}
+
 static int compat_put_long(unsigned long arg, long val)
 {
 	return put_user(val, (compat_long_t __user *)compat_ptr(arg));
@@ -734,6 +739,14 @@
 	switch (cmd) {
 	case HDIO_GETGEO:
 		return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
+	case BLKPBSZGET:
+		return compat_put_uint(arg, bdev_physical_block_size(bdev));
+	case BLKIOMIN:
+		return compat_put_uint(arg, bdev_io_min(bdev));
+	case BLKIOOPT:
+		return compat_put_uint(arg, bdev_io_opt(bdev));
+	case BLKALIGNOFF:
+		return compat_put_int(arg, bdev_alignment_offset(bdev));
 	case BLKFLSBUF:
 	case BLKROSET:
 	case BLKDISCARD:
diff --git a/block/genhd.c b/block/genhd.c
index 517e433..5a0861d 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -869,7 +869,6 @@
 static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -889,7 +888,6 @@
 	&dev_attr_alignment_offset.attr,
 	&dev_attr_capability.attr,
 	&dev_attr_stat.attr,
-	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
@@ -1055,7 +1053,7 @@
 			   part_stat_read(hd, merges[1]),
 			   (unsigned long long)part_stat_read(hd, sectors[1]),
 			   jiffies_to_msecs(part_stat_read(hd, ticks[1])),
-			   part_in_flight(hd),
+			   hd->in_flight,
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue))
 			);
diff --git a/block/ioctl.c b/block/ioctl.c
index d3e6b58..1f4d1de 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -138,6 +138,11 @@
 	return put_user(val, (int __user *)arg);
 }
 
+static int put_uint(unsigned long arg, unsigned int val)
+{
+	return put_user(val, (unsigned int __user *)arg);
+}
+
 static int put_long(unsigned long arg, long val)
 {
 	return put_user(val, (long __user *)arg);
@@ -263,10 +268,18 @@
 		return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
 	case BLKROGET:
 		return put_int(arg, bdev_read_only(bdev) != 0);
-	case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
+	case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
 		return put_int(arg, block_size(bdev));
-	case BLKSSZGET: /* get block device hardware sector size */
+	case BLKSSZGET: /* get block device logical block size */
 		return put_int(arg, bdev_logical_block_size(bdev));
+	case BLKPBSZGET: /* get block device physical block size */
+		return put_uint(arg, bdev_physical_block_size(bdev));
+	case BLKIOMIN:
+		return put_uint(arg, bdev_io_min(bdev));
+	case BLKIOOPT:
+		return put_uint(arg, bdev_io_opt(bdev));
+	case BLKALIGNOFF:
+		return put_int(arg, bdev_alignment_offset(bdev));
 	case BLKSECTGET:
 		return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
 	case BLKRASET:
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 6fa7b0f..eb4fa19 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -38,6 +38,7 @@
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/reboot.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
@@ -6422,16 +6423,10 @@
   return true;
 }
 
-
-/*
-  DAC960_ProcReadStatus implements reading /proc/rd/status.
-*/
-
-static int DAC960_ProcReadStatus(char *Page, char **Start, off_t Offset,
-				 int Count, int *EOF, void *Data)
+static int dac960_proc_show(struct seq_file *m, void *v)
 {
   unsigned char *StatusMessage = "OK\n";
-  int ControllerNumber, BytesAvailable;
+  int ControllerNumber;
   for (ControllerNumber = 0;
        ControllerNumber < DAC960_ControllerCount;
        ControllerNumber++)
@@ -6444,52 +6439,49 @@
 	  break;
 	}
     }
-  BytesAvailable = strlen(StatusMessage) - Offset;
-  if (Count >= BytesAvailable)
-    {
-      Count = BytesAvailable;
-      *EOF = true;
-    }
-  if (Count <= 0) return 0;
-  *Start = Page;
-  memcpy(Page, &StatusMessage[Offset], Count);
-  return Count;
+  seq_puts(m, StatusMessage);
+  return 0;
 }
 
-
-/*
-  DAC960_ProcReadInitialStatus implements reading /proc/rd/cN/initial_status.
-*/
-
-static int DAC960_ProcReadInitialStatus(char *Page, char **Start, off_t Offset,
-					int Count, int *EOF, void *Data)
+static int dac960_proc_open(struct inode *inode, struct file *file)
 {
-  DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data;
-  int BytesAvailable = Controller->InitialStatusLength - Offset;
-  if (Count >= BytesAvailable)
-    {
-      Count = BytesAvailable;
-      *EOF = true;
-    }
-  if (Count <= 0) return 0;
-  *Start = Page;
-  memcpy(Page, &Controller->CombinedStatusBuffer[Offset], Count);
-  return Count;
+	return single_open(file, dac960_proc_show, NULL);
 }
 
+static const struct file_operations dac960_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dac960_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
-/*
-  DAC960_ProcReadCurrentStatus implements reading /proc/rd/cN/current_status.
-*/
-
-static int DAC960_ProcReadCurrentStatus(char *Page, char **Start, off_t Offset,
-					int Count, int *EOF, void *Data)
+static int dac960_initial_status_proc_show(struct seq_file *m, void *v)
 {
-  DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data;
+	DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private;
+	seq_printf(m, "%.*s", Controller->InitialStatusLength, Controller->CombinedStatusBuffer);
+	return 0;
+}
+
+static int dac960_initial_status_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dac960_initial_status_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations dac960_initial_status_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dac960_initial_status_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int dac960_current_status_proc_show(struct seq_file *m, void *v)
+{
+  DAC960_Controller_T *Controller = (DAC960_Controller_T *) m->private;
   unsigned char *StatusMessage =
     "No Rebuild or Consistency Check in Progress\n";
   int ProgressMessageLength = strlen(StatusMessage);
-  int BytesAvailable;
   if (jiffies != Controller->LastCurrentStatusTime)
     {
       Controller->CurrentStatusLength = 0;
@@ -6513,49 +6505,41 @@
 	}
       Controller->LastCurrentStatusTime = jiffies;
     }
-  BytesAvailable = Controller->CurrentStatusLength - Offset;
-  if (Count >= BytesAvailable)
-    {
-      Count = BytesAvailable;
-      *EOF = true;
-    }
-  if (Count <= 0) return 0;
-  *Start = Page;
-  memcpy(Page, &Controller->CurrentStatusBuffer[Offset], Count);
-  return Count;
+	seq_printf(m, "%.*s", Controller->CurrentStatusLength, Controller->CurrentStatusBuffer);
+	return 0;
 }
 
-
-/*
-  DAC960_ProcReadUserCommand implements reading /proc/rd/cN/user_command.
-*/
-
-static int DAC960_ProcReadUserCommand(char *Page, char **Start, off_t Offset,
-				      int Count, int *EOF, void *Data)
+static int dac960_current_status_proc_open(struct inode *inode, struct file *file)
 {
-  DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data;
-  int BytesAvailable = Controller->UserStatusLength - Offset;
-  if (Count >= BytesAvailable)
-    {
-      Count = BytesAvailable;
-      *EOF = true;
-    }
-  if (Count <= 0) return 0;
-  *Start = Page;
-  memcpy(Page, &Controller->UserStatusBuffer[Offset], Count);
-  return Count;
+	return single_open(file, dac960_current_status_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations dac960_current_status_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dac960_current_status_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
-/*
-  DAC960_ProcWriteUserCommand implements writing /proc/rd/cN/user_command.
-*/
+static int dac960_user_command_proc_show(struct seq_file *m, void *v)
+{
+	DAC960_Controller_T *Controller = (DAC960_Controller_T *)m->private;
 
-static int DAC960_ProcWriteUserCommand(struct file *file,
+	seq_printf(m, "%.*s", Controller->UserStatusLength, Controller->UserStatusBuffer);
+	return 0;
+}
+
+static int dac960_user_command_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dac960_user_command_proc_show, PDE(inode)->data);
+}
+
+static ssize_t dac960_user_command_proc_write(struct file *file,
 				       const char __user *Buffer,
-				       unsigned long Count, void *Data)
+				       size_t Count, loff_t *pos)
 {
-  DAC960_Controller_T *Controller = (DAC960_Controller_T *) Data;
+  DAC960_Controller_T *Controller = (DAC960_Controller_T *) PDE(file->f_path.dentry->d_inode)->data;
   unsigned char CommandBuffer[80];
   int Length;
   if (Count > sizeof(CommandBuffer)-1) return -EINVAL;
@@ -6572,6 +6556,14 @@
 	    ? Count : -EBUSY);
 }
 
+static const struct file_operations dac960_user_command_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= dac960_user_command_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= dac960_user_command_proc_write,
+};
 
 /*
   DAC960_CreateProcEntries creates the /proc/rd/... entries for the
@@ -6586,23 +6578,17 @@
 
 	if (DAC960_ProcDirectoryEntry == NULL) {
   		DAC960_ProcDirectoryEntry = proc_mkdir("rd", NULL);
-  		StatusProcEntry = create_proc_read_entry("status", 0,
+  		StatusProcEntry = proc_create("status", 0,
 					   DAC960_ProcDirectoryEntry,
-					   DAC960_ProcReadStatus, NULL);
+					   &dac960_proc_fops);
 	}
 
       sprintf(Controller->ControllerName, "c%d", Controller->ControllerNumber);
       ControllerProcEntry = proc_mkdir(Controller->ControllerName,
 				       DAC960_ProcDirectoryEntry);
-      create_proc_read_entry("initial_status", 0, ControllerProcEntry,
-			     DAC960_ProcReadInitialStatus, Controller);
-      create_proc_read_entry("current_status", 0, ControllerProcEntry,
-			     DAC960_ProcReadCurrentStatus, Controller);
-      UserCommandProcEntry =
-	create_proc_read_entry("user_command", S_IWUSR | S_IRUSR,
-			       ControllerProcEntry, DAC960_ProcReadUserCommand,
-			       Controller);
-      UserCommandProcEntry->write_proc = DAC960_ProcWriteUserCommand;
+      proc_create_data("initial_status", 0, ControllerProcEntry, &dac960_initial_status_proc_fops, Controller);
+      proc_create_data("current_status", 0, ControllerProcEntry, &dac960_current_status_proc_fops, Controller);
+      UserCommandProcEntry = proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
       Controller->ControllerProcEntry = ControllerProcEntry;
 }
 
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 1ece0b4..fb5be2d 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -36,9 +36,11 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
+#include <linux/jiffies.h>
 #include <linux/hdreg.h>
 #include <linux/spinlock.h>
 #include <linux/compat.h>
+#include <linux/mutex.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 
@@ -155,6 +157,10 @@
 
 static ctlr_info_t *hba[MAX_CTLR];
 
+static struct task_struct *cciss_scan_thread;
+static DEFINE_MUTEX(scan_mutex);
+static LIST_HEAD(scan_q);
+
 static void do_cciss_request(struct request_queue *q);
 static irqreturn_t do_cciss_intr(int irq, void *dev_id);
 static int cciss_open(struct block_device *bdev, fmode_t mode);
@@ -164,9 +170,9 @@
 static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
 
 static int cciss_revalidate(struct gendisk *disk);
-static int rebuild_lun_table(ctlr_info_t *h, int first_time);
+static int rebuild_lun_table(ctlr_info_t *h, int first_time, int via_ioctl);
 static int deregister_disk(ctlr_info_t *h, int drv_index,
-			   int clear_all);
+			   int clear_all, int via_ioctl);
 
 static void cciss_read_capacity(int ctlr, int logvol, int withirq,
 			sector_t *total_size, unsigned int *block_size);
@@ -189,8 +195,13 @@
 static int process_sendcmd_error(ctlr_info_t *h, CommandList_struct *c);
 
 static void fail_all_cmds(unsigned long ctlr);
+static int add_to_scan_list(struct ctlr_info *h);
 static int scan_thread(void *data);
 static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
+static void cciss_hba_release(struct device *dev);
+static void cciss_device_release(struct device *dev);
+static void cciss_free_gendisk(ctlr_info_t *h, int drv_index);
+static void cciss_free_drive_info(ctlr_info_t *h, int drv_index);
 
 #ifdef CONFIG_PROC_FS
 static void cciss_procinit(int i);
@@ -245,7 +256,10 @@
 
 #include "cciss_scsi.c"		/* For SCSI tape support */
 
-#define RAID_UNKNOWN 6
+static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
+	"UNKNOWN"
+};
+#define RAID_UNKNOWN (sizeof(raid_label) / sizeof(raid_label[0])-1)
 
 #ifdef CONFIG_PROC_FS
 
@@ -255,9 +269,6 @@
 #define ENG_GIG 1000000000
 #define ENG_GIG_FACTOR (ENG_GIG/512)
 #define ENGAGE_SCSI	"engage scsi"
-static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
-	"UNKNOWN"
-};
 
 static struct proc_dir_entry *proc_cciss;
 
@@ -318,7 +329,7 @@
 	ctlr_info_t *h = seq->private;
 	unsigned ctlr = h->ctlr;
 	loff_t *pos = v;
-	drive_info_struct *drv = &h->drv[*pos];
+	drive_info_struct *drv = h->drv[*pos];
 
 	if (*pos > h->highest_lun)
 		return 0;
@@ -331,7 +342,7 @@
 	vol_sz_frac *= 100;
 	sector_div(vol_sz_frac, ENG_GIG_FACTOR);
 
-	if (drv->raid_level > 5)
+	if (drv->raid_level < 0 || drv->raid_level > RAID_UNKNOWN)
 		drv->raid_level = RAID_UNKNOWN;
 	seq_printf(seq, "cciss/c%dd%d:"
 			"\t%4u.%02uGB\tRAID %s\n",
@@ -454,9 +465,19 @@
 #define to_hba(n) container_of(n, struct ctlr_info, dev)
 #define to_drv(n) container_of(n, drive_info_struct, dev)
 
-static struct device_type cciss_host_type = {
-	.name		= "cciss_host",
-};
+static ssize_t host_store_rescan(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct ctlr_info *h = to_hba(dev);
+
+	add_to_scan_list(h);
+	wake_up_process(cciss_scan_thread);
+	wait_for_completion_interruptible(&h->scan_wait);
+
+	return count;
+}
+DEVICE_ATTR(rescan, S_IWUSR, NULL, host_store_rescan);
 
 static ssize_t dev_show_unique_id(struct device *dev,
 				 struct device_attribute *attr,
@@ -560,11 +581,101 @@
 }
 DEVICE_ATTR(rev, S_IRUGO, dev_show_rev, NULL);
 
+static ssize_t cciss_show_lunid(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	unsigned long flags;
+	unsigned char lunid[8];
+
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	if (h->busy_configuring) {
+		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		return -EBUSY;
+	}
+	if (!drv->heads) {
+		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		return -ENOTTY;
+	}
+	memcpy(lunid, drv->LunID, sizeof(lunid));
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+	return snprintf(buf, 20, "0x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+		lunid[0], lunid[1], lunid[2], lunid[3],
+		lunid[4], lunid[5], lunid[6], lunid[7]);
+}
+DEVICE_ATTR(lunid, S_IRUGO, cciss_show_lunid, NULL);
+
+static ssize_t cciss_show_raid_level(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	int raid;
+	unsigned long flags;
+
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	if (h->busy_configuring) {
+		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		return -EBUSY;
+	}
+	raid = drv->raid_level;
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+	if (raid < 0 || raid > RAID_UNKNOWN)
+		raid = RAID_UNKNOWN;
+
+	return snprintf(buf, strlen(raid_label[raid]) + 7, "RAID %s\n",
+			raid_label[raid]);
+}
+DEVICE_ATTR(raid_level, S_IRUGO, cciss_show_raid_level, NULL);
+
+static ssize_t cciss_show_usage_count(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	drive_info_struct *drv = to_drv(dev);
+	struct ctlr_info *h = to_hba(drv->dev.parent);
+	unsigned long flags;
+	int count;
+
+	spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
+	if (h->busy_configuring) {
+		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+		return -EBUSY;
+	}
+	count = drv->usage_count;
+	spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
+	return snprintf(buf, 20, "%d\n", count);
+}
+DEVICE_ATTR(usage_count, S_IRUGO, cciss_show_usage_count, NULL);
+
+static struct attribute *cciss_host_attrs[] = {
+	&dev_attr_rescan.attr,
+	NULL
+};
+
+static struct attribute_group cciss_host_attr_group = {
+	.attrs = cciss_host_attrs,
+};
+
+static const struct attribute_group *cciss_host_attr_groups[] = {
+	&cciss_host_attr_group,
+	NULL
+};
+
+static struct device_type cciss_host_type = {
+	.name		= "cciss_host",
+	.groups		= cciss_host_attr_groups,
+	.release	= cciss_hba_release,
+};
+
 static struct attribute *cciss_dev_attrs[] = {
 	&dev_attr_unique_id.attr,
 	&dev_attr_model.attr,
 	&dev_attr_vendor.attr,
 	&dev_attr_rev.attr,
+	&dev_attr_lunid.attr,
+	&dev_attr_raid_level.attr,
+	&dev_attr_usage_count.attr,
 	NULL
 };
 
@@ -580,12 +691,24 @@
 static struct device_type cciss_dev_type = {
 	.name		= "cciss_device",
 	.groups		= cciss_dev_attr_groups,
+	.release	= cciss_device_release,
 };
 
 static struct bus_type cciss_bus_type = {
 	.name		= "cciss",
 };
 
+/*
+ * cciss_hba_release is called when the reference count
+ * of h->dev goes to zero.
+ */
+static void cciss_hba_release(struct device *dev)
+{
+	/*
+	 * nothing to do, but need this to avoid a warning
+	 * about not having a release handler from lib/kref.c.
+	 */
+}
 
 /*
  * Initialize sysfs entry for each controller.  This sets up and registers
@@ -609,6 +732,16 @@
 static void cciss_destroy_hba_sysfs_entry(struct ctlr_info *h)
 {
 	device_del(&h->dev);
+	put_device(&h->dev); /* final put. */
+}
+
+/* cciss_device_release is called when the reference count
+ * of h->drv[x]dev goes to zero.
+ */
+static void cciss_device_release(struct device *dev)
+{
+	drive_info_struct *drv = to_drv(dev);
+	kfree(drv);
 }
 
 /*
@@ -617,24 +750,39 @@
  * /sys/bus/pci/devices/<dev/ccis#/. We also create a link from
  * /sys/block/cciss!c#d# to this entry.
  */
-static int cciss_create_ld_sysfs_entry(struct ctlr_info *h,
-				       drive_info_struct *drv,
+static long cciss_create_ld_sysfs_entry(struct ctlr_info *h,
 				       int drv_index)
 {
-	device_initialize(&drv->dev);
-	drv->dev.type = &cciss_dev_type;
-	drv->dev.bus = &cciss_bus_type;
-	dev_set_name(&drv->dev, "c%dd%d", h->ctlr, drv_index);
-	drv->dev.parent = &h->dev;
-	return device_add(&drv->dev);
+	struct device *dev;
+
+	if (h->drv[drv_index]->device_initialized)
+		return 0;
+
+	dev = &h->drv[drv_index]->dev;
+	device_initialize(dev);
+	dev->type = &cciss_dev_type;
+	dev->bus = &cciss_bus_type;
+	dev_set_name(dev, "c%dd%d", h->ctlr, drv_index);
+	dev->parent = &h->dev;
+	h->drv[drv_index]->device_initialized = 1;
+	return device_add(dev);
 }
 
 /*
  * Remove sysfs entries for a logical drive.
  */
-static void cciss_destroy_ld_sysfs_entry(drive_info_struct *drv)
+static void cciss_destroy_ld_sysfs_entry(struct ctlr_info *h, int drv_index,
+	int ctlr_exiting)
 {
-	device_del(&drv->dev);
+	struct device *dev = &h->drv[drv_index]->dev;
+
+	/* special case for c*d0, we only destroy it on controller exit */
+	if (drv_index == 0 && !ctlr_exiting)
+		return;
+
+	device_del(dev);
+	put_device(dev); /* the "final" put. */
+	h->drv[drv_index] = NULL;
 }
 
 /*
@@ -751,7 +899,7 @@
 	printk(KERN_DEBUG "cciss_open %s\n", bdev->bd_disk->disk_name);
 #endif				/* CCISS_DEBUG */
 
-	if (host->busy_initializing || drv->busy_configuring)
+	if (drv->busy_configuring)
 		return -EBUSY;
 	/*
 	 * Root is allowed to open raw volume zero even if it's not configured
@@ -767,7 +915,8 @@
 			if (MINOR(bdev->bd_dev) & 0x0f) {
 				return -ENXIO;
 				/* if it is, make sure we have a LUN ID */
-			} else if (drv->LunID == 0) {
+			} else if (memcmp(drv->LunID, CTLR_LUNID,
+				sizeof(drv->LunID))) {
 				return -ENXIO;
 			}
 		}
@@ -1132,12 +1281,13 @@
 	case CCISS_DEREGDISK:
 	case CCISS_REGNEWD:
 	case CCISS_REVALIDVOLS:
-		return rebuild_lun_table(host, 0);
+		return rebuild_lun_table(host, 0, 1);
 
 	case CCISS_GETLUNINFO:{
 			LogvolInfo_struct luninfo;
 
-			luninfo.LunID = drv->LunID;
+			memcpy(&luninfo.LunID, drv->LunID,
+				sizeof(luninfo.LunID));
 			luninfo.num_opens = drv->usage_count;
 			luninfo.num_parts = 0;
 			if (copy_to_user(argp, &luninfo,
@@ -1475,7 +1625,10 @@
 		/* make sure the disk has been added and the drive is real
 		 * because this can be called from the middle of init_one.
 		 */
-		if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads))
+		if (!h->drv[curr_queue])
+			continue;
+		if (!(h->drv[curr_queue]->queue) ||
+			!(h->drv[curr_queue]->heads))
 			continue;
 		blk_start_queue(h->gendisk[curr_queue]->queue);
 
@@ -1532,13 +1685,11 @@
 	spin_unlock_irqrestore(&h->lock, flags);
 }
 
-static void log_unit_to_scsi3addr(ctlr_info_t *h, unsigned char scsi3addr[],
-	uint32_t log_unit)
+static inline void log_unit_to_scsi3addr(ctlr_info_t *h,
+	unsigned char scsi3addr[], uint32_t log_unit)
 {
-	log_unit = h->drv[log_unit].LunID & 0x03fff;
-	memset(&scsi3addr[4], 0, 4);
-	memcpy(&scsi3addr[0], &log_unit, 4);
-	scsi3addr[3] |= 0x40;
+	memcpy(scsi3addr, h->drv[log_unit]->LunID,
+		sizeof(h->drv[log_unit]->LunID));
 }
 
 /* This function gets the SCSI vendor, model, and revision of a logical drive
@@ -1615,16 +1766,23 @@
 	return;
 }
 
-static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
+/*
+ * cciss_add_disk sets up the block device queue for a logical drive
+ */
+static int cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
 				int drv_index)
 {
 	disk->queue = blk_init_queue(do_cciss_request, &h->lock);
+	if (!disk->queue)
+		goto init_queue_failure;
 	sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index);
 	disk->major = h->major;
 	disk->first_minor = drv_index << NWD_SHIFT;
 	disk->fops = &cciss_fops;
-	disk->private_data = &h->drv[drv_index];
-	disk->driverfs_dev = &h->drv[drv_index].dev;
+	if (cciss_create_ld_sysfs_entry(h, drv_index))
+		goto cleanup_queue;
+	disk->private_data = h->drv[drv_index];
+	disk->driverfs_dev = &h->drv[drv_index]->dev;
 
 	/* Set up queue information */
 	blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
@@ -1642,14 +1800,21 @@
 	disk->queue->queuedata = h;
 
 	blk_queue_logical_block_size(disk->queue,
-				     h->drv[drv_index].block_size);
+				     h->drv[drv_index]->block_size);
 
 	/* Make sure all queue data is written out before */
-	/* setting h->drv[drv_index].queue, as setting this */
+	/* setting h->drv[drv_index]->queue, as setting this */
 	/* allows the interrupt handler to start the queue */
 	wmb();
-	h->drv[drv_index].queue = disk->queue;
+	h->drv[drv_index]->queue = disk->queue;
 	add_disk(disk);
+	return 0;
+
+cleanup_queue:
+	blk_cleanup_queue(disk->queue);
+	disk->queue = NULL;
+init_queue_failure:
+	return -1;
 }
 
 /* This function will check the usage_count of the drive to be updated/added.
@@ -1662,7 +1827,8 @@
  * is also the controller node.  Any changes to disk 0 will show up on
  * the next reboot.
  */
-static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
+static void cciss_update_drive_info(int ctlr, int drv_index, int first_time,
+	int via_ioctl)
 {
 	ctlr_info_t *h = hba[ctlr];
 	struct gendisk *disk;
@@ -1672,21 +1838,13 @@
 	unsigned long flags = 0;
 	int ret = 0;
 	drive_info_struct *drvinfo;
-	int was_only_controller_node;
 
 	/* Get information about the disk and modify the driver structure */
 	inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
-	drvinfo = kmalloc(sizeof(*drvinfo), GFP_KERNEL);
+	drvinfo = kzalloc(sizeof(*drvinfo), GFP_KERNEL);
 	if (inq_buff == NULL || drvinfo == NULL)
 		goto mem_msg;
 
-	/* See if we're trying to update the "controller node"
-	 * this will happen the when the first logical drive gets
-	 * created by ACU.
-	 */
-	was_only_controller_node = (drv_index == 0 &&
-				h->drv[0].raid_level == -1);
-
 	/* testing to see if 16-byte CDBs are already being used */
 	if (h->cciss_read == CCISS_READ_16) {
 		cciss_read_capacity_16(h->ctlr, drv_index, 1,
@@ -1719,16 +1877,19 @@
 				drvinfo->model, drvinfo->rev);
 	cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no,
 			sizeof(drvinfo->serial_no));
+	/* Save the lunid in case we deregister the disk, below. */
+	memcpy(drvinfo->LunID, h->drv[drv_index]->LunID,
+		sizeof(drvinfo->LunID));
 
 	/* Is it the same disk we already know, and nothing's changed? */
-	if (h->drv[drv_index].raid_level != -1 &&
+	if (h->drv[drv_index]->raid_level != -1 &&
 		((memcmp(drvinfo->serial_no,
-				h->drv[drv_index].serial_no, 16) == 0) &&
-		drvinfo->block_size == h->drv[drv_index].block_size &&
-		drvinfo->nr_blocks == h->drv[drv_index].nr_blocks &&
-		drvinfo->heads == h->drv[drv_index].heads &&
-		drvinfo->sectors == h->drv[drv_index].sectors &&
-		drvinfo->cylinders == h->drv[drv_index].cylinders))
+				h->drv[drv_index]->serial_no, 16) == 0) &&
+		drvinfo->block_size == h->drv[drv_index]->block_size &&
+		drvinfo->nr_blocks == h->drv[drv_index]->nr_blocks &&
+		drvinfo->heads == h->drv[drv_index]->heads &&
+		drvinfo->sectors == h->drv[drv_index]->sectors &&
+		drvinfo->cylinders == h->drv[drv_index]->cylinders))
 			/* The disk is unchanged, nothing to update */
 			goto freeret;
 
@@ -1738,18 +1899,17 @@
 	 * If the disk already exists then deregister it before proceeding
 	 * (unless it's the first disk (for the controller node).
 	 */
-	if (h->drv[drv_index].raid_level != -1 && drv_index != 0) {
+	if (h->drv[drv_index]->raid_level != -1 && drv_index != 0) {
 		printk(KERN_WARNING "disk %d has changed.\n", drv_index);
 		spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
-		h->drv[drv_index].busy_configuring = 1;
+		h->drv[drv_index]->busy_configuring = 1;
 		spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
 
-		/* deregister_disk sets h->drv[drv_index].queue = NULL
+		/* deregister_disk sets h->drv[drv_index]->queue = NULL
 		 * which keeps the interrupt handler from starting
 		 * the queue.
 		 */
-		ret = deregister_disk(h, drv_index, 0);
-		h->drv[drv_index].busy_configuring = 0;
+		ret = deregister_disk(h, drv_index, 0, via_ioctl);
 	}
 
 	/* If the disk is in use return */
@@ -1757,22 +1917,31 @@
 		goto freeret;
 
 	/* Save the new information from cciss_geometry_inquiry
-	 * and serial number inquiry.
+	 * and serial number inquiry.  If the disk was deregistered
+	 * above, then h->drv[drv_index] will be NULL.
 	 */
-	h->drv[drv_index].block_size = drvinfo->block_size;
-	h->drv[drv_index].nr_blocks = drvinfo->nr_blocks;
-	h->drv[drv_index].heads = drvinfo->heads;
-	h->drv[drv_index].sectors = drvinfo->sectors;
-	h->drv[drv_index].cylinders = drvinfo->cylinders;
-	h->drv[drv_index].raid_level = drvinfo->raid_level;
-	memcpy(h->drv[drv_index].serial_no, drvinfo->serial_no, 16);
-	memcpy(h->drv[drv_index].vendor, drvinfo->vendor, VENDOR_LEN + 1);
-	memcpy(h->drv[drv_index].model, drvinfo->model, MODEL_LEN + 1);
-	memcpy(h->drv[drv_index].rev, drvinfo->rev, REV_LEN + 1);
+	if (h->drv[drv_index] == NULL) {
+		drvinfo->device_initialized = 0;
+		h->drv[drv_index] = drvinfo;
+		drvinfo = NULL; /* so it won't be freed below. */
+	} else {
+		/* special case for cxd0 */
+		h->drv[drv_index]->block_size = drvinfo->block_size;
+		h->drv[drv_index]->nr_blocks = drvinfo->nr_blocks;
+		h->drv[drv_index]->heads = drvinfo->heads;
+		h->drv[drv_index]->sectors = drvinfo->sectors;
+		h->drv[drv_index]->cylinders = drvinfo->cylinders;
+		h->drv[drv_index]->raid_level = drvinfo->raid_level;
+		memcpy(h->drv[drv_index]->serial_no, drvinfo->serial_no, 16);
+		memcpy(h->drv[drv_index]->vendor, drvinfo->vendor,
+			VENDOR_LEN + 1);
+		memcpy(h->drv[drv_index]->model, drvinfo->model, MODEL_LEN + 1);
+		memcpy(h->drv[drv_index]->rev, drvinfo->rev, REV_LEN + 1);
+	}
 
 	++h->num_luns;
 	disk = h->gendisk[drv_index];
-	set_capacity(disk, h->drv[drv_index].nr_blocks);
+	set_capacity(disk, h->drv[drv_index]->nr_blocks);
 
 	/* If it's not disk 0 (drv_index != 0)
 	 * or if it was disk 0, but there was previously
@@ -1780,8 +1949,15 @@
 	 * (raid_leve == -1) then we want to update the
 	 * logical drive's information.
 	 */
-	if (drv_index || first_time)
-		cciss_add_disk(h, disk, drv_index);
+	if (drv_index || first_time) {
+		if (cciss_add_disk(h, disk, drv_index) != 0) {
+			cciss_free_gendisk(h, drv_index);
+			cciss_free_drive_info(h, drv_index);
+			printk(KERN_WARNING "cciss:%d could not update "
+				"disk %d\n", h->ctlr, drv_index);
+			--h->num_luns;
+		}
+	}
 
 freeret:
 	kfree(inq_buff);
@@ -1793,28 +1969,70 @@
 }
 
 /* This function will find the first index of the controllers drive array
- * that has a -1 for the raid_level and will return that index.  This is
- * where new drives will be added.  If the index to be returned is greater
- * than the highest_lun index for the controller then highest_lun is set
- * to this new index.  If there are no available indexes then -1 is returned.
- * "controller_node" is used to know if this is a real logical drive, or just
- * the controller node, which determines if this counts towards highest_lun.
+ * that has a null drv pointer and allocate the drive info struct and
+ * will return that index   This is where new drives will be added.
+ * If the index to be returned is greater than the highest_lun index for
+ * the controller then highest_lun is set * to this new index.
+ * If there are no available indexes or if tha allocation fails, then -1
+ * is returned.  * "controller_node" is used to know if this is a real
+ * logical drive, or just the controller node, which determines if this
+ * counts towards highest_lun.
  */
-static int cciss_find_free_drive_index(int ctlr, int controller_node)
+static int cciss_alloc_drive_info(ctlr_info_t *h, int controller_node)
 {
 	int i;
+	drive_info_struct *drv;
 
+	/* Search for an empty slot for our drive info */
 	for (i = 0; i < CISS_MAX_LUN; i++) {
-		if (hba[ctlr]->drv[i].raid_level == -1) {
-			if (i > hba[ctlr]->highest_lun)
-				if (!controller_node)
-					hba[ctlr]->highest_lun = i;
+
+		/* if not cxd0 case, and it's occupied, skip it. */
+		if (h->drv[i] && i != 0)
+			continue;
+		/*
+		 * If it's cxd0 case, and drv is alloc'ed already, and a
+		 * disk is configured there, skip it.
+		 */
+		if (i == 0 && h->drv[i] && h->drv[i]->raid_level != -1)
+			continue;
+
+		/*
+		 * We've found an empty slot.  Update highest_lun
+		 * provided this isn't just the fake cxd0 controller node.
+		 */
+		if (i > h->highest_lun && !controller_node)
+			h->highest_lun = i;
+
+		/* If adding a real disk at cxd0, and it's already alloc'ed */
+		if (i == 0 && h->drv[i] != NULL)
 			return i;
-		}
+
+		/*
+		 * Found an empty slot, not already alloc'ed.  Allocate it.
+		 * Mark it with raid_level == -1, so we know it's new later on.
+		 */
+		drv = kzalloc(sizeof(*drv), GFP_KERNEL);
+		if (!drv)
+			return -1;
+		drv->raid_level = -1; /* so we know it's new */
+		h->drv[i] = drv;
+		return i;
 	}
 	return -1;
 }
 
+static void cciss_free_drive_info(ctlr_info_t *h, int drv_index)
+{
+	kfree(h->drv[drv_index]);
+	h->drv[drv_index] = NULL;
+}
+
+static void cciss_free_gendisk(ctlr_info_t *h, int drv_index)
+{
+	put_disk(h->gendisk[drv_index]);
+	h->gendisk[drv_index] = NULL;
+}
+
 /* cciss_add_gendisk finds a free hba[]->drv structure
  * and allocates a gendisk if needed, and sets the lunid
  * in the drvinfo structure.   It returns the index into
@@ -1824,13 +2042,15 @@
  * a means to talk to the controller in case no logical
  * drives have yet been configured.
  */
-static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node)
+static int cciss_add_gendisk(ctlr_info_t *h, unsigned char lunid[],
+	int controller_node)
 {
 	int drv_index;
 
-	drv_index = cciss_find_free_drive_index(h->ctlr, controller_node);
+	drv_index = cciss_alloc_drive_info(h, controller_node);
 	if (drv_index == -1)
 		return -1;
+
 	/*Check if the gendisk needs to be allocated */
 	if (!h->gendisk[drv_index]) {
 		h->gendisk[drv_index] =
@@ -1839,23 +2059,24 @@
 			printk(KERN_ERR "cciss%d: could not "
 				"allocate a new disk %d\n",
 				h->ctlr, drv_index);
-			return -1;
+			goto err_free_drive_info;
 		}
 	}
-	h->drv[drv_index].LunID = lunid;
-	if (cciss_create_ld_sysfs_entry(h, &h->drv[drv_index], drv_index))
+	memcpy(h->drv[drv_index]->LunID, lunid,
+		sizeof(h->drv[drv_index]->LunID));
+	if (cciss_create_ld_sysfs_entry(h, drv_index))
 		goto err_free_disk;
-
 	/* Don't need to mark this busy because nobody */
 	/* else knows about this disk yet to contend */
 	/* for access to it. */
-	h->drv[drv_index].busy_configuring = 0;
+	h->drv[drv_index]->busy_configuring = 0;
 	wmb();
 	return drv_index;
 
 err_free_disk:
-	put_disk(h->gendisk[drv_index]);
-	h->gendisk[drv_index] = NULL;
+	cciss_free_gendisk(h, drv_index);
+err_free_drive_info:
+	cciss_free_drive_info(h, drv_index);
 	return -1;
 }
 
@@ -1872,21 +2093,25 @@
 	if (h->gendisk[0] != NULL) /* already did this? Then bail. */
 		return;
 
-	drv_index = cciss_add_gendisk(h, 0, 1);
-	if (drv_index == -1) {
-		printk(KERN_WARNING "cciss%d: could not "
-			"add disk 0.\n", h->ctlr);
-		return;
-	}
-	h->drv[drv_index].block_size = 512;
-	h->drv[drv_index].nr_blocks = 0;
-	h->drv[drv_index].heads = 0;
-	h->drv[drv_index].sectors = 0;
-	h->drv[drv_index].cylinders = 0;
-	h->drv[drv_index].raid_level = -1;
-	memset(h->drv[drv_index].serial_no, 0, 16);
+	drv_index = cciss_add_gendisk(h, CTLR_LUNID, 1);
+	if (drv_index == -1)
+		goto error;
+	h->drv[drv_index]->block_size = 512;
+	h->drv[drv_index]->nr_blocks = 0;
+	h->drv[drv_index]->heads = 0;
+	h->drv[drv_index]->sectors = 0;
+	h->drv[drv_index]->cylinders = 0;
+	h->drv[drv_index]->raid_level = -1;
+	memset(h->drv[drv_index]->serial_no, 0, 16);
 	disk = h->gendisk[drv_index];
-	cciss_add_disk(h, disk, drv_index);
+	if (cciss_add_disk(h, disk, drv_index) == 0)
+		return;
+	cciss_free_gendisk(h, drv_index);
+	cciss_free_drive_info(h, drv_index);
+error:
+	printk(KERN_WARNING "cciss%d: could not "
+		"add disk 0.\n", h->ctlr);
+	return;
 }
 
 /* This function will add and remove logical drives from the Logical
@@ -1897,7 +2122,8 @@
  * INPUT
  * h		= The controller to perform the operations on
  */
-static int rebuild_lun_table(ctlr_info_t *h, int first_time)
+static int rebuild_lun_table(ctlr_info_t *h, int first_time,
+	int via_ioctl)
 {
 	int ctlr = h->ctlr;
 	int num_luns;
@@ -1907,7 +2133,7 @@
 	int i;
 	int drv_found;
 	int drv_index = 0;
-	__u32 lunid = 0;
+	unsigned char lunid[8] = CTLR_LUNID;
 	unsigned long flags;
 
 	if (!capable(CAP_SYS_RAWIO))
@@ -1960,13 +2186,13 @@
 		drv_found = 0;
 
 		/* skip holes in the array from already deleted drives */
-		if (h->drv[i].raid_level == -1)
+		if (h->drv[i] == NULL)
 			continue;
 
 		for (j = 0; j < num_luns; j++) {
-			memcpy(&lunid, &ld_buff->LUN[j][0], 4);
-			lunid = le32_to_cpu(lunid);
-			if (h->drv[i].LunID == lunid) {
+			memcpy(lunid, &ld_buff->LUN[j][0], sizeof(lunid));
+			if (memcmp(h->drv[i]->LunID, lunid,
+				sizeof(lunid)) == 0) {
 				drv_found = 1;
 				break;
 			}
@@ -1974,11 +2200,11 @@
 		if (!drv_found) {
 			/* Deregister it from the OS, it's gone. */
 			spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
-			h->drv[i].busy_configuring = 1;
+			h->drv[i]->busy_configuring = 1;
 			spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
-			return_code = deregister_disk(h, i, 1);
-			cciss_destroy_ld_sysfs_entry(&h->drv[i]);
-			h->drv[i].busy_configuring = 0;
+			return_code = deregister_disk(h, i, 1, via_ioctl);
+			if (h->drv[i] != NULL)
+				h->drv[i]->busy_configuring = 0;
 		}
 	}
 
@@ -1992,17 +2218,16 @@
 
 		drv_found = 0;
 
-		memcpy(&lunid, &ld_buff->LUN[i][0], 4);
-		lunid = le32_to_cpu(lunid);
-
+		memcpy(lunid, &ld_buff->LUN[i][0], sizeof(lunid));
 		/* Find if the LUN is already in the drive array
 		 * of the driver.  If so then update its info
 		 * if not in use.  If it does not exist then find
 		 * the first free index and add it.
 		 */
 		for (j = 0; j <= h->highest_lun; j++) {
-			if (h->drv[j].raid_level != -1 &&
-				h->drv[j].LunID == lunid) {
+			if (h->drv[j] != NULL &&
+				memcmp(h->drv[j]->LunID, lunid,
+					sizeof(h->drv[j]->LunID)) == 0) {
 				drv_index = j;
 				drv_found = 1;
 				break;
@@ -2015,7 +2240,8 @@
 			if (drv_index == -1)
 				goto freeret;
 		}
-		cciss_update_drive_info(ctlr, drv_index, first_time);
+		cciss_update_drive_info(ctlr, drv_index, first_time,
+			via_ioctl);
 	}		/* end for */
 
 freeret:
@@ -2032,6 +2258,25 @@
 	goto freeret;
 }
 
+static void cciss_clear_drive_info(drive_info_struct *drive_info)
+{
+	/* zero out the disk size info */
+	drive_info->nr_blocks = 0;
+	drive_info->block_size = 0;
+	drive_info->heads = 0;
+	drive_info->sectors = 0;
+	drive_info->cylinders = 0;
+	drive_info->raid_level = -1;
+	memset(drive_info->serial_no, 0, sizeof(drive_info->serial_no));
+	memset(drive_info->model, 0, sizeof(drive_info->model));
+	memset(drive_info->rev, 0, sizeof(drive_info->rev));
+	memset(drive_info->vendor, 0, sizeof(drive_info->vendor));
+	/*
+	 * don't clear the LUNID though, we need to remember which
+	 * one this one is.
+	 */
+}
+
 /* This function will deregister the disk and it's queue from the
  * kernel.  It must be called with the controller lock held and the
  * drv structures busy_configuring flag set.  It's parameters are:
@@ -2046,43 +2291,48 @@
  *             the disk in preparation for re-adding it.  In this case
  *             the highest_lun should be left unchanged and the LunID
  *             should not be cleared.
+ * via_ioctl
+ *    This indicates whether we've reached this path via ioctl.
+ *    This affects the maximum usage count allowed for c0d0 to be messed with.
+ *    If this path is reached via ioctl(), then the max_usage_count will
+ *    be 1, as the process calling ioctl() has got to have the device open.
+ *    If we get here via sysfs, then the max usage count will be zero.
 */
 static int deregister_disk(ctlr_info_t *h, int drv_index,
-			   int clear_all)
+			   int clear_all, int via_ioctl)
 {
 	int i;
 	struct gendisk *disk;
 	drive_info_struct *drv;
+	int recalculate_highest_lun;
 
 	if (!capable(CAP_SYS_RAWIO))
 		return -EPERM;
 
-	drv = &h->drv[drv_index];
+	drv = h->drv[drv_index];
 	disk = h->gendisk[drv_index];
 
 	/* make sure logical volume is NOT is use */
 	if (clear_all || (h->gendisk[0] == disk)) {
-		if (drv->usage_count > 1)
+		if (drv->usage_count > via_ioctl)
 			return -EBUSY;
 	} else if (drv->usage_count > 0)
 		return -EBUSY;
 
+	recalculate_highest_lun = (drv == h->drv[h->highest_lun]);
+
 	/* invalidate the devices and deregister the disk.  If it is disk
 	 * zero do not deregister it but just zero out it's values.  This
 	 * allows us to delete disk zero but keep the controller registered.
 	 */
 	if (h->gendisk[0] != disk) {
 		struct request_queue *q = disk->queue;
-		if (disk->flags & GENHD_FL_UP)
+		if (disk->flags & GENHD_FL_UP) {
+			cciss_destroy_ld_sysfs_entry(h, drv_index, 0);
 			del_gendisk(disk);
-		if (q) {
-			blk_cleanup_queue(q);
-			/* Set drv->queue to NULL so that we do not try
-			 * to call blk_start_queue on this queue in the
-			 * interrupt handler
-			 */
-			drv->queue = NULL;
 		}
+		if (q)
+			blk_cleanup_queue(q);
 		/* If clear_all is set then we are deleting the logical
 		 * drive, not just refreshing its info.  For drives
 		 * other than disk 0 we will call put_disk.  We do not
@@ -2105,34 +2355,20 @@
 		}
 	} else {
 		set_capacity(disk, 0);
+		cciss_clear_drive_info(drv);
 	}
 
 	--h->num_luns;
-	/* zero out the disk size info */
-	drv->nr_blocks = 0;
-	drv->block_size = 0;
-	drv->heads = 0;
-	drv->sectors = 0;
-	drv->cylinders = 0;
-	drv->raid_level = -1;	/* This can be used as a flag variable to
-				 * indicate that this element of the drive
-				 * array is free.
-				 */
 
-	if (clear_all) {
-		/* check to see if it was the last disk */
-		if (drv == h->drv + h->highest_lun) {
-			/* if so, find the new hightest lun */
-			int i, newhighest = -1;
-			for (i = 0; i <= h->highest_lun; i++) {
-				/* if the disk has size > 0, it is available */
-				if (h->drv[i].heads)
-					newhighest = i;
-			}
-			h->highest_lun = newhighest;
+	/* if it was the last disk, find the new hightest lun */
+	if (clear_all && recalculate_highest_lun) {
+		int i, newhighest = -1;
+		for (i = 0; i <= h->highest_lun; i++) {
+			/* if the disk has size > 0, it is available */
+			if (h->drv[i] && h->drv[i]->heads)
+				newhighest = i;
 		}
-
-		drv->LunID = 0;
+		h->highest_lun = newhighest;
 	}
 	return 0;
 }
@@ -2479,8 +2715,6 @@
 	} else {		/* Get geometry failed */
 		printk(KERN_WARNING "cciss: reading geometry failed\n");
 	}
-	printk(KERN_INFO "      heads=%d, sectors=%d, cylinders=%d\n\n",
-	       drv->heads, drv->sectors, drv->cylinders);
 }
 
 static void
@@ -2514,9 +2748,6 @@
 		*total_size = 0;
 		*block_size = BLOCK_SIZE;
 	}
-	if (*total_size != 0)
-		printk(KERN_INFO "      blocks= %llu block_size= %d\n",
-		(unsigned long long)*total_size+1, *block_size);
 	kfree(buf);
 }
 
@@ -2568,7 +2799,8 @@
 	InquiryData_struct *inq_buff = NULL;
 
 	for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) {
-		if (h->drv[logvol].LunID == drv->LunID) {
+		if (memcmp(h->drv[logvol]->LunID, drv->LunID,
+			sizeof(drv->LunID)) == 0) {
 			FOUND = 1;
 			break;
 		}
@@ -3053,8 +3285,7 @@
 	/* The first 2 bits are reserved for controller error reporting. */
 	c->Header.Tag.lower = (c->cmdindex << 3);
 	c->Header.Tag.lower |= 0x04;	/* flag for direct lookup. */
-	c->Header.LUN.LogDev.VolId = drv->LunID;
-	c->Header.LUN.LogDev.Mode = 1;
+	memcpy(&c->Header.LUN, drv->LunID, sizeof(drv->LunID));
 	c->Request.CDBLen = 10;	// 12 byte commands not in FW yet;
 	c->Request.Type.Type = TYPE_CMD;	// It is a command.
 	c->Request.Type.Attribute = ATTR_SIMPLE;
@@ -3232,20 +3463,121 @@
 	return IRQ_HANDLED;
 }
 
+/**
+ * add_to_scan_list() - add controller to rescan queue
+ * @h:		      Pointer to the controller.
+ *
+ * Adds the controller to the rescan queue if not already on the queue.
+ *
+ * returns 1 if added to the queue, 0 if skipped (could be on the
+ * queue already, or the controller could be initializing or shutting
+ * down).
+ **/
+static int add_to_scan_list(struct ctlr_info *h)
+{
+	struct ctlr_info *test_h;
+	int found = 0;
+	int ret = 0;
+
+	if (h->busy_initializing)
+		return 0;
+
+	if (!mutex_trylock(&h->busy_shutting_down))
+		return 0;
+
+	mutex_lock(&scan_mutex);
+	list_for_each_entry(test_h, &scan_q, scan_list) {
+		if (test_h == h) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found && !h->busy_scanning) {
+		INIT_COMPLETION(h->scan_wait);
+		list_add_tail(&h->scan_list, &scan_q);
+		ret = 1;
+	}
+	mutex_unlock(&scan_mutex);
+	mutex_unlock(&h->busy_shutting_down);
+
+	return ret;
+}
+
+/**
+ * remove_from_scan_list() - remove controller from rescan queue
+ * @h:			   Pointer to the controller.
+ *
+ * Removes the controller from the rescan queue if present. Blocks if
+ * the controller is currently conducting a rescan.
+ **/
+static void remove_from_scan_list(struct ctlr_info *h)
+{
+	struct ctlr_info *test_h, *tmp_h;
+	int scanning = 0;
+
+	mutex_lock(&scan_mutex);
+	list_for_each_entry_safe(test_h, tmp_h, &scan_q, scan_list) {
+		if (test_h == h) {
+			list_del(&h->scan_list);
+			complete_all(&h->scan_wait);
+			mutex_unlock(&scan_mutex);
+			return;
+		}
+	}
+	if (&h->busy_scanning)
+		scanning = 0;
+	mutex_unlock(&scan_mutex);
+
+	if (scanning)
+		wait_for_completion(&h->scan_wait);
+}
+
+/**
+ * scan_thread() - kernel thread used to rescan controllers
+ * @data:	 Ignored.
+ *
+ * A kernel thread used scan for drive topology changes on
+ * controllers. The thread processes only one controller at a time
+ * using a queue.  Controllers are added to the queue using
+ * add_to_scan_list() and removed from the queue either after done
+ * processing or using remove_from_scan_list().
+ *
+ * returns 0.
+ **/
 static int scan_thread(void *data)
 {
-	ctlr_info_t *h = data;
-	int rc;
-	DECLARE_COMPLETION_ONSTACK(wait);
-	h->rescan_wait = &wait;
+	struct ctlr_info *h;
 
-	for (;;) {
-		rc = wait_for_completion_interruptible(&wait);
+	while (1) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule();
 		if (kthread_should_stop())
 			break;
-		if (!rc)
-			rebuild_lun_table(h, 0);
+
+		while (1) {
+			mutex_lock(&scan_mutex);
+			if (list_empty(&scan_q)) {
+				mutex_unlock(&scan_mutex);
+				break;
+			}
+
+			h = list_entry(scan_q.next,
+				       struct ctlr_info,
+				       scan_list);
+			list_del(&h->scan_list);
+			h->busy_scanning = 1;
+			mutex_unlock(&scan_mutex);
+
+			if (h) {
+				rebuild_lun_table(h, 0, 0);
+				complete_all(&h->scan_wait);
+				mutex_lock(&scan_mutex);
+				h->busy_scanning = 0;
+				mutex_unlock(&scan_mutex);
+			}
+		}
 	}
+
 	return 0;
 }
 
@@ -3268,8 +3600,8 @@
 	case REPORT_LUNS_CHANGED:
 		printk(KERN_WARNING "cciss%d: report LUN data "
 			"changed\n", h->ctlr);
-		if (h->rescan_wait)
-			complete(h->rescan_wait);
+		add_to_scan_list(h);
+		wake_up_process(cciss_scan_thread);
 		return 1;
 	break;
 	case POWER_OR_RESET:
@@ -3489,7 +3821,7 @@
 		if (scratchpad == CCISS_FIRMWARE_READY)
 			break;
 		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(HZ / 10);	/* wait 100ms */
+		schedule_timeout(msecs_to_jiffies(100));	/* wait 100ms */
 	}
 	if (scratchpad != CCISS_FIRMWARE_READY) {
 		printk(KERN_WARNING "cciss: Board not ready.  Timed out.\n");
@@ -3615,7 +3947,7 @@
 			break;
 		/* delay and try again */
 		set_current_state(TASK_INTERRUPTIBLE);
-		schedule_timeout(10);
+		schedule_timeout(msecs_to_jiffies(1));
 	}
 
 #ifdef CCISS_DEBUG
@@ -3669,15 +4001,16 @@
 	return -1;
 }
 
-static void free_hba(int i)
+static void free_hba(int n)
 {
-	ctlr_info_t *p = hba[i];
-	int n;
+	ctlr_info_t *h = hba[n];
+	int i;
 
-	hba[i] = NULL;
-	for (n = 0; n < CISS_MAX_LUN; n++)
-		put_disk(p->gendisk[n]);
-	kfree(p);
+	hba[n] = NULL;
+	for (i = 0; i < h->highest_lun + 1; i++)
+		if (h->gendisk[i] != NULL)
+			put_disk(h->gendisk[i]);
+	kfree(h);
 }
 
 /* Send a message CDB to the firmware. */
@@ -3918,6 +4251,7 @@
 	hba[i]->busy_initializing = 1;
 	INIT_HLIST_HEAD(&hba[i]->cmpQ);
 	INIT_HLIST_HEAD(&hba[i]->reqQ);
+	mutex_init(&hba[i]->busy_shutting_down);
 
 	if (cciss_pci_init(hba[i], pdev) != 0)
 		goto clean0;
@@ -3926,6 +4260,8 @@
 	hba[i]->ctlr = i;
 	hba[i]->pdev = pdev;
 
+	init_completion(&hba[i]->scan_wait);
+
 	if (cciss_create_hba_sysfs_entry(hba[i]))
 		goto clean0;
 
@@ -4001,8 +4337,7 @@
 	hba[i]->num_luns = 0;
 	hba[i]->highest_lun = -1;
 	for (j = 0; j < CISS_MAX_LUN; j++) {
-		hba[i]->drv[j].raid_level = -1;
-		hba[i]->drv[j].queue = NULL;
+		hba[i]->drv[j] = NULL;
 		hba[i]->gendisk[j] = NULL;
 	}
 
@@ -4035,14 +4370,8 @@
 
 	hba[i]->cciss_max_sectors = 2048;
 
+	rebuild_lun_table(hba[i], 1, 0);
 	hba[i]->busy_initializing = 0;
-
-	rebuild_lun_table(hba[i], 1);
-	hba[i]->cciss_scan_thread = kthread_run(scan_thread, hba[i],
-				"cciss_scan%02d", i);
-	if (IS_ERR(hba[i]->cciss_scan_thread))
-		return PTR_ERR(hba[i]->cciss_scan_thread);
-
 	return 1;
 
 clean4:
@@ -4063,12 +4392,7 @@
 	cciss_destroy_hba_sysfs_entry(hba[i]);
 clean0:
 	hba[i]->busy_initializing = 0;
-	/* cleanup any queues that may have been initialized */
-	for (j=0; j <= hba[i]->highest_lun; j++){
-		drive_info_struct *drv = &(hba[i]->drv[j]);
-		if (drv->queue)
-			blk_cleanup_queue(drv->queue);
-	}
+
 	/*
 	 * Deliberately omit pci_disable_device(): it does something nasty to
 	 * Smart Array controllers that pci_enable_device does not undo
@@ -4125,8 +4449,9 @@
 		return;
 	}
 
-	kthread_stop(hba[i]->cciss_scan_thread);
+	mutex_lock(&hba[i]->busy_shutting_down);
 
+	remove_from_scan_list(hba[i]);
 	remove_proc_entry(hba[i]->devname, proc_cciss);
 	unregister_blkdev(hba[i]->major, hba[i]->devname);
 
@@ -4136,8 +4461,10 @@
 		if (disk) {
 			struct request_queue *q = disk->queue;
 
-			if (disk->flags & GENHD_FL_UP)
+			if (disk->flags & GENHD_FL_UP) {
+				cciss_destroy_ld_sysfs_entry(hba[i], j, 1);
 				del_gendisk(disk);
+			}
 			if (q)
 				blk_cleanup_queue(q);
 		}
@@ -4170,6 +4497,7 @@
 	pci_release_regions(pdev);
 	pci_set_drvdata(pdev, NULL);
 	cciss_destroy_hba_sysfs_entry(hba[i]);
+	mutex_unlock(&hba[i]->busy_shutting_down);
 	free_hba(i);
 }
 
@@ -4202,15 +4530,25 @@
 	if (err)
 		return err;
 
+	/* Start the scan thread */
+	cciss_scan_thread = kthread_run(scan_thread, NULL, "cciss_scan");
+	if (IS_ERR(cciss_scan_thread)) {
+		err = PTR_ERR(cciss_scan_thread);
+		goto err_bus_unregister;
+	}
+
 	/* Register for our PCI devices */
 	err = pci_register_driver(&cciss_pci_driver);
 	if (err)
-		goto err_bus_register;
+		goto err_thread_stop;
 
-	return 0;
+	return err;
 
-err_bus_register:
+err_thread_stop:
+	kthread_stop(cciss_scan_thread);
+err_bus_unregister:
 	bus_unregister(&cciss_bus_type);
+
 	return err;
 }
 
@@ -4227,6 +4565,7 @@
 			cciss_remove_one(hba[i]->pdev);
 		}
 	}
+	kthread_stop(cciss_scan_thread);
 	remove_proc_entry("driver/cciss", NULL);
 	bus_unregister(&cciss_bus_type);
 }
diff --git a/drivers/block/cciss.h b/drivers/block/cciss.h
index 06a5db2..31524cf 100644
--- a/drivers/block/cciss.h
+++ b/drivers/block/cciss.h
@@ -2,6 +2,7 @@
 #define CCISS_H
 
 #include <linux/genhd.h>
+#include <linux/mutex.h>
 
 #include "cciss_cmd.h"
 
@@ -29,7 +30,7 @@
 };
 typedef struct _drive_info_struct
 {
- 	__u32   LunID;	
+	unsigned char LunID[8];
 	int 	usage_count;
 	struct request_queue *queue;
 	sector_t nr_blocks;
@@ -51,6 +52,7 @@
 	char vendor[VENDOR_LEN + 1]; /* SCSI vendor string */
 	char model[MODEL_LEN + 1];   /* SCSI model string */
 	char rev[REV_LEN + 1];       /* SCSI revision string */
+	char device_initialized;     /* indicates whether dev is initialized */
 } drive_info_struct;
 
 struct ctlr_info 
@@ -86,7 +88,7 @@
 	BYTE	cciss_read_capacity;
 
 	// information about each logical volume
-	drive_info_struct drv[CISS_MAX_LUN];
+	drive_info_struct *drv[CISS_MAX_LUN];
 
 	struct access_method access;
 
@@ -108,6 +110,8 @@
 	int			nr_frees; 
 	int			busy_configuring;
 	int			busy_initializing;
+	int			busy_scanning;
+	struct mutex		busy_shutting_down;
 
 	/* This element holds the zero based queue number of the last
 	 * queue to be started.  It is used for fairness.
@@ -122,8 +126,8 @@
 	/* and saved for later processing */
 #endif
 	unsigned char alive;
-	struct completion *rescan_wait;
-	struct task_struct *cciss_scan_thread;
+	struct list_head scan_list;
+	struct completion scan_wait;
 	struct device dev;
 };
 
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index b82d438..6422651 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -32,6 +32,7 @@
 #include <linux/blkpg.h>
 #include <linux/timer.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/hdreg.h>
 #include <linux/spinlock.h>
@@ -177,7 +178,6 @@
 
 #ifdef CONFIG_PROC_FS
 static void ida_procinit(int i);
-static int ida_proc_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
 #else
 static void ida_procinit(int i) {}
 #endif
@@ -206,6 +206,7 @@
 #ifdef CONFIG_PROC_FS
 
 static struct proc_dir_entry *proc_array;
+static const struct file_operations ida_proc_fops;
 
 /*
  * Get us a file in /proc/array that says something about each controller.
@@ -218,19 +219,16 @@
 		if (!proc_array) return;
 	}
 
-	create_proc_read_entry(hba[i]->devname, 0, proc_array,
-			       ida_proc_get_info, hba[i]);
+	proc_create_data(hba[i]->devname, 0, proc_array, &ida_proc_fops, hba[i]);
 }
 
 /*
  * Report information about this controller.
  */
-static int ida_proc_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
+static int ida_proc_show(struct seq_file *m, void *v)
 {
-	off_t pos = 0;
-	off_t len = 0;
-	int size, i, ctlr;
-	ctlr_info_t *h = (ctlr_info_t*)data;
+	int i, ctlr;
+	ctlr_info_t *h = (ctlr_info_t*)m->private;
 	drv_info_t *drv;
 #ifdef CPQ_PROC_PRINT_QUEUES
 	cmdlist_t *c;
@@ -238,7 +236,7 @@
 #endif
 
 	ctlr = h->ctlr;
-	size = sprintf(buffer, "%s:  Compaq %s Controller\n"
+	seq_printf(m, "%s:  Compaq %s Controller\n"
 		"       Board ID: 0x%08lx\n"
 		"       Firmware Revision: %c%c%c%c\n"
 		"       Controller Sig: 0x%08lx\n"
@@ -258,55 +256,54 @@
 		h->log_drives, h->phys_drives,
 		h->Qdepth, h->maxQsinceinit);
 
-	pos += size; len += size;
-	
-	size = sprintf(buffer+len, "Logical Drive Info:\n");
-	pos += size; len += size;
+	seq_puts(m, "Logical Drive Info:\n");
 
 	for(i=0; i<h->log_drives; i++) {
 		drv = &h->drv[i];
-		size = sprintf(buffer+len, "ida/c%dd%d: blksz=%d nr_blks=%d\n",
+		seq_printf(m, "ida/c%dd%d: blksz=%d nr_blks=%d\n",
 				ctlr, i, drv->blk_size, drv->nr_blks);
-		pos += size; len += size;
 	}
 
 #ifdef CPQ_PROC_PRINT_QUEUES
 	spin_lock_irqsave(IDA_LOCK(h->ctlr), flags); 
-	size = sprintf(buffer+len, "\nCurrent Queues:\n");
-	pos += size; len += size;
+	seq_puts(m, "\nCurrent Queues:\n");
 
 	c = h->reqQ;
-	size = sprintf(buffer+len, "reqQ = %p", c); pos += size; len += size;
+	seq_printf(m, "reqQ = %p", c);
 	if (c) c=c->next;
 	while(c && c != h->reqQ) {
-		size = sprintf(buffer+len, "->%p", c);
-		pos += size; len += size;
+		seq_printf(m, "->%p", c);
 		c=c->next;
 	}
 
 	c = h->cmpQ;
-	size = sprintf(buffer+len, "\ncmpQ = %p", c); pos += size; len += size;
+	seq_printf(m, "\ncmpQ = %p", c);
 	if (c) c=c->next;
 	while(c && c != h->cmpQ) {
-		size = sprintf(buffer+len, "->%p", c);
-		pos += size; len += size;
+		seq_printf(m, "->%p", c);
 		c=c->next;
 	}
 
-	size = sprintf(buffer+len, "\n"); pos += size; len += size;
+	seq_putc(m, '\n');
 	spin_unlock_irqrestore(IDA_LOCK(h->ctlr), flags); 
 #endif
-	size = sprintf(buffer+len, "nr_allocs = %d\nnr_frees = %d\n",
+	seq_printf(m, "nr_allocs = %d\nnr_frees = %d\n",
 			h->nr_allocs, h->nr_frees);
-	pos += size; len += size;
-
-	*eof = 1;
-	*start = buffer+offset;
-	len -= offset;
-	if (len>length)
-		len = length;
-	return len;
+	return 0;
 }
+
+static int ida_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, ida_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations ida_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= ida_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 #endif /* CONFIG_PROC_FS */
 
 module_param_array(eisa, int, NULL, 0);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 376f1ab..23e76fe 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -130,7 +130,7 @@
 	/*
 	 * A list of ios that arrived while we were suspended.
 	 */
-	atomic_t pending[2];
+	atomic_t pending;
 	wait_queue_head_t wait;
 	struct work_struct work;
 	struct bio_list deferred;
@@ -453,14 +453,13 @@
 {
 	struct mapped_device *md = io->md;
 	int cpu;
-	int rw = bio_data_dir(io->bio);
 
 	io->start_time = jiffies;
 
 	cpu = part_stat_lock();
 	part_round_stats(cpu, &dm_disk(md)->part0);
 	part_stat_unlock();
-	dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
+	dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending);
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -480,9 +479,8 @@
 	 * After this is decremented the bio must not be touched if it is
 	 * a barrier.
 	 */
-	dm_disk(md)->part0.in_flight[rw] = pending =
-		atomic_dec_return(&md->pending[rw]);
-	pending += atomic_read(&md->pending[rw^0x1]);
+	dm_disk(md)->part0.in_flight = pending =
+		atomic_dec_return(&md->pending);
 
 	/* nudge anyone waiting on suspend queue */
 	if (!pending)
@@ -1787,8 +1785,7 @@
 	if (!md->disk)
 		goto bad_disk;
 
-	atomic_set(&md->pending[0], 0);
-	atomic_set(&md->pending[1], 0);
+	atomic_set(&md->pending, 0);
 	init_waitqueue_head(&md->wait);
 	INIT_WORK(&md->work, dm_wq_work);
 	init_waitqueue_head(&md->eventq);
@@ -2091,8 +2088,7 @@
 				break;
 			}
 			spin_unlock_irqrestore(q->queue_lock, flags);
-		} else if (!atomic_read(&md->pending[0]) &&
-					!atomic_read(&md->pending[1]))
+		} else if (!atomic_read(&md->pending))
 			break;
 
 		if (interruptible == TASK_INTERRUPTIBLE &&
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 0acbf4f..8ca17a3 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -32,14 +32,6 @@
 	spinlock_t queue_lock;
 };
 
-static int blktrans_discard_request(struct request_queue *q,
-				    struct request *req)
-{
-	req->cmd_type = REQ_TYPE_LINUX_BLOCK;
-	req->cmd[0] = REQ_LB_OP_DISCARD;
-	return 0;
-}
-
 static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 			       struct mtd_blktrans_dev *dev,
 			       struct request *req)
@@ -52,10 +44,6 @@
 
 	buf = req->buffer;
 
-	if (req->cmd_type == REQ_TYPE_LINUX_BLOCK &&
-	    req->cmd[0] == REQ_LB_OP_DISCARD)
-		return tr->discard(dev, block, nsect);
-
 	if (!blk_fs_request(req))
 		return -EIO;
 
@@ -63,6 +51,9 @@
 	    get_capacity(req->rq_disk))
 		return -EIO;
 
+	if (blk_discard_rq(req))
+		return tr->discard(dev, block, nsect);
+
 	switch(rq_data_dir(req)) {
 	case READ:
 		for (; nsect > 0; nsect--, block++, buf += tr->blksize)
@@ -380,8 +371,8 @@
 	tr->blkcore_priv->rq->queuedata = tr;
 	blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize);
 	if (tr->discard)
-		blk_queue_set_discard(tr->blkcore_priv->rq,
-				      blktrans_discard_request);
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+					tr->blkcore_priv->rq);
 
 	tr->blkshift = ffs(tr->blksize) - 1;
 
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index ee16010..c24e4e0 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -102,7 +102,7 @@
 	struct dst_node *n = q->queuedata;
 	int err = -EIO;
 
-	if (bio_empty_barrier(bio) && !q->prepare_discard_fn) {
+	if (bio_empty_barrier(bio) && !blk_queue_discard(q)) {
 		/*
 		 * This is a dirty^Wnice hack, but if we complete this
 		 * operation with -EOPNOTSUPP like intended, XFS
diff --git a/fs/bio.c b/fs/bio.c
index 7673800..402cb84 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -249,6 +249,7 @@
 
 	mempool_free(p, bs->bio_pool);
 }
+EXPORT_SYMBOL(bio_free);
 
 void bio_init(struct bio *bio)
 {
@@ -257,6 +258,7 @@
 	bio->bi_comp_cpu = -1;
 	atomic_set(&bio->bi_cnt, 1);
 }
+EXPORT_SYMBOL(bio_init);
 
 /**
  * bio_alloc_bioset - allocate a bio for I/O
@@ -311,6 +313,7 @@
 	mempool_free(p, bs->bio_pool);
 	return NULL;
 }
+EXPORT_SYMBOL(bio_alloc_bioset);
 
 static void bio_fs_destructor(struct bio *bio)
 {
@@ -337,6 +340,7 @@
 
 	return bio;
 }
+EXPORT_SYMBOL(bio_alloc);
 
 static void bio_kmalloc_destructor(struct bio *bio)
 {
@@ -380,6 +384,7 @@
 
 	return bio;
 }
+EXPORT_SYMBOL(bio_kmalloc);
 
 void zero_fill_bio(struct bio *bio)
 {
@@ -416,6 +421,7 @@
 		bio->bi_destructor(bio);
 	}
 }
+EXPORT_SYMBOL(bio_put);
 
 inline int bio_phys_segments(struct request_queue *q, struct bio *bio)
 {
@@ -424,6 +430,7 @@
 
 	return bio->bi_phys_segments;
 }
+EXPORT_SYMBOL(bio_phys_segments);
 
 /**
  * 	__bio_clone	-	clone a bio
@@ -451,6 +458,7 @@
 	bio->bi_size = bio_src->bi_size;
 	bio->bi_idx = bio_src->bi_idx;
 }
+EXPORT_SYMBOL(__bio_clone);
 
 /**
  *	bio_clone	-	clone a bio
@@ -482,6 +490,7 @@
 
 	return b;
 }
+EXPORT_SYMBOL(bio_clone);
 
 /**
  *	bio_get_nr_vecs		- return approx number of vecs
@@ -505,6 +514,7 @@
 
 	return nr_pages;
 }
+EXPORT_SYMBOL(bio_get_nr_vecs);
 
 static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
 			  *page, unsigned int len, unsigned int offset,
@@ -635,6 +645,7 @@
 	return __bio_add_page(q, bio, page, len, offset,
 			      queue_max_hw_sectors(q));
 }
+EXPORT_SYMBOL(bio_add_pc_page);
 
 /**
  *	bio_add_page	-	attempt to add page to bio
@@ -655,6 +666,7 @@
 	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 	return __bio_add_page(q, bio, page, len, offset, queue_max_sectors(q));
 }
+EXPORT_SYMBOL(bio_add_page);
 
 struct bio_map_data {
 	struct bio_vec *iovecs;
@@ -776,6 +788,7 @@
 	bio_put(bio);
 	return ret;
 }
+EXPORT_SYMBOL(bio_uncopy_user);
 
 /**
  *	bio_copy_user_iov	-	copy user data to bio
@@ -920,6 +933,7 @@
 
 	return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask);
 }
+EXPORT_SYMBOL(bio_copy_user);
 
 static struct bio *__bio_map_user_iov(struct request_queue *q,
 				      struct block_device *bdev,
@@ -1050,6 +1064,7 @@
 
 	return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask);
 }
+EXPORT_SYMBOL(bio_map_user);
 
 /**
  *	bio_map_user_iov - map user sg_iovec table into bio
@@ -1117,13 +1132,13 @@
 	__bio_unmap_user(bio);
 	bio_put(bio);
 }
+EXPORT_SYMBOL(bio_unmap_user);
 
 static void bio_map_kern_endio(struct bio *bio, int err)
 {
 	bio_put(bio);
 }
 
-
 static struct bio *__bio_map_kern(struct request_queue *q, void *data,
 				  unsigned int len, gfp_t gfp_mask)
 {
@@ -1189,6 +1204,7 @@
 	bio_put(bio);
 	return ERR_PTR(-EINVAL);
 }
+EXPORT_SYMBOL(bio_map_kern);
 
 static void bio_copy_kern_endio(struct bio *bio, int err)
 {
@@ -1250,6 +1266,7 @@
 
 	return bio;
 }
+EXPORT_SYMBOL(bio_copy_kern);
 
 /*
  * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions
@@ -1400,6 +1417,7 @@
 	if (bio->bi_end_io)
 		bio->bi_end_io(bio, error);
 }
+EXPORT_SYMBOL(bio_endio);
 
 void bio_pair_release(struct bio_pair *bp)
 {
@@ -1410,6 +1428,7 @@
 		mempool_free(bp, bp->bio2.bi_private);
 	}
 }
+EXPORT_SYMBOL(bio_pair_release);
 
 static void bio_pair_end_1(struct bio *bi, int err)
 {
@@ -1477,6 +1496,7 @@
 
 	return bp;
 }
+EXPORT_SYMBOL(bio_split);
 
 /**
  *      bio_sector_offset - Find hardware sector offset in bio
@@ -1547,6 +1567,7 @@
 
 	kfree(bs);
 }
+EXPORT_SYMBOL(bioset_free);
 
 /**
  * bioset_create  - Create a bio_set
@@ -1592,6 +1613,7 @@
 	bioset_free(bs);
 	return NULL;
 }
+EXPORT_SYMBOL(bioset_create);
 
 static void __init biovec_init_slabs(void)
 {
@@ -1636,29 +1658,4 @@
 
 	return 0;
 }
-
 subsys_initcall(init_bio);
-
-EXPORT_SYMBOL(bio_alloc);
-EXPORT_SYMBOL(bio_kmalloc);
-EXPORT_SYMBOL(bio_put);
-EXPORT_SYMBOL(bio_free);
-EXPORT_SYMBOL(bio_endio);
-EXPORT_SYMBOL(bio_init);
-EXPORT_SYMBOL(__bio_clone);
-EXPORT_SYMBOL(bio_clone);
-EXPORT_SYMBOL(bio_phys_segments);
-EXPORT_SYMBOL(bio_add_page);
-EXPORT_SYMBOL(bio_add_pc_page);
-EXPORT_SYMBOL(bio_get_nr_vecs);
-EXPORT_SYMBOL(bio_map_user);
-EXPORT_SYMBOL(bio_unmap_user);
-EXPORT_SYMBOL(bio_map_kern);
-EXPORT_SYMBOL(bio_copy_kern);
-EXPORT_SYMBOL(bio_pair_release);
-EXPORT_SYMBOL(bio_split);
-EXPORT_SYMBOL(bio_copy_user);
-EXPORT_SYMBOL(bio_uncopy_user);
-EXPORT_SYMBOL(bioset_create);
-EXPORT_SYMBOL(bioset_free);
-EXPORT_SYMBOL(bio_alloc_bioset);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 7b685e1..f38fee0 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -248,19 +248,11 @@
 		part_stat_read(p, merges[WRITE]),
 		(unsigned long long)part_stat_read(p, sectors[WRITE]),
 		jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
-		part_in_flight(p),
+		p->in_flight,
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
 		jiffies_to_msecs(part_stat_read(p, time_in_queue)));
 }
 
-ssize_t part_inflight_show(struct device *dev,
-			struct device_attribute *attr, char *buf)
-{
-	struct hd_struct *p = dev_to_part(dev);
-
-	return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
-}
-
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 ssize_t part_fail_show(struct device *dev,
 		       struct device_attribute *attr, char *buf)
@@ -289,7 +281,6 @@
 static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
 static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -301,7 +292,6 @@
 	&dev_attr_size.attr,
 	&dev_attr_alignment_offset.attr,
 	&dev_attr_stat.attr,
-	&dev_attr_inflight.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
 #endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e23a86c..2511904 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -82,7 +82,6 @@
 enum {
 	REQ_LB_OP_EJECT	= 0x40,		/* eject request */
 	REQ_LB_OP_FLUSH = 0x41,		/* flush request */
-	REQ_LB_OP_DISCARD = 0x42,	/* discard sectors */
 };
 
 /*
@@ -261,7 +260,6 @@
 typedef int (make_request_fn) (struct request_queue *q, struct bio *bio);
 typedef int (prep_rq_fn) (struct request_queue *, struct request *);
 typedef void (unplug_fn) (struct request_queue *);
-typedef int (prepare_discard_fn) (struct request_queue *, struct request *);
 
 struct bio_vec;
 struct bvec_merge_data {
@@ -313,6 +311,7 @@
 	unsigned int		alignment_offset;
 	unsigned int		io_min;
 	unsigned int		io_opt;
+	unsigned int		max_discard_sectors;
 
 	unsigned short		logical_block_size;
 	unsigned short		max_hw_segments;
@@ -340,7 +339,6 @@
 	make_request_fn		*make_request_fn;
 	prep_rq_fn		*prep_rq_fn;
 	unplug_fn		*unplug_fn;
-	prepare_discard_fn	*prepare_discard_fn;
 	merge_bvec_fn		*merge_bvec_fn;
 	prepare_flush_fn	*prepare_flush_fn;
 	softirq_done_fn		*softirq_done_fn;
@@ -460,6 +458,7 @@
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
 #define QUEUE_FLAG_IO_STAT     15	/* do IO stats */
 #define QUEUE_FLAG_CQ	       16	/* hardware does queuing */
+#define QUEUE_FLAG_DISCARD     17	/* supports DISCARD */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_CLUSTER) |		\
@@ -591,6 +590,7 @@
 #define blk_queue_flushing(q)	((q)->ordseq)
 #define blk_queue_stackable(q)	\
 	test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
+#define blk_queue_discard(q)	test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
 
 #define blk_fs_request(rq)	((rq)->cmd_type == REQ_TYPE_FS)
 #define blk_pc_request(rq)	((rq)->cmd_type == REQ_TYPE_BLOCK_PC)
@@ -929,6 +929,8 @@
 extern void blk_queue_max_phys_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
+extern void blk_queue_max_discard_sectors(struct request_queue *q,
+		unsigned int max_discard_sectors);
 extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
 extern void blk_queue_alignment_offset(struct request_queue *q,
@@ -955,7 +957,6 @@
 extern void blk_queue_dma_alignment(struct request_queue *, int);
 extern void blk_queue_update_dma_alignment(struct request_queue *, int);
 extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
-extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *);
 extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
 extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
@@ -1080,25 +1081,37 @@
 	return q->limits.physical_block_size;
 }
 
+static inline int bdev_physical_block_size(struct block_device *bdev)
+{
+	return queue_physical_block_size(bdev_get_queue(bdev));
+}
+
 static inline unsigned int queue_io_min(struct request_queue *q)
 {
 	return q->limits.io_min;
 }
 
+static inline int bdev_io_min(struct block_device *bdev)
+{
+	return queue_io_min(bdev_get_queue(bdev));
+}
+
 static inline unsigned int queue_io_opt(struct request_queue *q)
 {
 	return q->limits.io_opt;
 }
 
+static inline int bdev_io_opt(struct block_device *bdev)
+{
+	return queue_io_opt(bdev_get_queue(bdev));
+}
+
 static inline int queue_alignment_offset(struct request_queue *q)
 {
-	if (q && q->limits.misaligned)
+	if (q->limits.misaligned)
 		return -1;
 
-	if (q && q->limits.alignment_offset)
-		return q->limits.alignment_offset;
-
-	return 0;
+	return q->limits.alignment_offset;
 }
 
 static inline int queue_sector_alignment_offset(struct request_queue *q,
@@ -1108,6 +1121,19 @@
 		& (q->limits.io_min - 1);
 }
 
+static inline int bdev_alignment_offset(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (q->limits.misaligned)
+		return -1;
+
+	if (bdev != bdev->bd_contains)
+		return bdev->bd_part->alignment_offset;
+
+	return q->limits.alignment_offset;
+}
+
 static inline int queue_dma_alignment(struct request_queue *q)
 {
 	return q ? q->dma_alignment : 511;
@@ -1146,7 +1172,11 @@
 }
 
 struct work_struct;
+struct delayed_work;
 int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
+int kblockd_schedule_delayed_work(struct request_queue *q,
+					struct delayed_work *work,
+				 	unsigned long delay);
 
 #define MODULE_ALIAS_BLOCKDEV(major,minor) \
 	MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 7e4350e..3b73b99 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -198,6 +198,7 @@
 			   char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
 extern int blk_trace_remove(struct request_queue *q);
+extern void blk_trace_remove_sysfs(struct device *dev);
 extern int blk_trace_init_sysfs(struct device *dev);
 
 extern struct attribute_group blk_trace_attr_group;
@@ -211,6 +212,7 @@
 # define blk_trace_startstop(q, start)			(-ENOTTY)
 # define blk_trace_remove(q)				(-ENOTTY)
 # define blk_add_trace_msg(q, fmt, ...)			do { } while (0)
+# define blk_trace_remove_sysfs(dev)			do { } while (0)
 static inline int blk_trace_init_sysfs(struct device *dev)
 {
 	return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a1e6899..2620a8c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -300,6 +300,10 @@
 #define BLKTRACESTOP _IO(0x12,117)
 #define BLKTRACETEARDOWN _IO(0x12,118)
 #define BLKDISCARD _IO(0x12,119)
+#define BLKIOMIN _IO(0x12,120)
+#define BLKIOOPT _IO(0x12,121)
+#define BLKALIGNOFF _IO(0x12,122)
+#define BLKPBSZGET _IO(0x12,123)
 
 #define BMAP_IOCTL 1		/* obsolete - kept for compatibility */
 #define FIBMAP	   _IO(0x00,1)	/* bmap access */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 297df45..7beaa21 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -98,7 +98,7 @@
 	int make_it_fail;
 #endif
 	unsigned long stamp;
-	int in_flight[2];
+	int in_flight;
 #ifdef	CONFIG_SMP
 	struct disk_stats *dkstats;
 #else
@@ -322,23 +322,18 @@
 #define part_stat_sub(cpu, gendiskp, field, subnd)			\
 	part_stat_add(cpu, gendiskp, field, -subnd)
 
-static inline void part_inc_in_flight(struct hd_struct *part, int rw)
+static inline void part_inc_in_flight(struct hd_struct *part)
 {
-	part->in_flight[rw]++;
+	part->in_flight++;
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight[rw]++;
+		part_to_disk(part)->part0.in_flight++;
 }
 
-static inline void part_dec_in_flight(struct hd_struct *part, int rw)
+static inline void part_dec_in_flight(struct hd_struct *part)
 {
-	part->in_flight[rw]--;
+	part->in_flight--;
 	if (part->partno)
-		part_to_disk(part)->part0.in_flight[rw]--;
-}
-
-static inline int part_in_flight(struct hd_struct *part)
-{
-	return part->in_flight[0] + part->in_flight[1];
+		part_to_disk(part)->part0.in_flight--;
 }
 
 /* block/blk-core.c */
@@ -551,8 +546,6 @@
 			      struct device_attribute *attr, char *buf);
 extern ssize_t part_stat_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
-extern ssize_t part_inflight_show(struct device *dev,
-			      struct device_attribute *attr, char *buf);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 extern ssize_t part_fail_show(struct device *dev,
 			      struct device_attribute *attr, char *buf);
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index d86af94..00405b5 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -488,6 +488,39 @@
 		  (unsigned long long)__entry->old_sector)
 );
 
+TRACE_EVENT(block_rq_remap,
+
+	TP_PROTO(struct request_queue *q, struct request *rq, dev_t dev,
+		 sector_t from),
+
+	TP_ARGS(q, rq, dev, from),
+
+	TP_STRUCT__entry(
+		__field( dev_t,		dev		)
+		__field( sector_t,	sector		)
+		__field( unsigned int,	nr_sector	)
+		__field( dev_t,		old_dev		)
+		__field( sector_t,	old_sector	)
+		__array( char,		rwbs,	6	)
+	),
+
+	TP_fast_assign(
+		__entry->dev		= disk_devt(rq->rq_disk);
+		__entry->sector		= blk_rq_pos(rq);
+		__entry->nr_sector	= blk_rq_sectors(rq);
+		__entry->old_dev	= dev;
+		__entry->old_sector	= from;
+		blk_fill_rwbs_rq(__entry->rwbs, rq);
+	),
+
+	TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+		  (unsigned long long)__entry->sector,
+		  __entry->nr_sector,
+		  MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+		  (unsigned long long)__entry->old_sector)
+);
+
 #endif /* _TRACE_BLOCK_H */
 
 /* This part must be outside protection */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 3eb159c..d9d6206 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -856,6 +856,37 @@
 }
 
 /**
+ * blk_add_trace_rq_remap - Add a trace for a request-remap operation
+ * @q:		queue the io is for
+ * @rq:		the source request
+ * @dev:	target device
+ * @from:	source sector
+ *
+ * Description:
+ *     Device mapper remaps request to other devices.
+ *     Add a trace for that action.
+ *
+ **/
+static void blk_add_trace_rq_remap(struct request_queue *q,
+				   struct request *rq, dev_t dev,
+				   sector_t from)
+{
+	struct blk_trace *bt = q->blk_trace;
+	struct blk_io_trace_remap r;
+
+	if (likely(!bt))
+		return;
+
+	r.device_from = cpu_to_be32(dev);
+	r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk));
+	r.sector_from = cpu_to_be64(from);
+
+	__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+			rq_data_dir(rq), BLK_TA_REMAP, !!rq->errors,
+			sizeof(r), &r);
+}
+
+/**
  * blk_add_driver_data - Add binary message with driver-specific data
  * @q:		queue the io is for
  * @rq:		io request
@@ -922,10 +953,13 @@
 	WARN_ON(ret);
 	ret = register_trace_block_remap(blk_add_trace_remap);
 	WARN_ON(ret);
+	ret = register_trace_block_rq_remap(blk_add_trace_rq_remap);
+	WARN_ON(ret);
 }
 
 static void blk_unregister_tracepoints(void)
 {
+	unregister_trace_block_rq_remap(blk_add_trace_rq_remap);
 	unregister_trace_block_remap(blk_add_trace_remap);
 	unregister_trace_block_split(blk_add_trace_split);
 	unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
@@ -1657,6 +1691,11 @@
 	return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
 }
 
+void blk_trace_remove_sysfs(struct device *dev)
+{
+	sysfs_remove_group(&dev->kobj, &blk_trace_attr_group);
+}
+
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
 
 #ifdef CONFIG_EVENT_TRACING
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 4de7f02..a1bc6b9 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1974,12 +1974,14 @@
 		goto bad_swap;
 	}
 
-	if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
-		p->flags |= SWP_SOLIDSTATE;
-		p->cluster_next = 1 + (random32() % p->highest_bit);
+	if (p->bdev) {
+		if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
+			p->flags |= SWP_SOLIDSTATE;
+			p->cluster_next = 1 + (random32() % p->highest_bit);
+		}
+		if (discard_swap(p) == 0)
+			p->flags |= SWP_DISCARDABLE;
 	}
-	if (discard_swap(p) == 0)
-		p->flags |= SWP_DISCARDABLE;
 
 	mutex_lock(&swapon_mutex);
 	spin_lock(&swap_lock);