[SCSI] relax scsi dma alignment

This patch relaxes the default SCSI DMA alignment from 512 bytes to 4
bytes.  I remember from previous discussions that usb and firewire have
sector size alignment requirements, so I upped their alignments in the
respective slave allocs.

The reason for doing this is so that we don't get such a huge amount of
copy overhead in bio_copy_user() for udev.  (basically all inquiries it
issues can now be directly mapped).

Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 264ae60..4bb268b 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -824,6 +824,9 @@
 	 * requests.
 	 */
 	sdev->max_device_blocked = 1;
+
+	/* set the min alignment */
+	blk_queue_update_dma_alignment(sdev->request_queue, ATA_DMA_PAD_SZ - 1);
 }
 
 static void ata_scsi_dev_config(struct scsi_device *sdev,
@@ -878,7 +881,7 @@
 	if (dev)
 		ata_scsi_dev_config(sdev, dev);
 
-	return 0;	/* scsi layer doesn't check return value, sigh */
+	return 0;
 }
 
 /**
diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c
index 624ff3e..c2169d2 100644
--- a/drivers/firewire/fw-sbp2.c
+++ b/drivers/firewire/fw-sbp2.c
@@ -1238,6 +1238,12 @@
 
 	sdev->allow_restart = 1;
 
+	/*
+	 * Update the dma alignment (minimum alignment requirements for
+	 * start and end of DMA transfers) to be a sector
+	 */
+	blk_queue_update_dma_alignment(sdev->request_queue, 511);
+
 	if (lu->tgt->workarounds & SBP2_WORKAROUND_INQUIRY_36)
 		sdev->inquiry_len = 36;
 
diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c
index b83d254..1eda11a 100644
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -1963,6 +1963,12 @@
 	lu->sdev = sdev;
 	sdev->allow_restart = 1;
 
+	/*
+	 * Update the dma alignment (minimum alignment requirements for
+	 * start and end of DMA transfers) to be a sector
+	 */
+	blk_queue_update_dma_alignment(sdev->request_queue, 511);
+
 	if (lu->workarounds & SBP2_WORKAROUND_INQUIRY_36)
 		sdev->inquiry_len = 36;
 	return 0;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index db522228..de60137 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1668,6 +1668,14 @@
 
 	if (!shost->use_clustering)
 		clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+
+	/*
+	 * set a reasonable default alignment on word boundaries: the
+	 * host and device may alter it using
+	 * blk_queue_update_dma_alignment() later.
+	 */
+	blk_queue_dma_alignment(q, 0x03);
+
 	return q;
 }
 EXPORT_SYMBOL(__scsi_alloc_queue);
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 7c9593b..dd8b13e 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -81,6 +81,16 @@
 	 */
 	sdev->inquiry_len = 36;
 
+	/* Scatter-gather buffers (all but the last) must have a length
+	 * divisible by the bulk maxpacket size.  Otherwise a data packet
+	 * would end up being short, causing a premature end to the data
+	 * transfer.  Since high-speed bulk pipes have a maxpacket size
+	 * of 512, we'll use that as the scsi device queue's DMA alignment
+	 * mask.  Guaranteeing proper alignment of the first buffer will
+	 * have the desired effect because, except at the beginning and
+	 * the end, scatter-gather buffers follow page boundaries. */
+	blk_queue_update_dma_alignment(sdev->request_queue, (512 - 1));
+
 	/*
 	 * The UFI spec treates the Peripheral Qualifier bits in an
 	 * INQUIRY result as reserved and requires devices to set them
@@ -100,16 +110,6 @@
 {
 	struct us_data *us = host_to_us(sdev->host);
 
-	/* Scatter-gather buffers (all but the last) must have a length
-	 * divisible by the bulk maxpacket size.  Otherwise a data packet
-	 * would end up being short, causing a premature end to the data
-	 * transfer.  Since high-speed bulk pipes have a maxpacket size
-	 * of 512, we'll use that as the scsi device queue's DMA alignment
-	 * mask.  Guaranteeing proper alignment of the first buffer will
-	 * have the desired effect because, except at the beginning and
-	 * the end, scatter-gather buffers follow page boundaries. */
-	blk_queue_dma_alignment(sdev->request_queue, (512 - 1));
-
 	/* Many devices have trouble transfering more than 32KB at a time,
 	 * while others have trouble with more than 64K. At this time we
 	 * are limiting both to 32K (64 sectores).