block: Implement support for WRITE SAME
The WRITE SAME command supported on some SCSI devices allows the same
block to be efficiently replicated throughout a block range. Only a
single logical block is transferred from the host and the storage device
writes the same data to all blocks described by the I/O.
This patch implements support for WRITE SAME in the block layer. The
blkdev_issue_write_same() function can be used by filesystems and block
drivers to replicate a buffer across a block range. This can be used to
efficiently initialize software RAID devices, etc.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/block/blk-core.c b/block/blk-core.c
index 33eded0..3b08054 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1704,6 +1704,11 @@
goto end_io;
}
+ if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
+ err = -EOPNOTSUPP;
+ goto end_io;
+ }
+
/*
* Various block parts want %current->io_context and lazy ioc
* allocation ends up trading a lot of pain for a small amount of
@@ -1809,8 +1814,6 @@
*/
void submit_bio(int rw, struct bio *bio)
{
- int count = bio_sectors(bio);
-
bio->bi_rw |= rw;
/*
@@ -1818,6 +1821,13 @@
* go through the normal accounting stuff before submission.
*/
if (bio_has_data(bio)) {
+ unsigned int count;
+
+ if (unlikely(rw & REQ_WRITE_SAME))
+ count = bdev_logical_block_size(bio->bi_bdev) >> 9;
+ else
+ count = bio_sectors(bio);
+
if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
} else {
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 19cc761..a062543 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -130,6 +130,80 @@
EXPORT_SYMBOL(blkdev_issue_discard);
/**
+ * blkdev_issue_write_same - queue a write same operation
+ * @bdev: target blockdev
+ * @sector: start sector
+ * @nr_sects: number of sectors to write
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @page: page containing data to write
+ *
+ * Description:
+ * Issue a write same request for the sectors in question.
+ */
+int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask,
+ struct page *page)
+{
+ DECLARE_COMPLETION_ONSTACK(wait);
+ struct request_queue *q = bdev_get_queue(bdev);
+ unsigned int max_write_same_sectors;
+ struct bio_batch bb;
+ struct bio *bio;
+ int ret = 0;
+
+ if (!q)
+ return -ENXIO;
+
+ max_write_same_sectors = q->limits.max_write_same_sectors;
+
+ if (max_write_same_sectors == 0)
+ return -EOPNOTSUPP;
+
+ atomic_set(&bb.done, 1);
+ bb.flags = 1 << BIO_UPTODATE;
+ bb.wait = &wait;
+
+ while (nr_sects) {
+ bio = bio_alloc(gfp_mask, 1);
+ if (!bio) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ bio->bi_sector = sector;
+ bio->bi_end_io = bio_batch_end_io;
+ bio->bi_bdev = bdev;
+ bio->bi_private = &bb;
+ bio->bi_vcnt = 1;
+ bio->bi_io_vec->bv_page = page;
+ bio->bi_io_vec->bv_offset = 0;
+ bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
+
+ if (nr_sects > max_write_same_sectors) {
+ bio->bi_size = max_write_same_sectors << 9;
+ nr_sects -= max_write_same_sectors;
+ sector += max_write_same_sectors;
+ } else {
+ bio->bi_size = nr_sects << 9;
+ nr_sects = 0;
+ }
+
+ atomic_inc(&bb.done);
+ submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
+ }
+
+ /* Wait for bios in-flight */
+ if (!atomic_dec_and_test(&bb.done))
+ wait_for_completion(&wait);
+
+ if (!test_bit(BIO_UPTODATE, &bb.flags))
+ ret = -ENOTSUPP;
+
+ return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_write_same);
+
+/**
* blkdev_issue_zeroout - generate number of zero filed write bios
* @bdev: blockdev to issue
* @sector: start sector
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 642b862..936a110 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -419,6 +419,10 @@
|| next->special)
return 0;
+ if (req->cmd_flags & REQ_WRITE_SAME &&
+ !blk_write_same_mergeable(req->bio, next->bio))
+ return 0;
+
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
@@ -518,6 +522,11 @@
if (bio_integrity(bio) != blk_integrity_rq(rq))
return false;
+ /* must be using the same buffer */
+ if (rq->cmd_flags & REQ_WRITE_SAME &&
+ !blk_write_same_mergeable(rq->bio, bio))
+ return false;
+
return true;
}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 565a678..779bb76 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -113,6 +113,7 @@
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
+ lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0;
lim->discard_granularity = 0;
lim->discard_alignment = 0;
@@ -144,6 +145,7 @@
lim->max_segments = USHRT_MAX;
lim->max_hw_sectors = UINT_MAX;
lim->max_sectors = UINT_MAX;
+ lim->max_write_same_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
@@ -286,6 +288,18 @@
EXPORT_SYMBOL(blk_queue_max_discard_sectors);
/**
+ * blk_queue_max_write_same_sectors - set max sectors for a single write same
+ * @q: the request queue for the device
+ * @max_write_same_sectors: maximum number of sectors to write per command
+ **/
+void blk_queue_max_write_same_sectors(struct request_queue *q,
+ unsigned int max_write_same_sectors)
+{
+ q->limits.max_write_same_sectors = max_write_same_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_write_same_sectors);
+
+/**
* blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device
* @max_segments: max number of segments
@@ -510,6 +524,8 @@
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
+ t->max_write_same_sectors = min(t->max_write_same_sectors,
+ b->max_write_same_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index ea51d82..247dbfd 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -180,6 +180,13 @@
return queue_var_show(queue_discard_zeroes_data(q), page);
}
+static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%llu\n",
+ (unsigned long long)q->limits.max_write_same_sectors << 9);
+}
+
+
static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
{
@@ -385,6 +392,11 @@
.show = queue_discard_zeroes_data_show,
};
+static struct queue_sysfs_entry queue_write_same_max_entry = {
+ .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
+ .show = queue_write_same_max_show,
+};
+
static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot,
@@ -432,6 +444,7 @@
&queue_discard_granularity_entry.attr,
&queue_discard_max_entry.attr,
&queue_discard_zeroes_data_entry.attr,
+ &queue_write_same_max_entry.attr,
&queue_nonrot_entry.attr,
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,