bsg: add support for tail queuing

Currently inherited from sg.c bsg will submit asynchronous request
 at the head-of-the-queue, (using "at_head" set in the call to
 blk_execute_rq_nowait()). This is bad in situation where the queues
 are full, requests will execute out of order, and can cause
 starvation of the first submitted requests.

The sg_io_v4->flags member is used and a bit is allocated to denote the
Q_AT_TAIL. Zero is to queue at_head as before, to be compatible with old
code at the write/read path. SG_IO code path behavior was changed so to
be the same as write/read behavior. SG_IO was very rarely used and breaking
compatibility with it is OK at this stage.

sg_io_hdr at sg.h also has a flags member and uses 3 bits from the first
nibble and one bit from the last nibble. Even though none of these bits
are supported by bsg, The second nibble is allocated for use by bsg. Just
in case.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
CC: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/block/bsg.c b/block/bsg.c
index 0ce8806..0f63b91 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -353,6 +353,8 @@
 static void bsg_add_command(struct bsg_device *bd, struct request_queue *q,
 			    struct bsg_command *bc, struct request *rq)
 {
+	int at_head = (0 == (bc->hdr.flags & BSG_FLAG_Q_AT_TAIL));
+
 	/*
 	 * add bc command to busy queue and submit rq for io
 	 */
@@ -368,7 +370,7 @@
 	dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc);
 
 	rq->end_io_data = bc;
-	blk_execute_rq_nowait(q, NULL, rq, 1, bsg_rq_end_io);
+	blk_execute_rq_nowait(q, NULL, rq, at_head, bsg_rq_end_io);
 }
 
 static struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd)
@@ -924,6 +926,7 @@
 		struct request *rq;
 		struct bio *bio, *bidi_bio = NULL;
 		struct sg_io_v4 hdr;
+		int at_head;
 		u8 sense[SCSI_SENSE_BUFFERSIZE];
 
 		if (copy_from_user(&hdr, uarg, sizeof(hdr)))
@@ -936,7 +939,9 @@
 		bio = rq->bio;
 		if (rq->next_rq)
 			bidi_bio = rq->next_rq->bio;
-		blk_execute_rq(bd->queue, NULL, rq, 0);
+
+		at_head = (0 == (hdr.flags & BSG_FLAG_Q_AT_TAIL));
+		blk_execute_rq(bd->queue, NULL, rq, at_head);
 		ret = blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio);
 
 		if (copy_to_user(uarg, &hdr, sizeof(hdr)))
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index cf0303a6..3f0c64a 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -7,6 +7,14 @@
 #define BSG_SUB_PROTOCOL_SCSI_TMF	1
 #define BSG_SUB_PROTOCOL_SCSI_TRANSPORT	2
 
+/*
+ * For flags member below
+ * sg.h sg_io_hdr also has bits defined for it's flags member. However
+ * none of these bits are implemented/used by bsg. The bits below are
+ * allocated to not conflict with sg.h ones anyway.
+ */
+#define BSG_FLAG_Q_AT_TAIL 0x10 /* default, == 0 at this bit, is Q_AT_HEAD */
+
 struct sg_io_v4 {
 	__s32 guard;		/* [i] 'Q' to differentiate from v3 */
 	__u32 protocol;		/* [i] 0 -> SCSI , .... */