hpsa: optimize cmd_alloc function by remembering last allocation

Empirically, this improves performance slightly (~2% max IOPS) by
allowing cmd_alloc to remember where it left off searching for
free commands between calls instead of always starting its search
at command 0.

Reviewed-by: Scott Teel <scott.teel@pmcs.com>
Signed-off-by: Robert Elliott <elliott@hp.com>
Signed-off-by: Don Brace <don.brace@pmcs.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index c95a20c..72abcf3 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -4649,9 +4649,10 @@
 	union u64bit temp64;
 	dma_addr_t cmd_dma_handle, err_dma_handle;
 	int refcount;
-	unsigned long offset = 0;
+	unsigned long offset;
 
-	/* There is some *extremely* small but non-zero chance that that
+	/*
+	 * There is some *extremely* small but non-zero chance that that
 	 * multiple threads could get in here, and one thread could
 	 * be scanning through the list of bits looking for a free
 	 * one, but the free ones are always behind him, and other
@@ -4662,6 +4663,7 @@
 	 * infrequently as to be indistinguishable from never.
 	 */
 
+	offset = h->last_allocation; /* benignly racy */
 	for (;;) {
 		i = find_next_zero_bit(h->cmd_pool_bits, h->nr_cmds, offset);
 		if (unlikely(i == h->nr_cmds)) {
@@ -4679,6 +4681,7 @@
 			h->cmd_pool_bits + (i / BITS_PER_LONG));
 		break; /* it's ours now. */
 	}
+	h->last_allocation = i; /* benignly racy */
 
 	/* Zero out all of commandlist except the last field, refcount */
 	memset(c, 0, offsetof(struct CommandList, refcount));
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index 679e4d2..981479a1 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -133,6 +133,7 @@
 	struct CfgTable __iomem *cfgtable;
 	int	interrupts_enabled;
 	int 	max_commands;
+	int last_allocation;
 	atomic_t commands_outstanding;
 #	define PERF_MODE_INT	0
 #	define DOORBELL_INT	1