blk-mq: switch ctx pending map to the sparser blk_align_bitmap
Each hardware queue has a bitmap of software queues with pending
requests. When new IO is queued on a software queue, the bit is
set, and when IO is pruned on a hardware queue run, the bit is
cleared. This causes a lot of traffic. Switch this from the regular
BITS_PER_LONG bitmap to a sparser layout, similarly to what was
done for blk-mq tagging.
20% performance increase was observed for single threaded IO, and
about 15% performanc increase on multiple threads driving the
same device.
Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f83d15f..952e558 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -11,6 +11,12 @@
void (*notify)(void *data, unsigned long action, unsigned int cpu);
};
+struct blk_mq_ctxmap {
+ unsigned int map_size;
+ unsigned int bits_per_word;
+ struct blk_align_bitmap *map;
+};
+
struct blk_mq_hw_ctx {
struct {
spinlock_t lock;
@@ -31,8 +37,8 @@
void *driver_data;
- unsigned int nr_ctx_map;
- unsigned long *ctx_map;
+ struct blk_mq_ctxmap ctx_map;
+
unsigned int nr_ctx;
struct blk_mq_ctx **ctxs;