bio: skip atomic inc/dec of ->bi_cnt for most use cases
Struct bio has a reference count that controls when it can be freed.
Most uses cases is allocating the bio, which then returns with a
single reference to it, doing IO, and then dropping that single
reference. We can remove this atomic_dec_and_test() in the completion
path, if nobody else is holding a reference to the bio.
If someone does call bio_get() on the bio, then we flag the bio as
now having valid count and that we must properly honor the reference
count when it's being put.
Tested-by: Robert Elliott <elliott@hp.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
diff --git a/block/bio.c b/block/bio.c
index 117da31..c2ff8a8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -271,7 +271,7 @@
memset(bio, 0, sizeof(*bio));
bio->bi_flags = 1 << BIO_UPTODATE;
atomic_set(&bio->__bi_remaining, 1);
- atomic_set(&bio->bi_cnt, 1);
+ atomic_set(&bio->__bi_cnt, 1);
}
EXPORT_SYMBOL(bio_init);
@@ -524,13 +524,17 @@
**/
void bio_put(struct bio *bio)
{
- BIO_BUG_ON(!atomic_read(&bio->bi_cnt));
-
- /*
- * last put frees it
- */
- if (atomic_dec_and_test(&bio->bi_cnt))
+ if (!bio_flagged(bio, BIO_REFFED))
bio_free(bio);
+ else {
+ BIO_BUG_ON(!atomic_read(&bio->__bi_cnt));
+
+ /*
+ * last put frees it
+ */
+ if (atomic_dec_and_test(&bio->__bi_cnt))
+ bio_free(bio);
+ }
}
EXPORT_SYMBOL(bio_put);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index ab43fad..1616f66 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -619,7 +619,7 @@
bio->bi_end_io = request_endio;
bio->bi_private = &s->cl;
- atomic_set(&bio->bi_cnt, 3);
+ bio_cnt_set(bio, 3);
}
static void search_free(struct closure *cl)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 96aebf3..8e8d1d1 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -345,7 +345,7 @@
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
- BUG_ON(atomic_read(&cur->bi_cnt) == 0);
+ BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
/*
* if we're doing the sync list, record that our
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a56960d..095f94c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -356,7 +356,6 @@
{
xfs_ioend_t *ioend = bio->bi_private;
- ASSERT(atomic_read(&bio->bi_cnt) >= 1);
ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
/* Toss bio and pass work off to an xfsdatad thread */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 8bfe9ee..7486ea1 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -290,7 +290,21 @@
* returns. and then bio would be freed memory when if (bio->bi_flags ...)
* runs
*/
-#define bio_get(bio) atomic_inc(&(bio)->bi_cnt)
+static inline void bio_get(struct bio *bio)
+{
+ bio->bi_flags |= (1 << BIO_REFFED);
+ smp_mb__before_atomic();
+ atomic_inc(&bio->__bi_cnt);
+}
+
+static inline void bio_cnt_set(struct bio *bio, unsigned int count)
+{
+ if (count != 1) {
+ bio->bi_flags |= (1 << BIO_REFFED);
+ smp_mb__before_atomic();
+ }
+ atomic_set(&bio->__bi_cnt, count);
+}
enum bip_flags {
BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 8b07e06..93d2e71 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -92,7 +92,7 @@
unsigned short bi_max_vecs; /* max bvl_vecs we can hold */
- atomic_t bi_cnt; /* pin count */
+ atomic_t __bi_cnt; /* pin count */
struct bio_vec *bi_io_vec; /* the actual vec list */
@@ -123,6 +123,7 @@
#define BIO_QUIET 9 /* Make BIO Quiet */
#define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */
#define BIO_CHAIN 11 /* chained bio, ->bi_remaining in effect */
+#define BIO_REFFED 12 /* bio has elevated ->bi_cnt */
/*
* Flags starting here get preserved by bio_reset() - this includes