Merge branch 'cfq-2.6.33' into for-2.6.33
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index aa00d8f..8b5ba18 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -2625,12 +2625,10 @@
cfqq->ioprio = IOPRIO_NORM;
} else {
/*
- * check if we need to unboost the queue
+ * unboost the queue (if needed)
*/
- if (cfqq->ioprio_class != cfqq->org_ioprio_class)
- cfqq->ioprio_class = cfqq->org_ioprio_class;
- if (cfqq->ioprio != cfqq->org_ioprio)
- cfqq->ioprio = cfqq->org_ioprio;
+ cfqq->ioprio_class = cfqq->org_ioprio_class;
+ cfqq->ioprio = cfqq->org_ioprio;
}
}
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index d3426ff..3678d3d 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -40,7 +40,7 @@
part_stat_inc(cpu, &mdev->vdisk->part0, ios[rw]);
part_stat_add(cpu, &mdev->vdisk->part0, sectors[rw], bio_sectors(bio));
part_stat_unlock();
- mdev->vdisk->part0.in_flight++;
+ mdev->vdisk->part0.in_flight[rw]++;
}
/* Update disk stats when completing request upwards */
@@ -53,7 +53,7 @@
part_stat_add(cpu, &mdev->vdisk->part0, ticks[rw], duration);
part_round_stats(cpu, &mdev->vdisk->part0);
part_stat_unlock();
- mdev->vdisk->part0.in_flight--;
+ mdev->vdisk->part0.in_flight[rw]--;
}
static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const int rw)
diff --git a/fs/aio.c b/fs/aio.c
index 02a2c93..c30dfc0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -15,6 +15,7 @@
#include <linux/aio_abi.h>
#include <linux/module.h>
#include <linux/syscalls.h>
+#include <linux/backing-dev.h>
#include <linux/uio.h>
#define DEBUG 0
@@ -32,6 +33,9 @@
#include <linux/workqueue.h>
#include <linux/security.h>
#include <linux/eventfd.h>
+#include <linux/blkdev.h>
+#include <linux/mempool.h>
+#include <linux/hash.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
@@ -60,6 +64,14 @@
static DEFINE_SPINLOCK(fput_lock);
static LIST_HEAD(fput_head);
+#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */
+#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS)
+struct aio_batch_entry {
+ struct hlist_node list;
+ struct address_space *mapping;
+};
+mempool_t *abe_pool;
+
static void aio_kick_handler(struct work_struct *);
static void aio_queue_work(struct kioctx *);
@@ -73,6 +85,8 @@
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
aio_wq = create_workqueue("aio");
+ abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry));
+ BUG_ON(!abe_pool);
pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
@@ -1531,8 +1545,44 @@
return 1;
}
+static void aio_batch_add(struct address_space *mapping,
+ struct hlist_head *batch_hash)
+{
+ struct aio_batch_entry *abe;
+ struct hlist_node *pos;
+ unsigned bucket;
+
+ bucket = hash_ptr(mapping, AIO_BATCH_HASH_BITS);
+ hlist_for_each_entry(abe, pos, &batch_hash[bucket], list) {
+ if (abe->mapping == mapping)
+ return;
+ }
+
+ abe = mempool_alloc(abe_pool, GFP_KERNEL);
+ BUG_ON(!igrab(mapping->host));
+ abe->mapping = mapping;
+ hlist_add_head(&abe->list, &batch_hash[bucket]);
+ return;
+}
+
+static void aio_batch_free(struct hlist_head *batch_hash)
+{
+ struct aio_batch_entry *abe;
+ struct hlist_node *pos, *n;
+ int i;
+
+ for (i = 0; i < AIO_BATCH_HASH_SIZE; i++) {
+ hlist_for_each_entry_safe(abe, pos, n, &batch_hash[i], list) {
+ blk_run_address_space(abe->mapping);
+ iput(abe->mapping->host);
+ hlist_del(&abe->list);
+ mempool_free(abe, abe_pool);
+ }
+ }
+}
+
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- struct iocb *iocb)
+ struct iocb *iocb, struct hlist_head *batch_hash)
{
struct kiocb *req;
struct file *file;
@@ -1608,6 +1658,12 @@
;
}
spin_unlock_irq(&ctx->ctx_lock);
+ if (req->ki_opcode == IOCB_CMD_PREAD ||
+ req->ki_opcode == IOCB_CMD_PREADV ||
+ req->ki_opcode == IOCB_CMD_PWRITE ||
+ req->ki_opcode == IOCB_CMD_PWRITEV)
+ aio_batch_add(file->f_mapping, batch_hash);
+
aio_put_req(req); /* drop extra ref to req */
return 0;
@@ -1635,6 +1691,7 @@
struct kioctx *ctx;
long ret = 0;
int i;
+ struct hlist_head batch_hash[AIO_BATCH_HASH_SIZE] = { { 0, }, };
if (unlikely(nr < 0))
return -EINVAL;
@@ -1666,10 +1723,11 @@
break;
}
- ret = io_submit_one(ctx, user_iocb, &tmp);
+ ret = io_submit_one(ctx, user_iocb, &tmp, batch_hash);
if (ret)
break;
}
+ aio_batch_free(batch_hash);
put_ioctx(ctx);
return i ? i : ret;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9cf4b92..dde91e7 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -405,7 +405,17 @@
static int block_fsync(struct file *filp, struct dentry *dentry, int datasync)
{
- return sync_blockdev(I_BDEV(filp->f_mapping->host));
+ struct block_device *bdev = I_BDEV(filp->f_mapping->host);
+ int error;
+
+ error = sync_blockdev(bdev);
+ if (error)
+ return error;
+
+ error = blkdev_issue_flush(bdev, NULL);
+ if (error == -EOPNOTSUPP)
+ error = 0;
+ return error;
}
/*
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 8b10b87..3af761c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1028,9 +1028,6 @@
if (dio->bio)
dio_bio_submit(dio);
- /* All IO is now issued, send it on its way */
- blk_run_address_space(inode->i_mapping);
-
/*
* It is possible that, we return short IO due to end of file.
* In that case, we need to release all the pages we got hold on.
@@ -1057,8 +1054,11 @@
((rw & READ) || (dio->result == dio->size)))
ret = -EIOCBQUEUED;
- if (ret != -EIOCBQUEUED)
+ if (ret != -EIOCBQUEUED) {
+ /* All IO is now issued, send it on its way */
+ blk_run_address_space(inode->i_mapping);
dio_await_completion(dio);
+ }
/*
* Sync will always be dropping the final ref and completing the
@@ -1124,7 +1124,7 @@
int acquire_i_mutex = 0;
if (rw & WRITE)
- rw = WRITE_ODIRECT;
+ rw = WRITE_SYNC_PLUG;
if (bdev)
bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index b449e73..fcbc26a 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -331,4 +331,17 @@
return 0;
}
+static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
+ struct page *page)
+{
+ if (bdi && bdi->unplug_io_fn)
+ bdi->unplug_io_fn(bdi, page);
+}
+
+static inline void blk_run_address_space(struct address_space *mapping)
+{
+ if (mapping)
+ blk_run_backing_dev(mapping->backing_dev_info, NULL);
+}
+
#endif /* _LINUX_BACKING_DEV_H */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5be93f1..474792b 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -450,11 +450,8 @@
/*
* remember never ever reenable interrupts between a bvec_kmap_irq and
* bvec_kunmap_irq!
- *
- * This function MUST be inlined - it plays with the CPU interrupt flags.
*/
-static __always_inline char *bvec_kmap_irq(struct bio_vec *bvec,
- unsigned long *flags)
+static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags)
{
unsigned long addr;
@@ -470,8 +467,7 @@
return (char *) addr + bvec->bv_offset;
}
-static __always_inline void bvec_kunmap_irq(char *buffer,
- unsigned long *flags)
+static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags)
{
unsigned long ptr = (unsigned long) buffer & PAGE_MASK;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 221cecd..39c601f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -823,19 +823,6 @@
return bdev->bd_disk->queue;
}
-static inline void blk_run_backing_dev(struct backing_dev_info *bdi,
- struct page *page)
-{
- if (bdi && bdi->unplug_io_fn)
- bdi->unplug_io_fn(bdi, page);
-}
-
-static inline void blk_run_address_space(struct address_space *mapping)
-{
- if (mapping)
- blk_run_backing_dev(mapping->backing_dev_info, NULL);
-}
-
/*
* blk_rq_pos() : the current sector
* blk_rq_bytes() : bytes left in the entire request
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2620a8c..2f5fca4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -129,7 +129,6 @@
* WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device
* immediately after submission. The write equivalent
* of READ_SYNC.
- * WRITE_ODIRECT Special case write for O_DIRECT only.
* SWRITE_SYNC
* SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
* See SWRITE.
@@ -151,7 +150,6 @@
#define READ_META (READ | (1 << BIO_RW_META))
#define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
#define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
-#define WRITE_ODIRECT (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
#define SWRITE_SYNC_PLUG \
(SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
#define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))