[PATCH] Kill PF_SYNCWRITE flag
A process flag to indicate whether we are doing sync io is incredibly
ugly. It also causes performance problems when one does a lot of async
io and then proceeds to sync it. Part of the io will go out as async,
and the other part as sync. This causes a disconnect between the
previously submitted io and the synced io. For io schedulers such as CFQ,
this will cause us lost merges and suboptimal behaviour in scheduling.
Remove PF_SYNCWRITE completely from the fsync/msync paths, and let
the O_DIRECT path just directly indicate that the writes are sync
by using WRITE_SYNC instead.
Signed-off-by: Jens Axboe <axboe@suse.de>
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b05d1b2..538fb04 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -162,7 +162,7 @@
NULL); /* vmas */
up_read(¤t->mm->mmap_sem);
- if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) {
+ if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
struct page *page = ZERO_PAGE(dio->curr_user_address);
/*
* A memory fault, but the filesystem has some outstanding
@@ -535,7 +535,7 @@
map_bh->b_state = 0;
map_bh->b_size = fs_count << dio->inode->i_blkbits;
- create = dio->rw == WRITE;
+ create = dio->rw & WRITE;
if (dio->lock_type == DIO_LOCKING) {
if (dio->block_in_file < (i_size_read(dio->inode) >>
dio->blkbits))
@@ -867,7 +867,7 @@
loff_t i_size_aligned;
/* AKPM: eargh, -ENOTBLK is a hack */
- if (dio->rw == WRITE) {
+ if (dio->rw & WRITE) {
page_cache_release(page);
return -ENOTBLK;
}
@@ -1045,7 +1045,7 @@
}
} /* end iovec loop */
- if (ret == -ENOTBLK && rw == WRITE) {
+ if (ret == -ENOTBLK && (rw & WRITE)) {
/*
* The remaining part of the request will be
* be handled by buffered I/O when we return
@@ -1089,7 +1089,7 @@
if (dio->is_async) {
int should_wait = 0;
- if (dio->result < dio->size && rw == WRITE) {
+ if (dio->result < dio->size && (rw & WRITE)) {
dio->waiter = current;
should_wait = 1;
}
@@ -1142,7 +1142,7 @@
ret = transferred;
/* We could have also come here on an AIO file extend */
- if (!is_sync_kiocb(iocb) && rw == WRITE &&
+ if (!is_sync_kiocb(iocb) && (rw & WRITE) &&
ret >= 0 && dio->result == dio->size)
/*
* For AIO writes where we have completed the
@@ -1194,7 +1194,7 @@
int acquire_i_mutex = 0;
if (rw & WRITE)
- current->flags |= PF_SYNCWRITE;
+ rw = WRITE_SYNC;
if (bdev)
bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
@@ -1270,7 +1270,7 @@
* even for AIO, we need to wait for i/o to complete before
* returning in this case.
*/
- dio->is_async = !is_sync_kiocb(iocb) && !((rw == WRITE) &&
+ dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
(end > i_size_read(inode)));
retval = direct_io_worker(rw, iocb, inode, iov, offset,
@@ -1284,8 +1284,6 @@
mutex_unlock(&inode->i_mutex);
else if (acquire_i_mutex)
mutex_lock(&inode->i_mutex);
- if (rw & WRITE)
- current->flags &= ~PF_SYNCWRITE;
return retval;
}
EXPORT_SYMBOL(__blockdev_direct_IO);