[PATCH] fuse: fix async read for legacy filesystems
While asynchronous reads mean a performance improvement in most cases, if
the filesystem assumed that reads are synchronous, then async reads may
degrade performance (filesystem may receive reads out of order, which can
confuse it's own readahead logic).
With sshfs a 1.5 to 4 times slowdown can be measured.
There's also a need for userspace filesystems to know whether asynchronous
reads are supported by the kernel or not.
To achive these, negotiate in the INIT request whether async reads will be
used and the maximum readahead value. Update interface version to 7.6
If userspace uses a version earlier than 7.6, then disable async reads, and
set maximum readahead value to the maximum read size, as done in previous
versions.
Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index a7ef5e7..2963516 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -335,9 +335,14 @@
loff_t pos = page_offset(req->pages[0]);
size_t count = req->num_pages << PAGE_CACHE_SHIFT;
req->out.page_zeroing = 1;
- req->end = fuse_readpages_end;
fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
- request_send_background(fc, req);
+ if (fc->async_read) {
+ req->end = fuse_readpages_end;
+ request_send_background(fc, req);
+ } else {
+ request_send(fc, req);
+ fuse_readpages_end(fc, req);
+ }
}
struct fuse_readpages_data {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 46cf933..4a83adf 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -272,6 +272,9 @@
reply, before any other request, and never cleared */
unsigned conn_error : 1;
+ /** Do readpages asynchronously? Only set in INIT */
+ unsigned async_read : 1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index c755a04..879e6fb 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -473,6 +473,16 @@
if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
fc->conn_error = 1;
else {
+ unsigned long ra_pages;
+
+ if (arg->minor >= 6) {
+ ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
+ if (arg->flags & FUSE_ASYNC_READ)
+ fc->async_read = 1;
+ } else
+ ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+
+ fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
fc->minor = arg->minor;
fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
}
@@ -496,6 +506,8 @@
arg->major = FUSE_KERNEL_VERSION;
arg->minor = FUSE_KERNEL_MINOR_VERSION;
+ arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
+ arg->flags |= FUSE_ASYNC_READ;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
@@ -552,8 +564,6 @@
fc->user_id = d.user_id;
fc->group_id = d.group_id;
fc->max_read = d.max_read;
- if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
- fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
/* Used by get_root_inode() */
sb->s_fs_info = fc;