NFS: support large reads and writes on the wire
Most NFS server implementations allow up to 64KB reads and writes on the
wire. The Solaris NFS server allows up to a megabyte, for instance.
Now the Linux NFS client supports transfer sizes up to 1MB, too. This will
help reduce protocol and context switch overhead on read/write intensive NFS
workloads, and support larger atomic read and write operations on servers
that support them.
Test-plan:
Connectathon and iozone on mount point with wsize=rsize>32768 over TCP.
Tests with NFS over UDP to verify the maximum RPC payload size cap.
Signed-off-by: Chuck Lever <cel@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 80bc4ea..1ce0c20 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -89,18 +89,33 @@
static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
-static inline struct nfs_write_data *nfs_commit_alloc(void)
+static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
{
struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
+
if (p) {
memset(p, 0, sizeof(*p));
INIT_LIST_HEAD(&p->pages);
+ if (pagecount < NFS_PAGEVEC_SIZE)
+ p->pagevec = &p->page_array[0];
+ else {
+ size_t size = ++pagecount * sizeof(struct page *);
+ p->pagevec = kmalloc(size, GFP_NOFS);
+ if (p->pagevec) {
+ memset(p->pagevec, 0, size);
+ } else {
+ mempool_free(p, nfs_commit_mempool);
+ p = NULL;
+ }
+ }
}
return p;
}
static inline void nfs_commit_free(struct nfs_write_data *p)
{
+ if (p && (p->pagevec != &p->page_array[0]))
+ kfree(p->pagevec);
mempool_free(p, nfs_commit_mempool);
}
@@ -167,7 +182,7 @@
int result, written = 0;
struct nfs_write_data *wdata;
- wdata = nfs_writedata_alloc();
+ wdata = nfs_writedata_alloc(1);
if (!wdata)
return -ENOMEM;
@@ -909,7 +924,7 @@
nbytes = req->wb_bytes;
for (;;) {
- data = nfs_writedata_alloc();
+ data = nfs_writedata_alloc(1);
if (!data)
goto out_bad;
list_add(&data->pages, &list);
@@ -973,7 +988,7 @@
if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
return nfs_flush_multi(head, inode, how);
- data = nfs_writedata_alloc();
+ data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
if (!data)
goto out_bad;
@@ -1241,12 +1256,12 @@
* Commit dirty pages
*/
static int
-nfs_commit_list(struct list_head *head, int how)
+nfs_commit_list(struct inode *inode, struct list_head *head, int how)
{
struct nfs_write_data *data;
struct nfs_page *req;
- data = nfs_commit_alloc();
+ data = nfs_commit_alloc(NFS_SERVER(inode)->wpages);
if (!data)
goto out_bad;
@@ -1351,7 +1366,7 @@
res = nfs_scan_commit(inode, &head, 0, 0);
spin_unlock(&nfsi->req_lock);
if (res) {
- error = nfs_commit_list(&head, how);
+ error = nfs_commit_list(inode, &head, how);
if (error < 0)
return error;
}