NFSv4.1: Clean ups and bugfixes for the pNFS read/writeback/commit code
Move more pnfs-isms out of the generic commit code.
Bugfixes:
- filelayout_scan_commit_lists doesn't need to get/put the lseg.
In fact since it is run under the inode->i_lock, the lseg_put()
can deadlock.
- Ensure that we distinguish between what needs to be done for
commit-to-data server and what needs to be done for commit-to-MDS
using the new flag PG_COMMIT_TO_DS. Otherwise we may end up calling
put_lseg() on a bucket for a struct nfs_page that got written
through the MDS.
- Fix a case where we were using list_del() on an nfs_page->wb_list
instead of list_del_init().
- filelayout_initiate_commit needs to call filelayout_commit_release
on error instead of the mds_ops->rpc_release(). Otherwise it won't
clear the commit lock.
Cleanups:
- Let the files layout manage the commit lists for the pNFS case.
Don't expose stuff like pnfs_choose_commit_list, and the fact
that the commit buckets hold references to the layout segment
in common code.
- Cast out the put_lseg() calls for the struct nfs_read/write_data->lseg
into the pNFS layer from whence they came.
- Let the pNFS layer manage the NFS_INO_PNFS_COMMIT bit.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Fred Isaman <iisaman@netapp.com>
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 379a085..c24e077 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -224,6 +224,7 @@
{
struct nfs_read_data *rdata = (struct nfs_read_data *)data;
+ put_lseg(rdata->lseg);
rdata->mds_ops->rpc_release(data);
}
@@ -310,6 +311,7 @@
{
struct nfs_write_data *wdata = (struct nfs_write_data *)data;
+ put_lseg(wdata->lseg);
wdata->mds_ops->rpc_release(data);
}
@@ -320,6 +322,7 @@
nfs_commit_release_pages(wdata);
if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
nfs_commit_clear_lock(NFS_I(wdata->inode));
+ put_lseg(wdata->lseg);
nfs_commitdata_release(wdata);
}
@@ -779,11 +782,16 @@
/* The generic layer is about to remove the req from the commit list.
* If this will make the bucket empty, it will need to put the lseg reference.
- * Note inode lock is held, so we can't do the put here.
*/
-static struct pnfs_layout_segment *
-filelayout_remove_commit_req(struct nfs_page *req)
+static void
+filelayout_clear_request_commit(struct nfs_page *req)
{
+ struct pnfs_layout_segment *freeme = NULL;
+ struct inode *inode = req->wb_context->dentry->d_inode;
+
+ spin_lock(&inode->i_lock);
+ if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
+ goto out;
if (list_is_singular(&req->wb_list)) {
struct inode *inode = req->wb_context->dentry->d_inode;
struct pnfs_layout_segment *lseg;
@@ -792,11 +800,16 @@
* since there is only one relevant lseg...
*/
list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) {
- if (lseg->pls_range.iomode == IOMODE_RW)
- return lseg;
+ if (lseg->pls_range.iomode == IOMODE_RW) {
+ freeme = lseg;
+ break;
+ }
}
}
- return NULL;
+out:
+ nfs_request_remove_commit_list(req);
+ spin_unlock(&inode->i_lock);
+ put_lseg(freeme);
}
static struct list_head *
@@ -829,9 +842,20 @@
*/
get_lseg(lseg);
}
+ set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
return list;
}
+static void
+filelayout_mark_request_commit(struct nfs_page *req,
+ struct pnfs_layout_segment *lseg)
+{
+ struct list_head *list;
+
+ list = filelayout_choose_commit_list(req, lseg);
+ nfs_request_add_commit_list(req, list);
+}
+
static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
{
struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
@@ -872,7 +896,7 @@
set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
prepare_to_resend_writes(data);
- data->mds_ops->rpc_release(data);
+ filelayout_commit_release(data);
return -EAGAIN;
}
dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
@@ -895,7 +919,7 @@
list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
if (lseg->pls_range.iomode == IOMODE_RW)
- return get_lseg(lseg);
+ return lseg;
return NULL;
}
@@ -905,10 +929,33 @@
spin_lock(&inode->i_lock);
rv = find_only_write_lseg_locked(inode);
+ if (rv)
+ get_lseg(rv);
spin_unlock(&inode->i_lock);
return rv;
}
+static int
+filelayout_scan_ds_commit_list(struct nfs4_fl_commit_bucket *bucket, int max)
+{
+ struct list_head *src = &bucket->written;
+ struct list_head *dst = &bucket->committing;
+ struct nfs_page *req, *tmp;
+ int ret = 0;
+
+ list_for_each_entry_safe(req, tmp, src, wb_list) {
+ if (!nfs_lock_request(req))
+ continue;
+ nfs_request_remove_commit_list(req);
+ clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
+ nfs_list_add_request(req, dst);
+ ret++;
+ if (ret == max)
+ break;
+ }
+ return ret;
+}
+
/* Move reqs from written to committing lists, returning count of number moved.
* Note called with i_lock held.
*/
@@ -920,21 +967,16 @@
lseg = find_only_write_lseg_locked(inode);
if (!lseg)
- return 0;
+ goto out_done;
fl = FILELAYOUT_LSEG(lseg);
if (fl->commit_through_mds)
- goto out_put;
- for (i = 0; i < fl->number_of_buckets; i++) {
- if (list_empty(&fl->commit_buckets[i].written))
- continue;
- cnt = nfs_scan_commit_list(&fl->commit_buckets[i].written,
- &fl->commit_buckets[i].committing,
- max);
+ goto out_done;
+ for (i = 0; i < fl->number_of_buckets && max != 0; i++) {
+ cnt = filelayout_scan_ds_commit_list(&fl->commit_buckets[i], max);
max -= cnt;
rv += cnt;
}
-out_put:
- put_lseg(lseg);
+out_done:
return rv;
}
@@ -1033,8 +1075,8 @@
.free_lseg = filelayout_free_lseg,
.pg_read_ops = &filelayout_pg_read_ops,
.pg_write_ops = &filelayout_pg_write_ops,
- .choose_commit_list = filelayout_choose_commit_list,
- .remove_commit_req = filelayout_remove_commit_req,
+ .mark_request_commit = filelayout_mark_request_commit,
+ .clear_request_commit = filelayout_clear_request_commit,
.scan_commit_lists = filelayout_scan_commit_lists,
.commit_pagelist = filelayout_commit_pagelist,
.read_pagelist = filelayout_read_pagelist,