Merge branch 'bugfixes' into nfs-for-next
diff --git a/Documentation/filesystems/nfs/nfs.txt b/Documentation/filesystems/nfs/nfs.txt
index f50f26c..f2571c8 100644
--- a/Documentation/filesystems/nfs/nfs.txt
+++ b/Documentation/filesystems/nfs/nfs.txt
@@ -12,9 +12,47 @@
protocol.
The purpose of this document is to provide information on some of the
-upcall interfaces that are used in order to provide the NFS client with
-some of the information that it requires in order to fully comply with
-the NFS spec.
+special features of the NFS client that can be configured by system
+administrators.
+
+
+The nfs4_unique_id parameter
+============================
+
+NFSv4 requires clients to identify themselves to servers with a unique
+string. File open and lock state shared between one client and one server
+is associated with this identity. To support robust NFSv4 state recovery
+and transparent state migration, this identity string must not change
+across client reboots.
+
+Without any other intervention, the Linux client uses a string that contains
+the local system's node name. System administrators, however, often do not
+take care to ensure that node names are fully qualified and do not change
+over the lifetime of a client system. Node names can have other
+administrative requirements that require particular behavior that does not
+work well as part of an nfs_client_id4 string.
+
+The nfs.nfs4_unique_id boot parameter specifies a unique string that can be
+used instead of a system's node name when an NFS client identifies itself to
+a server. Thus, if the system's node name is not unique, or it changes, its
+nfs.nfs4_unique_id stays the same, preventing collision with other clients
+or loss of state during NFS reboot recovery or transparent state migration.
+
+The nfs.nfs4_unique_id string is typically a UUID, though it can contain
+anything that is believed to be unique across all NFS clients. An
+nfs4_unique_id string should be chosen when a client system is installed,
+just as a system's root file system gets a fresh UUID in its label at
+install time.
+
+The string should remain fixed for the lifetime of the client. It can be
+changed safely if care is taken that the client shuts down cleanly and all
+outstanding NFSv4 state has expired, to prevent loss of NFSv4 state.
+
+This string can be stored in an NFS client's grub.conf, or it can be provided
+via a net boot facility such as PXE. It may also be specified as an nfs.ko
+module parameter. Specifying a uniquifier string is not support for NFS
+clients running in containers.
+
The DNS resolver
================
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ad7e2e5..396a1e6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1719,6 +1719,11 @@
will be autodetected by the client, and it will fall
back to using the idmapper.
To turn off this behaviour, set the value to '0'.
+ nfs.nfs4_unique_id=
+ [NFS4] Specify an additional fixed unique ident-
+ ification string that NFSv4 clients can insert into
+ their nfs_client_id4 string. This is typically a
+ UUID that is generated at system install time.
nfs.send_implementation_id =
[NFSv4.1] Send client implementation identification
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 7ef14b3..e4fb3ba 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -7,7 +7,6 @@
*/
#include <linux/types.h>
-#include <linux/utsname.h>
#include <linux/kernel.h>
#include <linux/ktime.h>
#include <linux/slab.h>
@@ -19,6 +18,8 @@
#include <asm/unaligned.h>
+#include "netns.h"
+
#define NLMDBG_FACILITY NLMDBG_MONITOR
#define NSM_PROGRAM 100024
#define NSM_VERSION 1
@@ -40,6 +41,7 @@
u32 proc;
char *mon_name;
+ char *nodename;
};
struct nsm_res {
@@ -70,7 +72,7 @@
};
struct rpc_create_args args = {
.net = net,
- .protocol = XPRT_TRANSPORT_UDP,
+ .protocol = XPRT_TRANSPORT_TCP,
.address = (struct sockaddr *)&sin,
.addrsize = sizeof(sin),
.servername = "rpc.statd",
@@ -83,10 +85,54 @@
return rpc_create(&args);
}
-static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
- struct net *net)
+static struct rpc_clnt *nsm_client_get(struct net *net)
{
+ static DEFINE_MUTEX(nsm_create_mutex);
struct rpc_clnt *clnt;
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
+
+ spin_lock(&ln->nsm_clnt_lock);
+ if (ln->nsm_users) {
+ ln->nsm_users++;
+ clnt = ln->nsm_clnt;
+ spin_unlock(&ln->nsm_clnt_lock);
+ goto out;
+ }
+ spin_unlock(&ln->nsm_clnt_lock);
+
+ mutex_lock(&nsm_create_mutex);
+ clnt = nsm_create(net);
+ if (!IS_ERR(clnt)) {
+ ln->nsm_clnt = clnt;
+ smp_wmb();
+ ln->nsm_users = 1;
+ }
+ mutex_unlock(&nsm_create_mutex);
+out:
+ return clnt;
+}
+
+static void nsm_client_put(struct net *net)
+{
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
+ struct rpc_clnt *clnt = ln->nsm_clnt;
+ int shutdown = 0;
+
+ spin_lock(&ln->nsm_clnt_lock);
+ if (ln->nsm_users) {
+ if (--ln->nsm_users)
+ ln->nsm_clnt = NULL;
+ shutdown = !ln->nsm_users;
+ }
+ spin_unlock(&ln->nsm_clnt_lock);
+
+ if (shutdown)
+ rpc_shutdown_client(clnt);
+}
+
+static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
+ struct rpc_clnt *clnt)
+{
int status;
struct nsm_args args = {
.priv = &nsm->sm_priv,
@@ -94,31 +140,24 @@
.vers = 3,
.proc = NLMPROC_NSM_NOTIFY,
.mon_name = nsm->sm_mon_name,
+ .nodename = clnt->cl_nodename,
};
struct rpc_message msg = {
.rpc_argp = &args,
.rpc_resp = res,
};
- clnt = nsm_create(net);
- if (IS_ERR(clnt)) {
- status = PTR_ERR(clnt);
- dprintk("lockd: failed to create NSM upcall transport, "
- "status=%d\n", status);
- goto out;
- }
+ BUG_ON(clnt == NULL);
memset(res, 0, sizeof(*res));
msg.rpc_proc = &clnt->cl_procinfo[proc];
- status = rpc_call_sync(clnt, &msg, 0);
+ status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
if (status < 0)
dprintk("lockd: NSM upcall RPC failed, status=%d\n",
status);
else
status = 0;
- rpc_shutdown_client(clnt);
- out:
return status;
}
@@ -138,6 +177,7 @@
struct nsm_handle *nsm = host->h_nsmhandle;
struct nsm_res res;
int status;
+ struct rpc_clnt *clnt;
dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name);
@@ -150,7 +190,15 @@
*/
nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
- status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host->net);
+ clnt = nsm_client_get(host->net);
+ if (IS_ERR(clnt)) {
+ status = PTR_ERR(clnt);
+ dprintk("lockd: failed to create NSM upcall transport, "
+ "status=%d, net=%p\n", status, host->net);
+ return status;
+ }
+
+ status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, clnt);
if (unlikely(res.status != 0))
status = -EIO;
if (unlikely(status < 0)) {
@@ -182,9 +230,11 @@
if (atomic_read(&nsm->sm_count) == 1
&& nsm->sm_monitored && !nsm->sm_sticky) {
+ struct lockd_net *ln = net_generic(host->net, lockd_net_id);
+
dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
- status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host->net);
+ status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, ln->nsm_clnt);
if (res.status != 0)
status = -EIO;
if (status < 0)
@@ -192,6 +242,8 @@
nsm->sm_name);
else
nsm->sm_monitored = 0;
+
+ nsm_client_put(host->net);
}
}
@@ -430,7 +482,7 @@
{
__be32 *p;
- encode_nsm_string(xdr, utsname()->nodename);
+ encode_nsm_string(xdr, argp->nodename);
p = xdr_reserve_space(xdr, 4 + 4 + 4);
*p++ = cpu_to_be32(argp->prog);
*p++ = cpu_to_be32(argp->vers);
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 4eee248..5010b55 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -12,6 +12,10 @@
struct delayed_work grace_period_end;
struct lock_manager lockd_manager;
struct list_head grace_list;
+
+ spinlock_t nsm_clnt_lock;
+ unsigned int nsm_users;
+ struct rpc_clnt *nsm_clnt;
};
extern int lockd_net_id;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 31a63f8..7e35587 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -596,6 +596,7 @@
INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
INIT_LIST_HEAD(&ln->grace_list);
+ spin_lock_init(&ln->nsm_clnt_lock);
return 0;
}
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index dd392ed..af3ef0e 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -162,25 +162,39 @@
return bio;
}
-static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
+static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw,
sector_t isect, struct page *page,
struct pnfs_block_extent *be,
void (*end_io)(struct bio *, int err),
- struct parallel_io *par)
+ struct parallel_io *par,
+ unsigned int offset, int len)
{
+ isect = isect + (offset >> SECTOR_SHIFT);
+ dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
+ npg, rw, (unsigned long long)isect, offset, len);
retry:
if (!bio) {
bio = bl_alloc_init_bio(npg, isect, be, end_io, par);
if (!bio)
return ERR_PTR(-ENOMEM);
}
- if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+ if (bio_add_page(bio, page, len, offset) < len) {
bio = bl_submit_bio(rw, bio);
goto retry;
}
return bio;
}
+static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
+ sector_t isect, struct page *page,
+ struct pnfs_block_extent *be,
+ void (*end_io)(struct bio *, int err),
+ struct parallel_io *par)
+{
+ return do_add_page_to_bio(bio, npg, rw, isect, page, be,
+ end_io, par, 0, PAGE_CACHE_SIZE);
+}
+
/* This is basically copied from mpage_end_io_read */
static void bl_end_io_read(struct bio *bio, int err)
{
@@ -228,14 +242,6 @@
schedule_work(&rdata->task.u.tk_work);
}
-static bool
-bl_check_alignment(u64 offset, u32 len, unsigned long blkmask)
-{
- if ((offset & blkmask) || (len & blkmask))
- return false;
- return true;
-}
-
static enum pnfs_try_status
bl_read_pagelist(struct nfs_read_data *rdata)
{
@@ -246,15 +252,15 @@
sector_t isect, extent_length = 0;
struct parallel_io *par;
loff_t f_offset = rdata->args.offset;
+ size_t bytes_left = rdata->args.count;
+ unsigned int pg_offset, pg_len;
struct page **pages = rdata->args.pages;
int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
+ const bool is_dio = (header->dreq != NULL);
dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
- if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK))
- goto use_mds;
-
par = alloc_parallel(rdata);
if (!par)
goto use_mds;
@@ -284,36 +290,53 @@
extent_length = min(extent_length, cow_length);
}
}
+
+ if (is_dio) {
+ pg_offset = f_offset & ~PAGE_CACHE_MASK;
+ if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
+ pg_len = PAGE_CACHE_SIZE - pg_offset;
+ else
+ pg_len = bytes_left;
+
+ f_offset += pg_len;
+ bytes_left -= pg_len;
+ isect += (pg_offset >> SECTOR_SHIFT);
+ } else {
+ pg_offset = 0;
+ pg_len = PAGE_CACHE_SIZE;
+ }
+
hole = is_hole(be, isect);
if (hole && !cow_read) {
bio = bl_submit_bio(READ, bio);
/* Fill hole w/ zeroes w/o accessing device */
dprintk("%s Zeroing page for hole\n", __func__);
- zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE);
+ zero_user_segment(pages[i], pg_offset, pg_len);
print_page(pages[i]);
SetPageUptodate(pages[i]);
} else {
struct pnfs_block_extent *be_read;
be_read = (hole && cow_read) ? cow_read : be;
- bio = bl_add_page_to_bio(bio, rdata->pages.npages - i,
+ bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
READ,
isect, pages[i], be_read,
- bl_end_io_read, par);
+ bl_end_io_read, par,
+ pg_offset, pg_len);
if (IS_ERR(bio)) {
header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
}
- isect += PAGE_CACHE_SECTORS;
+ isect += (pg_len >> SECTOR_SHIFT);
extent_length -= PAGE_CACHE_SECTORS;
}
if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
rdata->res.eof = 1;
- rdata->res.count = header->inode->i_size - f_offset;
+ rdata->res.count = header->inode->i_size - rdata->args.offset;
} else {
- rdata->res.count = (isect << SECTOR_SHIFT) - f_offset;
+ rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
}
out:
bl_put_extent(be);
@@ -461,6 +484,106 @@
return;
}
+static void
+bl_read_single_end_io(struct bio *bio, int error)
+{
+ struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
+ struct page *page = bvec->bv_page;
+
+ /* Only one page in bvec */
+ unlock_page(page);
+}
+
+static int
+bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be,
+ unsigned int offset, unsigned int len)
+{
+ struct bio *bio;
+ struct page *shadow_page;
+ sector_t isect;
+ char *kaddr, *kshadow_addr;
+ int ret = 0;
+
+ dprintk("%s: offset %u len %u\n", __func__, offset, len);
+
+ shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (shadow_page == NULL)
+ return -ENOMEM;
+
+ bio = bio_alloc(GFP_NOIO, 1);
+ if (bio == NULL)
+ return -ENOMEM;
+
+ isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
+ (offset / SECTOR_SIZE);
+
+ bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
+ bio->bi_bdev = be->be_mdev;
+ bio->bi_end_io = bl_read_single_end_io;
+
+ lock_page(shadow_page);
+ if (bio_add_page(bio, shadow_page,
+ SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) {
+ unlock_page(shadow_page);
+ bio_put(bio);
+ return -EIO;
+ }
+
+ submit_bio(READ, bio);
+ wait_on_page_locked(shadow_page);
+ if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) {
+ ret = -EIO;
+ } else {
+ kaddr = kmap_atomic(page);
+ kshadow_addr = kmap_atomic(shadow_page);
+ memcpy(kaddr + offset, kshadow_addr + offset, len);
+ kunmap_atomic(kshadow_addr);
+ kunmap_atomic(kaddr);
+ }
+ __free_page(shadow_page);
+ bio_put(bio);
+
+ return ret;
+}
+
+static int
+bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be,
+ unsigned int dirty_offset, unsigned int dirty_len,
+ bool full_page)
+{
+ int ret = 0;
+ unsigned int start, end;
+
+ if (full_page) {
+ start = 0;
+ end = PAGE_CACHE_SIZE;
+ } else {
+ start = round_down(dirty_offset, SECTOR_SIZE);
+ end = round_up(dirty_offset + dirty_len, SECTOR_SIZE);
+ }
+
+ dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len);
+ if (!be) {
+ zero_user_segments(page, start, dirty_offset,
+ dirty_offset + dirty_len, end);
+ if (start == 0 && end == PAGE_CACHE_SIZE &&
+ trylock_page(page)) {
+ SetPageUptodate(page);
+ unlock_page(page);
+ }
+ return ret;
+ }
+
+ if (start != dirty_offset)
+ ret = bl_do_readpage_sync(page, be, start, dirty_offset - start);
+
+ if (!ret && (dirty_offset + dirty_len < end))
+ ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len,
+ end - dirty_offset - dirty_len);
+
+ return ret;
+}
+
/* Given an unmapped page, zero it or read in page for COW, page is locked
* by caller.
*/
@@ -494,7 +617,6 @@
SetPageUptodate(page);
cleanup:
- bl_put_extent(cow_read);
if (bh)
free_buffer_head(bh);
if (ret) {
@@ -566,6 +688,7 @@
struct parallel_io *par = NULL;
loff_t offset = wdata->args.offset;
size_t count = wdata->args.count;
+ unsigned int pg_offset, pg_len, saved_len;
struct page **pages = wdata->args.pages;
struct page *page;
pgoff_t index;
@@ -574,10 +697,13 @@
NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
- /* Check for alignment first */
- if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK))
- goto out_mds;
+ if (header->dreq != NULL &&
+ (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) ||
+ !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) {
+ dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
+ goto out_mds;
+ }
/* At this point, wdata->pages is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error
* to have it redone using nfs.
@@ -674,10 +800,11 @@
if (!extent_length) {
/* We've used up the previous extent */
bl_put_extent(be);
+ bl_put_extent(cow_read);
bio = bl_submit_bio(WRITE, bio);
/* Get the next one */
be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
- isect, NULL);
+ isect, &cow_read);
if (!be || !is_writable(be, isect)) {
header->pnfs_error = -EINVAL;
goto out;
@@ -694,7 +821,26 @@
extent_length = be->be_length -
(isect - be->be_f_offset);
}
- if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+
+ dprintk("%s offset %lld count %Zu\n", __func__, offset, count);
+ pg_offset = offset & ~PAGE_CACHE_MASK;
+ if (pg_offset + count > PAGE_CACHE_SIZE)
+ pg_len = PAGE_CACHE_SIZE - pg_offset;
+ else
+ pg_len = count;
+
+ saved_len = pg_len;
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA &&
+ !bl_is_sector_init(be->be_inval, isect)) {
+ ret = bl_read_partial_page_sync(pages[i], cow_read,
+ pg_offset, pg_len, true);
+ if (ret) {
+ dprintk("%s bl_read_partial_page_sync fail %d\n",
+ __func__, ret);
+ header->pnfs_error = ret;
+ goto out;
+ }
+
ret = bl_mark_sectors_init(be->be_inval, isect,
PAGE_CACHE_SECTORS);
if (unlikely(ret)) {
@@ -703,15 +849,35 @@
header->pnfs_error = ret;
goto out;
}
+
+ /* Expand to full page write */
+ pg_offset = 0;
+ pg_len = PAGE_CACHE_SIZE;
+ } else if ((pg_offset & (SECTOR_SIZE - 1)) ||
+ (pg_len & (SECTOR_SIZE - 1))){
+ /* ahh, nasty case. We have to do sync full sector
+ * read-modify-write cycles.
+ */
+ unsigned int saved_offset = pg_offset;
+ ret = bl_read_partial_page_sync(pages[i], be, pg_offset,
+ pg_len, false);
+ pg_offset = round_down(pg_offset, SECTOR_SIZE);
+ pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE)
+ - pg_offset;
}
- bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
+
+
+ bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
isect, pages[i], be,
- bl_end_io_write, par);
+ bl_end_io_write, par,
+ pg_offset, pg_len);
if (IS_ERR(bio)) {
header->pnfs_error = PTR_ERR(bio);
bio = NULL;
goto out;
}
+ offset += saved_len;
+ count -= saved_len;
isect += PAGE_CACHE_SECTORS;
last_isect = isect;
extent_length -= PAGE_CACHE_SECTORS;
@@ -729,17 +895,16 @@
}
write_done:
- wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset);
- if (count < wdata->res.count) {
- wdata->res.count = count;
- }
+ wdata->res.count = wdata->args.count;
out:
bl_put_extent(be);
+ bl_put_extent(cow_read);
bl_submit_bio(WRITE, bio);
put_parallel(par);
return PNFS_ATTEMPTED;
out_mds:
bl_put_extent(be);
+ bl_put_extent(cow_read);
kfree(par);
return PNFS_NOT_ATTEMPTED;
}
@@ -874,7 +1039,7 @@
}
}
-/* This is mostly copied from the filelayout's get_device_info function.
+/* This is mostly copied from the filelayout_get_device_info function.
* It seems much of this should be at the generic pnfs level.
*/
static struct pnfs_block_dev *
@@ -1011,33 +1176,64 @@
return 0;
}
+static bool
+is_aligned_req(struct nfs_page *req, unsigned int alignment)
+{
+ return IS_ALIGNED(req->wb_offset, alignment) &&
+ IS_ALIGNED(req->wb_bytes, alignment);
+}
+
static void
bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
- if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK))
+ if (pgio->pg_dreq != NULL &&
+ !is_aligned_req(req, SECTOR_SIZE))
nfs_pageio_reset_read_mds(pgio);
else
pnfs_generic_pg_init_read(pgio, req);
}
-static void
+static bool
+bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+ struct nfs_page *req)
+{
+ if (pgio->pg_dreq != NULL &&
+ !is_aligned_req(req, SECTOR_SIZE))
+ return false;
+
+ return pnfs_generic_pg_test(pgio, prev, req);
+}
+
+void
bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
- if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK))
+ if (pgio->pg_dreq != NULL &&
+ !is_aligned_req(req, PAGE_CACHE_SIZE))
nfs_pageio_reset_write_mds(pgio);
else
pnfs_generic_pg_init_write(pgio, req);
}
+static bool
+bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
+ struct nfs_page *req)
+{
+ if (pgio->pg_dreq != NULL &&
+ !is_aligned_req(req, PAGE_CACHE_SIZE))
+ return false;
+
+ return pnfs_generic_pg_test(pgio, prev, req);
+}
+
static const struct nfs_pageio_ops bl_pg_read_ops = {
.pg_init = bl_pg_init_read,
- .pg_test = pnfs_generic_pg_test,
+ .pg_test = bl_pg_test_read,
.pg_doio = pnfs_generic_pg_readpages,
};
static const struct nfs_pageio_ops bl_pg_write_ops = {
.pg_init = bl_pg_init_write,
- .pg_test = pnfs_generic_pg_test,
+ .pg_test = bl_pg_test_write,
.pg_doio = pnfs_generic_pg_writepages,
};
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 0335069..39bb51a 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -41,6 +41,7 @@
#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
+#define SECTOR_SIZE (1 << SECTOR_SHIFT)
struct block_mount_id {
spinlock_t bm_lock; /* protects list */
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 1f9a603..9c3e117 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -683,8 +683,7 @@
p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT);
p = xdr_encode_hyper(p, 0LL);
*p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
- list_del(&lce->bse_node);
- list_add_tail(&lce->bse_node, &bl->bl_committing);
+ list_move_tail(&lce->bse_node, &bl->bl_committing);
bl->bl_count--;
count++;
}
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 4c8459e..8ed0bc8 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -23,6 +23,7 @@
#include "nfs4_fs.h"
#include "callback.h"
#include "internal.h"
+#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_CALLBACK
@@ -37,7 +38,32 @@
static DEFINE_MUTEX(nfs_callback_mutex);
static struct svc_program nfs4_callback_program;
-unsigned short nfs_callback_tcpport6;
+static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
+{
+ int ret;
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+
+ ret = svc_create_xprt(serv, "tcp", net, PF_INET,
+ nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
+ if (ret <= 0)
+ goto out_err;
+ nn->nfs_callback_tcpport = ret;
+ dprintk("NFS: Callback listener port = %u (af %u, net %p)\n",
+ nn->nfs_callback_tcpport, PF_INET, net);
+
+ ret = svc_create_xprt(serv, "tcp", net, PF_INET6,
+ nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
+ if (ret > 0) {
+ nn->nfs_callback_tcpport6 = ret;
+ dprintk("NFS: Callback listener port = %u (af %u, net %p)\n",
+ nn->nfs_callback_tcpport6, PF_INET6, net);
+ } else if (ret != -EAFNOSUPPORT)
+ goto out_err;
+ return 0;
+
+out_err:
+ return (ret) ? ret : -ENOMEM;
+}
/*
* This is the NFSv4 callback kernel thread.
@@ -78,38 +104,23 @@
* Prepare to bring up the NFSv4 callback service
*/
static struct svc_rqst *
-nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
+nfs4_callback_up(struct svc_serv *serv)
{
- int ret;
-
- ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
- nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
- if (ret <= 0)
- goto out_err;
- nfs_callback_tcpport = ret;
- dprintk("NFS: Callback listener port = %u (af %u)\n",
- nfs_callback_tcpport, PF_INET);
-
- ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
- nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
- if (ret > 0) {
- nfs_callback_tcpport6 = ret;
- dprintk("NFS: Callback listener port = %u (af %u)\n",
- nfs_callback_tcpport6, PF_INET6);
- } else if (ret == -EAFNOSUPPORT)
- ret = 0;
- else
- goto out_err;
-
return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
-
-out_err:
- if (ret == 0)
- ret = -ENOMEM;
- return ERR_PTR(ret);
}
#if defined(CONFIG_NFS_V4_1)
+static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net)
+{
+ /*
+ * Create an svc_sock for the back channel service that shares the
+ * fore channel connection.
+ * Returns the input port (0) and sets the svc_serv bc_xprt on success
+ */
+ return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
+ SVC_SOCK_ANONYMOUS);
+}
+
/*
* The callback service for NFSv4.1 callbacks
*/
@@ -149,28 +160,9 @@
* Bring up the NFSv4.1 callback service
*/
static struct svc_rqst *
-nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
+nfs41_callback_up(struct svc_serv *serv)
{
struct svc_rqst *rqstp;
- int ret;
-
- /*
- * Create an svc_sock for the back channel service that shares the
- * fore channel connection.
- * Returns the input port (0) and sets the svc_serv bc_xprt on success
- */
- ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0,
- SVC_SOCK_ANONYMOUS);
- if (ret < 0) {
- rqstp = ERR_PTR(ret);
- goto out;
- }
-
- /*
- * Save the svc_serv in the transport so that it can
- * be referenced when the session backchannel is initialized
- */
- xprt->bc_serv = serv;
INIT_LIST_HEAD(&serv->sv_cb_list);
spin_lock_init(&serv->sv_cb_lock);
@@ -180,90 +172,76 @@
svc_xprt_put(serv->sv_bc_xprt);
serv->sv_bc_xprt = NULL;
}
-out:
dprintk("--> %s return %ld\n", __func__,
IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0);
return rqstp;
}
static inline int nfs_minorversion_callback_svc_setup(u32 minorversion,
- struct svc_serv *serv, struct rpc_xprt *xprt,
+ struct svc_serv *serv,
struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
{
if (minorversion) {
- *rqstpp = nfs41_callback_up(serv, xprt);
+ *rqstpp = nfs41_callback_up(serv);
*callback_svc = nfs41_callback_svc;
}
return minorversion;
}
static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
- struct nfs_callback_data *cb_info)
+ struct svc_serv *serv)
{
if (minorversion)
- xprt->bc_serv = cb_info->serv;
+ /*
+ * Save the svc_serv in the transport so that it can
+ * be referenced when the session backchannel is initialized
+ */
+ xprt->bc_serv = serv;
}
#else
+static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net)
+{
+ return 0;
+}
+
static inline int nfs_minorversion_callback_svc_setup(u32 minorversion,
- struct svc_serv *serv, struct rpc_xprt *xprt,
+ struct svc_serv *serv,
struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
{
return 0;
}
static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
- struct nfs_callback_data *cb_info)
+ struct svc_serv *serv)
{
}
#endif /* CONFIG_NFS_V4_1 */
-/*
- * Bring up the callback thread if it is not already up.
- */
-int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
+static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
+ struct svc_serv *serv)
{
- struct svc_serv *serv = NULL;
struct svc_rqst *rqstp;
int (*callback_svc)(void *vrqstp);
struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
char svc_name[12];
- int ret = 0;
+ int ret;
int minorversion_setup;
- struct net *net = &init_net;
- mutex_lock(&nfs_callback_mutex);
- if (cb_info->users++ || cb_info->task != NULL) {
- nfs_callback_bc_serv(minorversion, xprt, cb_info);
- goto out;
- }
- serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
- if (!serv) {
- ret = -ENOMEM;
- goto out_err;
- }
- /* As there is only one thread we need to over-ride the
- * default maximum of 80 connections
- */
- serv->sv_maxconn = 1024;
+ nfs_callback_bc_serv(minorversion, xprt, serv);
- ret = svc_bind(serv, net);
- if (ret < 0) {
- printk(KERN_WARNING "NFS: bind callback service failed\n");
- goto out_err;
- }
+ if (cb_info->task)
+ return 0;
minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion,
- serv, xprt, &rqstp, &callback_svc);
+ serv, &rqstp, &callback_svc);
if (!minorversion_setup) {
/* v4.0 callback setup */
- rqstp = nfs4_callback_up(serv, xprt);
+ rqstp = nfs4_callback_up(serv);
callback_svc = nfs4_callback_svc;
}
- if (IS_ERR(rqstp)) {
- ret = PTR_ERR(rqstp);
- goto out_err;
- }
+ if (IS_ERR(rqstp))
+ return PTR_ERR(rqstp);
svc_sock_update_bufs(serv);
@@ -276,41 +254,165 @@
svc_exit_thread(cb_info->rqst);
cb_info->rqst = NULL;
cb_info->task = NULL;
- goto out_err;
+ return PTR_ERR(cb_info->task);
}
-out:
+ dprintk("nfs_callback_up: service started\n");
+ return 0;
+}
+
+static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struct net *net)
+{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+
+ if (--nn->cb_users[minorversion])
+ return;
+
+ dprintk("NFS: destroy per-net callback data; net=%p\n", net);
+ svc_shutdown_net(serv, net);
+}
+
+static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net)
+{
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+ int ret;
+
+ if (nn->cb_users[minorversion]++)
+ return 0;
+
+ dprintk("NFS: create per-net callback data; net=%p\n", net);
+
+ ret = svc_bind(serv, net);
+ if (ret < 0) {
+ printk(KERN_WARNING "NFS: bind callback service failed\n");
+ goto err_bind;
+ }
+
+ switch (minorversion) {
+ case 0:
+ ret = nfs4_callback_up_net(serv, net);
+ break;
+ case 1:
+ ret = nfs41_callback_up_net(serv, net);
+ break;
+ default:
+ printk(KERN_ERR "NFS: unknown callback version: %d\n",
+ minorversion);
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret < 0) {
+ printk(KERN_ERR "NFS: callback service start failed\n");
+ goto err_socks;
+ }
+ return 0;
+
+err_socks:
+ svc_rpcb_cleanup(serv, net);
+err_bind:
+ dprintk("NFS: Couldn't create callback socket: err = %d; "
+ "net = %p\n", ret, net);
+ return ret;
+}
+
+static struct svc_serv *nfs_callback_create_svc(int minorversion)
+{
+ struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
+ struct svc_serv *serv;
+
+ /*
+ * Check whether we're already up and running.
+ */
+ if (cb_info->task) {
+ /*
+ * Note: increase service usage, because later in case of error
+ * svc_destroy() will be called.
+ */
+ svc_get(cb_info->serv);
+ return cb_info->serv;
+ }
+
+ /*
+ * Sanity check: if there's no task,
+ * we should be the first user ...
+ */
+ if (cb_info->users)
+ printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
+ cb_info->users);
+
+ serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
+ if (!serv) {
+ printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ /* As there is only one thread we need to over-ride the
+ * default maximum of 80 connections
+ */
+ serv->sv_maxconn = 1024;
+ dprintk("nfs_callback_create_svc: service created\n");
+ return serv;
+}
+
+/*
+ * Bring up the callback thread if it is not already up.
+ */
+int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
+{
+ struct svc_serv *serv;
+ struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
+ int ret;
+ struct net *net = xprt->xprt_net;
+
+ mutex_lock(&nfs_callback_mutex);
+
+ serv = nfs_callback_create_svc(minorversion);
+ if (IS_ERR(serv)) {
+ ret = PTR_ERR(serv);
+ goto err_create;
+ }
+
+ ret = nfs_callback_up_net(minorversion, serv, net);
+ if (ret < 0)
+ goto err_net;
+
+ ret = nfs_callback_start_svc(minorversion, xprt, serv);
+ if (ret < 0)
+ goto err_start;
+
+ cb_info->users++;
/*
* svc_create creates the svc_serv with sv_nrthreads == 1, and then
* svc_prepare_thread increments that. So we need to call svc_destroy
* on both success and failure so that the refcount is 1 when the
* thread exits.
*/
- if (serv)
- svc_destroy(serv);
+err_net:
+ svc_destroy(serv);
+err_create:
mutex_unlock(&nfs_callback_mutex);
return ret;
-out_err:
- dprintk("NFS: Couldn't create callback socket or server thread; "
- "err = %d\n", ret);
- cb_info->users--;
- if (serv)
- svc_shutdown_net(serv, net);
- goto out;
+
+err_start:
+ nfs_callback_down_net(minorversion, serv, net);
+ dprintk("NFS: Couldn't create server thread; err = %d\n", ret);
+ goto err_net;
}
/*
* Kill the callback thread if it's no longer being used.
*/
-void nfs_callback_down(int minorversion)
+void nfs_callback_down(int minorversion, struct net *net)
{
struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
mutex_lock(&nfs_callback_mutex);
+ nfs_callback_down_net(minorversion, cb_info->serv, net);
cb_info->users--;
if (cb_info->users == 0 && cb_info->task != NULL) {
kthread_stop(cb_info->task);
- svc_shutdown_net(cb_info->serv, &init_net);
+ dprintk("nfs_callback_down: service stopped\n");
svc_exit_thread(cb_info->rqst);
+ dprintk("nfs_callback_down: service destroyed\n");
cb_info->serv = NULL;
cb_info->rqst = NULL;
cb_info->task = NULL;
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b44d7b1..c07a8d4 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -194,7 +194,7 @@
struct cb_process_state *cps);
#if IS_ENABLED(CONFIG_NFS_V4)
extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
-extern void nfs_callback_down(int minorversion);
+extern void nfs_callback_down(int minorversion, struct net *net);
extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
const nfs4_stateid *stateid);
extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
@@ -208,7 +208,5 @@
#define NFS41_BC_MAX_CALLBACKS 1
extern unsigned int nfs_callback_set_tcpport;
-extern unsigned short nfs_callback_tcpport;
-extern unsigned short nfs_callback_tcpport6;
#endif /* __LINUX_FS_NFS_CALLBACK_H */
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 1b5d809..76b4a7a 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -122,7 +122,15 @@
ino = igrab(lo->plh_inode);
if (!ino)
continue;
- get_layout_hdr(lo);
+ spin_lock(&ino->i_lock);
+ /* Is this layout in the process of being freed? */
+ if (NFS_I(ino)->layout != lo) {
+ spin_unlock(&ino->i_lock);
+ iput(ino);
+ continue;
+ }
+ pnfs_get_layout_hdr(lo);
+ spin_unlock(&ino->i_lock);
return lo;
}
}
@@ -158,7 +166,7 @@
ino = lo->plh_inode;
spin_lock(&ino->i_lock);
if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
- mark_matching_lsegs_invalid(lo, &free_me_list,
+ pnfs_mark_matching_lsegs_invalid(lo, &free_me_list,
&args->cbl_range))
rv = NFS4ERR_DELAY;
else
@@ -166,7 +174,7 @@
pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list);
- put_layout_hdr(lo);
+ pnfs_put_layout_hdr(lo);
iput(ino);
return rv;
}
@@ -196,9 +204,18 @@
continue;
list_for_each_entry(lo, &server->layouts, plh_layouts) {
- if (!igrab(lo->plh_inode))
+ ino = igrab(lo->plh_inode);
+ if (ino)
continue;
- get_layout_hdr(lo);
+ spin_lock(&ino->i_lock);
+ /* Is this layout in the process of being freed? */
+ if (NFS_I(ino)->layout != lo) {
+ spin_unlock(&ino->i_lock);
+ iput(ino);
+ continue;
+ }
+ pnfs_get_layout_hdr(lo);
+ spin_unlock(&ino->i_lock);
BUG_ON(!list_empty(&lo->plh_bulk_recall));
list_add(&lo->plh_bulk_recall, &recall_list);
}
@@ -211,12 +228,12 @@
ino = lo->plh_inode;
spin_lock(&ino->i_lock);
set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
- if (mark_matching_lsegs_invalid(lo, &free_me_list, &range))
+ if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range))
rv = NFS4ERR_DELAY;
list_del_init(&lo->plh_bulk_recall);
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&free_me_list);
- put_layout_hdr(lo);
+ pnfs_put_layout_hdr(lo);
iput(ino);
}
return rv;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 9969444..8b39a42 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -93,10 +93,10 @@
spin_unlock(&nfs_version_lock);
return nfs;
}
- };
+ }
spin_unlock(&nfs_version_lock);
- return ERR_PTR(-EPROTONOSUPPORT);;
+ return ERR_PTR(-EPROTONOSUPPORT);
}
struct nfs_subversion *get_nfs_version(unsigned int version)
@@ -498,7 +498,8 @@
return nfs_found_client(cl_init, clp);
}
if (new) {
- list_add(&new->cl_share_link, &nn->nfs_client_list);
+ list_add_tail(&new->cl_share_link,
+ &nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
new->cl_flags = cl_init->init_flags;
return rpc_ops->init_client(new, timeparms, ip_addr,
@@ -668,7 +669,8 @@
{
struct nfs_client *clp = server->nfs_client;
- server->client = rpc_clone_client(clp->cl_rpcclient);
+ server->client = rpc_clone_client_set_auth(clp->cl_rpcclient,
+ pseudoflavour);
if (IS_ERR(server->client)) {
dprintk("%s: couldn't create rpc_client!\n", __func__);
return PTR_ERR(server->client);
@@ -678,16 +680,6 @@
timeo,
sizeof(server->client->cl_timeout_default));
server->client->cl_timeout = &server->client->cl_timeout_default;
-
- if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
- struct rpc_auth *auth;
-
- auth = rpcauth_create(pseudoflavour, server->client);
- if (IS_ERR(auth)) {
- dprintk("%s: couldn't create credcache!\n", __func__);
- return PTR_ERR(auth);
- }
- }
server->client->cl_softrtry = 0;
if (server->flags & NFS_MOUNT_SOFT)
server->client->cl_softrtry = 1;
@@ -761,6 +753,8 @@
data->timeo, data->retrans);
if (data->flags & NFS_MOUNT_NORESVPORT)
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+ if (server->options & NFS_OPTION_MIGRATION)
+ set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);
@@ -855,7 +849,6 @@
if (server->wsize > NFS_MAX_FILE_IO_SIZE)
server->wsize = NFS_MAX_FILE_IO_SIZE;
server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- server->pnfs_blksize = fsinfo->blksize;
server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 627f108..ce8cb92 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2072,7 +2072,7 @@
nfs_access_free_entry(entry);
}
-static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
+void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
{
struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
if (cache == NULL)
@@ -2098,6 +2098,20 @@
spin_unlock(&nfs_access_lru_lock);
}
}
+EXPORT_SYMBOL_GPL(nfs_access_add_cache);
+
+void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
+{
+ entry->mask = 0;
+ if (access_result & NFS4_ACCESS_READ)
+ entry->mask |= MAY_READ;
+ if (access_result &
+ (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE))
+ entry->mask |= MAY_WRITE;
+ if (access_result & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
+ entry->mask |= MAY_EXEC;
+}
+EXPORT_SYMBOL_GPL(nfs_access_set_mask);
static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
{
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1ba385b..253d397 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -450,6 +450,7 @@
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct nfs_direct_req *dreq;
+ struct nfs_lock_context *l_ctx;
dreq = nfs_direct_req_alloc();
if (dreq == NULL)
@@ -457,16 +458,19 @@
dreq->inode = inode;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
- dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
- if (dreq->l_ctx == NULL)
+ l_ctx = nfs_get_lock_context(dreq->ctx);
+ if (IS_ERR(l_ctx)) {
+ result = PTR_ERR(l_ctx);
goto out_release;
+ }
+ dreq->l_ctx = l_ctx;
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
+ NFS_I(inode)->read_io += iov_length(iov, nr_segs);
result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
if (!result)
result = nfs_direct_wait(dreq);
- NFS_I(inode)->read_io += result;
out_release:
nfs_direct_req_release(dreq);
out:
@@ -814,6 +818,7 @@
get_dreq(dreq);
atomic_inc(&inode->i_dio_count);
+ NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs);
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
@@ -825,7 +830,6 @@
pos += vec->iov_len;
}
nfs_pageio_complete(&desc);
- NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;
/*
* If no bytes were started, return the error, and let the
@@ -849,6 +853,7 @@
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct nfs_direct_req *dreq;
+ struct nfs_lock_context *l_ctx;
dreq = nfs_direct_req_alloc();
if (!dreq)
@@ -856,9 +861,12 @@
dreq->inode = inode;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
- dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
- if (dreq->l_ctx == NULL)
+ l_ctx = nfs_get_lock_context(dreq->ctx);
+ if (IS_ERR(l_ctx)) {
+ result = PTR_ERR(l_ctx);
goto out_release;
+ }
+ dreq->l_ctx = l_ctx;
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 6a7fcab..c814666 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -259,7 +259,7 @@
struct dentry *dentry = file->f_path.dentry;
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct inode *inode = dentry->d_inode;
- int have_error, status;
+ int have_error, do_resend, status;
int ret = 0;
dprintk("NFS: fsync file(%s/%s) datasync %d\n",
@@ -267,15 +267,23 @@
datasync);
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
+ do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
status = nfs_commit_inode(inode, FLUSH_SYNC);
- if (status >= 0 && ret < 0)
- status = ret;
have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
- if (have_error)
+ if (have_error) {
ret = xchg(&ctx->error, 0);
- if (!ret && status < 0)
+ if (ret)
+ goto out;
+ }
+ if (status < 0) {
ret = status;
+ goto out;
+ }
+ do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
+ if (do_resend)
+ ret = -EAGAIN;
+out:
return ret;
}
EXPORT_SYMBOL_GPL(nfs_file_fsync_commit);
@@ -286,13 +294,22 @@
int ret;
struct inode *inode = file->f_path.dentry->d_inode;
- ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
- if (ret != 0)
- goto out;
- mutex_lock(&inode->i_mutex);
- ret = nfs_file_fsync_commit(file, start, end, datasync);
- mutex_unlock(&inode->i_mutex);
-out:
+ do {
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret != 0)
+ break;
+ mutex_lock(&inode->i_mutex);
+ ret = nfs_file_fsync_commit(file, start, end, datasync);
+ mutex_unlock(&inode->i_mutex);
+ /*
+ * If nfs_file_fsync_commit detected a server reboot, then
+ * resend all dirty pages that might have been covered by
+ * the NFS_CONTEXT_RESEND_WRITES flag
+ */
+ start = 0;
+ end = LLONG_MAX;
+ } while (ret == -EAGAIN);
+
return ret;
}
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 7ac93e0..675b389c 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -159,7 +159,7 @@
return 0;
memcpy(buf, name, namelen);
buf[namelen] = '\0';
- if (strict_strtoul(buf, 0, &val) != 0)
+ if (kstrtoul(buf, 0, &val) != 0)
return 0;
*res = val;
return 1;
@@ -364,7 +364,7 @@
if (data_size <= 0) {
ret = -EINVAL;
} else {
- ret = strict_strtol(id_str, 10, &id_long);
+ ret = kstrtol(id_str, 10, &id_long);
*id = (__u32)id_long;
}
return ret;
@@ -629,9 +629,6 @@
substring_t substr;
int token, ret;
- memset(im, 0, sizeof(*im));
- memset(msg, 0, sizeof(*msg));
-
im->im_type = IDMAP_TYPE_GROUP;
token = match_token(desc, nfs_idmap_tokens, &substr);
@@ -703,7 +700,7 @@
int ret = -ENOMEM;
/* msg and im are freed in idmap_pipe_destroy_msg */
- data = kmalloc(sizeof(*data), GFP_KERNEL);
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
goto out1;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9b47610..126a4cb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -547,8 +547,8 @@
static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
{
atomic_set(&l_ctx->count, 1);
- l_ctx->lockowner = current->files;
- l_ctx->pid = current->tgid;
+ l_ctx->lockowner.l_owner = current->files;
+ l_ctx->lockowner.l_pid = current->tgid;
INIT_LIST_HEAD(&l_ctx->list);
}
@@ -557,9 +557,9 @@
struct nfs_lock_context *pos;
list_for_each_entry(pos, &ctx->lock_context.list, list) {
- if (pos->lockowner != current->files)
+ if (pos->lockowner.l_owner != current->files)
continue;
- if (pos->pid != current->tgid)
+ if (pos->lockowner.l_pid != current->tgid)
continue;
atomic_inc(&pos->count);
return pos;
@@ -578,7 +578,7 @@
spin_unlock(&inode->i_lock);
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (new == NULL)
- return NULL;
+ return ERR_PTR(-ENOMEM);
nfs_init_lock_context(new);
spin_lock(&inode->i_lock);
res = __nfs_find_lock_context(ctx);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 31fdb03..89a795d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -101,11 +101,11 @@
*/
struct nfs_parsed_mount_data {
int flags;
- int rsize, wsize;
- int timeo, retrans;
- int acregmin, acregmax,
+ unsigned int rsize, wsize;
+ unsigned int timeo, retrans;
+ unsigned int acregmin, acregmax,
acdirmin, acdirmax;
- int namlen;
+ unsigned int namlen;
unsigned int options;
unsigned int bsize;
unsigned int auth_flavor_len;
@@ -483,6 +483,12 @@
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
int cache_reply);
+extern int nfs40_walk_client_list(struct nfs_client *clp,
+ struct nfs_client **result,
+ struct rpc_cred *cred);
+extern int nfs41_walk_client_list(struct nfs_client *clp,
+ struct nfs_client **result,
+ struct rpc_cred *cred);
/*
* Determine the device name as a string
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index 0539de1..b9c7f9b 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -22,6 +22,9 @@
struct list_head nfs_volume_list;
#if IS_ENABLED(CONFIG_NFS_V4)
struct idr cb_ident_idr; /* Protected by nfs_client_lock */
+ unsigned short nfs_callback_tcpport;
+ unsigned short nfs_callback_tcpport6;
+ int cb_users[NFS4_MAX_MINOR_VERSION + 1];
#endif
spinlock_t nfs_client_lock;
struct timespec boot_time;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index da0618a..a525fde 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -132,8 +132,8 @@
struct nfs4_lock_state {
struct list_head ls_locks; /* Other lock stateids */
struct nfs4_state * ls_state; /* Pointer to open state */
-#define NFS_LOCK_INITIALIZED 1
- int ls_flags;
+#define NFS_LOCK_INITIALIZED 0
+ unsigned long ls_flags;
struct nfs_seqid_counter ls_seqid;
nfs4_stateid ls_stateid;
atomic_t ls_count;
@@ -191,6 +191,8 @@
int (*establish_clid)(struct nfs_client *, struct rpc_cred *);
struct rpc_cred * (*get_clid_cred)(struct nfs_client *);
int (*reclaim_complete)(struct nfs_client *);
+ int (*detect_trunking)(struct nfs_client *, struct nfs_client **,
+ struct rpc_cred *);
};
struct nfs4_state_maintenance_ops {
@@ -223,7 +225,7 @@
extern int nfs4_destroy_clientid(struct nfs_client *clp);
extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
+extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
struct nfs4_fs_locations *, struct page *);
@@ -320,9 +322,15 @@
/* nfs4state.c */
struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp);
struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
+int nfs4_discover_server_trunking(struct nfs_client *clp,
+ struct nfs_client **);
+int nfs40_discover_server_trunking(struct nfs_client *clp,
+ struct nfs_client **, struct rpc_cred *);
#if defined(CONFIG_NFS_V4_1)
struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
+int nfs41_discover_server_trunking(struct nfs_client *clp,
+ struct nfs_client **, struct rpc_cred *);
extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
#else
static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
@@ -351,7 +359,7 @@
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
- fmode_t, fl_owner_t, pid_t);
+ fmode_t, const struct nfs_lockowner *);
extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
@@ -372,6 +380,9 @@
extern unsigned short max_session_slots;
extern unsigned short send_implementation_id;
+#define NFS4_CLIENT_ID_UNIQ_LEN (64)
+extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN];
+
/* nfs4sysctl.c */
#ifdef CONFIG_SYSCTL
int nfs4_register_sysctl(void);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 24eb663..14ddd4d 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -84,7 +84,7 @@
static void nfs4_destroy_callback(struct nfs_client *clp)
{
if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
- nfs_callback_down(clp->cl_mvops->minor_version);
+ nfs_callback_down(clp->cl_mvops->minor_version, clp->cl_net);
}
static void nfs4_shutdown_client(struct nfs_client *clp)
@@ -185,6 +185,7 @@
rpc_authflavor_t authflavour)
{
char buf[INET6_ADDRSTRLEN + 1];
+ struct nfs_client *old;
int error;
if (clp->cl_cons_state == NFS_CS_READY) {
@@ -230,6 +231,17 @@
if (!nfs4_has_session(clp))
nfs_mark_client_ready(clp, NFS_CS_READY);
+
+ error = nfs4_discover_server_trunking(clp, &old);
+ if (error < 0)
+ goto error;
+ if (clp != old) {
+ clp->cl_preserve_clid = true;
+ nfs_put_client(clp);
+ clp = old;
+ atomic_inc(&clp->cl_count);
+ }
+
return clp;
error:
@@ -239,6 +251,247 @@
return ERR_PTR(error);
}
+/*
+ * Returns true if the client IDs match
+ */
+static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b)
+{
+ if (a->cl_clientid != b->cl_clientid) {
+ dprintk("NFS: --> %s client ID %llx does not match %llx\n",
+ __func__, a->cl_clientid, b->cl_clientid);
+ return false;
+ }
+ dprintk("NFS: --> %s client ID %llx matches %llx\n",
+ __func__, a->cl_clientid, b->cl_clientid);
+ return true;
+}
+
+/*
+ * SETCLIENTID just did a callback update with the callback ident in
+ * "drop," but server trunking discovery claims "drop" and "keep" are
+ * actually the same server. Swap the callback IDs so that "keep"
+ * will continue to use the callback ident the server now knows about,
+ * and so that "keep"'s original callback ident is destroyed when
+ * "drop" is freed.
+ */
+static void nfs4_swap_callback_idents(struct nfs_client *keep,
+ struct nfs_client *drop)
+{
+ struct nfs_net *nn = net_generic(keep->cl_net, nfs_net_id);
+ unsigned int save = keep->cl_cb_ident;
+
+ if (keep->cl_cb_ident == drop->cl_cb_ident)
+ return;
+
+ dprintk("%s: keeping callback ident %u and dropping ident %u\n",
+ __func__, keep->cl_cb_ident, drop->cl_cb_ident);
+
+ spin_lock(&nn->nfs_client_lock);
+
+ idr_replace(&nn->cb_ident_idr, keep, drop->cl_cb_ident);
+ keep->cl_cb_ident = drop->cl_cb_ident;
+
+ idr_replace(&nn->cb_ident_idr, drop, save);
+ drop->cl_cb_ident = save;
+
+ spin_unlock(&nn->nfs_client_lock);
+}
+
+/**
+ * nfs40_walk_client_list - Find server that recognizes a client ID
+ *
+ * @new: nfs_client with client ID to test
+ * @result: OUT: found nfs_client, or new
+ * @cred: credential to use for trunking test
+ *
+ * Returns zero, a negative errno, or a negative NFS4ERR status.
+ * If zero is returned, an nfs_client pointer is planted in "result."
+ *
+ * NB: nfs40_walk_client_list() relies on the new nfs_client being
+ * the last nfs_client on the list.
+ */
+int nfs40_walk_client_list(struct nfs_client *new,
+ struct nfs_client **result,
+ struct rpc_cred *cred)
+{
+ struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
+ struct nfs_client *pos, *n, *prev = NULL;
+ struct nfs4_setclientid_res clid = {
+ .clientid = new->cl_clientid,
+ .confirm = new->cl_confirm,
+ };
+ int status;
+
+ spin_lock(&nn->nfs_client_lock);
+ list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
+ /* If "pos" isn't marked ready, we can't trust the
+ * remaining fields in "pos" */
+ if (pos->cl_cons_state < NFS_CS_READY)
+ continue;
+
+ if (pos->rpc_ops != new->rpc_ops)
+ continue;
+
+ if (pos->cl_proto != new->cl_proto)
+ continue;
+
+ if (pos->cl_minorversion != new->cl_minorversion)
+ continue;
+
+ if (pos->cl_clientid != new->cl_clientid)
+ continue;
+
+ atomic_inc(&pos->cl_count);
+ spin_unlock(&nn->nfs_client_lock);
+
+ if (prev)
+ nfs_put_client(prev);
+
+ status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
+ if (status == 0) {
+ nfs4_swap_callback_idents(pos, new);
+
+ nfs_put_client(pos);
+ *result = pos;
+ dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
+ __func__, pos, atomic_read(&pos->cl_count));
+ return 0;
+ }
+ if (status != -NFS4ERR_STALE_CLIENTID) {
+ nfs_put_client(pos);
+ dprintk("NFS: <-- %s status = %d, no result\n",
+ __func__, status);
+ return status;
+ }
+
+ spin_lock(&nn->nfs_client_lock);
+ prev = pos;
+ }
+
+ /*
+ * No matching nfs_client found. This should be impossible,
+ * because the new nfs_client has already been added to
+ * nfs_client_list by nfs_get_client().
+ *
+ * Don't BUG(), since the caller is holding a mutex.
+ */
+ if (prev)
+ nfs_put_client(prev);
+ spin_unlock(&nn->nfs_client_lock);
+ pr_err("NFS: %s Error: no matching nfs_client found\n", __func__);
+ return -NFS4ERR_STALE_CLIENTID;
+}
+
+#ifdef CONFIG_NFS_V4_1
+/*
+ * Returns true if the server owners match
+ */
+static bool
+nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b)
+{
+ struct nfs41_server_owner *o1 = a->cl_serverowner;
+ struct nfs41_server_owner *o2 = b->cl_serverowner;
+
+ if (o1->minor_id != o2->minor_id) {
+ dprintk("NFS: --> %s server owner minor IDs do not match\n",
+ __func__);
+ return false;
+ }
+
+ if (o1->major_id_sz != o2->major_id_sz)
+ goto out_major_mismatch;
+ if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0)
+ goto out_major_mismatch;
+
+ dprintk("NFS: --> %s server owners match\n", __func__);
+ return true;
+
+out_major_mismatch:
+ dprintk("NFS: --> %s server owner major IDs do not match\n",
+ __func__);
+ return false;
+}
+
+/**
+ * nfs41_walk_client_list - Find nfs_client that matches a client/server owner
+ *
+ * @new: nfs_client with client ID to test
+ * @result: OUT: found nfs_client, or new
+ * @cred: credential to use for trunking test
+ *
+ * Returns zero, a negative errno, or a negative NFS4ERR status.
+ * If zero is returned, an nfs_client pointer is planted in "result."
+ *
+ * NB: nfs41_walk_client_list() relies on the new nfs_client being
+ * the last nfs_client on the list.
+ */
+int nfs41_walk_client_list(struct nfs_client *new,
+ struct nfs_client **result,
+ struct rpc_cred *cred)
+{
+ struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
+ struct nfs_client *pos, *n, *prev = NULL;
+ int error;
+
+ spin_lock(&nn->nfs_client_lock);
+ list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
+ /* If "pos" isn't marked ready, we can't trust the
+ * remaining fields in "pos", especially the client
+ * ID and serverowner fields. Wait for CREATE_SESSION
+ * to finish. */
+ if (pos->cl_cons_state < NFS_CS_READY) {
+ atomic_inc(&pos->cl_count);
+ spin_unlock(&nn->nfs_client_lock);
+
+ if (prev)
+ nfs_put_client(prev);
+ prev = pos;
+
+ error = nfs_wait_client_init_complete(pos);
+ if (error < 0) {
+ nfs_put_client(pos);
+ continue;
+ }
+
+ spin_lock(&nn->nfs_client_lock);
+ }
+
+ if (pos->rpc_ops != new->rpc_ops)
+ continue;
+
+ if (pos->cl_proto != new->cl_proto)
+ continue;
+
+ if (pos->cl_minorversion != new->cl_minorversion)
+ continue;
+
+ if (!nfs4_match_clientids(pos, new))
+ continue;
+
+ if (!nfs4_match_serverowners(pos, new))
+ continue;
+
+ spin_unlock(&nn->nfs_client_lock);
+ dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
+ __func__, pos, atomic_read(&pos->cl_count));
+
+ *result = pos;
+ return 0;
+ }
+
+ /*
+ * No matching nfs_client found. This should be impossible,
+ * because the new nfs_client has already been added to
+ * nfs_client_list by nfs_get_client().
+ *
+ * Don't BUG(), since the caller is holding a mutex.
+ */
+ spin_unlock(&nn->nfs_client_lock);
+ pr_err("NFS: %s Error: no matching nfs_client found\n", __func__);
+ return -NFS4ERR_STALE_CLIENTID;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
static void nfs4_destroy_server(struct nfs_server *server)
{
nfs_server_return_all_delegations(server);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index eb5eb8e..afddd66 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -95,16 +95,25 @@
int ret;
struct inode *inode = file->f_path.dentry->d_inode;
- ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
- if (ret != 0)
- goto out;
- mutex_lock(&inode->i_mutex);
- ret = nfs_file_fsync_commit(file, start, end, datasync);
- if (!ret && !datasync)
- /* application has asked for meta-data sync */
- ret = pnfs_layoutcommit_inode(inode, true);
- mutex_unlock(&inode->i_mutex);
-out:
+ do {
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret != 0)
+ break;
+ mutex_lock(&inode->i_mutex);
+ ret = nfs_file_fsync_commit(file, start, end, datasync);
+ if (!ret && !datasync)
+ /* application has asked for meta-data sync */
+ ret = pnfs_layoutcommit_inode(inode, true);
+ mutex_unlock(&inode->i_mutex);
+ /*
+ * If nfs_file_fsync_commit detected a server reboot, then
+ * resend all dirty pages that might have been covered by
+ * the NFS_CONTEXT_RESEND_WRITES flag
+ */
+ start = 0;
+ end = LLONG_MAX;
+ } while (ret == -EAGAIN);
+
return ret;
}
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 53f94d9..52d8472 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -190,8 +190,6 @@
* i/o and all i/o waiting on the slot table to the MDS until
* layout is destroyed and a new valid layout is obtained.
*/
- set_bit(NFS_LAYOUT_INVALID,
- &NFS_I(inode)->layout->plh_flags);
pnfs_destroy_layout(NFS_I(inode));
rpc_wake_up(&tbl->slot_tbl_waitq);
goto reset;
@@ -205,7 +203,7 @@
case -EPIPE:
dprintk("%s DS connection error %d\n", __func__,
task->tk_status);
- filelayout_mark_devid_invalid(devid);
+ nfs4_mark_deviceid_unavailable(devid);
clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
_pnfs_return_layout(inode);
rpc_wake_up(&tbl->slot_tbl_waitq);
@@ -269,6 +267,21 @@
(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
}
+bool
+filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node)
+{
+ return filelayout_test_devid_invalid(node) ||
+ nfs4_test_deviceid_unavailable(node);
+}
+
+static bool
+filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
+{
+ struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg);
+
+ return filelayout_test_devid_unavailable(node);
+}
+
/*
* Call ops for the async read/write cases
* In the case of dense layouts, the offset needs to be reset to its
@@ -453,7 +466,7 @@
struct nfs_commit_data *data = calldata;
data->completion_ops->completion(data);
- put_lseg(data->lseg);
+ pnfs_put_lseg(data->lseg);
nfs_put_client(data->ds_clp);
nfs_commitdata_release(data);
}
@@ -608,13 +621,13 @@
d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld,
NFS_SERVER(lo->plh_inode)->nfs_client, id);
if (d == NULL) {
- dsaddr = get_device_info(lo->plh_inode, id, gfp_flags);
+ dsaddr = filelayout_get_device_info(lo->plh_inode, id, gfp_flags);
if (dsaddr == NULL)
goto out;
} else
dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
- /* Found deviceid is being reaped */
- if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags))
+ /* Found deviceid is unavailable */
+ if (filelayout_test_devid_unavailable(&dsaddr->id_node))
goto out_put;
fl->dsaddr = dsaddr;
@@ -931,7 +944,7 @@
nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
if (status < 0) {
- put_lseg(pgio->pg_lseg);
+ pnfs_put_lseg(pgio->pg_lseg);
pgio->pg_lseg = NULL;
goto out_mds;
}
@@ -985,7 +998,7 @@
out:
nfs_request_remove_commit_list(req, cinfo);
spin_unlock(cinfo->lock);
- put_lseg(freeme);
+ pnfs_put_lseg(freeme);
}
static struct list_head *
@@ -1018,7 +1031,7 @@
* off due to a rewrite, in which case it will be done in
* filelayout_clear_request_commit
*/
- buckets[i].wlseg = get_lseg(lseg);
+ buckets[i].wlseg = pnfs_get_lseg(lseg);
}
set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
cinfo->ds->nwritten++;
@@ -1128,7 +1141,7 @@
if (list_empty(src))
bucket->wlseg = NULL;
else
- get_lseg(bucket->clseg);
+ pnfs_get_lseg(bucket->clseg);
}
return ret;
}
@@ -1159,12 +1172,12 @@
/* NOTE cinfo->lock is NOT held, relying on fact that this is
* only called on single thread per dreq.
- * Can't take the lock because need to do put_lseg
+ * Can't take the lock because need to do pnfs_put_lseg
*/
for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
BUG_ON(!list_empty(&b->written));
- put_lseg(b->wlseg);
+ pnfs_put_lseg(b->wlseg);
b->wlseg = NULL;
}
}
@@ -1200,7 +1213,7 @@
if (list_empty(&bucket->committing))
continue;
nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
- put_lseg(bucket->clseg);
+ pnfs_put_lseg(bucket->clseg);
bucket->clseg = NULL;
}
/* Caller will clean up entries put on list */
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 43fe802..dca47d78 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -129,23 +129,13 @@
}
static inline bool
-filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo)
-{
- return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags);
-}
-
-static inline bool
filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
{
return test_bit(NFS_DEVICEID_INVALID, &node->flags);
}
-static inline bool
-filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
-{
- return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) ||
- filelayout_test_layout_invalid(lseg->pls_layout);
-}
+extern bool
+filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node);
extern struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
@@ -158,7 +148,7 @@
extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
struct nfs4_file_layout_dsaddr *
-get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
+filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
void nfs4_ds_disconnect(struct nfs_client *clp);
#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f81231f..3336d5e 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -690,7 +690,7 @@
* of available devices, and return it.
*/
struct nfs4_file_layout_dsaddr *
-get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
+filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
{
struct pnfs_device *pdev = NULL;
u32 max_resp_sz;
@@ -804,13 +804,14 @@
struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
- if (filelayout_test_devid_invalid(devid))
+ if (filelayout_test_devid_unavailable(devid))
return NULL;
if (ds == NULL) {
printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
__func__, ds_idx);
- goto mark_dev_invalid;
+ filelayout_mark_devid_invalid(devid);
+ return NULL;
}
if (!ds->ds_clp) {
@@ -818,14 +819,12 @@
int err;
err = nfs4_ds_connect(s, ds);
- if (err)
- goto mark_dev_invalid;
+ if (err) {
+ nfs4_mark_deviceid_unavailable(devid);
+ return NULL;
+ }
}
return ds;
-
-mark_dev_invalid:
- filelayout_mark_devid_invalid(devid);
- return NULL;
}
module_param(dataserver_retrans, uint, 0644);
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 4fdeb1b..79fbb61 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -192,25 +192,13 @@
struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode,
struct qstr *name)
{
- struct rpc_clnt *clone;
- struct rpc_auth *auth;
rpc_authflavor_t flavor;
flavor = nfs4_negotiate_security(inode, name);
if ((int)flavor < 0)
return ERR_PTR((int)flavor);
- clone = rpc_clone_client(clnt);
- if (IS_ERR(clone))
- return clone;
-
- auth = rpcauth_create(flavor, clone);
- if (IS_ERR(auth)) {
- rpc_shutdown_client(clone);
- clone = ERR_PTR(-EIO);
- }
-
- return clone;
+ return rpc_clone_client_set_auth(clnt, flavor);
}
static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ddfebb12..e10d66f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -104,6 +104,8 @@
return -EACCES;
case -NFS4ERR_MINOR_VERS_MISMATCH:
return -EPROTONOSUPPORT;
+ case -NFS4ERR_ACCESS:
+ return -EACCES;
default:
dprintk("%s could not handle NFSv4 error %d\n",
__func__, -err);
@@ -860,6 +862,9 @@
p->o_arg.fh = NFS_FH(dir);
p->o_arg.open_flags = flags;
p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
+ /* ask server to check for all possible rights as results are cached */
+ p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY |
+ NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE;
p->o_arg.clientid = server->nfs_client->cl_clientid;
p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time);
p->o_arg.id.uniquifier = sp->so_seqid.owner_id;
@@ -1643,6 +1648,39 @@
return status;
}
+static int nfs4_opendata_access(struct rpc_cred *cred,
+ struct nfs4_opendata *opendata,
+ struct nfs4_state *state, fmode_t fmode)
+{
+ struct nfs_access_entry cache;
+ u32 mask;
+
+ /* access call failed or for some reason the server doesn't
+ * support any access modes -- defer access call until later */
+ if (opendata->o_res.access_supported == 0)
+ return 0;
+
+ mask = 0;
+ if (fmode & FMODE_READ)
+ mask |= MAY_READ;
+ if (fmode & FMODE_WRITE)
+ mask |= MAY_WRITE;
+ if (fmode & FMODE_EXEC)
+ mask |= MAY_EXEC;
+
+ cache.cred = cred;
+ cache.jiffies = jiffies;
+ nfs_access_set_mask(&cache, opendata->o_res.access_result);
+ nfs_access_add_cache(state->inode, &cache);
+
+ if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
+ return 0;
+
+ /* even though OPEN succeeded, access is denied. Close the file */
+ nfs4_close_state(state, fmode);
+ return -NFS4ERR_ACCESS;
+}
+
/*
* Note: On error, nfs4_proc_open will free the struct nfs4_opendata
*/
@@ -1774,7 +1812,11 @@
* informs us the stateid is unrecognized. */
if (status != -NFS4ERR_BAD_STATEID)
nfs41_free_stateid(server, stateid);
+ nfs_remove_bad_delegation(state->inode);
+ write_seqlock(&state->seqlock);
+ nfs4_stateid_copy(&state->stateid, &state->open_stateid);
+ write_sequnlock(&state->seqlock);
clear_bit(NFS_DELEGATED_STATE, &state->flags);
}
}
@@ -1790,7 +1832,7 @@
static int nfs41_check_open_stateid(struct nfs4_state *state)
{
struct nfs_server *server = NFS_SERVER(state->inode);
- nfs4_stateid *stateid = &state->stateid;
+ nfs4_stateid *stateid = &state->open_stateid;
int status;
/* If a state reset has been done, test_stateid is unneeded */
@@ -1896,6 +1938,10 @@
if (server->caps & NFS_CAP_POSIX_LOCK)
set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
+ status = nfs4_opendata_access(cred, opendata, state, fmode);
+ if (status != 0)
+ goto err_opendata_put;
+
if (opendata->o_arg.open_flags & O_EXCL) {
nfs4_exclusive_attrset(opendata, sattr);
@@ -1941,7 +1987,7 @@
struct nfs4_state *res;
int status;
- fmode &= FMODE_READ|FMODE_WRITE;
+ fmode &= FMODE_READ|FMODE_WRITE|FMODE_EXEC;
do {
status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
&res, ctx_th);
@@ -2013,8 +2059,12 @@
nfs_fattr_init(fattr);
if (state != NULL) {
+ struct nfs_lockowner lockowner = {
+ .l_owner = current->files,
+ .l_pid = current->tgid,
+ };
nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE,
- current->files, current->tgid);
+ &lockowner);
} else if (nfs4_copy_delegation_stateid(&arg.stateid, inode,
FMODE_WRITE)) {
/* Use that stateid */
@@ -2133,6 +2183,7 @@
{
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
+ struct inode *inode = calldata->inode;
int call_close = 0;
dprintk("%s: begin!\n", __func__);
@@ -2166,16 +2217,13 @@
if (calldata->arg.fmode == 0) {
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
if (calldata->roc &&
- pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) {
- rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
- task, NULL);
+ pnfs_roc_drain(inode, &calldata->roc_barrier, task))
goto out;
- }
}
nfs_fattr_init(calldata->res.fattr);
calldata->timestamp = jiffies;
- if (nfs4_setup_sequence(NFS_SERVER(calldata->inode),
+ if (nfs4_setup_sequence(NFS_SERVER(inode),
&calldata->arg.seq_args,
&calldata->res.seq_res,
task))
@@ -2202,7 +2250,7 @@
*
* NOTE: Caller must be holding the sp->so_owner semaphore!
*/
-int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
+int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_closedata *calldata;
@@ -2238,7 +2286,7 @@
calldata->res.fattr = &calldata->fattr;
calldata->res.seqid = calldata->arg.seqid;
calldata->res.server = server;
- calldata->roc = roc;
+ calldata->roc = pnfs_roc(state->inode);
nfs_sb_active(calldata->inode->i_sb);
msg.rpc_argp = &calldata->arg;
@@ -2255,8 +2303,6 @@
out_free_calldata:
kfree(calldata);
out:
- if (roc)
- pnfs_roc_release(state->inode);
nfs4_put_open_state(state);
nfs4_put_state_owner(sp);
return status;
@@ -2767,13 +2813,7 @@
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
if (!status) {
- entry->mask = 0;
- if (res.access & NFS4_ACCESS_READ)
- entry->mask |= MAY_READ;
- if (res.access & (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE))
- entry->mask |= MAY_WRITE;
- if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
- entry->mask |= MAY_EXEC;
+ nfs_access_set_mask(entry, res.access);
nfs_refresh_inode(inode, res.fattr);
}
nfs_free_fattr(res.fattr);
@@ -3362,8 +3402,11 @@
nfs_fattr_init(fsinfo->fattr);
error = nfs4_do_fsinfo(server, fhandle, fsinfo);
- if (error == 0)
+ if (error == 0) {
+ /* block layout checks this! */
+ server->pnfs_blksize = fsinfo->blksize;
set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
+ }
return error;
}
@@ -4007,6 +4050,36 @@
memcpy(bootverf->data, verf, sizeof(bootverf->data));
}
+static unsigned int
+nfs4_init_nonuniform_client_string(const struct nfs_client *clp,
+ char *buf, size_t len)
+{
+ unsigned int result;
+
+ rcu_read_lock();
+ result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s",
+ clp->cl_ipaddr,
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR),
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_PROTO));
+ rcu_read_unlock();
+ return result;
+}
+
+static unsigned int
+nfs4_init_uniform_client_string(const struct nfs_client *clp,
+ char *buf, size_t len)
+{
+ char *nodename = clp->cl_rpcclient->cl_nodename;
+
+ if (nfs4_client_id_uniquifier[0] != '\0')
+ nodename = nfs4_client_id_uniquifier;
+ return scnprintf(buf, len, "Linux NFSv%u.%u %s",
+ clp->rpc_ops->version, clp->cl_minorversion,
+ nodename);
+}
+
/**
* nfs4_proc_setclientid - Negotiate client ID
* @clp: state data structure
@@ -4037,15 +4110,18 @@
/* nfs_client_id4 */
nfs4_init_boot_verifier(clp, &sc_verifier);
- rcu_read_lock();
- setclientid.sc_name_len = scnprintf(setclientid.sc_name,
- sizeof(setclientid.sc_name), "%s/%s %s",
- clp->cl_ipaddr,
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR),
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_PROTO));
+ if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags))
+ setclientid.sc_name_len =
+ nfs4_init_uniform_client_string(clp,
+ setclientid.sc_name,
+ sizeof(setclientid.sc_name));
+ else
+ setclientid.sc_name_len =
+ nfs4_init_nonuniform_client_string(clp,
+ setclientid.sc_name,
+ sizeof(setclientid.sc_name));
/* cb_client4 */
+ rcu_read_lock();
setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
sizeof(setclientid.sc_netid),
rpc_peeraddr2str(clp->cl_rpcclient,
@@ -4391,7 +4467,7 @@
if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
return;
- if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) {
+ if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
/* Note: exit _without_ running nfs4_locku_done */
task->tk_action = NULL;
return;
@@ -4585,7 +4661,7 @@
}
if (data->rpc_status == 0) {
nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid);
- data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+ set_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags);
renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
}
out:
@@ -4632,7 +4708,7 @@
case -NFS4ERR_BAD_STATEID:
lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
if (new_lock_owner != 0 ||
- (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+ test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0)
nfs4_schedule_stateid_recovery(server, lsp->ls_state);
break;
case -NFS4ERR_STALE_STATEID:
@@ -4756,7 +4832,7 @@
struct nfs_server *server = NFS_SERVER(state->inode);
list_for_each_entry(lsp, &state->lock_states, ls_locks) {
- if (lsp->ls_flags & NFS_LOCK_INITIALIZED) {
+ if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
status = nfs41_test_stateid(server, &lsp->ls_stateid);
if (status != NFS_OK) {
/* Free the stateid unless the server
@@ -4764,7 +4840,7 @@
if (status != -NFS4ERR_BAD_STATEID)
nfs41_free_stateid(server,
&lsp->ls_stateid);
- lsp->ls_flags &= ~NFS_LOCK_INITIALIZED;
+ clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
ret = status;
}
}
@@ -5267,10 +5343,8 @@
};
nfs4_init_boot_verifier(clp, &verifier);
- args.id_len = scnprintf(args.id, sizeof(args.id),
- "%s/%s",
- clp->cl_ipaddr,
- clp->cl_rpcclient->cl_nodename);
+ args.id_len = nfs4_init_uniform_client_string(clp, args.id,
+ sizeof(args.id));
dprintk("NFS call exchange_id auth=%s, '%.*s'\n",
clp->cl_rpcclient->cl_auth->au_ops->au_name,
args.id_len, args.id);
@@ -5391,6 +5465,8 @@
goto out;
if (clp->cl_exchange_flags == 0)
goto out;
+ if (clp->cl_preserve_clid)
+ goto out;
cred = nfs4_get_exchange_id_cred(clp);
ret = nfs4_proc_destroy_clientid(clp, cred);
if (cred)
@@ -6282,7 +6358,8 @@
.rpc_release = nfs4_layoutget_release,
};
-void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
+struct pnfs_layout_segment *
+nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
{
struct nfs_server *server = NFS_SERVER(lgp->args.inode);
size_t max_pages = max_response_pages(server);
@@ -6299,6 +6376,7 @@
.callback_data = lgp,
.flags = RPC_TASK_ASYNC,
};
+ struct pnfs_layout_segment *lseg = NULL;
int status = 0;
dprintk("--> %s\n", __func__);
@@ -6306,7 +6384,7 @@
lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
if (!lgp->args.layout.pages) {
nfs4_layoutget_release(lgp);
- return;
+ return ERR_PTR(-ENOMEM);
}
lgp->args.layout.pglen = max_pages * PAGE_SIZE;
@@ -6315,15 +6393,17 @@
nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
- return;
+ return ERR_CAST(task);
status = nfs4_wait_for_completion_rpc_task(task);
if (status == 0)
status = task->tk_status;
if (status == 0)
- status = pnfs_layout_process(lgp);
+ lseg = pnfs_layout_process(lgp);
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
- return;
+ if (status)
+ return ERR_PTR(status);
+ return lseg;
}
static void
@@ -6342,7 +6422,6 @@
{
struct nfs4_layoutreturn *lrp = calldata;
struct nfs_server *server;
- struct pnfs_layout_hdr *lo = lrp->args.layout;
dprintk("--> %s\n", __func__);
@@ -6354,20 +6433,21 @@
rpc_restart_call_prepare(task);
return;
}
- spin_lock(&lo->plh_inode->i_lock);
- if (task->tk_status == 0 && lrp->res.lrs_present)
- pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
- lo->plh_block_lgets--;
- spin_unlock(&lo->plh_inode->i_lock);
dprintk("<-- %s\n", __func__);
}
static void nfs4_layoutreturn_release(void *calldata)
{
struct nfs4_layoutreturn *lrp = calldata;
+ struct pnfs_layout_hdr *lo = lrp->args.layout;
dprintk("--> %s\n", __func__);
- put_layout_hdr(lrp->args.layout);
+ spin_lock(&lo->plh_inode->i_lock);
+ if (lrp->res.lrs_present)
+ pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
+ lo->plh_block_lgets--;
+ spin_unlock(&lo->plh_inode->i_lock);
+ pnfs_put_layout_hdr(lrp->args.layout);
kfree(calldata);
dprintk("<-- %s\n", __func__);
}
@@ -6541,7 +6621,7 @@
list_del_init(&lseg->pls_lc_list);
if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
&lseg->pls_flags))
- put_lseg(lseg);
+ pnfs_put_lseg(lseg);
}
clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
@@ -6800,6 +6880,7 @@
.recover_lock = nfs4_lock_reclaim,
.establish_clid = nfs4_init_clientid,
.get_clid_cred = nfs4_get_setclientid_cred,
+ .detect_trunking = nfs40_discover_server_trunking,
};
#if defined(CONFIG_NFS_V4_1)
@@ -6811,6 +6892,7 @@
.establish_clid = nfs41_init_clientid,
.get_clid_cred = nfs4_get_exchange_id_cred,
.reclaim_complete = nfs41_proc_reclaim_complete,
+ .detect_trunking = nfs41_discover_server_trunking,
};
#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 55148de..5c42866 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -51,18 +51,21 @@
#include <linux/bitops.h>
#include <linux/jiffies.h>
+#include <linux/sunrpc/clnt.h>
+
#include "nfs4_fs.h"
#include "callback.h"
#include "delegation.h"
#include "internal.h"
#include "pnfs.h"
+#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_STATE
#define OPENOWNER_POOL_SIZE 8
const nfs4_stateid zero_stateid;
-
+static DEFINE_MUTEX(nfs_clid_init_mutex);
static LIST_HEAD(nfs4_clientid_list);
int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
@@ -73,12 +76,13 @@
};
unsigned short port;
int status;
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
goto do_confirm;
- port = nfs_callback_tcpport;
+ port = nn->nfs_callback_tcpport;
if (clp->cl_addr.ss_family == AF_INET6)
- port = nfs_callback_tcpport6;
+ port = nn->nfs_callback_tcpport6;
status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
if (status != 0)
@@ -96,6 +100,55 @@
return status;
}
+/**
+ * nfs40_discover_server_trunking - Detect server IP address trunking (mv0)
+ *
+ * @clp: nfs_client under test
+ * @result: OUT: found nfs_client, or clp
+ * @cred: credential to use for trunking test
+ *
+ * Returns zero, a negative errno, or a negative NFS4ERR status.
+ * If zero is returned, an nfs_client pointer is planted in
+ * "result".
+ *
+ * Note: The returned client may not yet be marked ready.
+ */
+int nfs40_discover_server_trunking(struct nfs_client *clp,
+ struct nfs_client **result,
+ struct rpc_cred *cred)
+{
+ struct nfs4_setclientid_res clid = {
+ .clientid = clp->cl_clientid,
+ .confirm = clp->cl_confirm,
+ };
+ unsigned short port;
+ int status;
+
+ port = nfs_callback_tcpport;
+ if (clp->cl_addr.ss_family == AF_INET6)
+ port = nfs_callback_tcpport6;
+
+ status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
+ if (status != 0)
+ goto out;
+ clp->cl_clientid = clid.clientid;
+ clp->cl_confirm = clid.confirm;
+
+ status = nfs40_walk_client_list(clp, result, cred);
+ switch (status) {
+ case -NFS4ERR_STALE_CLIENTID:
+ set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+ case 0:
+ /* Sustain the lease, even if it's empty. If the clientid4
+ * goes stale it's of no use for trunking discovery. */
+ nfs4_schedule_state_renewal(*result);
+ break;
+ }
+
+out:
+ return status;
+}
+
struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp)
{
struct rpc_cred *cred = NULL;
@@ -275,6 +328,32 @@
return status;
}
+/**
+ * nfs41_discover_server_trunking - Detect server IP address trunking (mv1)
+ *
+ * @clp: nfs_client under test
+ * @result: OUT: found nfs_client, or clp
+ * @cred: credential to use for trunking test
+ *
+ * Returns NFS4_OK, a negative errno, or a negative NFS4ERR status.
+ * If NFS4_OK is returned, an nfs_client pointer is planted in
+ * "result".
+ *
+ * Note: The returned client may not yet be marked ready.
+ */
+int nfs41_discover_server_trunking(struct nfs_client *clp,
+ struct nfs_client **result,
+ struct rpc_cred *cred)
+{
+ int status;
+
+ status = nfs4_proc_exchange_id(clp, cred);
+ if (status != NFS4_OK)
+ return status;
+
+ return nfs41_walk_client_list(clp, result, cred);
+}
+
struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
{
struct rpc_cred *cred;
@@ -729,11 +808,8 @@
if (!call_close) {
nfs4_put_open_state(state);
nfs4_put_state_owner(owner);
- } else {
- bool roc = pnfs_roc(state->inode);
-
- nfs4_do_close(state, gfp_mask, wait, roc);
- }
+ } else
+ nfs4_do_close(state, gfp_mask, wait);
}
void nfs4_close_state(struct nfs4_state *state, fmode_t fmode)
@@ -865,7 +941,7 @@
if (list_empty(&state->lock_states))
clear_bit(LK_STATE_IN_USE, &state->flags);
spin_unlock(&state->state_lock);
- if (lsp->ls_flags & NFS_LOCK_INITIALIZED) {
+ if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
if (nfs4_release_lockowner(lsp) == 0)
return;
}
@@ -911,17 +987,25 @@
}
static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state,
- fl_owner_t fl_owner, pid_t fl_pid)
+ const struct nfs_lockowner *lockowner)
{
struct nfs4_lock_state *lsp;
+ fl_owner_t fl_owner;
+ pid_t fl_pid;
bool ret = false;
+
+ if (lockowner == NULL)
+ goto out;
+
if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
goto out;
+ fl_owner = lockowner->l_owner;
+ fl_pid = lockowner->l_pid;
spin_lock(&state->state_lock);
lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
- if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) {
+ if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
nfs4_stateid_copy(dst, &lsp->ls_stateid);
ret = true;
}
@@ -946,11 +1030,11 @@
* requests.
*/
void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
- fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid)
+ fmode_t fmode, const struct nfs_lockowner *lockowner)
{
if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
return;
- if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid))
+ if (nfs4_copy_lock_stateid(dst, state, lockowner))
return;
nfs4_copy_open_stateid(dst, state);
}
@@ -1289,7 +1373,7 @@
if (status >= 0) {
spin_lock(&state->state_lock);
list_for_each_entry(lock, &state->lock_states, ls_locks) {
- if (!(lock->ls_flags & NFS_LOCK_INITIALIZED))
+ if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
pr_warn_ratelimited("NFS: "
"%s: Lock reclaim "
"failed!\n", __func__);
@@ -1361,7 +1445,7 @@
spin_lock(&state->state_lock);
list_for_each_entry(lock, &state->lock_states, ls_locks) {
lock->ls_seqid.flags = 0;
- lock->ls_flags &= ~NFS_LOCK_INITIALIZED;
+ clear_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags);
}
spin_unlock(&state->state_lock);
}
@@ -1698,6 +1782,109 @@
return 0;
}
+/**
+ * nfs4_discover_server_trunking - Detect server IP address trunking
+ *
+ * @clp: nfs_client under test
+ * @result: OUT: found nfs_client, or clp
+ *
+ * Returns zero or a negative errno. If zero is returned,
+ * an nfs_client pointer is planted in "result".
+ *
+ * Note: since we are invoked in process context, and
+ * not from inside the state manager, we cannot use
+ * nfs4_handle_reclaim_lease_error().
+ */
+int nfs4_discover_server_trunking(struct nfs_client *clp,
+ struct nfs_client **result)
+{
+ const struct nfs4_state_recovery_ops *ops =
+ clp->cl_mvops->reboot_recovery_ops;
+ rpc_authflavor_t *flavors, flav, save;
+ struct rpc_clnt *clnt;
+ struct rpc_cred *cred;
+ int i, len, status;
+
+ dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname);
+
+ len = NFS_MAX_SECFLAVORS;
+ flavors = kcalloc(len, sizeof(*flavors), GFP_KERNEL);
+ if (flavors == NULL) {
+ status = -ENOMEM;
+ goto out;
+ }
+ len = rpcauth_list_flavors(flavors, len);
+ if (len < 0) {
+ status = len;
+ goto out_free;
+ }
+ clnt = clp->cl_rpcclient;
+ save = clnt->cl_auth->au_flavor;
+ i = 0;
+
+ mutex_lock(&nfs_clid_init_mutex);
+ status = -ENOENT;
+again:
+ cred = ops->get_clid_cred(clp);
+ if (cred == NULL)
+ goto out_unlock;
+
+ status = ops->detect_trunking(clp, result, cred);
+ put_rpccred(cred);
+ switch (status) {
+ case 0:
+ break;
+
+ case -EACCES:
+ if (clp->cl_machine_cred == NULL)
+ break;
+ /* Handle case where the user hasn't set up machine creds */
+ nfs4_clear_machine_cred(clp);
+ case -NFS4ERR_DELAY:
+ case -ETIMEDOUT:
+ case -EAGAIN:
+ ssleep(1);
+ dprintk("NFS: %s after status %d, retrying\n",
+ __func__, status);
+ goto again;
+
+ case -NFS4ERR_CLID_INUSE:
+ case -NFS4ERR_WRONGSEC:
+ status = -EPERM;
+ if (i >= len)
+ break;
+
+ flav = flavors[i++];
+ if (flav == save)
+ flav = flavors[i++];
+ clnt = rpc_clone_client_set_auth(clnt, flav);
+ if (IS_ERR(clnt)) {
+ status = PTR_ERR(clnt);
+ break;
+ }
+ clp->cl_rpcclient = clnt;
+ goto again;
+
+ case -NFS4ERR_MINOR_VERS_MISMATCH:
+ status = -EPROTONOSUPPORT;
+ break;
+
+ case -EKEYEXPIRED:
+ nfs4_warn_keyexpired(clp->cl_hostname);
+ case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
+ * in nfs4_exchange_id */
+ status = -EKEYEXPIRED;
+ }
+
+out_unlock:
+ mutex_unlock(&nfs_clid_init_mutex);
+out_free:
+ kfree(flavors);
+out:
+ dprintk("NFS: %s: status = %d\n", __func__, status);
+ return status;
+}
+
#ifdef CONFIG_NFS_V4_1
void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
{
@@ -2008,6 +2195,7 @@
pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
" with error %d\n", section_sep, section,
clp->cl_hostname, -status);
+ ssleep(1);
nfs4_end_drain_session(clp);
nfs4_clear_state_manager_bit(clp);
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8dba6bd..657483c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -447,12 +447,14 @@
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_open_maxsz + \
+ encode_access_maxsz + \
encode_getfh_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_open_maxsz + \
+ decode_access_maxsz + \
decode_getfh_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_open_confirm_sz \
@@ -467,11 +469,13 @@
encode_sequence_maxsz + \
encode_putfh_maxsz + \
encode_open_maxsz + \
+ encode_access_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \
decode_sequence_maxsz + \
decode_putfh_maxsz + \
decode_open_maxsz + \
+ decode_access_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_open_downgrade_sz \
(compound_encode_hdr_maxsz + \
@@ -1509,8 +1513,12 @@
nfs4_stateid stateid;
if (ctx->state != NULL) {
+ const struct nfs_lockowner *lockowner = NULL;
+
+ if (l_ctx != NULL)
+ lockowner = &l_ctx->lockowner;
nfs4_select_rw_stateid(&stateid, ctx->state,
- fmode, l_ctx->lockowner, l_ctx->pid);
+ fmode, lockowner);
if (zero_seqid)
stateid.seqid = 0;
encode_nfs4_stateid(xdr, &stateid);
@@ -2216,6 +2224,7 @@
encode_putfh(xdr, args->fh, &hdr);
encode_open(xdr, args, &hdr);
encode_getfh(xdr, &hdr);
+ encode_access(xdr, args->access, &hdr);
encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
encode_nops(&hdr);
}
@@ -2252,6 +2261,7 @@
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
encode_open(xdr, args, &hdr);
+ encode_access(xdr, args->access, &hdr);
encode_getfattr(xdr, args->bitmask, &hdr);
encode_nops(&hdr);
}
@@ -4095,7 +4105,7 @@
return -EIO;
}
-static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
+static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access)
{
__be32 *p;
uint32_t supp, acc;
@@ -4109,8 +4119,8 @@
goto out_overflow;
supp = be32_to_cpup(p++);
acc = be32_to_cpup(p);
- access->supported = supp;
- access->access = acc;
+ *supported = supp;
+ *access = acc;
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -5642,7 +5652,8 @@
* and places the remaining xdr data in xdr_buf->tail
*/
pdev->mincount = be32_to_cpup(p);
- xdr_read_pages(xdr, pdev->mincount); /* include space for the length */
+ if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
+ goto out_overflow;
/* Parse notification bitmap, verifying that it is zero. */
p = xdr_inline_decode(xdr, 4);
@@ -5887,7 +5898,7 @@
status = decode_putfh(xdr);
if (status != 0)
goto out;
- status = decode_access(xdr, res);
+ status = decode_access(xdr, &res->supported, &res->access);
if (status != 0)
goto out;
decode_getfattr(xdr, res->fattr, res->server);
@@ -6228,6 +6239,7 @@
status = decode_getfh(xdr, &res->fh);
if (status)
goto out;
+ decode_access(xdr, &res->access_supported, &res->access_result);
decode_getfattr(xdr, res->f_attr, res->server);
out:
return status;
@@ -6276,6 +6288,7 @@
status = decode_open(xdr, res);
if (status)
goto out;
+ decode_access(xdr, &res->access_supported, &res->access_result);
decode_getfattr(xdr, res->f_attr, res->server);
out:
return status;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 311a796..e56e846 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -102,6 +102,7 @@
unsigned int offset, unsigned int count)
{
struct nfs_page *req;
+ struct nfs_lock_context *l_ctx;
/* try to allocate the request struct */
req = nfs_page_alloc();
@@ -109,11 +110,12 @@
return ERR_PTR(-ENOMEM);
/* get lock context early so we can deal with alloc failures */
- req->wb_lock_context = nfs_get_lock_context(ctx);
- if (req->wb_lock_context == NULL) {
+ l_ctx = nfs_get_lock_context(ctx);
+ if (IS_ERR(l_ctx)) {
nfs_page_free(req);
- return ERR_PTR(-ENOMEM);
+ return ERR_CAST(l_ctx);
}
+ req->wb_lock_context = l_ctx;
/* Initialize the request struct. Initially, we assume a
* long write-back delay. This will be adjusted in
@@ -290,7 +292,9 @@
{
if (req->wb_context->cred != prev->wb_context->cred)
return false;
- if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner)
+ if (req->wb_lock_context->lockowner.l_owner != prev->wb_lock_context->lockowner.l_owner)
+ return false;
+ if (req->wb_lock_context->lockowner.l_pid != prev->wb_lock_context->lockowner.l_pid)
return false;
if (req->wb_context->state != prev->wb_context->state)
return false;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2e00fea..d737557 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -35,6 +35,7 @@
#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PNFS
+#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
/* Locking:
*
@@ -190,7 +191,7 @@
/* Need to hold i_lock if caller does not already hold reference */
void
-get_layout_hdr(struct pnfs_layout_hdr *lo)
+pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo)
{
atomic_inc(&lo->plh_refcount);
}
@@ -199,45 +200,109 @@
pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
{
struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
- return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) :
- kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
+ return ld->alloc_layout_hdr(ino, gfp_flags);
}
static void
pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
{
- struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld;
+ struct nfs_server *server = NFS_SERVER(lo->plh_inode);
+ struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
+
+ if (!list_empty(&lo->plh_layouts)) {
+ struct nfs_client *clp = server->nfs_client;
+
+ spin_lock(&clp->cl_lock);
+ list_del_init(&lo->plh_layouts);
+ spin_unlock(&clp->cl_lock);
+ }
put_rpccred(lo->plh_lc_cred);
- return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo);
+ return ld->free_layout_hdr(lo);
}
static void
-destroy_layout_hdr(struct pnfs_layout_hdr *lo)
+pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
{
+ struct nfs_inode *nfsi = NFS_I(lo->plh_inode);
dprintk("%s: freeing layout cache %p\n", __func__, lo);
- BUG_ON(!list_empty(&lo->plh_layouts));
- NFS_I(lo->plh_inode)->layout = NULL;
- pnfs_free_layout_hdr(lo);
-}
-
-static void
-put_layout_hdr_locked(struct pnfs_layout_hdr *lo)
-{
- if (atomic_dec_and_test(&lo->plh_refcount))
- destroy_layout_hdr(lo);
+ nfsi->layout = NULL;
+ /* Reset MDS Threshold I/O counters */
+ nfsi->write_io = 0;
+ nfsi->read_io = 0;
}
void
-put_layout_hdr(struct pnfs_layout_hdr *lo)
+pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
{
struct inode *inode = lo->plh_inode;
if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
- destroy_layout_hdr(lo);
+ pnfs_detach_layout_hdr(lo);
spin_unlock(&inode->i_lock);
+ pnfs_free_layout_hdr(lo);
}
}
+static int
+pnfs_iomode_to_fail_bit(u32 iomode)
+{
+ return iomode == IOMODE_RW ?
+ NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
+}
+
+static void
+pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
+{
+ lo->plh_retry_timestamp = jiffies;
+ if (test_and_set_bit(fail_bit, &lo->plh_flags))
+ atomic_inc(&lo->plh_refcount);
+}
+
+static void
+pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
+{
+ if (test_and_clear_bit(fail_bit, &lo->plh_flags))
+ atomic_dec(&lo->plh_refcount);
+}
+
+static void
+pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)
+{
+ struct inode *inode = lo->plh_inode;
+ struct pnfs_layout_range range = {
+ .iomode = iomode,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
+ LIST_HEAD(head);
+
+ spin_lock(&inode->i_lock);
+ pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
+ pnfs_mark_matching_lsegs_invalid(lo, &head, &range);
+ spin_unlock(&inode->i_lock);
+ pnfs_free_lseg_list(&head);
+ dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
+ iomode == IOMODE_RW ? "RW" : "READ");
+}
+
+static bool
+pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode)
+{
+ unsigned long start, end;
+ int fail_bit = pnfs_iomode_to_fail_bit(iomode);
+
+ if (test_bit(fail_bit, &lo->plh_flags) == 0)
+ return false;
+ end = jiffies;
+ start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT;
+ if (!time_in_range(lo->plh_retry_timestamp, start, end)) {
+ /* It is time to retry the failed layoutgets */
+ pnfs_layout_clear_fail_bit(lo, fail_bit);
+ return false;
+ }
+ return true;
+}
+
static void
init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
{
@@ -249,33 +314,32 @@
lseg->pls_layout = lo;
}
-static void free_lseg(struct pnfs_layout_segment *lseg)
+static void pnfs_free_lseg(struct pnfs_layout_segment *lseg)
{
struct inode *ino = lseg->pls_layout->plh_inode;
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
- /* Matched by get_layout_hdr in pnfs_insert_layout */
- put_layout_hdr(NFS_I(ino)->layout);
}
static void
-put_lseg_common(struct pnfs_layout_segment *lseg)
+pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
+ struct pnfs_layout_segment *lseg)
{
- struct inode *inode = lseg->pls_layout->plh_inode;
+ struct inode *inode = lo->plh_inode;
WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
list_del_init(&lseg->pls_list);
- if (list_empty(&lseg->pls_layout->plh_segs)) {
- set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
- /* Matched by initial refcount set in alloc_init_layout_hdr */
- put_layout_hdr_locked(lseg->pls_layout);
- }
+ /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
+ atomic_dec(&lo->plh_refcount);
+ if (list_empty(&lo->plh_segs))
+ clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
}
void
-put_lseg(struct pnfs_layout_segment *lseg)
+pnfs_put_lseg(struct pnfs_layout_segment *lseg)
{
+ struct pnfs_layout_hdr *lo;
struct inode *inode;
if (!lseg)
@@ -284,17 +348,17 @@
dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount),
test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
- inode = lseg->pls_layout->plh_inode;
+ lo = lseg->pls_layout;
+ inode = lo->plh_inode;
if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
- LIST_HEAD(free_me);
-
- put_lseg_common(lseg);
- list_add(&lseg->pls_list, &free_me);
+ pnfs_get_layout_hdr(lo);
+ pnfs_layout_remove_lseg(lo, lseg);
spin_unlock(&inode->i_lock);
- pnfs_free_lseg_list(&free_me);
+ pnfs_free_lseg(lseg);
+ pnfs_put_layout_hdr(lo);
}
}
-EXPORT_SYMBOL_GPL(put_lseg);
+EXPORT_SYMBOL_GPL(pnfs_put_lseg);
static inline u64
end_offset(u64 start, u64 len)
@@ -378,7 +442,7 @@
dprintk("%s: lseg %p ref %d\n", __func__, lseg,
atomic_read(&lseg->pls_refcount));
if (atomic_dec_and_test(&lseg->pls_refcount)) {
- put_lseg_common(lseg);
+ pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
list_add(&lseg->pls_list, tmp_list);
rv = 1;
}
@@ -390,7 +454,7 @@
* after call.
*/
int
-mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
+pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
struct pnfs_layout_range *recall_range)
{
@@ -399,14 +463,8 @@
dprintk("%s:Begin lo %p\n", __func__, lo);
- if (list_empty(&lo->plh_segs)) {
- /* Reset MDS Threshold I/O counters */
- NFS_I(lo->plh_inode)->write_io = 0;
- NFS_I(lo->plh_inode)->read_io = 0;
- if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
- put_layout_hdr_locked(lo);
+ if (list_empty(&lo->plh_segs))
return 0;
- }
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (!recall_range ||
should_free_lseg(&lseg->pls_range, recall_range)) {
@@ -426,25 +484,13 @@
pnfs_free_lseg_list(struct list_head *free_me)
{
struct pnfs_layout_segment *lseg, *tmp;
- struct pnfs_layout_hdr *lo;
if (list_empty(free_me))
return;
- lo = list_first_entry(free_me, struct pnfs_layout_segment,
- pls_list)->pls_layout;
-
- if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
- struct nfs_client *clp;
-
- clp = NFS_SERVER(lo->plh_inode)->nfs_client;
- spin_lock(&clp->cl_lock);
- list_del_init(&lo->plh_layouts);
- spin_unlock(&clp->cl_lock);
- }
list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
list_del(&lseg->pls_list);
- free_lseg(lseg);
+ pnfs_free_lseg(lseg);
}
}
@@ -458,10 +504,15 @@
lo = nfsi->layout;
if (lo) {
lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
- mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
- }
- spin_unlock(&nfsi->vfs_inode.i_lock);
- pnfs_free_lseg_list(&tmp_list);
+ pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
+ pnfs_get_layout_hdr(lo);
+ pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
+ pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
+ spin_unlock(&nfsi->vfs_inode.i_lock);
+ pnfs_free_lseg_list(&tmp_list);
+ pnfs_put_layout_hdr(lo);
+ } else
+ spin_unlock(&nfsi->vfs_inode.i_lock);
}
EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
@@ -537,7 +588,6 @@
(int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0)
return true;
return lo->plh_block_lgets ||
- test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
(list_empty(&lo->plh_segs) &&
(atomic_read(&lo->plh_outstanding) > lget));
@@ -582,7 +632,7 @@
struct inode *ino = lo->plh_inode;
struct nfs_server *server = NFS_SERVER(ino);
struct nfs4_layoutget *lgp;
- struct pnfs_layout_segment *lseg = NULL;
+ struct pnfs_layout_segment *lseg;
dprintk("--> %s\n", __func__);
@@ -599,16 +649,22 @@
lgp->args.type = server->pnfs_curr_ld->id;
lgp->args.inode = ino;
lgp->args.ctx = get_nfs_open_context(ctx);
- lgp->lsegpp = &lseg;
lgp->gfp_flags = gfp_flags;
/* Synchronously retrieve layout information from server and
* store in lseg.
*/
- nfs4_proc_layoutget(lgp, gfp_flags);
- if (!lseg) {
- /* remember that LAYOUTGET failed and suspend trying */
- set_bit(lo_fail_bit(range->iomode), &lo->plh_flags);
+ lseg = nfs4_proc_layoutget(lgp, gfp_flags);
+ if (IS_ERR(lseg)) {
+ switch (PTR_ERR(lseg)) {
+ case -ENOMEM:
+ case -ERESTARTSYS:
+ break;
+ default:
+ /* remember that LAYOUTGET failed and suspend trying */
+ pnfs_layout_io_set_failed(lo, range->iomode);
+ }
+ return NULL;
}
return lseg;
@@ -636,25 +692,24 @@
spin_lock(&ino->i_lock);
lo = nfsi->layout;
- if (!lo || pnfs_test_layout_returned(lo)) {
+ if (!lo) {
spin_unlock(&ino->i_lock);
dprintk("NFS: %s no layout to return\n", __func__);
goto out;
}
stateid = nfsi->layout->plh_stateid;
/* Reference matched in nfs4_layoutreturn_release */
- get_layout_hdr(lo);
+ pnfs_get_layout_hdr(lo);
empty = list_empty(&lo->plh_segs);
- mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
+ pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
/* Don't send a LAYOUTRETURN if list was initially empty */
if (empty) {
spin_unlock(&ino->i_lock);
- put_layout_hdr(lo);
+ pnfs_put_layout_hdr(lo);
dprintk("NFS: %s no layout segments to return\n", __func__);
goto out;
}
lo->plh_block_lgets++;
- pnfs_mark_layout_returned(lo);
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
@@ -663,10 +718,10 @@
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (unlikely(lrp == NULL)) {
status = -ENOMEM;
- set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags);
- set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags);
- pnfs_clear_layout_returned(lo);
- put_layout_hdr(lo);
+ spin_lock(&ino->i_lock);
+ lo->plh_block_lgets--;
+ spin_unlock(&ino->i_lock);
+ pnfs_put_layout_hdr(lo);
goto out;
}
@@ -703,7 +758,7 @@
if (!found)
goto out_nolayout;
lo->plh_block_lgets++;
- get_layout_hdr(lo); /* matched in pnfs_roc_release */
+ pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
return true;
@@ -720,8 +775,12 @@
spin_lock(&ino->i_lock);
lo = NFS_I(ino)->layout;
lo->plh_block_lgets--;
- put_layout_hdr_locked(lo);
- spin_unlock(&ino->i_lock);
+ if (atomic_dec_and_test(&lo->plh_refcount)) {
+ pnfs_detach_layout_hdr(lo);
+ spin_unlock(&ino->i_lock);
+ pnfs_free_layout_hdr(lo);
+ } else
+ spin_unlock(&ino->i_lock);
}
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
@@ -735,27 +794,29 @@
spin_unlock(&ino->i_lock);
}
-bool pnfs_roc_drain(struct inode *ino, u32 *barrier)
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
{
struct nfs_inode *nfsi = NFS_I(ino);
+ struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg;
+ u32 current_seqid;
bool found = false;
spin_lock(&ino->i_lock);
list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
+ rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
found = true;
- break;
+ goto out;
}
- if (!found) {
- struct pnfs_layout_hdr *lo = nfsi->layout;
- u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
+ lo = nfsi->layout;
+ current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
- /* Since close does not return a layout stateid for use as
- * a barrier, we choose the worst-case barrier.
- */
- *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
- }
+ /* Since close does not return a layout stateid for use as
+ * a barrier, we choose the worst-case barrier.
+ */
+ *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
+out:
spin_unlock(&ino->i_lock);
return found;
}
@@ -786,14 +847,13 @@
}
static void
-pnfs_insert_layout(struct pnfs_layout_hdr *lo,
+pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo,
struct pnfs_layout_segment *lseg)
{
struct pnfs_layout_segment *lp;
dprintk("%s:Begin\n", __func__);
- assert_spin_locked(&lo->plh_inode->i_lock);
list_for_each_entry(lp, &lo->plh_segs, pls_list) {
if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0)
continue;
@@ -813,7 +873,7 @@
__func__, lseg, lseg->pls_range.iomode,
lseg->pls_range.offset, lseg->pls_range.length);
out:
- get_layout_hdr(lo);
+ pnfs_get_layout_hdr(lo);
dprintk("%s:Return\n", __func__);
}
@@ -847,12 +907,9 @@
dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
- assert_spin_locked(&ino->i_lock);
if (nfsi->layout) {
- if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
- return NULL;
- else
- return nfsi->layout;
+ pnfs_get_layout_hdr(nfsi->layout);
+ return nfsi->layout;
}
spin_unlock(&ino->i_lock);
new = alloc_init_layout_hdr(ino, ctx, gfp_flags);
@@ -904,11 +961,10 @@
dprintk("%s:Begin\n", __func__);
- assert_spin_locked(&lo->plh_inode->i_lock);
list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
is_matching_lseg(&lseg->pls_range, range)) {
- ret = get_lseg(lseg);
+ ret = pnfs_get_lseg(lseg);
break;
}
if (lseg->pls_range.offset > range->offset)
@@ -1013,7 +1069,6 @@
.length = count,
};
unsigned pg_offset;
- struct nfs_inode *nfsi = NFS_I(ino);
struct nfs_server *server = NFS_SERVER(ino);
struct nfs_client *clp = server->nfs_client;
struct pnfs_layout_hdr *lo;
@@ -1021,16 +1076,16 @@
bool first = false;
if (!pnfs_enabled_sb(NFS_SERVER(ino)))
- return NULL;
+ goto out;
if (pnfs_within_mdsthreshold(ctx, ino, iomode))
- return NULL;
+ goto out;
spin_lock(&ino->i_lock);
lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
if (lo == NULL) {
- dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__);
- goto out_unlock;
+ spin_unlock(&ino->i_lock);
+ goto out;
}
/* Do we even need to bother with this? */
@@ -1040,7 +1095,7 @@
}
/* if LAYOUTGET already failed once we don't try again */
- if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
+ if (pnfs_layout_io_test_failed(lo, iomode))
goto out_unlock;
/* Check to see if the layout for the given range already exists */
@@ -1052,13 +1107,9 @@
goto out_unlock;
atomic_inc(&lo->plh_outstanding);
- get_layout_hdr(lo);
if (list_empty(&lo->plh_segs))
first = true;
- /* Enable LAYOUTRETURNs */
- pnfs_clear_layout_returned(lo);
-
spin_unlock(&ino->i_lock);
if (first) {
/* The lo must be on the clp list if there is any
@@ -1079,24 +1130,26 @@
arg.length = PAGE_CACHE_ALIGN(arg.length);
lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
- if (!lseg && first) {
- spin_lock(&clp->cl_lock);
- list_del_init(&lo->plh_layouts);
- spin_unlock(&clp->cl_lock);
- }
atomic_dec(&lo->plh_outstanding);
- put_layout_hdr(lo);
+out_put_layout_hdr:
+ pnfs_put_layout_hdr(lo);
out:
- dprintk("%s end, state 0x%lx lseg %p\n", __func__,
- nfsi->layout ? nfsi->layout->plh_flags : -1, lseg);
+ dprintk("%s: inode %s/%llu pNFS layout segment %s for "
+ "(%s, offset: %llu, length: %llu)\n",
+ __func__, ino->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(ino),
+ lseg == NULL ? "not found" : "found",
+ iomode==IOMODE_RW ? "read/write" : "read-only",
+ (unsigned long long)pos,
+ (unsigned long long)count);
return lseg;
out_unlock:
spin_unlock(&ino->i_lock);
- goto out;
+ goto out_put_layout_hdr;
}
EXPORT_SYMBOL_GPL(pnfs_update_layout);
-int
+struct pnfs_layout_segment *
pnfs_layout_process(struct nfs4_layoutget *lgp)
{
struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
@@ -1129,8 +1182,8 @@
}
init_lseg(lo, lseg);
lseg->pls_range = res->range;
- *lgp->lsegpp = get_lseg(lseg);
- pnfs_insert_layout(lo, lseg);
+ pnfs_get_lseg(lseg);
+ pnfs_layout_insert_lseg(lo, lseg);
if (res->return_on_close) {
set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
@@ -1140,8 +1193,9 @@
/* Done processing layoutget. Set the layout stateid */
pnfs_set_layout_stateid(lo, &res->stateid, false);
spin_unlock(&ino->i_lock);
+ return lseg;
out:
- return status;
+ return ERR_PTR(status);
out_forget_reply:
spin_unlock(&ino->i_lock);
@@ -1362,12 +1416,12 @@
if (trypnfs == PNFS_NOT_ATTEMPTED)
pnfs_write_through_mds(desc, data);
}
- put_lseg(lseg);
+ pnfs_put_lseg(lseg);
}
static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
{
- put_lseg(hdr->lseg);
+ pnfs_put_lseg(hdr->lseg);
nfs_writehdr_free(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
@@ -1382,17 +1436,17 @@
whdr = nfs_writehdr_alloc();
if (!whdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
- put_lseg(desc->pg_lseg);
+ pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return -ENOMEM;
}
hdr = &whdr->header;
nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
- hdr->lseg = get_lseg(desc->pg_lseg);
+ hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
atomic_inc(&hdr->refcnt);
ret = nfs_generic_flush(desc, hdr);
if (ret != 0) {
- put_lseg(desc->pg_lseg);
+ pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
} else
pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
@@ -1517,12 +1571,12 @@
if (trypnfs == PNFS_NOT_ATTEMPTED)
pnfs_read_through_mds(desc, data);
}
- put_lseg(lseg);
+ pnfs_put_lseg(lseg);
}
static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
{
- put_lseg(hdr->lseg);
+ pnfs_put_lseg(hdr->lseg);
nfs_readhdr_free(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
@@ -1538,17 +1592,17 @@
if (!rhdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
ret = -ENOMEM;
- put_lseg(desc->pg_lseg);
+ pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
return ret;
}
hdr = &rhdr->header;
nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
- hdr->lseg = get_lseg(desc->pg_lseg);
+ hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
atomic_inc(&hdr->refcnt);
ret = nfs_generic_pagein(desc, hdr);
if (ret != 0) {
- put_lseg(desc->pg_lseg);
+ pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
} else
pnfs_do_multiple_reads(desc, &hdr->rpc_list);
@@ -1574,13 +1628,7 @@
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
{
- if (lseg->pls_range.iomode == IOMODE_RW) {
- dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
- set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
- } else {
- dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
- set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
- }
+ pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
}
EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
@@ -1601,7 +1649,7 @@
}
if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
/* references matched in nfs4_layoutcommit_release */
- get_lseg(hdr->lseg);
+ pnfs_get_lseg(hdr->lseg);
}
if (end_pos > nfsi->layout->plh_lwb)
nfsi->layout->plh_lwb = end_pos;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 745aa1b..6cede2c 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -62,9 +62,6 @@
NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
NFS_LAYOUT_ROC, /* some lseg had roc bit set */
- NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
- NFS_LAYOUT_INVALID, /* layout is being destroyed */
- NFS_LAYOUT_RETURNED, /* layout has already been returned */
};
enum layoutdriver_policy_flags {
@@ -140,6 +137,7 @@
atomic_t plh_outstanding; /* number of RPCs out */
unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */
u32 plh_barrier; /* ignore lower seqids */
+ unsigned long plh_retry_timestamp;
unsigned long plh_flags;
loff_t plh_lwb; /* last write byte for layoutcommit */
struct rpc_cred *plh_lc_cred; /* layoutcommit cred */
@@ -172,12 +170,12 @@
struct pnfs_devicelist *devlist);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev);
-extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags);
+extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags);
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
/* pnfs.c */
-void get_layout_hdr(struct pnfs_layout_hdr *lo);
-void put_lseg(struct pnfs_layout_segment *lseg);
+void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
+void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
const struct nfs_pgio_completion_ops *);
@@ -192,24 +190,24 @@
int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
-int pnfs_layout_process(struct nfs4_layoutget *lgp);
+struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
-void put_layout_hdr(struct pnfs_layout_hdr *lo);
+void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
const nfs4_stateid *new,
bool update_barrier);
int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
struct pnfs_layout_hdr *lo,
struct nfs4_state *open_state);
-int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
+int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
struct list_head *tmp_list,
struct pnfs_layout_range *recall_range);
bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
-bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
+bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
@@ -233,6 +231,7 @@
/* nfs4_deviceid_flags */
enum {
NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */
+ NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */
};
/* pnfs_dev.c */
@@ -242,6 +241,7 @@
const struct pnfs_layoutdriver_type *ld;
const struct nfs_client *nfs_client;
unsigned long flags;
+ unsigned long timestamp_unavailable;
struct nfs4_deviceid deviceid;
atomic_t ref;
};
@@ -254,34 +254,12 @@
const struct nfs4_deviceid *);
struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
+void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node);
+bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node);
void nfs4_deviceid_purge_client(const struct nfs_client *);
-static inline void
-pnfs_mark_layout_returned(struct pnfs_layout_hdr *lo)
-{
- set_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags);
-}
-
-static inline void
-pnfs_clear_layout_returned(struct pnfs_layout_hdr *lo)
-{
- clear_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags);
-}
-
-static inline bool
-pnfs_test_layout_returned(struct pnfs_layout_hdr *lo)
-{
- return test_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags);
-}
-
-static inline int lo_fail_bit(u32 iomode)
-{
- return iomode == IOMODE_RW ?
- NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
-}
-
static inline struct pnfs_layout_segment *
-get_lseg(struct pnfs_layout_segment *lseg)
+pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
if (lseg) {
atomic_inc(&lseg->pls_refcount);
@@ -406,12 +384,12 @@
}
static inline struct pnfs_layout_segment *
-get_lseg(struct pnfs_layout_segment *lseg)
+pnfs_get_lseg(struct pnfs_layout_segment *lseg)
{
return NULL;
}
-static inline void put_lseg(struct pnfs_layout_segment *lseg)
+static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg)
{
}
@@ -443,7 +421,7 @@
}
static inline bool
-pnfs_roc_drain(struct inode *ino, u32 *barrier)
+pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
{
return false;
}
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 73f701f..d35b62e 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -40,6 +40,8 @@
#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
+#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
+
static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE];
static DEFINE_SPINLOCK(nfs4_deviceid_lock);
@@ -218,6 +220,30 @@
}
EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node);
+void
+nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node)
+{
+ node->timestamp_unavailable = jiffies;
+ set_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
+}
+EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_unavailable);
+
+bool
+nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node)
+{
+ if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
+ unsigned long start, end;
+
+ end = jiffies;
+ start = end - PNFS_DEVICE_RETRY_TIMEOUT;
+ if (time_in_range(node->timestamp_unavailable, start, end))
+ return true;
+ clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
+ }
+ return false;
+}
+EXPORT_SYMBOL_GPL(nfs4_test_deviceid_unavailable);
+
static void
_deviceid_purge_client(const struct nfs_client *clp, long hash)
{
@@ -276,3 +302,4 @@
}
rcu_read_unlock();
}
+
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b8eda70..a719bc0 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -88,6 +88,7 @@
Opt_sharecache, Opt_nosharecache,
Opt_resvport, Opt_noresvport,
Opt_fscache, Opt_nofscache,
+ Opt_migration, Opt_nomigration,
/* Mount options that take integer arguments */
Opt_port,
@@ -147,6 +148,8 @@
{ Opt_noresvport, "noresvport" },
{ Opt_fscache, "fsc" },
{ Opt_nofscache, "nofsc" },
+ { Opt_migration, "migration" },
+ { Opt_nomigration, "nomigration" },
{ Opt_port, "port=%s" },
{ Opt_rsize, "rsize=%s" },
@@ -676,6 +679,9 @@
if (nfss->options & NFS_OPTION_FSCACHE)
seq_printf(m, ",fsc");
+ if (nfss->options & NFS_OPTION_MIGRATION)
+ seq_printf(m, ",migration");
+
if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) {
if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
seq_printf(m, ",lookupcache=none");
@@ -1106,7 +1112,7 @@
string = match_strdup(args);
if (string == NULL)
return -ENOMEM;
- rc = strict_strtoul(string, 10, option);
+ rc = kstrtoul(string, 10, option);
kfree(string);
return rc;
@@ -1243,6 +1249,12 @@
kfree(mnt->fscache_uniq);
mnt->fscache_uniq = NULL;
break;
+ case Opt_migration:
+ mnt->options |= NFS_OPTION_MIGRATION;
+ break;
+ case Opt_nomigration:
+ mnt->options &= NFS_OPTION_MIGRATION;
+ break;
/*
* options that take numeric values
@@ -1535,6 +1547,10 @@
if (mnt->minorversion && mnt->version != 4)
goto out_minorversion_mismatch;
+ if (mnt->options & NFS_OPTION_MIGRATION &&
+ mnt->version != 4 && mnt->minorversion != 0)
+ goto out_migration_misuse;
+
/*
* verify that any proto=/mountproto= options match the address
* familiies in the addr=/mountaddr= options.
@@ -1572,6 +1588,10 @@
printk(KERN_INFO "NFS: mount option vers=%u does not support "
"minorversion=%u\n", mnt->version, mnt->minorversion);
return 0;
+out_migration_misuse:
+ printk(KERN_INFO
+ "NFS: 'migration' not supported for this NFS version\n");
+ return 0;
out_nomem:
printk(KERN_INFO "NFS: not enough memory to parse option\n");
return 0;
@@ -2642,6 +2662,7 @@
bool nfs4_disable_idmapping = true;
unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE;
unsigned short send_implementation_id = 1;
+char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = "";
EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport);
EXPORT_SYMBOL_GPL(nfs_callback_tcpport);
@@ -2649,6 +2670,7 @@
EXPORT_SYMBOL_GPL(nfs4_disable_idmapping);
EXPORT_SYMBOL_GPL(max_session_slots);
EXPORT_SYMBOL_GPL(send_implementation_id);
+EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier);
#define NFS_CALLBACK_MAXPORTNR (65535U)
@@ -2659,7 +2681,7 @@
if (!val)
return -EINVAL;
- ret = strict_strtoul(val, 0, &num);
+ ret = kstrtoul(val, 0, &num);
if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR)
return -EINVAL;
*((unsigned int *)kp->arg) = num;
@@ -2674,6 +2696,8 @@
module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644);
module_param(nfs_idmap_cache_timeout, int, 0644);
module_param(nfs4_disable_idmapping, bool, 0644);
+module_param_string(nfs4_unique_id, nfs4_client_id_uniquifier,
+ NFS4_CLIENT_ID_UNIQ_LEN, 0600);
MODULE_PARM_DESC(nfs4_disable_idmapping,
"Turn off NFSv4 idmapping when using 'sec=sys'");
module_param(max_session_slots, ushort, 0644);
@@ -2682,6 +2706,7 @@
module_param(send_implementation_id, ushort, 0644);
MODULE_PARM_DESC(send_implementation_id,
"Send implementation ID with NFSv4.1 exchange_id");
+MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string");
MODULE_ALIAS("nfs4");
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e3b5537..9347ab7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -846,6 +846,7 @@
int nfs_flush_incompatible(struct file *file, struct page *page)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
+ struct nfs_lock_context *l_ctx;
struct nfs_page *req;
int do_flush, status;
/*
@@ -860,9 +861,12 @@
req = nfs_page_find_request(page);
if (req == NULL)
return 0;
- do_flush = req->wb_page != page || req->wb_context != ctx ||
- req->wb_lock_context->lockowner != current->files ||
- req->wb_lock_context->pid != current->tgid;
+ l_ctx = req->wb_lock_context;
+ do_flush = req->wb_page != page || req->wb_context != ctx;
+ if (l_ctx) {
+ do_flush |= l_ctx->lockowner.l_owner != current->files
+ || l_ctx->lockowner.l_pid != current->tgid;
+ }
nfs_release_request(req);
if (!do_flush)
return 0;
@@ -1576,6 +1580,7 @@
/* We have a mismatch. Write the page again */
dprintk(" mismatch\n");
nfs_mark_request_dirty(req);
+ set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
next:
nfs_unlock_and_release_request(req);
}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4b03f56..334a2f5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -81,12 +81,16 @@
int mask;
};
+struct nfs_lockowner {
+ fl_owner_t l_owner;
+ pid_t l_pid;
+};
+
struct nfs_lock_context {
atomic_t count;
struct list_head list;
struct nfs_open_context *open_context;
- fl_owner_t lockowner;
- pid_t pid;
+ struct nfs_lockowner lockowner;
};
struct nfs4_state;
@@ -99,6 +103,7 @@
unsigned long flags;
#define NFS_CONTEXT_ERROR_WRITE (0)
+#define NFS_CONTEXT_RESEND_WRITES (1)
int error;
struct list_head list;
@@ -355,6 +360,8 @@
extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
+extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
extern int nfs_permission(struct inode *, int);
extern int nfs_open(struct inode *, struct file *);
extern int nfs_release(struct inode *, struct file *);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 310c63c..a9e76ee 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -39,6 +39,7 @@
unsigned long cl_flags; /* behavior switches */
#define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */
#define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */
+#define NFS_CS_MIGRATION 2 /* - transparent state migr */
struct sockaddr_storage cl_addr; /* server identifier */
size_t cl_addrlen;
char * cl_hostname; /* hostname of server */
@@ -81,6 +82,7 @@
/* The flags used for obtaining the clientid during EXCHANGE_ID */
u32 cl_exchange_flags;
struct nfs4_session *cl_session; /* shared session */
+ bool cl_preserve_clid;
struct nfs41_server_owner *cl_serverowner;
struct nfs41_server_scope *cl_serverscope;
struct nfs41_impl_id *cl_implid;
@@ -125,6 +127,7 @@
unsigned int namelen;
unsigned int options; /* extra options enabled by mount */
#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
+#define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */
struct nfs_fsid fsid;
__u64 maxfilesize; /* maximum file size */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index be9cf3c..655490d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -251,7 +251,6 @@
struct nfs4_layoutget {
struct nfs4_layoutget_args args;
struct nfs4_layoutget_res res;
- struct pnfs_layout_segment **lsegpp;
gfp_t gfp_flags;
};
@@ -335,6 +334,7 @@
struct nfs_seqid * seqid;
int open_flags;
fmode_t fmode;
+ u32 access;
__u64 clientid;
struct stateowner_id id;
union {
@@ -369,6 +369,8 @@
struct nfs4_string *owner;
struct nfs4_string *group_owner;
struct nfs4_sequence_res seq_res;
+ __u32 access_supported;
+ __u32 access_result;
};
/*
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 523547e..34206b8 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -130,6 +130,8 @@
const struct rpc_program *, u32);
void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt);
struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
+struct rpc_clnt *rpc_clone_client_set_auth(struct rpc_clnt *,
+ rpc_authflavor_t);
void rpc_shutdown_client(struct rpc_clnt *);
void rpc_release_client(struct rpc_clnt *);
void rpc_task_release_client(struct rpc_task *);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index bf8c49f..951cb9b 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -173,8 +173,7 @@
unsigned int min_reqs; /* min number of slots */
atomic_t num_reqs; /* total slots */
unsigned long state; /* transport state */
- unsigned char shutdown : 1, /* being shut down */
- resvport : 1; /* use a reserved port */
+ unsigned char resvport : 1; /* use a reserved port */
unsigned int swapper; /* we're swapping over this
transport */
unsigned int bind_index; /* bind function index */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 34c5220..909dc0c 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -239,7 +239,7 @@
}
return q;
err:
- dprintk("RPC: gss_fill_context returning %ld\n", -PTR_ERR(p));
+ dprintk("RPC: %s returning %ld\n", __func__, -PTR_ERR(p));
return p;
}
@@ -301,10 +301,10 @@
if (pos->uid != uid)
continue;
atomic_inc(&pos->count);
- dprintk("RPC: gss_find_upcall found msg %p\n", pos);
+ dprintk("RPC: %s found msg %p\n", __func__, pos);
return pos;
}
- dprintk("RPC: gss_find_upcall found nothing\n");
+ dprintk("RPC: %s found nothing\n", __func__);
return NULL;
}
@@ -507,8 +507,8 @@
struct rpc_pipe *pipe;
int err = 0;
- dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid,
- cred->cr_uid);
+ dprintk("RPC: %5u %s for uid %u\n",
+ task->tk_pid, __func__, cred->cr_uid);
gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
/* XXX: warning on the first, under the assumption we
@@ -539,8 +539,8 @@
spin_unlock(&pipe->lock);
gss_release_msg(gss_msg);
out:
- dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n",
- task->tk_pid, cred->cr_uid, err);
+ dprintk("RPC: %5u %s for uid %u result %d\n",
+ task->tk_pid, __func__, cred->cr_uid, err);
return err;
}
@@ -553,7 +553,7 @@
DEFINE_WAIT(wait);
int err = 0;
- dprintk("RPC: gss_upcall for uid %u\n", cred->cr_uid);
+ dprintk("RPC: %s for uid %u\n", __func__, cred->cr_uid);
retry:
gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
@@ -594,8 +594,8 @@
finish_wait(&gss_msg->waitqueue, &wait);
gss_release_msg(gss_msg);
out:
- dprintk("RPC: gss_create_upcall for uid %u result %d\n",
- cred->cr_uid, err);
+ dprintk("RPC: %s for uid %u result %d\n",
+ __func__, cred->cr_uid, err);
return err;
}
@@ -681,7 +681,7 @@
err:
kfree(buf);
out:
- dprintk("RPC: gss_pipe_downcall returning %Zd\n", err);
+ dprintk("RPC: %s returning %Zd\n", __func__, err);
return err;
}
@@ -747,8 +747,8 @@
struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
if (msg->errno < 0) {
- dprintk("RPC: gss_pipe_destroy_msg releasing msg %p\n",
- gss_msg);
+ dprintk("RPC: %s releasing msg %p\n",
+ __func__, gss_msg);
atomic_inc(&gss_msg->count);
gss_unhash_msg(gss_msg);
if (msg->errno == -ETIMEDOUT)
@@ -976,7 +976,7 @@
static void
gss_do_free_ctx(struct gss_cl_ctx *ctx)
{
- dprintk("RPC: gss_free_ctx\n");
+ dprintk("RPC: %s\n", __func__);
gss_delete_sec_context(&ctx->gc_gss_ctx);
kfree(ctx->gc_wire_ctx.data);
@@ -999,7 +999,7 @@
static void
gss_free_cred(struct gss_cred *gss_cred)
{
- dprintk("RPC: gss_free_cred %p\n", gss_cred);
+ dprintk("RPC: %s cred=%p\n", __func__, gss_cred);
kfree(gss_cred);
}
@@ -1049,8 +1049,8 @@
struct gss_cred *cred = NULL;
int err = -ENOMEM;
- dprintk("RPC: gss_create_cred for uid %d, flavor %d\n",
- acred->uid, auth->au_flavor);
+ dprintk("RPC: %s for uid %d, flavor %d\n",
+ __func__, acred->uid, auth->au_flavor);
if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS)))
goto out_err;
@@ -1069,7 +1069,7 @@
return &cred->gc_base;
out_err:
- dprintk("RPC: gss_create_cred failed with error %d\n", err);
+ dprintk("RPC: %s failed with error %d\n", __func__, err);
return ERR_PTR(err);
}
@@ -1127,7 +1127,7 @@
struct kvec iov;
struct xdr_buf verf_buf;
- dprintk("RPC: %5u gss_marshal\n", task->tk_pid);
+ dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
*p++ = htonl(RPC_AUTH_GSS);
cred_len = p++;
@@ -1253,7 +1253,7 @@
u32 flav,len;
u32 maj_stat;
- dprintk("RPC: %5u gss_validate\n", task->tk_pid);
+ dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
flav = ntohl(*p++);
if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
@@ -1271,20 +1271,20 @@
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat) {
- dprintk("RPC: %5u gss_validate: gss_verify_mic returned "
- "error 0x%08x\n", task->tk_pid, maj_stat);
+ dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
+ task->tk_pid, __func__, maj_stat);
goto out_bad;
}
/* We leave it to unwrap to calculate au_rslack. For now we just
* calculate the length of the verifier: */
cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
gss_put_ctx(ctx);
- dprintk("RPC: %5u gss_validate: gss_verify_mic succeeded.\n",
- task->tk_pid);
+ dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n",
+ task->tk_pid, __func__);
return p + XDR_QUADLEN(len);
out_bad:
gss_put_ctx(ctx);
- dprintk("RPC: %5u gss_validate failed.\n", task->tk_pid);
+ dprintk("RPC: %5u %s failed.\n", task->tk_pid, __func__);
return NULL;
}
@@ -1466,7 +1466,7 @@
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
int status = -EIO;
- dprintk("RPC: %5u gss_wrap_req\n", task->tk_pid);
+ dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
/* The spec seems a little ambiguous here, but I think that not
* wrapping context destruction requests makes the most sense.
@@ -1489,7 +1489,7 @@
}
out:
gss_put_ctx(ctx);
- dprintk("RPC: %5u gss_wrap_req returning %d\n", task->tk_pid, status);
+ dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status);
return status;
}
@@ -1604,8 +1604,8 @@
status = gss_unwrap_req_decode(decode, rqstp, p, obj);
out:
gss_put_ctx(ctx);
- dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid,
- status);
+ dprintk("RPC: %5u %s returning %d\n",
+ task->tk_pid, __func__, status);
return status;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fa48c60..cdc7564b 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -490,61 +490,86 @@
* same transport while varying parameters such as the authentication
* flavour.
*/
-struct rpc_clnt *
-rpc_clone_client(struct rpc_clnt *clnt)
+static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
+ struct rpc_clnt *clnt)
{
- struct rpc_clnt *new;
struct rpc_xprt *xprt;
- int err = -ENOMEM;
+ struct rpc_clnt *new;
+ int err;
- new = kmemdup(clnt, sizeof(*new), GFP_KERNEL);
- if (!new)
- goto out_no_clnt;
- new->cl_parent = clnt;
- /* Turn off autobind on clones */
- new->cl_autobind = 0;
- INIT_LIST_HEAD(&new->cl_tasks);
- spin_lock_init(&new->cl_lock);
- rpc_init_rtt(&new->cl_rtt_default, clnt->cl_timeout->to_initval);
- new->cl_metrics = rpc_alloc_iostats(clnt);
- if (new->cl_metrics == NULL)
- goto out_no_stats;
- if (clnt->cl_principal) {
- new->cl_principal = kstrdup(clnt->cl_principal, GFP_KERNEL);
- if (new->cl_principal == NULL)
- goto out_no_principal;
- }
+ err = -ENOMEM;
rcu_read_lock();
xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
rcu_read_unlock();
if (xprt == NULL)
- goto out_no_transport;
- rcu_assign_pointer(new->cl_xprt, xprt);
- atomic_set(&new->cl_count, 1);
- err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
- if (err != 0)
- goto out_no_path;
- rpc_clnt_set_nodename(new, utsname()->nodename);
- if (new->cl_auth)
- atomic_inc(&new->cl_auth->au_count);
+ goto out_err;
+ args->servername = xprt->servername;
+
+ new = rpc_new_client(args, xprt);
+ if (IS_ERR(new)) {
+ err = PTR_ERR(new);
+ goto out_put;
+ }
+
atomic_inc(&clnt->cl_count);
- rpc_register_client(new);
- rpciod_up();
+ new->cl_parent = clnt;
+
+ /* Turn off autobind on clones */
+ new->cl_autobind = 0;
+ new->cl_softrtry = clnt->cl_softrtry;
+ new->cl_discrtry = clnt->cl_discrtry;
+ new->cl_chatty = clnt->cl_chatty;
return new;
-out_no_path:
+
+out_put:
xprt_put(xprt);
-out_no_transport:
- kfree(new->cl_principal);
-out_no_principal:
- rpc_free_iostats(new->cl_metrics);
-out_no_stats:
- kfree(new);
-out_no_clnt:
+out_err:
dprintk("RPC: %s: returned error %d\n", __func__, err);
return ERR_PTR(err);
}
+
+/**
+ * rpc_clone_client - Clone an RPC client structure
+ *
+ * @clnt: RPC client whose parameters are copied
+ *
+ * Returns a fresh RPC client or an ERR_PTR.
+ */
+struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
+{
+ struct rpc_create_args args = {
+ .program = clnt->cl_program,
+ .prognumber = clnt->cl_prog,
+ .version = clnt->cl_vers,
+ .authflavor = clnt->cl_auth->au_flavor,
+ .client_name = clnt->cl_principal,
+ };
+ return __rpc_clone_client(&args, clnt);
+}
EXPORT_SYMBOL_GPL(rpc_clone_client);
+/**
+ * rpc_clone_client_set_auth - Clone an RPC client structure and set its auth
+ *
+ * @clnt: RPC client whose parameters are copied
+ * @auth: security flavor for new client
+ *
+ * Returns a fresh RPC client or an ERR_PTR.
+ */
+struct rpc_clnt *
+rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+{
+ struct rpc_create_args args = {
+ .program = clnt->cl_program,
+ .prognumber = clnt->cl_prog,
+ .version = clnt->cl_vers,
+ .authflavor = flavor,
+ .client_name = clnt->cl_principal,
+ };
+ return __rpc_clone_client(&args, clnt);
+}
+EXPORT_SYMBOL_GPL(rpc_clone_client_set_auth);
+
/*
* Kill all tasks for the given client.
* XXX: kill their descendants as well?
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 21fde99..80f5dd2 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1119,8 +1119,8 @@
return -ENOMEM;
if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
return -ENOMEM;
- dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", net,
- NET_NAME(net));
+ dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n",
+ net, NET_NAME(net));
sn->pipefs_sb = sb;
err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_MOUNT,
@@ -1155,8 +1155,8 @@
sn->pipefs_sb = NULL;
mutex_unlock(&sn->pipefs_sb_lock);
put_net(net);
- dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", net,
- NET_NAME(net));
+ dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n",
+ net, NET_NAME(net));
blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
RPC_PIPEFS_UMOUNT,
sb);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 128494ec..6357fcb 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -1022,7 +1022,7 @@
* Create the rpciod thread and wait for it to start.
*/
dprintk("RPC: creating workqueue rpciod\n");
- wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0);
+ wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 1);
rpciod_workqueue = wq;
return rpciod_workqueue != NULL;
}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 0afba1b..08f50af 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -730,19 +730,24 @@
if (xdr->nwords == 0)
return 0;
+ /* Realign pages to current pointer position */
+ iov = buf->head;
+ if (iov->iov_len > cur) {
+ xdr_shrink_bufhead(buf, iov->iov_len - cur);
+ xdr->nwords = XDR_QUADLEN(buf->len - cur);
+ }
+
if (nwords > xdr->nwords) {
nwords = xdr->nwords;
len = nwords << 2;
}
- /* Realign pages to current pointer position */
- iov = buf->head;
- if (iov->iov_len > cur)
- xdr_shrink_bufhead(buf, iov->iov_len - cur);
-
- /* Truncate page data and move it into the tail */
- if (buf->page_len > len)
+ if (buf->page_len <= len)
+ len = buf->page_len;
+ else if (nwords < xdr->nwords) {
+ /* Truncate page data and move it into the tail */
xdr_shrink_pagelen(buf, buf->page_len - len);
- xdr->nwords = XDR_QUADLEN(buf->len - cur);
+ xdr->nwords = XDR_QUADLEN(buf->len - cur);
+ }
return len;
}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 5d7f61d..bd462a5 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -231,7 +231,7 @@
static void xprt_clear_locked(struct rpc_xprt *xprt)
{
xprt->snd_task = NULL;
- if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state) || xprt->shutdown) {
+ if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
smp_mb__before_clear_bit();
clear_bit(XPRT_LOCKED, &xprt->state);
smp_mb__after_clear_bit();
@@ -504,9 +504,6 @@
*/
void xprt_write_space(struct rpc_xprt *xprt)
{
- if (unlikely(xprt->shutdown))
- return;
-
spin_lock_bh(&xprt->transport_lock);
if (xprt->snd_task) {
dprintk("RPC: write space: waking waiting task on "
@@ -679,7 +676,7 @@
struct rpc_xprt *xprt = (struct rpc_xprt *)data;
spin_lock(&xprt->transport_lock);
- if (!list_empty(&xprt->recv) || xprt->shutdown)
+ if (!list_empty(&xprt->recv))
goto out_abort;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
goto out_abort;
@@ -1262,7 +1259,6 @@
static void xprt_destroy(struct rpc_xprt *xprt)
{
dprintk("RPC: destroying transport %p\n", xprt);
- xprt->shutdown = 1;
del_timer_sync(&xprt->timer);
rpc_destroy_wait_queue(&xprt->binding);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 5d9202d..c9aa7a3 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -199,21 +199,15 @@
struct rpc_xprt *xprt = &r_xprt->xprt;
int rc = 0;
- if (!xprt->shutdown) {
- current->flags |= PF_FSTRANS;
- xprt_clear_connected(xprt);
+ current->flags |= PF_FSTRANS;
+ xprt_clear_connected(xprt);
- dprintk("RPC: %s: %sconnect\n", __func__,
- r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
- rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
- if (rc)
- goto out;
- }
- goto out_clear;
+ dprintk("RPC: %s: %sconnect\n", __func__,
+ r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
+ rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
+ if (rc)
+ xprt_wake_pending_tasks(xprt, rc);
-out:
- xprt_wake_pending_tasks(xprt, rc);
-out_clear:
dprintk("RPC: %s: exit\n", __func__);
xprt_clear_connecting(xprt);
current->flags &= ~PF_FSTRANS;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 97f8918..aaaadfb 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -917,9 +917,6 @@
if (skb == NULL)
goto out;
- if (xprt->shutdown)
- goto dropit;
-
repsize = skb->len - sizeof(rpc_fraghdr);
if (repsize < 4) {
dprintk("RPC: impossible RPC reply size %d\n", repsize);
@@ -981,9 +978,6 @@
if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
goto out;
- if (xprt->shutdown)
- goto dropit;
-
repsize = skb->len - sizeof(struct udphdr);
if (repsize < 4) {
dprintk("RPC: impossible RPC reply size %d!\n", repsize);
@@ -1412,9 +1406,6 @@
read_lock_bh(&sk->sk_callback_lock);
if (!(xprt = xprt_from_sock(sk)))
goto out;
- if (xprt->shutdown)
- goto out;
-
/* Any data means we had a useful conversation, so
* the we don't need to delay the next reconnect
*/
@@ -1901,9 +1892,6 @@
struct socket *sock;
int status = -EIO;
- if (xprt->shutdown)
- goto out;
-
current->flags |= PF_FSTRANS;
clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
@@ -2020,9 +2008,6 @@
struct socket *sock = transport->sock;
int status = -EIO;
- if (xprt->shutdown)
- goto out;
-
current->flags |= PF_FSTRANS;
/* Start by resetting any existing state */
@@ -2168,9 +2153,6 @@
struct rpc_xprt *xprt = &transport->xprt;
int status = -EIO;
- if (xprt->shutdown)
- goto out;
-
current->flags |= PF_FSTRANS;
if (!sock) {