Merge branch 'devel'
diff --git a/fs/Kconfig b/fs/Kconfig
index 8b18a87..48ca1e1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1664,30 +1664,6 @@
If unsure, say N.
-config NFS_DIRECTIO
- bool "Allow direct I/O on NFS files"
- depends on NFS_FS
- help
- This option enables applications to perform uncached I/O on files
- in NFS file systems using the O_DIRECT open() flag. When O_DIRECT
- is set for a file, its data is not cached in the system's page
- cache. Data is moved to and from user-level application buffers
- directly. Unlike local disk-based file systems, NFS O_DIRECT has
- no alignment restrictions.
-
- Unless your program is designed to use O_DIRECT properly, you are
- much better off allowing the NFS client to manage data caching for
- you. Misusing O_DIRECT can cause poor server performance or network
- storms. This kernel build option defaults OFF to avoid exposing
- system administrators unwittingly to a potentially hazardous
- feature.
-
- For more details on NFS O_DIRECT, see fs/nfs/direct.c.
-
- If unsure, say N. This reduces the size of the NFS client, and
- causes open() to return EINVAL if a file residing in NFS is
- opened with the O_DIRECT flag.
-
config NFSD
tristate "NFS server support"
depends on INET
@@ -1808,15 +1784,33 @@
tristate
depends on SUNRPC && INFINIBAND && EXPERIMENTAL
default SUNRPC && INFINIBAND
+ help
+ This option enables an RPC client transport capability that
+ allows the NFS client to mount servers via an RDMA-enabled
+ transport.
+
+ To compile RPC client RDMA transport support as a module,
+ choose M here: the module will be called xprtrdma.
+
+ If unsure, say N.
config SUNRPC_BIND34
bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"
depends on SUNRPC && EXPERIMENTAL
+ default n
help
- Provides kernel support for querying rpcbind servers via versions 3
- and 4 of the rpcbind protocol. The kernel automatically falls back
- to version 2 if a remote rpcbind service does not support versions
- 3 or 4.
+ RPC requests over IPv6 networks require support for larger
+ addresses when performing an RPC bind. Sun added support for
+ IPv6 addressing by creating two new versions of the rpcbind
+ protocol (RFC 1833).
+
+ This option enables support in the kernel RPC client for
+ querying rpcbind servers via versions 3 and 4 of the rpcbind
+ protocol. The kernel automatically falls back to version 2
+ if a remote rpcbind service does not support versions 3 or 4.
+ By themselves, these new versions do not provide support for
+ RPC over IPv6, but the new protocol versions are necessary to
+ support it.
If unsure, say N to get traditional behavior (version 2 rpcbind
requests only).
@@ -1830,12 +1824,13 @@
select CRYPTO_DES
select CRYPTO_CBC
help
- Provides for secure RPC calls by means of a gss-api
- mechanism based on Kerberos V5. This is required for
- NFSv4.
+ Choose Y here to enable Secure RPC using the Kerberos version 5
+ GSS-API mechanism (RFC 1964).
- Note: Requires an auxiliary userspace daemon which may be found on
- http://www.citi.umich.edu/projects/nfsv4/
+ Secure RPC calls with Kerberos require an auxiliary user-space
+ daemon which may be found in the Linux nfs-utils package
+ available from http://linux-nfs.org/. In addition, user-space
+ Kerberos support should be installed.
If unsure, say N.
@@ -1849,11 +1844,12 @@
select CRYPTO_CAST5
select CRYPTO_CBC
help
- Provides for secure RPC calls by means of a gss-api
- mechanism based on the SPKM3 public-key mechanism.
+ Choose Y here to enable Secure RPC using the SPKM3 public key
+ GSS-API mechansim (RFC 2025).
- Note: Requires an auxiliary userspace daemon which may be found on
- http://www.citi.umich.edu/projects/nfsv4/
+ Secure RPC calls with SPKM3 require an auxiliary userspace
+ daemon which may be found in the Linux nfs-utils package
+ available from http://linux-nfs.org/.
If unsure, say N.
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index b6b74a6..40b16f2 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -155,8 +155,6 @@
int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
{
struct nlm_rqst *call;
- sigset_t oldset;
- unsigned long flags;
int status;
nlm_get_host(host);
@@ -168,22 +166,6 @@
/* Set up the argument struct */
nlmclnt_setlockargs(call, fl);
- /* Keep the old signal mask */
- spin_lock_irqsave(¤t->sighand->siglock, flags);
- oldset = current->blocked;
-
- /* If we're cleaning up locks because the process is exiting,
- * perform the RPC call asynchronously. */
- if ((IS_SETLK(cmd) || IS_SETLKW(cmd))
- && fl->fl_type == F_UNLCK
- && (current->flags & PF_EXITING)) {
- sigfillset(¤t->blocked); /* Mask all signals */
- recalc_sigpending();
-
- call->a_flags = RPC_TASK_ASYNC;
- }
- spin_unlock_irqrestore(¤t->sighand->siglock, flags);
-
if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
if (fl->fl_type != F_UNLCK) {
call->a_args.block = IS_SETLKW(cmd) ? 1 : 0;
@@ -198,11 +180,6 @@
fl->fl_ops->fl_release_private(fl);
fl->fl_ops = NULL;
- spin_lock_irqsave(¤t->sighand->siglock, flags);
- current->blocked = oldset;
- recalc_sigpending();
- spin_unlock_irqrestore(¤t->sighand->siglock, flags);
-
dprintk("lockd: clnt proc returns %d\n", status);
return status;
}
@@ -221,6 +198,7 @@
for(;;) {
call = kzalloc(sizeof(*call), GFP_KERNEL);
if (call != NULL) {
+ atomic_set(&call->a_count, 1);
locks_init_lock(&call->a_args.lock.fl);
locks_init_lock(&call->a_res.lock.fl);
call->a_host = host;
@@ -237,6 +215,8 @@
void nlm_release_call(struct nlm_rqst *call)
{
+ if (!atomic_dec_and_test(&call->a_count))
+ return;
nlm_release_host(call->a_host);
nlmclnt_release_lockargs(call);
kfree(call);
@@ -267,7 +247,7 @@
* Generic NLM call
*/
static int
-nlmclnt_call(struct nlm_rqst *req, u32 proc)
+nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc)
{
struct nlm_host *host = req->a_host;
struct rpc_clnt *clnt;
@@ -276,6 +256,7 @@
struct rpc_message msg = {
.rpc_argp = argp,
.rpc_resp = resp,
+ .rpc_cred = cred,
};
int status;
@@ -343,10 +324,16 @@
/*
* Generic NLM call, async version.
*/
-static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
+static struct rpc_task *__nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
{
struct nlm_host *host = req->a_host;
struct rpc_clnt *clnt;
+ struct rpc_task_setup task_setup_data = {
+ .rpc_message = msg,
+ .callback_ops = tk_ops,
+ .callback_data = req,
+ .flags = RPC_TASK_ASYNC,
+ };
dprintk("lockd: call procedure %d on %s (async)\n",
(int)proc, host->h_name);
@@ -356,21 +343,36 @@
if (clnt == NULL)
goto out_err;
msg->rpc_proc = &clnt->cl_procinfo[proc];
+ task_setup_data.rpc_client = clnt;
/* bootstrap and kick off the async RPC call */
- return rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req);
+ return rpc_run_task(&task_setup_data);
out_err:
tk_ops->rpc_release(req);
- return -ENOLCK;
+ return ERR_PTR(-ENOLCK);
}
+static int nlm_do_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
+{
+ struct rpc_task *task;
+
+ task = __nlm_async_call(req, proc, msg, tk_ops);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ rpc_put_task(task);
+ return 0;
+}
+
+/*
+ * NLM asynchronous call.
+ */
int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
{
struct rpc_message msg = {
.rpc_argp = &req->a_args,
.rpc_resp = &req->a_res,
};
- return __nlm_async_call(req, proc, &msg, tk_ops);
+ return nlm_do_async_call(req, proc, &msg, tk_ops);
}
int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
@@ -378,7 +380,33 @@
struct rpc_message msg = {
.rpc_argp = &req->a_res,
};
- return __nlm_async_call(req, proc, &msg, tk_ops);
+ return nlm_do_async_call(req, proc, &msg, tk_ops);
+}
+
+/*
+ * NLM client asynchronous call.
+ *
+ * Note that although the calls are asynchronous, and are therefore
+ * guaranteed to complete, we still always attempt to wait for
+ * completion in order to be able to correctly track the lock
+ * state.
+ */
+static int nlmclnt_async_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+{
+ struct rpc_message msg = {
+ .rpc_argp = &req->a_args,
+ .rpc_resp = &req->a_res,
+ .rpc_cred = cred,
+ };
+ struct rpc_task *task;
+ int err;
+
+ task = __nlm_async_call(req, proc, &msg, tk_ops);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ err = rpc_wait_for_completion_task(task);
+ rpc_put_task(task);
+ return err;
}
/*
@@ -389,7 +417,7 @@
{
int status;
- status = nlmclnt_call(req, NLMPROC_TEST);
+ status = nlmclnt_call(nfs_file_cred(fl->fl_file), req, NLMPROC_TEST);
if (status < 0)
goto out;
@@ -480,10 +508,12 @@
static int
nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
{
+ struct rpc_cred *cred = nfs_file_cred(fl->fl_file);
struct nlm_host *host = req->a_host;
struct nlm_res *resp = &req->a_res;
struct nlm_wait *block = NULL;
unsigned char fl_flags = fl->fl_flags;
+ unsigned char fl_type;
int status = -ENOLCK;
if (nsm_monitor(host) < 0) {
@@ -493,18 +523,22 @@
}
fl->fl_flags |= FL_ACCESS;
status = do_vfs_lock(fl);
+ fl->fl_flags = fl_flags;
if (status < 0)
goto out;
block = nlmclnt_prepare_block(host, fl);
again:
+ /*
+ * Initialise resp->status to a valid non-zero value,
+ * since 0 == nlm_lck_granted
+ */
+ resp->status = nlm_lck_blocked;
for(;;) {
/* Reboot protection */
fl->fl_u.nfs_fl.state = host->h_state;
- status = nlmclnt_call(req, NLMPROC_LOCK);
+ status = nlmclnt_call(cred, req, NLMPROC_LOCK);
if (status < 0)
- goto out_unblock;
- if (!req->a_args.block)
break;
/* Did a reclaimer thread notify us of a server reboot? */
if (resp->status == nlm_lck_denied_grace_period)
@@ -513,15 +547,22 @@
break;
/* Wait on an NLM blocking lock */
status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
- /* if we were interrupted. Send a CANCEL request to the server
- * and exit
- */
if (status < 0)
- goto out_unblock;
+ break;
if (resp->status != nlm_lck_blocked)
break;
}
+ /* if we were interrupted while blocking, then cancel the lock request
+ * and exit
+ */
+ if (resp->status == nlm_lck_blocked) {
+ if (!req->a_args.block)
+ goto out_unlock;
+ if (nlmclnt_cancel(host, req->a_args.block, fl) == 0)
+ goto out_unblock;
+ }
+
if (resp->status == nlm_granted) {
down_read(&host->h_rwsem);
/* Check whether or not the server has rebooted */
@@ -530,20 +571,34 @@
goto again;
}
/* Ensure the resulting lock will get added to granted list */
- fl->fl_flags = fl_flags | FL_SLEEP;
+ fl->fl_flags |= FL_SLEEP;
if (do_vfs_lock(fl) < 0)
printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
up_read(&host->h_rwsem);
+ fl->fl_flags = fl_flags;
+ status = 0;
}
+ if (status < 0)
+ goto out_unlock;
status = nlm_stat_to_errno(resp->status);
out_unblock:
nlmclnt_finish_block(block);
- /* Cancel the blocked request if it is still pending */
- if (resp->status == nlm_lck_blocked)
- nlmclnt_cancel(host, req->a_args.block, fl);
out:
nlm_release_call(req);
+ return status;
+out_unlock:
+ /* Fatal error: ensure that we remove the lock altogether */
+ dprintk("lockd: lock attempt ended in fatal error.\n"
+ " Attempting to unlock.\n");
+ nlmclnt_finish_block(block);
+ fl_type = fl->fl_type;
+ fl->fl_type = F_UNLCK;
+ down_read(&host->h_rwsem);
+ do_vfs_lock(fl);
+ up_read(&host->h_rwsem);
+ fl->fl_type = fl_type;
fl->fl_flags = fl_flags;
+ nlmclnt_async_call(cred, req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
return status;
}
@@ -567,8 +622,8 @@
nlmclnt_setlockargs(req, fl);
req->a_args.reclaim = 1;
- if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0
- && req->a_res.status == nlm_granted)
+ status = nlmclnt_call(nfs_file_cred(fl->fl_file), req, NLMPROC_LOCK);
+ if (status >= 0 && req->a_res.status == nlm_granted)
return 0;
printk(KERN_WARNING "lockd: failed to reclaim lock for pid %d "
@@ -598,7 +653,8 @@
{
struct nlm_host *host = req->a_host;
struct nlm_res *resp = &req->a_res;
- int status = 0;
+ int status;
+ unsigned char fl_flags = fl->fl_flags;
/*
* Note: the server is supposed to either grant us the unlock
@@ -607,16 +663,17 @@
*/
fl->fl_flags |= FL_EXISTS;
down_read(&host->h_rwsem);
- if (do_vfs_lock(fl) == -ENOENT) {
- up_read(&host->h_rwsem);
+ status = do_vfs_lock(fl);
+ up_read(&host->h_rwsem);
+ fl->fl_flags = fl_flags;
+ if (status == -ENOENT) {
+ status = 0;
goto out;
}
- up_read(&host->h_rwsem);
- if (req->a_flags & RPC_TASK_ASYNC)
- return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
-
- status = nlmclnt_call(req, NLMPROC_UNLOCK);
+ atomic_inc(&req->a_count);
+ status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req,
+ NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
if (status < 0)
goto out;
@@ -671,16 +728,10 @@
static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl)
{
struct nlm_rqst *req;
- unsigned long flags;
- sigset_t oldset;
- int status;
+ int status;
- /* Block all signals while setting up call */
- spin_lock_irqsave(¤t->sighand->siglock, flags);
- oldset = current->blocked;
- sigfillset(¤t->blocked);
- recalc_sigpending();
- spin_unlock_irqrestore(¤t->sighand->siglock, flags);
+ dprintk("lockd: blocking lock attempt was interrupted by a signal.\n"
+ " Attempting to cancel lock.\n");
req = nlm_alloc_call(nlm_get_host(host));
if (!req)
@@ -690,13 +741,12 @@
nlmclnt_setlockargs(req, fl);
req->a_args.block = block;
- status = nlm_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
-
- spin_lock_irqsave(¤t->sighand->siglock, flags);
- current->blocked = oldset;
- recalc_sigpending();
- spin_unlock_irqrestore(¤t->sighand->siglock, flags);
-
+ atomic_inc(&req->a_count);
+ status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req,
+ NLMPROC_CANCEL, &nlmclnt_cancel_ops);
+ if (status == 0 && req->a_res.status == nlm_lck_denied)
+ status = -ENOLCK;
+ nlm_release_call(req);
return status;
}
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index f1ef49f..f23750d 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -42,11 +42,12 @@
/*
* Common host lookup routine for server & client
*/
-static struct nlm_host *
-nlm_lookup_host(int server, const struct sockaddr_in *sin,
- int proto, int version, const char *hostname,
- unsigned int hostname_len,
- const struct sockaddr_in *ssin)
+static struct nlm_host *nlm_lookup_host(int server,
+ const struct sockaddr_in *sin,
+ int proto, u32 version,
+ const char *hostname,
+ unsigned int hostname_len,
+ const struct sockaddr_in *ssin)
{
struct hlist_head *chain;
struct hlist_node *pos;
@@ -55,7 +56,7 @@
int hash;
dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
- ", p=%d, v=%d, my role=%s, name=%.*s)\n",
+ ", p=%d, v=%u, my role=%s, name=%.*s)\n",
NIPQUAD(ssin->sin_addr.s_addr),
NIPQUAD(sin->sin_addr.s_addr), proto, version,
server? "server" : "client",
@@ -175,9 +176,10 @@
/*
* Find an NLM server handle in the cache. If there is none, create it.
*/
-struct nlm_host *
-nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
- const char *hostname, unsigned int hostname_len)
+struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin,
+ int proto, u32 version,
+ const char *hostname,
+ unsigned int hostname_len)
{
struct sockaddr_in ssin = {0};
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 908b23f..e4d5635 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -18,6 +18,8 @@
#define NLMDBG_FACILITY NLMDBG_MONITOR
+#define XDR_ADDRBUF_LEN (20)
+
static struct rpc_clnt * nsm_create(void);
static struct rpc_program nsm_program;
@@ -147,28 +149,55 @@
/*
* XDR functions for NSM.
+ *
+ * See http://www.opengroup.org/ for details on the Network
+ * Status Monitor wire protocol.
*/
-static __be32 *
-xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
+static __be32 *xdr_encode_nsm_string(__be32 *p, char *string)
{
- char buffer[20], *name;
+ size_t len = strlen(string);
- /*
- * Use the dotted-quad IP address of the remote host as
- * identifier. Linux statd always looks up the canonical
- * hostname first for whatever remote hostname it receives,
- * so this works alright.
- */
- if (nsm_use_hostnames) {
- name = argp->mon_name;
- } else {
- sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
+ if (len > SM_MAXSTRLEN)
+ len = SM_MAXSTRLEN;
+ return xdr_encode_opaque(p, string, len);
+}
+
+/*
+ * "mon_name" specifies the host to be monitored.
+ *
+ * Linux uses a text version of the IP address of the remote
+ * host as the host identifier (the "mon_name" argument).
+ *
+ * Linux statd always looks up the canonical hostname first for
+ * whatever remote hostname it receives, so this works alright.
+ */
+static __be32 *xdr_encode_mon_name(__be32 *p, struct nsm_args *argp)
+{
+ char buffer[XDR_ADDRBUF_LEN + 1];
+ char *name = argp->mon_name;
+
+ if (!nsm_use_hostnames) {
+ snprintf(buffer, XDR_ADDRBUF_LEN,
+ NIPQUAD_FMT, NIPQUAD(argp->addr));
name = buffer;
}
- if (!(p = xdr_encode_string(p, name))
- || !(p = xdr_encode_string(p, utsname()->nodename)))
+
+ return xdr_encode_nsm_string(p, name);
+}
+
+/*
+ * The "my_id" argument specifies the hostname and RPC procedure
+ * to be called when the status manager receives notification
+ * (via the SM_NOTIFY call) that the state of host "mon_name"
+ * has changed.
+ */
+static __be32 *xdr_encode_my_id(__be32 *p, struct nsm_args *argp)
+{
+ p = xdr_encode_nsm_string(p, utsname()->nodename);
+ if (!p)
return ERR_PTR(-EIO);
+
*p++ = htonl(argp->prog);
*p++ = htonl(argp->vers);
*p++ = htonl(argp->proc);
@@ -176,18 +205,48 @@
return p;
}
-static int
-xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
+/*
+ * The "mon_id" argument specifies the non-private arguments
+ * of an SM_MON or SM_UNMON call.
+ */
+static __be32 *xdr_encode_mon_id(__be32 *p, struct nsm_args *argp)
{
- p = xdr_encode_common(rqstp, p, argp);
- if (IS_ERR(p))
- return PTR_ERR(p);
+ p = xdr_encode_mon_name(p, argp);
+ if (!p)
+ return ERR_PTR(-EIO);
- /* Surprise - there may even be room for an IPv6 address now */
+ return xdr_encode_my_id(p, argp);
+}
+
+/*
+ * The "priv" argument may contain private information required
+ * by the SM_MON call. This information will be supplied in the
+ * SM_NOTIFY call.
+ *
+ * Linux provides the raw IP address of the monitored host,
+ * left in network byte order.
+ */
+static __be32 *xdr_encode_priv(__be32 *p, struct nsm_args *argp)
+{
*p++ = argp->addr;
*p++ = 0;
*p++ = 0;
*p++ = 0;
+
+ return p;
+}
+
+static int
+xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
+{
+ p = xdr_encode_mon_id(p, argp);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
+ p = xdr_encode_priv(p, argp);
+ if (IS_ERR(p))
+ return PTR_ERR(p);
+
rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p);
return 0;
}
@@ -195,7 +254,7 @@
static int
xdr_encode_unmon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
{
- p = xdr_encode_common(rqstp, p, argp);
+ p = xdr_encode_mon_id(p, argp);
if (IS_ERR(p))
return PTR_ERR(p);
rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p);
@@ -220,9 +279,11 @@
}
#define SM_my_name_sz (1+XDR_QUADLEN(SM_MAXSTRLEN))
-#define SM_my_id_sz (3+1+SM_my_name_sz)
-#define SM_mon_id_sz (1+XDR_QUADLEN(20)+SM_my_id_sz)
-#define SM_mon_sz (SM_mon_id_sz+4)
+#define SM_my_id_sz (SM_my_name_sz+3)
+#define SM_mon_name_sz (1+XDR_QUADLEN(SM_MAXSTRLEN))
+#define SM_mon_id_sz (SM_mon_name_sz+SM_my_id_sz)
+#define SM_priv_sz (XDR_QUADLEN(SM_PRIV_SIZE))
+#define SM_mon_sz (SM_mon_id_sz+SM_priv_sz)
#define SM_monres_sz 2
#define SM_unmonres_sz 1
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1ed8bd4..38c2f0b 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -74,7 +74,9 @@
static const unsigned long nlm_timeout_max = 20;
static const int nlm_port_min = 0, nlm_port_max = 65535;
+#ifdef CONFIG_SYSCTL
static struct ctl_table_header * nlm_sysctl_table;
+#endif
static unsigned long get_lockd_grace_period(void)
{
@@ -359,6 +361,8 @@
}
EXPORT_SYMBOL(lockd_down);
+#ifdef CONFIG_SYSCTL
+
/*
* Sysctl parameters (same as module parameters, different interface).
*/
@@ -443,6 +447,8 @@
{ .ctl_name = 0 }
};
+#endif /* CONFIG_SYSCTL */
+
/*
* Module (and sysfs) parameters.
*/
@@ -516,15 +522,21 @@
static int __init init_nlm(void)
{
+#ifdef CONFIG_SYSCTL
nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root);
return nlm_sysctl_table ? 0 : -ENOMEM;
+#else
+ return 0;
+#endif
}
static void __exit exit_nlm(void)
{
/* FIXME: delete all NLM clients */
nlm_shutdown_hosts();
+#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nlm_sysctl_table);
+#endif
}
module_init(init_nlm);
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index df0f41e..ac6170c 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -5,7 +5,7 @@
obj-$(CONFIG_NFS_FS) += nfs.o
nfs-y := client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
- pagelist.o proc.o read.o symlink.o unlink.o \
+ direct.o pagelist.o proc.o read.o symlink.o unlink.o \
write.o namespace.o mount_clnt.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
@@ -14,5 +14,4 @@
delegation.o idmap.o \
callback.o callback_xdr.o callback_proc.o \
nfs4namespace.o
-nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
nfs-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index c5c0175..f2f3b28 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -112,6 +112,7 @@
static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
{
struct nfs_client *clp;
+ struct rpc_cred *cred;
if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
goto error_0;
@@ -150,6 +151,9 @@
clp->cl_boot_time = CURRENT_TIME;
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
#endif
+ cred = rpc_lookup_machine_cred();
+ if (!IS_ERR(cred))
+ clp->cl_machine_cred = cred;
return clp;
@@ -170,6 +174,8 @@
BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
nfs_idmap_delete(clp);
+
+ rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
#endif
}
@@ -189,6 +195,9 @@
if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
nfs_callback_down();
+ if (clp->cl_machine_cred != NULL)
+ put_rpccred(clp->cl_machine_cred);
+
kfree(clp->cl_hostname);
kfree(clp);
@@ -680,10 +689,22 @@
if (error < 0)
goto error;
+ server->port = data->nfs_server.port;
+
error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
if (error < 0)
goto error;
+ /* Preserve the values of mount_server-related mount options */
+ if (data->mount_server.addrlen) {
+ memcpy(&server->mountd_address, &data->mount_server.address,
+ data->mount_server.addrlen);
+ server->mountd_addrlen = data->mount_server.addrlen;
+ }
+ server->mountd_version = data->mount_server.version;
+ server->mountd_port = data->mount_server.port;
+ server->mountd_protocol = data->mount_server.protocol;
+
server->namelen = data->namlen;
/* Create a client RPC handle for the NFSv3 ACL management interface */
nfs_init_server_aclclient(server);
@@ -1062,6 +1083,8 @@
server->acdirmin = data->acdirmin * HZ;
server->acdirmax = data->acdirmax * HZ;
+ server->port = data->nfs_server.port;
+
error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
error:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d9e30ac..f288b3e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1967,7 +1967,7 @@
if (!NFS_PROTO(inode)->access)
goto out_notsup;
- cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+ cred = rpc_lookup_cred();
if (!IS_ERR(cred)) {
res = nfs_do_access(inode, cred, mask);
put_rpccred(cred);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 16844f9..4757a2b 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -229,14 +229,20 @@
static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
{
struct nfs_read_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- if (nfs_readpage_result(task, data) != 0)
- return;
+ nfs_readpage_result(task, data);
+}
+
+static void nfs_direct_read_release(void *calldata)
+{
+
+ struct nfs_read_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+ int status = data->task.tk_status;
spin_lock(&dreq->lock);
- if (unlikely(task->tk_status < 0)) {
- dreq->error = task->tk_status;
+ if (unlikely(status < 0)) {
+ dreq->error = status;
spin_unlock(&dreq->lock);
} else {
dreq->count += data->res.count;
@@ -249,11 +255,12 @@
if (put_dreq(dreq))
nfs_direct_complete(dreq);
+ nfs_readdata_release(calldata);
}
static const struct rpc_call_ops nfs_read_direct_ops = {
.rpc_call_done = nfs_direct_read_result,
- .rpc_release = nfs_readdata_release,
+ .rpc_release = nfs_direct_read_release,
};
/*
@@ -280,6 +287,7 @@
.rpc_client = NFS_CLIENT(inode),
.rpc_message = &msg,
.callback_ops = &nfs_read_direct_ops,
+ .workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
unsigned int pgbase;
@@ -323,7 +331,7 @@
data->inode = inode;
data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
- data->args.context = ctx;
+ data->args.context = get_nfs_open_context(ctx);
data->args.offset = pos;
data->args.pgbase = pgbase;
data->args.pages = data->pagevec;
@@ -339,8 +347,9 @@
NFS_PROTO(inode)->read_setup(data, &msg);
task = rpc_run_task(&task_setup_data);
- if (!IS_ERR(task))
- rpc_put_task(task);
+ if (IS_ERR(task))
+ break;
+ rpc_put_task(task);
dprintk("NFS: %5u initiated direct read call "
"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
@@ -446,6 +455,7 @@
struct rpc_task_setup task_setup_data = {
.rpc_client = NFS_CLIENT(inode),
.callback_ops = &nfs_write_direct_ops,
+ .workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
@@ -499,27 +509,34 @@
static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
/* Call the NFS version-specific code */
- if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
- return;
- if (unlikely(task->tk_status < 0)) {
+ NFS_PROTO(data->inode)->commit_done(task, data);
+}
+
+static void nfs_direct_commit_release(void *calldata)
+{
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+ int status = data->task.tk_status;
+
+ if (status < 0) {
dprintk("NFS: %5u commit failed with error %d.\n",
- task->tk_pid, task->tk_status);
+ data->task.tk_pid, status);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
- dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
+ dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
}
- dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
+ dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
nfs_direct_write_complete(dreq, data->inode);
+ nfs_commitdata_release(calldata);
}
static const struct rpc_call_ops nfs_commit_direct_ops = {
.rpc_call_done = nfs_direct_commit_result,
- .rpc_release = nfs_commit_release,
+ .rpc_release = nfs_direct_commit_release,
};
static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
@@ -537,6 +554,7 @@
.rpc_message = &msg,
.callback_ops = &nfs_commit_direct_ops,
.callback_data = data,
+ .workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
@@ -546,6 +564,7 @@
data->args.fh = NFS_FH(data->inode);
data->args.offset = 0;
data->args.count = 0;
+ data->args.context = get_nfs_open_context(dreq->ctx);
data->res.count = 0;
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
@@ -585,7 +604,7 @@
static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
{
- dreq->commit_data = nfs_commit_alloc();
+ dreq->commit_data = nfs_commitdata_alloc();
if (dreq->commit_data != NULL)
dreq->commit_data->req = (struct nfs_page *) dreq;
}
@@ -606,11 +625,20 @@
static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
- int status = task->tk_status;
if (nfs_writeback_done(task, data) != 0)
return;
+}
+
+/*
+ * NB: Return the value of the first error return code. Subsequent
+ * errors after the first one are ignored.
+ */
+static void nfs_direct_write_release(void *calldata)
+{
+ struct nfs_write_data *data = calldata;
+ struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+ int status = data->task.tk_status;
spin_lock(&dreq->lock);
@@ -632,23 +660,13 @@
break;
case NFS_ODIRECT_DO_COMMIT:
if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
- dprintk("NFS: %5u write verify failed\n", task->tk_pid);
+ dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
}
}
}
out_unlock:
spin_unlock(&dreq->lock);
-}
-
-/*
- * NB: Return the value of the first error return code. Subsequent
- * errors after the first one are ignored.
- */
-static void nfs_direct_write_release(void *calldata)
-{
- struct nfs_write_data *data = calldata;
- struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
if (put_dreq(dreq))
nfs_direct_write_complete(dreq, data->inode);
@@ -682,6 +700,7 @@
.rpc_client = NFS_CLIENT(inode),
.rpc_message = &msg,
.callback_ops = &nfs_write_direct_ops,
+ .workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
size_t wsize = NFS_SERVER(inode)->wsize;
@@ -728,7 +747,7 @@
data->inode = inode;
data->cred = msg.rpc_cred;
data->args.fh = NFS_FH(inode);
- data->args.context = ctx;
+ data->args.context = get_nfs_open_context(ctx);
data->args.offset = pos;
data->args.pgbase = pgbase;
data->args.pages = data->pagevec;
@@ -745,8 +764,9 @@
NFS_PROTO(inode)->write_setup(data, &msg);
task = rpc_run_task(&task_setup_data);
- if (!IS_ERR(task))
- rpc_put_task(task);
+ if (IS_ERR(task))
+ break;
+ rpc_put_task(task);
dprintk("NFS: %5u initiated direct write call "
"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 5d2e9d9..3536b01 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -238,10 +238,8 @@
ssize_t result;
size_t count = iov_length(iov, nr_segs);
-#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
return nfs_file_direct_read(iocb, iov, nr_segs, pos);
-#endif
dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -387,9 +385,7 @@
.write_end = nfs_write_end,
.invalidatepage = nfs_invalidate_page,
.releasepage = nfs_release_page,
-#ifdef CONFIG_NFS_DIRECTIO
.direct_IO = nfs_direct_IO,
-#endif
.launder_page = nfs_launder_page,
};
@@ -447,10 +443,8 @@
ssize_t result;
size_t count = iov_length(iov, nr_segs);
-#ifdef CONFIG_NFS_DIRECTIO
if (iocb->ki_filp->f_flags & O_DIRECT)
return nfs_file_direct_write(iocb, iov, nr_segs, pos);
-#endif
dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -576,17 +570,9 @@
lock_kernel();
/* Use local locking if mounted with "-onolock" */
- if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
+ if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
status = NFS_PROTO(inode)->lock(filp, cmd, fl);
- /* If we were signalled we still need to ensure that
- * we clean up any state on the server. We therefore
- * record the lock call as having succeeded in order to
- * ensure that locks_remove_posix() cleans it out when
- * the process exits.
- */
- if (status == -EINTR || status == -ERESTARTSYS)
- do_vfs_lock(filp, fl);
- } else
+ else
status = do_vfs_lock(filp, fl);
unlock_kernel();
if (status < 0)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6f88d7c..5cb3345 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -523,8 +523,12 @@
static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait)
{
- struct inode *inode = ctx->path.dentry->d_inode;
+ struct inode *inode;
+ if (ctx == NULL)
+ return;
+
+ inode = ctx->path.dentry->d_inode;
if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
return;
list_del(&ctx->list);
@@ -610,7 +614,7 @@
struct nfs_open_context *ctx;
struct rpc_cred *cred;
- cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+ cred = rpc_lookup_cred();
if (IS_ERR(cred))
return PTR_ERR(cred);
ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred);
@@ -1218,6 +1222,36 @@
kmem_cache_destroy(nfs_inode_cachep);
}
+struct workqueue_struct *nfsiod_workqueue;
+
+/*
+ * start up the nfsiod workqueue
+ */
+static int nfsiod_start(void)
+{
+ struct workqueue_struct *wq;
+ dprintk("RPC: creating workqueue nfsiod\n");
+ wq = create_singlethread_workqueue("nfsiod");
+ if (wq == NULL)
+ return -ENOMEM;
+ nfsiod_workqueue = wq;
+ return 0;
+}
+
+/*
+ * Destroy the nfsiod workqueue
+ */
+static void nfsiod_stop(void)
+{
+ struct workqueue_struct *wq;
+
+ wq = nfsiod_workqueue;
+ if (wq == NULL)
+ return;
+ nfsiod_workqueue = NULL;
+ destroy_workqueue(wq);
+}
+
/*
* Initialize NFS
*/
@@ -1225,6 +1259,10 @@
{
int err;
+ err = nfsiod_start();
+ if (err)
+ goto out6;
+
err = nfs_fs_proc_init();
if (err)
goto out5;
@@ -1271,6 +1309,8 @@
out4:
nfs_fs_proc_exit();
out5:
+ nfsiod_stop();
+out6:
return err;
}
@@ -1286,6 +1326,7 @@
#endif
unregister_nfs_fs();
nfs_fs_proc_exit();
+ nfsiod_stop();
}
/* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9319927..04ae867 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -46,9 +46,9 @@
struct sockaddr_storage address;
size_t addrlen;
char *hostname;
- unsigned int version;
+ u32 version;
unsigned short port;
- int protocol;
+ unsigned short protocol;
} mount_server;
struct {
@@ -56,7 +56,8 @@
size_t addrlen;
char *hostname;
char *export_path;
- int protocol;
+ unsigned short port;
+ unsigned short protocol;
} nfs_server;
struct security_mnt_opts lsm_opts;
@@ -115,13 +116,8 @@
extern int __init nfs_init_writepagecache(void);
extern void nfs_destroy_writepagecache(void);
-#ifdef CONFIG_NFS_DIRECTIO
extern int __init nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
-#else
-#define nfs_init_directcache() (0)
-#define nfs_destroy_directcache() do {} while(0)
-#endif
/* nfs2xdr.c */
extern int nfs_stat_to_errno(int);
@@ -146,6 +142,7 @@
extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
/* inode.c */
+extern struct workqueue_struct *nfsiod_workqueue;
extern struct inode *nfs_alloc_inode(struct super_block *sb);
extern void nfs_destroy_inode(struct inode *);
extern int nfs_write_inode(struct inode *,int);
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 607f6eb..af4d0f1 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -20,7 +20,7 @@
static void nfs_expire_automounts(struct work_struct *work);
-LIST_HEAD(nfs_automount_list);
+static LIST_HEAD(nfs_automount_list);
static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
int nfs_mountpoint_expiry_timeout = 500 * HZ;
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1f7ea67..28bab67 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -267,7 +267,7 @@
int status;
if ((status = ntohl(*p++)))
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
p = xdr_decode_fattr(p, res->fattr);
count = ntohl(*p++);
@@ -428,11 +428,11 @@
size_t hdrlen;
unsigned int pglen, recvd;
u32 len;
- int status, nr;
+ int status, nr = 0;
__be32 *end, *entry, *kaddr;
if ((status = ntohl(*p++)))
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
if (iov->iov_len < hdrlen) {
@@ -452,7 +452,12 @@
kaddr = p = kmap_atomic(*page, KM_USER0);
end = (__be32 *)((char *)p + pglen);
entry = p;
- for (nr = 0; *p++; nr++) {
+
+ /* Make sure the packet actually has a value_follows and EOF entry */
+ if ((entry + 1) > end)
+ goto short_pkt;
+
+ for (; *p++; nr++) {
if (p + 2 > end)
goto short_pkt;
p++; /* fileid */
@@ -467,18 +472,32 @@
goto short_pkt;
entry = p;
}
- if (!nr && (entry[0] != 0 || entry[1] == 0))
- goto short_pkt;
+
+ /*
+ * Apparently some server sends responses that are a valid size, but
+ * contain no entries, and have value_follows==0 and EOF==0. For
+ * those, just set the EOF marker.
+ */
+ if (!nr && entry[1] == 0) {
+ dprintk("NFS: readdir reply truncated!\n");
+ entry[1] = 1;
+ }
out:
kunmap_atomic(kaddr, KM_USER0);
return nr;
short_pkt:
+ /*
+ * When we get a short packet there are 2 possibilities. We can
+ * return an error, or fix up the response to look like a valid
+ * response and return what we have so far. If there are no
+ * entries and the packet was short, then return -EIO. If there
+ * are valid entries in the response, return them and pretend that
+ * the call was successful, but incomplete. The caller can retry the
+ * readdir starting at the last cookie.
+ */
entry[0] = entry[1] = 0;
- /* truncate listing ? */
- if (!nr) {
- dprintk("NFS: readdir reply truncated!\n");
- entry[1] = 1;
- }
+ if (!nr)
+ nr = -errno_NFSERR_IO;
goto out;
err_unmap:
nr = -errno_NFSERR_IO;
@@ -518,7 +537,7 @@
int status;
if ((status = ntohl(*p++)) != 0)
- status = -nfs_stat_to_errno(status);
+ status = nfs_stat_to_errno(status);
return status;
}
@@ -532,7 +551,7 @@
int status;
if ((status = ntohl(*p++)))
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
xdr_decode_fattr(p, fattr);
return 0;
}
@@ -547,7 +566,7 @@
int status;
if ((status = ntohl(*p++)))
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
p = xdr_decode_fhandle(p, res->fh);
xdr_decode_fattr(p, res->fattr);
return 0;
@@ -585,7 +604,7 @@
int status;
if ((status = ntohl(*p++)))
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
/* Convert length of symlink */
len = ntohl(*p++);
if (len >= rcvbuf->page_len) {
@@ -634,7 +653,7 @@
int status;
if ((status = ntohl(*p++)))
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
res->tsize = ntohl(*p++);
res->bsize = ntohl(*p++);
@@ -653,39 +672,39 @@
int errno;
} nfs_errtbl[] = {
{ NFS_OK, 0 },
- { NFSERR_PERM, EPERM },
- { NFSERR_NOENT, ENOENT },
- { NFSERR_IO, errno_NFSERR_IO },
- { NFSERR_NXIO, ENXIO },
-/* { NFSERR_EAGAIN, EAGAIN }, */
- { NFSERR_ACCES, EACCES },
- { NFSERR_EXIST, EEXIST },
- { NFSERR_XDEV, EXDEV },
- { NFSERR_NODEV, ENODEV },
- { NFSERR_NOTDIR, ENOTDIR },
- { NFSERR_ISDIR, EISDIR },
- { NFSERR_INVAL, EINVAL },
- { NFSERR_FBIG, EFBIG },
- { NFSERR_NOSPC, ENOSPC },
- { NFSERR_ROFS, EROFS },
- { NFSERR_MLINK, EMLINK },
- { NFSERR_NAMETOOLONG, ENAMETOOLONG },
- { NFSERR_NOTEMPTY, ENOTEMPTY },
- { NFSERR_DQUOT, EDQUOT },
- { NFSERR_STALE, ESTALE },
- { NFSERR_REMOTE, EREMOTE },
+ { NFSERR_PERM, -EPERM },
+ { NFSERR_NOENT, -ENOENT },
+ { NFSERR_IO, -errno_NFSERR_IO},
+ { NFSERR_NXIO, -ENXIO },
+/* { NFSERR_EAGAIN, -EAGAIN }, */
+ { NFSERR_ACCES, -EACCES },
+ { NFSERR_EXIST, -EEXIST },
+ { NFSERR_XDEV, -EXDEV },
+ { NFSERR_NODEV, -ENODEV },
+ { NFSERR_NOTDIR, -ENOTDIR },
+ { NFSERR_ISDIR, -EISDIR },
+ { NFSERR_INVAL, -EINVAL },
+ { NFSERR_FBIG, -EFBIG },
+ { NFSERR_NOSPC, -ENOSPC },
+ { NFSERR_ROFS, -EROFS },
+ { NFSERR_MLINK, -EMLINK },
+ { NFSERR_NAMETOOLONG, -ENAMETOOLONG },
+ { NFSERR_NOTEMPTY, -ENOTEMPTY },
+ { NFSERR_DQUOT, -EDQUOT },
+ { NFSERR_STALE, -ESTALE },
+ { NFSERR_REMOTE, -EREMOTE },
#ifdef EWFLUSH
- { NFSERR_WFLUSH, EWFLUSH },
+ { NFSERR_WFLUSH, -EWFLUSH },
#endif
- { NFSERR_BADHANDLE, EBADHANDLE },
- { NFSERR_NOT_SYNC, ENOTSYNC },
- { NFSERR_BAD_COOKIE, EBADCOOKIE },
- { NFSERR_NOTSUPP, ENOTSUPP },
- { NFSERR_TOOSMALL, ETOOSMALL },
- { NFSERR_SERVERFAULT, ESERVERFAULT },
- { NFSERR_BADTYPE, EBADTYPE },
- { NFSERR_JUKEBOX, EJUKEBOX },
- { -1, EIO }
+ { NFSERR_BADHANDLE, -EBADHANDLE },
+ { NFSERR_NOT_SYNC, -ENOTSYNC },
+ { NFSERR_BAD_COOKIE, -EBADCOOKIE },
+ { NFSERR_NOTSUPP, -ENOTSUPP },
+ { NFSERR_TOOSMALL, -ETOOSMALL },
+ { NFSERR_SERVERFAULT, -ESERVERFAULT },
+ { NFSERR_BADTYPE, -EBADTYPE },
+ { NFSERR_JUKEBOX, -EJUKEBOX },
+ { -1, -EIO }
};
/*
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 3917e2f..11cddde 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -508,14 +508,14 @@
struct page **page;
size_t hdrlen;
u32 len, recvd, pglen;
- int status, nr;
+ int status, nr = 0;
__be32 *entry, *end, *kaddr;
status = ntohl(*p++);
/* Decode post_op_attrs */
p = xdr_decode_post_op_attr(p, res->dir_attr);
if (status)
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
/* Decode verifier cookie */
if (res->verf) {
res->verf[0] = *p++;
@@ -542,7 +542,12 @@
kaddr = p = kmap_atomic(*page, KM_USER0);
end = (__be32 *)((char *)p + pglen);
entry = p;
- for (nr = 0; *p++; nr++) {
+
+ /* Make sure the packet actually has a value_follows and EOF entry */
+ if ((entry + 1) > end)
+ goto short_pkt;
+
+ for (; *p++; nr++) {
if (p + 3 > end)
goto short_pkt;
p += 2; /* inode # */
@@ -581,18 +586,32 @@
goto short_pkt;
entry = p;
}
- if (!nr && (entry[0] != 0 || entry[1] == 0))
- goto short_pkt;
+
+ /*
+ * Apparently some server sends responses that are a valid size, but
+ * contain no entries, and have value_follows==0 and EOF==0. For
+ * those, just set the EOF marker.
+ */
+ if (!nr && entry[1] == 0) {
+ dprintk("NFS: readdir reply truncated!\n");
+ entry[1] = 1;
+ }
out:
kunmap_atomic(kaddr, KM_USER0);
return nr;
short_pkt:
+ /*
+ * When we get a short packet there are 2 possibilities. We can
+ * return an error, or fix up the response to look like a valid
+ * response and return what we have so far. If there are no
+ * entries and the packet was short, then return -EIO. If there
+ * are valid entries in the response, return them and pretend that
+ * the call was successful, but incomplete. The caller can retry the
+ * readdir starting at the last cookie.
+ */
entry[0] = entry[1] = 0;
- /* truncate listing ? */
- if (!nr) {
- dprintk("NFS: readdir reply truncated!\n");
- entry[1] = 1;
- }
+ if (!nr)
+ nr = -errno_NFSERR_IO;
goto out;
err_unmap:
nr = -errno_NFSERR_IO;
@@ -732,7 +751,7 @@
int status;
if ((status = ntohl(*p++)))
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
xdr_decode_fattr(p, fattr);
return 0;
}
@@ -747,7 +766,7 @@
int status;
if ((status = ntohl(*p++)))
- status = -nfs_stat_to_errno(status);
+ status = nfs_stat_to_errno(status);
xdr_decode_wcc_data(p, fattr);
return status;
}
@@ -767,7 +786,7 @@
int status;
if ((status = ntohl(*p++))) {
- status = -nfs_stat_to_errno(status);
+ status = nfs_stat_to_errno(status);
} else {
if (!(p = xdr_decode_fhandle(p, res->fh)))
return -errno_NFSERR_IO;
@@ -787,7 +806,7 @@
p = xdr_decode_post_op_attr(p, res->fattr);
if (status)
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
res->access = ntohl(*p++);
return 0;
}
@@ -824,7 +843,7 @@
p = xdr_decode_post_op_attr(p, fattr);
if (status != 0)
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
/* Convert length of symlink */
len = ntohl(*p++);
@@ -872,7 +891,7 @@
p = xdr_decode_post_op_attr(p, res->fattr);
if (status != 0)
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
/* Decode reply count and EOF flag. NFSv3 is somewhat redundant
* in that it puts the count both in the res struct and in the
@@ -922,7 +941,7 @@
p = xdr_decode_wcc_data(p, res->fattr);
if (status != 0)
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
res->count = ntohl(*p++);
res->verf->committed = (enum nfs3_stable_how)ntohl(*p++);
@@ -953,7 +972,7 @@
res->fattr->valid = 0;
}
} else {
- status = -nfs_stat_to_errno(status);
+ status = nfs_stat_to_errno(status);
}
p = xdr_decode_wcc_data(p, res->dir_attr);
return status;
@@ -968,7 +987,7 @@
int status;
if ((status = ntohl(*p++)) != 0)
- status = -nfs_stat_to_errno(status);
+ status = nfs_stat_to_errno(status);
p = xdr_decode_wcc_data(p, res->fromattr);
p = xdr_decode_wcc_data(p, res->toattr);
return status;
@@ -983,7 +1002,7 @@
int status;
if ((status = ntohl(*p++)) != 0)
- status = -nfs_stat_to_errno(status);
+ status = nfs_stat_to_errno(status);
p = xdr_decode_post_op_attr(p, res->fattr);
p = xdr_decode_wcc_data(p, res->dir_attr);
return status;
@@ -1001,7 +1020,7 @@
p = xdr_decode_post_op_attr(p, res->fattr);
if (status != 0)
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
p = xdr_decode_hyper(p, &res->tbytes);
p = xdr_decode_hyper(p, &res->fbytes);
@@ -1026,7 +1045,7 @@
p = xdr_decode_post_op_attr(p, res->fattr);
if (status != 0)
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
res->rtmax = ntohl(*p++);
res->rtpref = ntohl(*p++);
@@ -1054,7 +1073,7 @@
p = xdr_decode_post_op_attr(p, res->fattr);
if (status != 0)
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
res->max_link = ntohl(*p++);
res->max_namelen = ntohl(*p++);
@@ -1073,7 +1092,7 @@
status = ntohl(*p++);
p = xdr_decode_wcc_data(p, res->fattr);
if (status != 0)
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
res->verf->verifier[0] = *p++;
res->verf->verifier[1] = *p++;
@@ -1095,7 +1114,7 @@
int err, base;
if (status != 0)
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
p = xdr_decode_post_op_attr(p, res->fattr);
res->mask = ntohl(*p++);
if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -1122,7 +1141,7 @@
int status = ntohl(*p++);
if (status)
- return -nfs_stat_to_errno(status);
+ return nfs_stat_to_errno(status);
xdr_decode_post_op_attr(p, fattr);
return 0;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7ce0786..dbc0927 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
#include "nfs4_fs.h"
#include "delegation.h"
+#include "internal.h"
#include "iostat.h"
#define NFSDBG_FACILITY NFSDBG_PROC
@@ -239,6 +240,8 @@
{
p->o_res.f_attr = &p->f_attr;
p->o_res.dir_attr = &p->dir_attr;
+ p->o_res.seqid = p->o_arg.seqid;
+ p->c_res.seqid = p->c_arg.seqid;
p->o_res.server = p->o_arg.server;
nfs_fattr_init(&p->f_attr);
nfs_fattr_init(&p->dir_attr);
@@ -729,7 +732,6 @@
renew_lease(data->o_res.server, data->timestamp);
data->rpc_done = 1;
}
- nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
}
static void nfs4_open_confirm_release(void *calldata)
@@ -773,6 +775,7 @@
.rpc_message = &msg,
.callback_ops = &nfs4_open_confirm_ops,
.callback_data = data,
+ .workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
int status;
@@ -858,7 +861,6 @@
if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
nfs_confirm_seqid(&data->owner->so_seqid, 0);
}
- nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
data->rpc_done = 1;
}
@@ -910,6 +912,7 @@
.rpc_message = &msg,
.callback_ops = &nfs4_open_ops,
.callback_data = data,
+ .workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
int status;
@@ -979,11 +982,8 @@
if (IS_ERR(opendata))
return PTR_ERR(opendata);
ret = nfs4_open_recover(opendata, state);
- if (ret == -ESTALE) {
- /* Invalidate the state owner so we don't ever use it again */
- nfs4_drop_state_owner(state->owner);
+ if (ret == -ESTALE)
d_drop(ctx->path.dentry);
- }
nfs4_opendata_put(opendata);
return ret;
}
@@ -1226,7 +1226,6 @@
/* hmm. we are done with the inode, and in the process of freeing
* the state_owner. we keep this around to process errors
*/
- nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
switch (task->tk_status) {
case 0:
nfs_set_open_stateid(state, &calldata->res.stateid, 0);
@@ -1315,6 +1314,7 @@
.rpc_client = server->client,
.rpc_message = &msg,
.callback_ops = &nfs4_close_ops,
+ .workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
int status = -ENOMEM;
@@ -1332,6 +1332,7 @@
goto out_free_calldata;
calldata->arg.bitmask = server->attr_bitmask;
calldata->res.fattr = &calldata->fattr;
+ calldata->res.seqid = calldata->arg.seqid;
calldata->res.server = server;
calldata->path.mnt = mntget(path->mnt);
calldata->path.dentry = dget(path->dentry);
@@ -1404,7 +1405,7 @@
BUG_ON(nd->intent.open.flags & O_CREAT);
}
- cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
+ cred = rpc_lookup_cred();
if (IS_ERR(cred))
return (struct dentry *)cred;
parent = dentry->d_parent;
@@ -1439,7 +1440,7 @@
struct rpc_cred *cred;
struct nfs4_state *state;
- cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
+ cred = rpc_lookup_cred();
if (IS_ERR(cred))
return PTR_ERR(cred);
state = nfs4_do_open(dir, &path, openflags, NULL, cred);
@@ -1656,7 +1657,7 @@
nfs_fattr_init(fattr);
- cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+ cred = rpc_lookup_cred();
if (IS_ERR(cred))
return PTR_ERR(cred);
@@ -1892,7 +1893,7 @@
struct rpc_cred *cred;
int status = 0;
- cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
+ cred = rpc_lookup_cred();
if (IS_ERR(cred)) {
status = PTR_ERR(cred);
goto out;
@@ -2761,10 +2762,10 @@
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
- rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL);
+ rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
nfs4_schedule_state_recovery(clp);
if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0)
- rpc_wake_up_task(task);
+ rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
task->tk_status = 0;
return -EAGAIN;
case -NFS4ERR_DELAY:
@@ -2884,7 +2885,7 @@
RPC_DISPLAY_ADDR),
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_PROTO),
- cred->cr_ops->cr_name,
+ clp->cl_rpcclient->cl_auth->au_ops->au_name,
clp->cl_id_uniquifier);
setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
sizeof(setclientid.sc_netid),
@@ -3158,6 +3159,7 @@
p->arg.fh = NFS_FH(inode);
p->arg.fl = &p->fl;
p->arg.seqid = seqid;
+ p->res.seqid = seqid;
p->arg.stateid = &lsp->ls_stateid;
p->lsp = lsp;
atomic_inc(&lsp->ls_count);
@@ -3183,7 +3185,6 @@
if (RPC_ASSASSINATED(task))
return;
- nfs_increment_lock_seqid(task->tk_status, calldata->arg.seqid);
switch (task->tk_status) {
case 0:
memcpy(calldata->lsp->ls_stateid.data,
@@ -3235,6 +3236,7 @@
.rpc_client = NFS_CLIENT(lsp->ls_state->inode),
.rpc_message = &msg,
.callback_ops = &nfs4_locku_ops,
+ .workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
@@ -3261,6 +3263,7 @@
struct nfs4_lock_state *lsp;
struct rpc_task *task;
int status = 0;
+ unsigned char fl_flags = request->fl_flags;
status = nfs4_set_lock_state(state, request);
/* Unlock _before_ we do the RPC call */
@@ -3284,6 +3287,7 @@
status = nfs4_wait_for_completion_rpc_task(task);
rpc_put_task(task);
out:
+ request->fl_flags = fl_flags;
return status;
}
@@ -3320,6 +3324,7 @@
p->arg.lock_stateid = &lsp->ls_stateid;
p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
p->arg.lock_owner.id = lsp->ls_id.id;
+ p->res.lock_seqid = p->arg.lock_seqid;
p->lsp = lsp;
atomic_inc(&lsp->ls_count);
p->ctx = get_nfs_open_context(ctx);
@@ -3346,6 +3351,7 @@
return;
data->arg.open_stateid = &state->stateid;
data->arg.new_lock_owner = 1;
+ data->res.open_seqid = data->arg.open_seqid;
} else
data->arg.new_lock_owner = 0;
data->timestamp = jiffies;
@@ -3363,7 +3369,6 @@
if (RPC_ASSASSINATED(task))
goto out;
if (data->arg.new_lock_owner != 0) {
- nfs_increment_open_seqid(data->rpc_status, data->arg.open_seqid);
if (data->rpc_status == 0)
nfs_confirm_seqid(&data->lsp->ls_seqid, 0);
else
@@ -3375,7 +3380,6 @@
data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
}
- nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
out:
dprintk("%s: done, ret = %d!\n", __FUNCTION__, data->rpc_status);
}
@@ -3419,6 +3423,7 @@
.rpc_client = NFS_CLIENT(state->inode),
.rpc_message = &msg,
.callback_ops = &nfs4_lock_ops,
+ .workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC,
};
int ret;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b962397..46eb624 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -71,6 +71,29 @@
return status;
}
+static struct rpc_cred *nfs4_get_machine_cred(struct nfs_client *clp)
+{
+ struct rpc_cred *cred = NULL;
+
+ spin_lock(&clp->cl_lock);
+ if (clp->cl_machine_cred != NULL)
+ cred = get_rpccred(clp->cl_machine_cred);
+ spin_unlock(&clp->cl_lock);
+ return cred;
+}
+
+static void nfs4_clear_machine_cred(struct nfs_client *clp)
+{
+ struct rpc_cred *cred;
+
+ spin_lock(&clp->cl_lock);
+ cred = clp->cl_machine_cred;
+ clp->cl_machine_cred = NULL;
+ spin_unlock(&clp->cl_lock);
+ if (cred != NULL)
+ put_rpccred(cred);
+}
+
struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
{
struct nfs4_state_owner *sp;
@@ -91,13 +114,18 @@
{
struct nfs4_state_owner *sp;
struct rb_node *pos;
+ struct rpc_cred *cred;
+ cred = nfs4_get_machine_cred(clp);
+ if (cred != NULL)
+ goto out;
pos = rb_first(&clp->cl_state_owners);
if (pos != NULL) {
sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
- return get_rpccred(sp->so_cred);
+ cred = get_rpccred(sp->so_cred);
}
- return NULL;
+out:
+ return cred;
}
static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
@@ -292,8 +320,10 @@
spin_unlock(&clp->cl_lock);
if (sp == new)
get_rpccred(cred);
- else
+ else {
+ rpc_destroy_wait_queue(&new->so_sequence.wait);
kfree(new);
+ }
return sp;
}
@@ -310,6 +340,7 @@
return;
nfs4_remove_state_owner(clp, sp);
spin_unlock(&clp->cl_lock);
+ rpc_destroy_wait_queue(&sp->so_sequence.wait);
put_rpccred(cred);
kfree(sp);
}
@@ -529,6 +560,7 @@
spin_lock(&clp->cl_lock);
nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
spin_unlock(&clp->cl_lock);
+ rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
kfree(lsp);
}
@@ -731,7 +763,7 @@
list_add_tail(&seqid->list, &sequence->list);
if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
goto unlock;
- rpc_sleep_on(&sequence->wait, task, NULL, NULL);
+ rpc_sleep_on(&sequence->wait, task, NULL);
status = -EAGAIN;
unlock:
spin_unlock(&sequence->lock);
@@ -920,10 +952,10 @@
if (cred != NULL) {
/* Yes there are: try to renew the old lease */
status = nfs4_proc_renew(clp, cred);
+ put_rpccred(cred);
switch (status) {
case 0:
case -NFS4ERR_CB_PATH_DOWN:
- put_rpccred(cred);
goto out;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_LEASE_MOVED:
@@ -932,14 +964,19 @@
} else {
/* "reboot" to ensure we clear all state on the server */
clp->cl_boot_time = CURRENT_TIME;
- cred = nfs4_get_setclientid_cred(clp);
}
/* We're going to have to re-establish a clientid */
nfs4_state_mark_reclaim(clp);
status = -ENOENT;
+ cred = nfs4_get_setclientid_cred(clp);
if (cred != NULL) {
status = nfs4_init_client(clp, cred);
put_rpccred(cred);
+ /* Handle case where the user hasn't set up machine creds */
+ if (status == -EACCES && cred == clp->cl_machine_cred) {
+ nfs4_clear_machine_cred(clp);
+ goto restart_loop;
+ }
}
if (status)
goto out_error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index db1ed9c..5a2d649 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -110,7 +110,7 @@
#define decode_savefh_maxsz (op_decode_hdr_maxsz)
#define encode_restorefh_maxsz (op_encode_hdr_maxsz)
#define decode_restorefh_maxsz (op_decode_hdr_maxsz)
-#define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2)
+#define encode_fsinfo_maxsz (encode_getattr_maxsz)
#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11)
#define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
#define decode_renew_maxsz (op_decode_hdr_maxsz)
@@ -1191,8 +1191,8 @@
attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
WRITE32(attrs[0] & readdir->bitmask[0]);
WRITE32(attrs[1] & readdir->bitmask[1]);
- dprintk("%s: cookie = %Lu, verifier = 0x%x%x, bitmap = 0x%x%x\n",
- __FUNCTION__,
+ dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
+ __func__,
(unsigned long long)readdir->cookie,
((u32 *)readdir->verifier.data)[0],
((u32 *)readdir->verifier.data)[1],
@@ -2241,7 +2241,7 @@
}
READ32(nfserr);
if (nfserr != NFS_OK)
- return -nfs4_stat_to_errno(nfserr);
+ return nfs4_stat_to_errno(nfserr);
return 0;
}
@@ -2291,7 +2291,7 @@
bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
} else
bitmask[0] = bitmask[1] = 0;
- dprintk("%s: bitmask=0x%x%x\n", __FUNCTION__, bitmask[0], bitmask[1]);
+ dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]);
return 0;
}
@@ -3005,6 +3005,8 @@
int status;
status = decode_op_hdr(xdr, OP_CLOSE);
+ if (status != -EIO)
+ nfs_increment_open_seqid(status, res->seqid);
if (status)
return status;
READ_BUF(NFS4_STATEID_SIZE);
@@ -3296,11 +3298,17 @@
int status;
status = decode_op_hdr(xdr, OP_LOCK);
+ if (status == -EIO)
+ goto out;
if (status == 0) {
READ_BUF(NFS4_STATEID_SIZE);
COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
} else if (status == -NFS4ERR_DENIED)
- return decode_lock_denied(xdr, NULL);
+ status = decode_lock_denied(xdr, NULL);
+ if (res->open_seqid != NULL)
+ nfs_increment_open_seqid(status, res->open_seqid);
+ nfs_increment_lock_seqid(status, res->lock_seqid);
+out:
return status;
}
@@ -3319,6 +3327,8 @@
int status;
status = decode_op_hdr(xdr, OP_LOCKU);
+ if (status != -EIO)
+ nfs_increment_lock_seqid(status, res->seqid);
if (status == 0) {
READ_BUF(NFS4_STATEID_SIZE);
COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
@@ -3384,6 +3394,8 @@
int status;
status = decode_op_hdr(xdr, OP_OPEN);
+ if (status != -EIO)
+ nfs_increment_open_seqid(status, res->seqid);
if (status)
return status;
READ_BUF(NFS4_STATEID_SIZE);
@@ -3416,6 +3428,8 @@
int status;
status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
+ if (status != -EIO)
+ nfs_increment_open_seqid(status, res->seqid);
if (status)
return status;
READ_BUF(NFS4_STATEID_SIZE);
@@ -3429,6 +3443,8 @@
int status;
status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
+ if (status != -EIO)
+ nfs_increment_open_seqid(status, res->seqid);
if (status)
return status;
READ_BUF(NFS4_STATEID_SIZE);
@@ -3481,7 +3497,7 @@
size_t hdrlen;
u32 recvd, pglen = rcvbuf->page_len;
__be32 *end, *entry, *p, *kaddr;
- unsigned int nr;
+ unsigned int nr = 0;
int status;
status = decode_op_hdr(xdr, OP_READDIR);
@@ -3489,8 +3505,8 @@
return status;
READ_BUF(8);
COPYMEM(readdir->verifier.data, 8);
- dprintk("%s: verifier = 0x%x%x\n",
- __FUNCTION__,
+ dprintk("%s: verifier = %08x:%08x\n",
+ __func__,
((u32 *)readdir->verifier.data)[0],
((u32 *)readdir->verifier.data)[1]);
@@ -3505,7 +3521,12 @@
kaddr = p = kmap_atomic(page, KM_USER0);
end = p + ((pglen + readdir->pgbase) >> 2);
entry = p;
- for (nr = 0; *p++; nr++) {
+
+ /* Make sure the packet actually has a value_follows and EOF entry */
+ if ((entry + 1) > end)
+ goto short_pkt;
+
+ for (; *p++; nr++) {
u32 len, attrlen, xlen;
if (end - p < 3)
goto short_pkt;
@@ -3532,20 +3553,32 @@
p += attrlen; /* attributes */
entry = p;
}
- if (!nr && (entry[0] != 0 || entry[1] == 0))
- goto short_pkt;
+ /*
+ * Apparently some server sends responses that are a valid size, but
+ * contain no entries, and have value_follows==0 and EOF==0. For
+ * those, just set the EOF marker.
+ */
+ if (!nr && entry[1] == 0) {
+ dprintk("NFS: readdir reply truncated!\n");
+ entry[1] = 1;
+ }
out:
kunmap_atomic(kaddr, KM_USER0);
return 0;
short_pkt:
+ /*
+ * When we get a short packet there are 2 possibilities. We can
+ * return an error, or fix up the response to look like a valid
+ * response and return what we have so far. If there are no
+ * entries and the packet was short, then return -EIO. If there
+ * are valid entries in the response, return them and pretend that
+ * the call was successful, but incomplete. The caller can retry the
+ * readdir starting at the last cookie.
+ */
dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr);
entry[0] = entry[1] = 0;
- /* truncate listing ? */
- if (!nr) {
- dprintk("NFS: readdir reply truncated!\n");
- entry[1] = 1;
- }
- goto out;
+ if (nr)
+ goto out;
err_unmap:
kunmap_atomic(kaddr, KM_USER0);
return -errno_NFSERR_IO;
@@ -3727,7 +3760,7 @@
READ_BUF(len);
return -NFSERR_CLID_INUSE;
} else
- return -nfs4_stat_to_errno(nfserr);
+ return nfs4_stat_to_errno(nfserr);
return 0;
}
@@ -4389,7 +4422,7 @@
if (!status)
status = decode_fsinfo(&xdr, fsinfo);
if (!status)
- status = -nfs4_stat_to_errno(hdr.status);
+ status = nfs4_stat_to_errno(hdr.status);
return status;
}
@@ -4479,7 +4512,7 @@
if (!status)
status = decode_setclientid(&xdr, clp);
if (!status)
- status = -nfs4_stat_to_errno(hdr.status);
+ status = nfs4_stat_to_errno(hdr.status);
return status;
}
@@ -4501,7 +4534,7 @@
if (!status)
status = decode_fsinfo(&xdr, fsinfo);
if (!status)
- status = -nfs4_stat_to_errno(hdr.status);
+ status = nfs4_stat_to_errno(hdr.status);
return status;
}
@@ -4611,42 +4644,42 @@
int errno;
} nfs_errtbl[] = {
{ NFS4_OK, 0 },
- { NFS4ERR_PERM, EPERM },
- { NFS4ERR_NOENT, ENOENT },
- { NFS4ERR_IO, errno_NFSERR_IO },
- { NFS4ERR_NXIO, ENXIO },
- { NFS4ERR_ACCESS, EACCES },
- { NFS4ERR_EXIST, EEXIST },
- { NFS4ERR_XDEV, EXDEV },
- { NFS4ERR_NOTDIR, ENOTDIR },
- { NFS4ERR_ISDIR, EISDIR },
- { NFS4ERR_INVAL, EINVAL },
- { NFS4ERR_FBIG, EFBIG },
- { NFS4ERR_NOSPC, ENOSPC },
- { NFS4ERR_ROFS, EROFS },
- { NFS4ERR_MLINK, EMLINK },
- { NFS4ERR_NAMETOOLONG, ENAMETOOLONG },
- { NFS4ERR_NOTEMPTY, ENOTEMPTY },
- { NFS4ERR_DQUOT, EDQUOT },
- { NFS4ERR_STALE, ESTALE },
- { NFS4ERR_BADHANDLE, EBADHANDLE },
- { NFS4ERR_BADOWNER, EINVAL },
- { NFS4ERR_BADNAME, EINVAL },
- { NFS4ERR_BAD_COOKIE, EBADCOOKIE },
- { NFS4ERR_NOTSUPP, ENOTSUPP },
- { NFS4ERR_TOOSMALL, ETOOSMALL },
- { NFS4ERR_SERVERFAULT, ESERVERFAULT },
- { NFS4ERR_BADTYPE, EBADTYPE },
- { NFS4ERR_LOCKED, EAGAIN },
- { NFS4ERR_RESOURCE, EREMOTEIO },
- { NFS4ERR_SYMLINK, ELOOP },
- { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP },
- { NFS4ERR_DEADLOCK, EDEADLK },
- { NFS4ERR_WRONGSEC, EPERM }, /* FIXME: this needs
+ { NFS4ERR_PERM, -EPERM },
+ { NFS4ERR_NOENT, -ENOENT },
+ { NFS4ERR_IO, -errno_NFSERR_IO},
+ { NFS4ERR_NXIO, -ENXIO },
+ { NFS4ERR_ACCESS, -EACCES },
+ { NFS4ERR_EXIST, -EEXIST },
+ { NFS4ERR_XDEV, -EXDEV },
+ { NFS4ERR_NOTDIR, -ENOTDIR },
+ { NFS4ERR_ISDIR, -EISDIR },
+ { NFS4ERR_INVAL, -EINVAL },
+ { NFS4ERR_FBIG, -EFBIG },
+ { NFS4ERR_NOSPC, -ENOSPC },
+ { NFS4ERR_ROFS, -EROFS },
+ { NFS4ERR_MLINK, -EMLINK },
+ { NFS4ERR_NAMETOOLONG, -ENAMETOOLONG },
+ { NFS4ERR_NOTEMPTY, -ENOTEMPTY },
+ { NFS4ERR_DQUOT, -EDQUOT },
+ { NFS4ERR_STALE, -ESTALE },
+ { NFS4ERR_BADHANDLE, -EBADHANDLE },
+ { NFS4ERR_BADOWNER, -EINVAL },
+ { NFS4ERR_BADNAME, -EINVAL },
+ { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
+ { NFS4ERR_NOTSUPP, -ENOTSUPP },
+ { NFS4ERR_TOOSMALL, -ETOOSMALL },
+ { NFS4ERR_SERVERFAULT, -ESERVERFAULT },
+ { NFS4ERR_BADTYPE, -EBADTYPE },
+ { NFS4ERR_LOCKED, -EAGAIN },
+ { NFS4ERR_RESOURCE, -EREMOTEIO },
+ { NFS4ERR_SYMLINK, -ELOOP },
+ { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
+ { NFS4ERR_DEADLOCK, -EDEADLK },
+ { NFS4ERR_WRONGSEC, -EPERM }, /* FIXME: this needs
* to be handled by a
* middle-layer.
*/
- { -1, EIO }
+ { -1, -EIO }
};
/*
@@ -4663,14 +4696,14 @@
}
if (stat <= 10000 || stat > 10100) {
/* The server is looney tunes. */
- return ESERVERFAULT;
+ return -ESERVERFAULT;
}
/* If we cannot translate the error, the recovery routines should
* handle it.
* Note: remaining NFSv4 error codes have values > 10000, so should
* not conflict with native Linux error codes.
*/
- return stat;
+ return -stat;
}
#define PROC(proc, argtype, restype) \
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 5a70be5..16f57e0 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -58,22 +58,19 @@
return p;
}
-static void nfs_readdata_rcu_free(struct rcu_head *head)
+static void nfs_readdata_free(struct nfs_read_data *p)
{
- struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu);
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
mempool_free(p, nfs_rdata_mempool);
}
-static void nfs_readdata_free(struct nfs_read_data *rdata)
-{
- call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free);
-}
-
void nfs_readdata_release(void *data)
{
- nfs_readdata_free(data);
+ struct nfs_read_data *rdata = data;
+
+ put_nfs_open_context(rdata->args.context);
+ nfs_readdata_free(rdata);
}
static
@@ -156,7 +153,7 @@
/*
* Set up the NFS read request struct
*/
-static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset)
{
@@ -174,6 +171,7 @@
.rpc_message = &msg,
.callback_ops = call_ops,
.callback_data = data,
+ .workqueue = nfsiod_workqueue,
.flags = RPC_TASK_ASYNC | swap_flags,
};
@@ -186,7 +184,7 @@
data->args.pgbase = req->wb_pgbase + offset;
data->args.pages = data->pagevec;
data->args.count = count;
- data->args.context = req->wb_context;
+ data->args.context = get_nfs_open_context(req->wb_context);
data->res.fattr = &data->fattr;
data->res.count = count;
@@ -204,8 +202,10 @@
(unsigned long long)data->args.offset);
task = rpc_run_task(&task_setup_data);
- if (!IS_ERR(task))
- rpc_put_task(task);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ rpc_put_task(task);
+ return 0;
}
static void
@@ -242,6 +242,7 @@
size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
unsigned int offset;
int requests = 0;
+ int ret = 0;
LIST_HEAD(list);
nfs_list_remove_request(req);
@@ -253,7 +254,6 @@
data = nfs_readdata_alloc(1);
if (!data)
goto out_bad;
- INIT_LIST_HEAD(&data->pages);
list_add(&data->pages, &list);
requests++;
nbytes -= len;
@@ -264,6 +264,8 @@
offset = 0;
nbytes = count;
do {
+ int ret2;
+
data = list_entry(list.next, struct nfs_read_data, pages);
list_del_init(&data->pages);
@@ -271,13 +273,15 @@
if (nbytes < rsize)
rsize = nbytes;
- nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+ ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
rsize, offset);
+ if (ret == 0)
+ ret = ret2;
offset += rsize;
nbytes -= rsize;
} while (nbytes != 0);
- return 0;
+ return ret;
out_bad:
while (!list_empty(&list)) {
@@ -295,12 +299,12 @@
struct nfs_page *req;
struct page **pages;
struct nfs_read_data *data;
+ int ret = -ENOMEM;
data = nfs_readdata_alloc(npages);
if (!data)
goto out_bad;
- INIT_LIST_HEAD(&data->pages);
pages = data->pagevec;
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
@@ -311,11 +315,10 @@
}
req = nfs_list_entry(data->pages.next);
- nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
- return 0;
+ return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
out_bad:
nfs_async_read_error(head);
- return -ENOMEM;
+ return ret;
}
/*
@@ -342,26 +345,25 @@
return 0;
}
-static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
{
struct nfs_readargs *argp = &data->args;
struct nfs_readres *resp = &data->res;
if (resp->eof || resp->count == argp->count)
- return 0;
+ return;
/* This is a short read! */
nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
/* Has the server at least made some progress? */
if (resp->count == 0)
- return 0;
+ return;
/* Yes, so retry the read at the end of the data */
argp->offset += resp->count;
argp->pgbase += resp->count;
argp->count -= resp->count;
rpc_restart_call(task);
- return -EAGAIN;
}
/*
@@ -370,29 +372,37 @@
static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
{
struct nfs_read_data *data = calldata;
- struct nfs_page *req = data->req;
- struct page *page = req->wb_page;
if (nfs_readpage_result(task, data) != 0)
return;
+ if (task->tk_status < 0)
+ return;
- if (likely(task->tk_status >= 0)) {
- nfs_readpage_truncate_uninitialised_page(data);
- if (nfs_readpage_retry(task, data) != 0)
- return;
- }
- if (unlikely(task->tk_status < 0))
+ nfs_readpage_truncate_uninitialised_page(data);
+ nfs_readpage_retry(task, data);
+}
+
+static void nfs_readpage_release_partial(void *calldata)
+{
+ struct nfs_read_data *data = calldata;
+ struct nfs_page *req = data->req;
+ struct page *page = req->wb_page;
+ int status = data->task.tk_status;
+
+ if (status < 0)
SetPageError(page);
+
if (atomic_dec_and_test(&req->wb_complete)) {
if (!PageError(page))
SetPageUptodate(page);
nfs_readpage_release(req);
}
+ nfs_readdata_release(calldata);
}
static const struct rpc_call_ops nfs_read_partial_ops = {
.rpc_call_done = nfs_readpage_result_partial,
- .rpc_release = nfs_readdata_release,
+ .rpc_release = nfs_readpage_release_partial,
};
static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
@@ -427,29 +437,35 @@
if (nfs_readpage_result(task, data) != 0)
return;
+ if (task->tk_status < 0)
+ return;
/*
* Note: nfs_readpage_retry may change the values of
* data->args. In the multi-page case, we therefore need
* to ensure that we call nfs_readpage_set_pages_uptodate()
* first.
*/
- if (likely(task->tk_status >= 0)) {
- nfs_readpage_truncate_uninitialised_page(data);
- nfs_readpage_set_pages_uptodate(data);
- if (nfs_readpage_retry(task, data) != 0)
- return;
- }
+ nfs_readpage_truncate_uninitialised_page(data);
+ nfs_readpage_set_pages_uptodate(data);
+ nfs_readpage_retry(task, data);
+}
+
+static void nfs_readpage_release_full(void *calldata)
+{
+ struct nfs_read_data *data = calldata;
+
while (!list_empty(&data->pages)) {
struct nfs_page *req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
nfs_readpage_release(req);
}
+ nfs_readdata_release(calldata);
}
static const struct rpc_call_ops nfs_read_full_ops = {
.rpc_call_done = nfs_readpage_result_full,
- .rpc_release = nfs_readdata_release,
+ .rpc_release = nfs_readpage_release_full,
};
/*
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f921902..20a1cb1 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -441,10 +441,52 @@
return sec_flavours[i].str;
}
+static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
+ int showdefaults)
+{
+ struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address;
+
+ switch (sap->sa_family) {
+ case AF_INET: {
+ struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+ seq_printf(m, ",mountaddr=" NIPQUAD_FMT,
+ NIPQUAD(sin->sin_addr.s_addr));
+ break;
+ }
+ case AF_INET6: {
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+ seq_printf(m, ",mountaddr=" NIP6_FMT,
+ NIP6(sin6->sin6_addr));
+ break;
+ }
+ default:
+ if (showdefaults)
+ seq_printf(m, ",mountaddr=unspecified");
+ }
+
+ if (nfss->mountd_version || showdefaults)
+ seq_printf(m, ",mountvers=%u", nfss->mountd_version);
+ if (nfss->mountd_port || showdefaults)
+ seq_printf(m, ",mountport=%u", nfss->mountd_port);
+
+ switch (nfss->mountd_protocol) {
+ case IPPROTO_UDP:
+ seq_printf(m, ",mountproto=udp");
+ break;
+ case IPPROTO_TCP:
+ seq_printf(m, ",mountproto=tcp");
+ break;
+ default:
+ if (showdefaults)
+ seq_printf(m, ",mountproto=auto");
+ }
+}
+
/*
* Describe the mount options in force on this server representation
*/
-static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
+static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
+ int showdefaults)
{
static const struct proc_nfs_info {
int flag;
@@ -452,6 +494,8 @@
const char *nostr;
} nfs_info[] = {
{ NFS_MOUNT_SOFT, ",soft", ",hard" },
+ { NFS_MOUNT_INTR, ",intr", ",nointr" },
+ { NFS_MOUNT_POSIX, ",posix", "" },
{ NFS_MOUNT_NOCTO, ",nocto", "" },
{ NFS_MOUNT_NOAC, ",noac", "" },
{ NFS_MOUNT_NONLM, ",nolock", "" },
@@ -462,18 +506,22 @@
};
const struct proc_nfs_info *nfs_infop;
struct nfs_client *clp = nfss->nfs_client;
+ u32 version = clp->rpc_ops->version;
- seq_printf(m, ",vers=%d", clp->rpc_ops->version);
- seq_printf(m, ",rsize=%d", nfss->rsize);
- seq_printf(m, ",wsize=%d", nfss->wsize);
+ seq_printf(m, ",vers=%u", version);
+ seq_printf(m, ",rsize=%u", nfss->rsize);
+ seq_printf(m, ",wsize=%u", nfss->wsize);
+ if (nfss->bsize != 0)
+ seq_printf(m, ",bsize=%u", nfss->bsize);
+ seq_printf(m, ",namlen=%u", nfss->namelen);
if (nfss->acregmin != 3*HZ || showdefaults)
- seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
+ seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ);
if (nfss->acregmax != 60*HZ || showdefaults)
- seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
+ seq_printf(m, ",acregmax=%u", nfss->acregmax/HZ);
if (nfss->acdirmin != 30*HZ || showdefaults)
- seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
+ seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ);
if (nfss->acdirmax != 60*HZ || showdefaults)
- seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
+ seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ);
for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
if (nfss->flags & nfs_infop->flag)
seq_puts(m, nfs_infop->str);
@@ -482,9 +530,24 @@
}
seq_printf(m, ",proto=%s",
rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO));
+ if (version == 4) {
+ if (nfss->port != NFS_PORT)
+ seq_printf(m, ",port=%u", nfss->port);
+ } else
+ if (nfss->port)
+ seq_printf(m, ",port=%u", nfss->port);
+
seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ);
seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries);
seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
+
+ if (version != 4)
+ nfs_show_mountd_options(m, nfss, showdefaults);
+
+#ifdef CONFIG_NFS_V4
+ if (clp->rpc_ops->version == 4)
+ seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
+#endif
}
/*
@@ -529,10 +592,10 @@
seq_printf(m, "\n\tcaps:\t");
seq_printf(m, "caps=0x%x", nfss->caps);
- seq_printf(m, ",wtmult=%d", nfss->wtmult);
- seq_printf(m, ",dtsize=%d", nfss->dtsize);
- seq_printf(m, ",bsize=%d", nfss->bsize);
- seq_printf(m, ",namelen=%d", nfss->namelen);
+ seq_printf(m, ",wtmult=%u", nfss->wtmult);
+ seq_printf(m, ",dtsize=%u", nfss->dtsize);
+ seq_printf(m, ",bsize=%u", nfss->bsize);
+ seq_printf(m, ",namlen=%u", nfss->namelen);
#ifdef CONFIG_NFS_V4
if (nfss->nfs_client->rpc_ops->version == 4) {
@@ -546,9 +609,9 @@
/*
* Display security flavor in effect for this mount
*/
- seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
+ seq_printf(m, "\n\tsec:\tflavor=%u", auth->au_ops->au_flavor);
if (auth->au_flavor)
- seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
+ seq_printf(m, ",pseudoflavor=%u", auth->au_flavor);
/*
* Display superblock I/O counters
@@ -683,7 +746,6 @@
struct nfs_parsed_mount_data *mnt)
{
char *p, *string, *secdata;
- unsigned short port = 0;
int rc;
if (!raw) {
@@ -798,7 +860,7 @@
return 0;
if (option < 0 || option > 65535)
return 0;
- port = option;
+ mnt->nfs_server.port = option;
break;
case Opt_rsize:
if (match_int(args, &mnt->rsize))
@@ -1048,7 +1110,8 @@
}
}
- nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, port);
+ nfs_set_port((struct sockaddr *)&mnt->nfs_server.address,
+ mnt->nfs_server.port);
return 1;
@@ -1169,7 +1232,9 @@
args->acregmax = 60;
args->acdirmin = 30;
args->acdirmax = 60;
+ args->mount_server.port = 0; /* autobind unless user sets port */
args->mount_server.protocol = XPRT_TRANSPORT_UDP;
+ args->nfs_server.port = 0; /* autobind unless user sets port */
args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
switch (data->version) {
@@ -1208,7 +1273,6 @@
args->flags = data->flags;
args->rsize = data->rsize;
args->wsize = data->wsize;
- args->flags = data->flags;
args->timeo = data->timeo;
args->retrans = data->retrans;
args->acregmin = data->acregmin;
@@ -1230,6 +1294,8 @@
args->namlen = data->namlen;
args->bsize = data->bsize;
args->auth_flavors[0] = data->pseudoflavor;
+ if (!args->nfs_server.hostname)
+ goto out_nomem;
/*
* The legacy version 6 binary mount data from userspace has a
@@ -1276,6 +1342,8 @@
len = c - dev_name;
/* N.B. caller will free nfs_server.hostname in all cases */
args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
+ if (!args->nfs_server.hostname)
+ goto out_nomem;
c++;
if (strlen(c) > NFS_MAXPATHLEN)
@@ -1319,6 +1387,10 @@
return -EPROTONOSUPPORT;
#endif /* !CONFIG_NFS_V3 */
+out_nomem:
+ dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
+ return -ENOMEM;
+
out_no_address:
dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
return -EINVAL;
@@ -1706,28 +1778,6 @@
}
/*
- * If the user didn't specify a port, set the port number to
- * the NFS version 4 default port.
- */
-static void nfs4_default_port(struct sockaddr *sap)
-{
- switch (sap->sa_family) {
- case AF_INET: {
- struct sockaddr_in *ap = (struct sockaddr_in *)sap;
- if (ap->sin_port == 0)
- ap->sin_port = htons(NFS_PORT);
- break;
- }
- case AF_INET6: {
- struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
- if (ap->sin6_port == 0)
- ap->sin6_port = htons(NFS_PORT);
- break;
- }
- }
-}
-
-/*
* Validate NFSv4 mount options
*/
static int nfs4_validate_mount_data(void *options,
@@ -1751,6 +1801,7 @@
args->acregmax = 60;
args->acdirmin = 30;
args->acdirmax = 60;
+ args->nfs_server.port = NFS_PORT; /* 2049 unless user set port= */
args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
switch (data->version) {
@@ -1767,9 +1818,6 @@
&args->nfs_server.address))
goto out_no_address;
- nfs4_default_port((struct sockaddr *)
- &args->nfs_server.address);
-
switch (data->auth_flavourlen) {
case 0:
args->auth_flavors[0] = RPC_AUTH_UNIX;
@@ -1827,9 +1875,6 @@
&args->nfs_server.address))
return -EINVAL;
- nfs4_default_port((struct sockaddr *)
- &args->nfs_server.address);
-
switch (args->auth_flavor_len) {
case 0:
args->auth_flavors[0] = RPC_AUTH_UNIX;
@@ -1852,12 +1897,16 @@
return -ENAMETOOLONG;
/* N.B. caller will free nfs_server.hostname in all cases */
args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
+ if (!args->nfs_server.hostname)
+ goto out_nomem;
c++; /* step over the ':' */
len = strlen(c);
if (len > NFS4_MAXPATHLEN)
return -ENAMETOOLONG;
args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL);
+ if (!args->nfs_server.export_path)
+ goto out_nomem;
dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path);
@@ -1879,6 +1928,10 @@
data->auth_flavourlen);
return -EINVAL;
+out_nomem:
+ dfprintk(MOUNT, "NFS4: not enough memory to handle mount options\n");
+ return -ENOMEM;
+
out_no_address:
dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
return -EINVAL;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 7574153..3adf8b2 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -234,7 +234,7 @@
if (data == NULL)
goto out;
- data->cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
+ data->cred = rpc_lookup_cred();
if (IS_ERR(data->cred)) {
status = PTR_ERR(data->cred);
goto out_free;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index bed6341..1ade11d1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -48,7 +48,7 @@
static mempool_t *nfs_wdata_mempool;
static mempool_t *nfs_commit_mempool;
-struct nfs_write_data *nfs_commit_alloc(void)
+struct nfs_write_data *nfs_commitdata_alloc(void)
{
struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
@@ -59,19 +59,13 @@
return p;
}
-static void nfs_commit_rcu_free(struct rcu_head *head)
+void nfs_commit_free(struct nfs_write_data *p)
{
- struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
mempool_free(p, nfs_commit_mempool);
}
-void nfs_commit_free(struct nfs_write_data *wdata)
-{
- call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free);
-}
-
struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
{
struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
@@ -93,21 +87,18 @@
return p;
}
-static void nfs_writedata_rcu_free(struct rcu_head *head)
+static void nfs_writedata_free(struct nfs_write_data *p)
{
- struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
if (p && (p->pagevec != &p->page_array[0]))
kfree(p->pagevec);
mempool_free(p, nfs_wdata_mempool);
}
-static void nfs_writedata_free(struct nfs_write_data *wdata)
+void nfs_writedata_release(void *data)
{
- call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free);
-}
+ struct nfs_write_data *wdata = data;
-void nfs_writedata_release(void *wdata)
-{
+ put_nfs_open_context(wdata->args.context);
nfs_writedata_free(wdata);
}
@@ -291,8 +282,6 @@
spin_unlock(&inode->i_lock);
if (!nfs_pageio_add_request(pgio, req)) {
nfs_redirty_request(req);
- nfs_end_page_writeback(page);
- nfs_clear_page_tag_locked(req);
return pgio->pg_error;
}
return 0;
@@ -366,15 +355,13 @@
/*
* Insert a write request into an inode
*/
-static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
+static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(inode);
int error;
error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
- BUG_ON(error == -EEXIST);
- if (error)
- return error;
+ BUG_ON(error);
if (!nfsi->npages) {
igrab(inode);
if (nfs_have_delegation(inode, FMODE_WRITE))
@@ -384,8 +371,8 @@
set_page_private(req->wb_page, (unsigned long)req);
nfsi->npages++;
kref_get(&req->wb_kref);
- radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
- return 0;
+ radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
+ NFS_PAGE_TAG_LOCKED);
}
/*
@@ -413,7 +400,7 @@
}
static void
-nfs_redirty_request(struct nfs_page *req)
+nfs_mark_request_dirty(struct nfs_page *req)
{
__set_page_dirty_nobuffers(req->wb_page);
}
@@ -467,7 +454,7 @@
return 1;
}
if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
- nfs_redirty_request(req);
+ nfs_mark_request_dirty(req);
return 1;
}
return 0;
@@ -597,6 +584,13 @@
/* Loop over all inode entries and see if we find
* A request for the page we wish to update
*/
+ if (new) {
+ if (radix_tree_preload(GFP_NOFS)) {
+ nfs_release_request(new);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+
spin_lock(&inode->i_lock);
req = nfs_page_find_request_locked(page);
if (req) {
@@ -607,28 +601,27 @@
error = nfs_wait_on_request(req);
nfs_release_request(req);
if (error < 0) {
- if (new)
+ if (new) {
+ radix_tree_preload_end();
nfs_release_request(new);
+ }
return ERR_PTR(error);
}
continue;
}
spin_unlock(&inode->i_lock);
- if (new)
+ if (new) {
+ radix_tree_preload_end();
nfs_release_request(new);
+ }
break;
}
if (new) {
- int error;
nfs_lock_request_dontget(new);
- error = nfs_inode_add_request(inode, new);
- if (error) {
- spin_unlock(&inode->i_lock);
- nfs_unlock_request(new);
- return ERR_PTR(error);
- }
+ nfs_inode_add_request(inode, new);
spin_unlock(&inode->i_lock);
+ radix_tree_preload_end();
req = new;
goto zero_page;
}
@@ -785,7 +778,7 @@
/*
* Set up the argument/result storage required for the RPC call.
*/
-static void nfs_write_rpcsetup(struct nfs_page *req,
+static int nfs_write_rpcsetup(struct nfs_page *req,
struct nfs_write_data *data,
const struct rpc_call_ops *call_ops,
unsigned int count, unsigned int offset,
@@ -806,6 +799,7 @@
.rpc_message = &msg,
.callback_ops = call_ops,
.callback_data = data,
+ .workqueue = nfsiod_workqueue,
.flags = flags,
.priority = priority,
};
@@ -822,7 +816,7 @@
data->args.pgbase = req->wb_pgbase + offset;
data->args.pages = data->pagevec;
data->args.count = count;
- data->args.context = req->wb_context;
+ data->args.context = get_nfs_open_context(req->wb_context);
data->args.stable = NFS_UNSTABLE;
if (how & FLUSH_STABLE) {
data->args.stable = NFS_DATA_SYNC;
@@ -847,8 +841,21 @@
(unsigned long long)data->args.offset);
task = rpc_run_task(&task_setup_data);
- if (!IS_ERR(task))
- rpc_put_task(task);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ rpc_put_task(task);
+ return 0;
+}
+
+/* If a nfs_flush_* function fails, it should remove reqs from @head and
+ * call this on each, which will prepare them to be retried on next
+ * writeback using standard nfs.
+ */
+static void nfs_redirty_request(struct nfs_page *req)
+{
+ nfs_mark_request_dirty(req);
+ nfs_end_page_writeback(req->wb_page);
+ nfs_clear_page_tag_locked(req);
}
/*
@@ -863,6 +870,7 @@
size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
unsigned int offset;
int requests = 0;
+ int ret = 0;
LIST_HEAD(list);
nfs_list_remove_request(req);
@@ -884,6 +892,8 @@
offset = 0;
nbytes = count;
do {
+ int ret2;
+
data = list_entry(list.next, struct nfs_write_data, pages);
list_del_init(&data->pages);
@@ -891,13 +901,15 @@
if (nbytes < wsize)
wsize = nbytes;
- nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+ ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
wsize, offset, how);
+ if (ret == 0)
+ ret = ret2;
offset += wsize;
nbytes -= wsize;
} while (nbytes != 0);
- return 0;
+ return ret;
out_bad:
while (!list_empty(&list)) {
@@ -906,8 +918,6 @@
nfs_writedata_release(data);
}
nfs_redirty_request(req);
- nfs_end_page_writeback(req->wb_page);
- nfs_clear_page_tag_locked(req);
return -ENOMEM;
}
@@ -940,16 +950,12 @@
req = nfs_list_entry(data->pages.next);
/* Set up the argument struct */
- nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
-
- return 0;
+ return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
out_bad:
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
nfs_list_remove_request(req);
nfs_redirty_request(req);
- nfs_end_page_writeback(req->wb_page);
- nfs_clear_page_tag_locked(req);
}
return -ENOMEM;
}
@@ -972,7 +978,6 @@
{
struct nfs_write_data *data = calldata;
struct nfs_page *req = data->req;
- struct page *page = req->wb_page;
dprintk("NFS: write (%s/%Ld %d@%Ld)",
req->wb_context->path.dentry->d_inode->i_sb->s_id,
@@ -980,13 +985,20 @@
req->wb_bytes,
(long long)req_offset(req));
- if (nfs_writeback_done(task, data) != 0)
- return;
+ nfs_writeback_done(task, data);
+}
- if (task->tk_status < 0) {
+static void nfs_writeback_release_partial(void *calldata)
+{
+ struct nfs_write_data *data = calldata;
+ struct nfs_page *req = data->req;
+ struct page *page = req->wb_page;
+ int status = data->task.tk_status;
+
+ if (status < 0) {
nfs_set_pageerror(page);
- nfs_context_set_write_error(req->wb_context, task->tk_status);
- dprintk(", error = %d\n", task->tk_status);
+ nfs_context_set_write_error(req->wb_context, status);
+ dprintk(", error = %d\n", status);
goto out;
}
@@ -1011,11 +1023,12 @@
out:
if (atomic_dec_and_test(&req->wb_complete))
nfs_writepage_release(req);
+ nfs_writedata_release(calldata);
}
static const struct rpc_call_ops nfs_write_partial_ops = {
.rpc_call_done = nfs_writeback_done_partial,
- .rpc_release = nfs_writedata_release,
+ .rpc_release = nfs_writeback_release_partial,
};
/*
@@ -1028,17 +1041,21 @@
static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
- struct nfs_page *req;
- struct page *page;
- if (nfs_writeback_done(task, data) != 0)
- return;
+ nfs_writeback_done(task, data);
+}
+
+static void nfs_writeback_release_full(void *calldata)
+{
+ struct nfs_write_data *data = calldata;
+ int status = data->task.tk_status;
/* Update attributes as result of writeback. */
while (!list_empty(&data->pages)) {
- req = nfs_list_entry(data->pages.next);
+ struct nfs_page *req = nfs_list_entry(data->pages.next);
+ struct page *page = req->wb_page;
+
nfs_list_remove_request(req);
- page = req->wb_page;
dprintk("NFS: write (%s/%Ld %d@%Ld)",
req->wb_context->path.dentry->d_inode->i_sb->s_id,
@@ -1046,10 +1063,10 @@
req->wb_bytes,
(long long)req_offset(req));
- if (task->tk_status < 0) {
+ if (status < 0) {
nfs_set_pageerror(page);
- nfs_context_set_write_error(req->wb_context, task->tk_status);
- dprintk(", error = %d\n", task->tk_status);
+ nfs_context_set_write_error(req->wb_context, status);
+ dprintk(", error = %d\n", status);
goto remove_request;
}
@@ -1069,11 +1086,12 @@
next:
nfs_clear_page_tag_locked(req);
}
+ nfs_writedata_release(calldata);
}
static const struct rpc_call_ops nfs_write_full_ops = {
.rpc_call_done = nfs_writeback_done_full,
- .rpc_release = nfs_writedata_release,
+ .rpc_release = nfs_writeback_release_full,
};
@@ -1159,15 +1177,18 @@
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-void nfs_commit_release(void *wdata)
+void nfs_commitdata_release(void *data)
{
+ struct nfs_write_data *wdata = data;
+
+ put_nfs_open_context(wdata->args.context);
nfs_commit_free(wdata);
}
/*
* Set up the argument/result storage required for the RPC call.
*/
-static void nfs_commit_rpcsetup(struct list_head *head,
+static int nfs_commit_rpcsetup(struct list_head *head,
struct nfs_write_data *data,
int how)
{
@@ -1187,6 +1208,7 @@
.rpc_message = &msg,
.callback_ops = &nfs_commit_ops,
.callback_data = data,
+ .workqueue = nfsiod_workqueue,
.flags = flags,
.priority = priority,
};
@@ -1203,6 +1225,7 @@
/* Note: we always request a commit of the entire inode */
data->args.offset = 0;
data->args.count = 0;
+ data->args.context = get_nfs_open_context(first->wb_context);
data->res.count = 0;
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
@@ -1214,8 +1237,10 @@
dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
task = rpc_run_task(&task_setup_data);
- if (!IS_ERR(task))
- rpc_put_task(task);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ rpc_put_task(task);
+ return 0;
}
/*
@@ -1227,15 +1252,13 @@
struct nfs_write_data *data;
struct nfs_page *req;
- data = nfs_commit_alloc();
+ data = nfs_commitdata_alloc();
if (!data)
goto out_bad;
/* Set up the argument struct */
- nfs_commit_rpcsetup(head, data, how);
-
- return 0;
+ return nfs_commit_rpcsetup(head, data, how);
out_bad:
while (!list_empty(head)) {
req = nfs_list_entry(head->next);
@@ -1255,7 +1278,6 @@
static void nfs_commit_done(struct rpc_task *task, void *calldata)
{
struct nfs_write_data *data = calldata;
- struct nfs_page *req;
dprintk("NFS: %5u nfs_commit_done (status %d)\n",
task->tk_pid, task->tk_status);
@@ -1263,6 +1285,13 @@
/* Call the NFS version-specific code */
if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
return;
+}
+
+static void nfs_commit_release(void *calldata)
+{
+ struct nfs_write_data *data = calldata;
+ struct nfs_page *req;
+ int status = data->task.tk_status;
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
@@ -1277,10 +1306,10 @@
(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
- if (task->tk_status < 0) {
- nfs_context_set_write_error(req->wb_context, task->tk_status);
+ if (status < 0) {
+ nfs_context_set_write_error(req->wb_context, status);
nfs_inode_remove_request(req);
- dprintk(", error = %d\n", task->tk_status);
+ dprintk(", error = %d\n", status);
goto next;
}
@@ -1297,10 +1326,11 @@
}
/* We have a mismatch. Write the page again */
dprintk(" mismatch\n");
- nfs_redirty_request(req);
+ nfs_mark_request_dirty(req);
next:
nfs_clear_page_tag_locked(req);
}
+ nfs_commitdata_release(calldata);
}
static const struct rpc_call_ops nfs_commit_ops = {
@@ -1487,18 +1517,19 @@
};
int ret;
- BUG_ON(!PageLocked(page));
- if (clear_page_dirty_for_io(page)) {
- ret = nfs_writepage_locked(page, &wbc);
+ do {
+ if (clear_page_dirty_for_io(page)) {
+ ret = nfs_writepage_locked(page, &wbc);
+ if (ret < 0)
+ goto out_error;
+ } else if (!PagePrivate(page))
+ break;
+ ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
if (ret < 0)
- goto out;
- }
- if (!PagePrivate(page))
- return 0;
- ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
- if (ret >= 0)
- return 0;
-out:
+ goto out_error;
+ } while (PagePrivate(page));
+ return 0;
+out_error:
__mark_inode_dirty(inode, I_DIRTY_PAGES);
return ret;
}
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 4babb2a..94649a8 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -91,6 +91,7 @@
*/
#define NLMCLNT_OHSIZE ((__NEW_UTS_LEN) + 10u)
struct nlm_rqst {
+ atomic_t a_count;
unsigned int a_flags; /* initial RPC task flags */
struct nlm_host * a_host; /* host handle */
struct nlm_args a_args; /* arguments */
@@ -173,8 +174,10 @@
/*
* Host cache
*/
-struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *, int, int,
- const char *, unsigned int);
+struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin,
+ int proto, u32 version,
+ const char *hostname,
+ unsigned int hostname_len);
struct nlm_host *nlmsvc_lookup_host(struct svc_rqst *, const char *,
unsigned int);
struct rpc_clnt * nlm_bind_host(struct nlm_host *);
@@ -217,8 +220,7 @@
void nlmsvc_free_host_resources(struct nlm_host *);
void nlmsvc_invalidate_all(void);
-static __inline__ struct inode *
-nlmsvc_file_inode(struct nlm_file *file)
+static inline struct inode *nlmsvc_file_inode(struct nlm_file *file)
{
return file->f_file->f_path.dentry->d_inode;
}
@@ -226,8 +228,8 @@
/*
* Compare two host addresses (needs modifying for ipv6)
*/
-static __inline__ int
-nlm_cmp_addr(const struct sockaddr_in *sin1, const struct sockaddr_in *sin2)
+static inline int nlm_cmp_addr(const struct sockaddr_in *sin1,
+ const struct sockaddr_in *sin2)
{
return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr;
}
@@ -236,8 +238,8 @@
* Compare two NLM locks.
* When the second lock is of type F_UNLCK, this acts like a wildcard.
*/
-static __inline__ int
-nlm_compare_locks(const struct file_lock *fl1, const struct file_lock *fl2)
+static inline int nlm_compare_locks(const struct file_lock *fl1,
+ const struct file_lock *fl2)
{
return fl1->fl_pid == fl2->fl_pid
&& fl1->fl_owner == fl2->fl_owner
diff --git a/include/linux/lockd/sm_inter.h b/include/linux/lockd/sm_inter.h
index 22a6458..5a5448b 100644
--- a/include/linux/lockd/sm_inter.h
+++ b/include/linux/lockd/sm_inter.h
@@ -19,6 +19,7 @@
#define SM_NOTIFY 6
#define SM_MAXSTRLEN 1024
+#define SM_PRIV_SIZE 16
/*
* Arguments for all calls to statd
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f4a0e4c..27d6a8d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -430,7 +430,6 @@
/*
* linux/fs/nfs/namespace.c
*/
-extern struct list_head nfs_automount_list;
extern const struct inode_operations nfs_mountpoint_inode_operations;
extern const struct inode_operations nfs_referral_inode_operations;
extern int nfs_mountpoint_expiry_timeout;
@@ -466,9 +465,9 @@
extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
extern int nfs_commit_inode(struct inode *, int);
-extern struct nfs_write_data *nfs_commit_alloc(void);
+extern struct nfs_write_data *nfs_commitdata_alloc(void);
extern void nfs_commit_free(struct nfs_write_data *wdata);
-extern void nfs_commit_release(void *wdata);
+extern void nfs_commitdata_release(void *wdata);
#else
static inline int
nfs_commit_inode(struct inode *inode, int how)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 3423c67..c9beacd 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -32,6 +32,8 @@
const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */
int cl_proto; /* Network transport protocol */
+ struct rpc_cred *cl_machine_cred;
+
#ifdef CONFIG_NFS_V4
u64 cl_clientid; /* constant */
nfs4_verifier cl_confirm;
@@ -93,6 +95,7 @@
unsigned int wpages; /* write size (in pages) */
unsigned int wtmult; /* server disk block size */
unsigned int dtsize; /* readdir size */
+ unsigned short port; /* "port=" setting */
unsigned int bsize; /* server block size */
unsigned int acregmin; /* attr cache timeouts */
unsigned int acregmax;
@@ -117,6 +120,13 @@
atomic_t active; /* Keep trace of any activity to this server */
wait_queue_head_t active_wq; /* Wait for any activity to stop */
+
+ /* mountd-related mount options */
+ struct sockaddr_storage mountd_address;
+ size_t mountd_addrlen;
+ u32 mountd_version;
+ unsigned short mountd_port;
+ unsigned short mountd_protocol;
};
/* Server capabilities */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index f301d0b..24263bb 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -140,6 +140,7 @@
__u32 rflags;
struct nfs_fattr * f_attr;
struct nfs_fattr * dir_attr;
+ struct nfs_seqid * seqid;
const struct nfs_server *server;
int delegation_type;
nfs4_stateid delegation;
@@ -159,6 +160,7 @@
struct nfs_open_confirmres {
nfs4_stateid stateid;
+ struct nfs_seqid * seqid;
};
/*
@@ -175,6 +177,7 @@
struct nfs_closeres {
nfs4_stateid stateid;
struct nfs_fattr * fattr;
+ struct nfs_seqid * seqid;
const struct nfs_server *server;
};
/*
@@ -199,7 +202,9 @@
};
struct nfs_lock_res {
- nfs4_stateid stateid;
+ nfs4_stateid stateid;
+ struct nfs_seqid * lock_seqid;
+ struct nfs_seqid * open_seqid;
};
struct nfs_locku_args {
@@ -210,7 +215,8 @@
};
struct nfs_locku_res {
- nfs4_stateid stateid;
+ nfs4_stateid stateid;
+ struct nfs_seqid * seqid;
};
struct nfs_lockt_args {
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 7a69ca3..3f63218 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -26,6 +26,7 @@
uid_t uid;
gid_t gid;
struct group_info *group_info;
+ unsigned char machine_cred : 1;
};
/*
@@ -59,8 +60,8 @@
/*
* Client authentication handle
*/
-#define RPC_CREDCACHE_NR 8
-#define RPC_CREDCACHE_MASK (RPC_CREDCACHE_NR - 1)
+#define RPC_CREDCACHE_HASHBITS 4
+#define RPC_CREDCACHE_NR (1 << RPC_CREDCACHE_HASHBITS)
struct rpc_cred_cache {
struct hlist_head hashtable[RPC_CREDCACHE_NR];
spinlock_t lock;
@@ -89,7 +90,6 @@
/* Flags for rpcauth_lookupcred() */
#define RPCAUTH_LOOKUP_NEW 0x01 /* Accept an uninitialised cred */
-#define RPCAUTH_LOOKUP_ROOTCREDS 0x02 /* This really ought to go! */
/*
* Client authentication ops
@@ -97,9 +97,7 @@
struct rpc_authops {
struct module *owner;
rpc_authflavor_t au_flavor; /* flavor (RPC_AUTH_*) */
-#ifdef RPC_DEBUG
char * au_name;
-#endif
struct rpc_auth * (*create)(struct rpc_clnt *, rpc_authflavor_t);
void (*destroy)(struct rpc_auth *);
@@ -113,6 +111,7 @@
void (*crdestroy)(struct rpc_cred *);
int (*crmatch)(struct auth_cred *, struct rpc_cred *, int);
+ void (*crbind)(struct rpc_task *, struct rpc_cred *);
__be32 * (*crmarshal)(struct rpc_task *, __be32 *);
int (*crrefresh)(struct rpc_task *);
__be32 * (*crvalidate)(struct rpc_task *, __be32 *);
@@ -126,9 +125,13 @@
extern const struct rpc_authops authnull_ops;
void __init rpc_init_authunix(void);
+void __init rpc_init_generic_auth(void);
void __init rpcauth_init_module(void);
void __exit rpcauth_remove_module(void);
+void __exit rpc_destroy_generic_auth(void);
+struct rpc_cred * rpc_lookup_cred(void);
+struct rpc_cred * rpc_lookup_machine_cred(void);
int rpcauth_register(const struct rpc_authops *);
int rpcauth_unregister(const struct rpc_authops *);
struct rpc_auth * rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
@@ -136,8 +139,8 @@
struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
-struct rpc_cred * rpcauth_bindcred(struct rpc_task *);
-void rpcauth_holdcred(struct rpc_task *);
+void rpcauth_bindcred(struct rpc_task *, struct rpc_cred *, int);
+void rpcauth_generic_bind_cred(struct rpc_task *, struct rpc_cred *);
void put_rpccred(struct rpc_cred *);
void rpcauth_unbindcred(struct rpc_task *);
__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *);
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 67658e1..fec6899 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -84,6 +84,7 @@
enum rpc_gss_svc gc_service;
struct gss_cl_ctx *gc_ctx;
struct gss_upcall_msg *gc_upcall;
+ unsigned char gc_machine_cred : 1;
};
#endif /* __KERNEL__ */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 129a86e..6fff7f8 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -127,11 +127,12 @@
void rpcb_getport_async(struct rpc_task *);
void rpc_call_start(struct rpc_task *);
-int rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
- int flags, const struct rpc_call_ops *tk_ops,
+int rpc_call_async(struct rpc_clnt *clnt,
+ const struct rpc_message *msg, int flags,
+ const struct rpc_call_ops *tk_ops,
void *calldata);
-int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg,
- int flags);
+int rpc_call_sync(struct rpc_clnt *clnt,
+ const struct rpc_message *msg, int flags);
struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
int flags);
void rpc_restart_call(struct rpc_task *);
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index f689f02..d1a5c8c 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -11,7 +11,6 @@
#include <linux/timer.h>
#include <linux/sunrpc/types.h>
-#include <linux/rcupdate.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
#include <linux/workqueue.h>
@@ -33,7 +32,8 @@
struct rpc_wait {
struct list_head list; /* wait queue links */
struct list_head links; /* Links to related tasks */
- struct rpc_wait_queue * rpc_waitq; /* RPC wait queue we're on */
+ struct list_head timer_list; /* Timer list */
+ unsigned long expires;
};
/*
@@ -57,33 +57,25 @@
__u8 tk_cred_retry;
/*
- * timeout_fn to be executed by timer bottom half
* callback to be executed after waking up
* action next procedure for async tasks
* tk_ops caller callbacks
*/
- void (*tk_timeout_fn)(struct rpc_task *);
void (*tk_callback)(struct rpc_task *);
void (*tk_action)(struct rpc_task *);
const struct rpc_call_ops *tk_ops;
void * tk_calldata;
- /*
- * tk_timer is used for async processing by the RPC scheduling
- * primitives. You should not access this directly unless
- * you have a pathological interest in kernel oopses.
- */
- struct timer_list tk_timer; /* kernel timer */
unsigned long tk_timeout; /* timeout for rpc_sleep() */
unsigned short tk_flags; /* misc flags */
unsigned long tk_runstate; /* Task run status */
struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could
* be any workqueue
*/
+ struct rpc_wait_queue *tk_waitqueue; /* RPC wait queue we're on */
union {
struct work_struct tk_work; /* Async task work queue */
struct rpc_wait tk_wait; /* RPC wait */
- struct rcu_head tk_rcu; /* for task deletion */
} u;
unsigned short tk_timeouts; /* maj timeouts */
@@ -123,6 +115,7 @@
const struct rpc_message *rpc_message;
const struct rpc_call_ops *callback_ops;
void *callback_data;
+ struct workqueue_struct *workqueue;
unsigned short flags;
signed char priority;
};
@@ -147,9 +140,7 @@
#define RPC_TASK_RUNNING 0
#define RPC_TASK_QUEUED 1
-#define RPC_TASK_WAKEUP 2
-#define RPC_TASK_HAS_TIMER 3
-#define RPC_TASK_ACTIVE 4
+#define RPC_TASK_ACTIVE 2
#define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
#define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
@@ -171,15 +162,6 @@
smp_mb__after_clear_bit(); \
} while (0)
-#define rpc_start_wakeup(t) \
- (test_and_set_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate) == 0)
-#define rpc_finish_wakeup(t) \
- do { \
- smp_mb__before_clear_bit(); \
- clear_bit(RPC_TASK_WAKEUP, &(t)->tk_runstate); \
- smp_mb__after_clear_bit(); \
- } while (0)
-
#define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)
/*
@@ -192,6 +174,12 @@
#define RPC_PRIORITY_HIGH (1)
#define RPC_NR_PRIORITY (1 + RPC_PRIORITY_HIGH - RPC_PRIORITY_LOW)
+struct rpc_timer {
+ struct timer_list timer;
+ struct list_head list;
+ unsigned long expires;
+};
+
/*
* RPC synchronization objects
*/
@@ -204,6 +192,7 @@
unsigned char count; /* # task groups remaining serviced so far */
unsigned char nr; /* # tasks remaining for cookie */
unsigned short qlen; /* total # tasks waiting in queue */
+ struct rpc_timer timer_list;
#ifdef RPC_DEBUG
const char * name;
#endif
@@ -229,9 +218,11 @@
void rpc_execute(struct rpc_task *);
void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
void rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
+void rpc_destroy_wait_queue(struct rpc_wait_queue *);
void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
- rpc_action action, rpc_action timer);
-void rpc_wake_up_task(struct rpc_task *);
+ rpc_action action);
+void rpc_wake_up_queued_task(struct rpc_wait_queue *,
+ struct rpc_task *);
void rpc_wake_up(struct rpc_wait_queue *);
struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
void rpc_wake_up_status(struct rpc_wait_queue *, int);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index b3ff9a8..4d80a11 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -86,6 +86,10 @@
unsigned long rq_majortimeo; /* major timeout alarm */
unsigned long rq_timeout; /* Current timeout value */
unsigned int rq_retries; /* # of retries */
+ unsigned int rq_connect_cookie;
+ /* A cookie used to track the
+ state of the transport
+ connection */
/*
* Partial send handling
@@ -152,6 +156,9 @@
unsigned long connect_timeout,
bind_timeout,
reestablish_timeout;
+ unsigned int connect_cookie; /* A cookie that gets bumped
+ every time the transport
+ is reconnected */
/*
* Disconnection of idle transports
@@ -232,7 +239,7 @@
void xprt_set_retrans_timeout_def(struct rpc_task *task);
void xprt_set_retrans_timeout_rtt(struct rpc_task *task);
void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
-void xprt_wait_for_buffer_space(struct rpc_task *task);
+void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action);
void xprt_write_space(struct rpc_xprt *xprt);
void xprt_update_rtt(struct rpc_task *task);
void xprt_adjust_cwnd(struct rpc_task *task, int result);
@@ -241,6 +248,7 @@
void xprt_release_rqst_cong(struct rpc_task *task);
void xprt_disconnect_done(struct rpc_xprt *xprt);
void xprt_force_disconnect(struct rpc_xprt *xprt);
+void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
/*
* Reserved bit positions in xprt->state
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 92e1dbe..5369aa3 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -8,7 +8,7 @@
obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/
sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
- auth.o auth_null.o auth_unix.o \
+ auth.o auth_null.o auth_unix.o auth_generic.o \
svc.o svcsock.o svcauth.o svcauth_unix.o \
rpcb_clnt.o timer.o xdr.o \
sunrpc_syms.o cache.o rpc_pipe.o \
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index eca941c..6bfea9e 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -11,6 +11,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/errno.h>
+#include <linux/hash.h>
#include <linux/sunrpc/clnt.h>
#include <linux/spinlock.h>
@@ -219,6 +220,9 @@
}
EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
+
+#define RPC_AUTH_EXPIRY_MORATORIUM (60 * HZ)
+
/*
* Remove stale credentials. Avoid sleeping inside the loop.
*/
@@ -227,6 +231,7 @@
{
spinlock_t *cache_lock;
struct rpc_cred *cred;
+ unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
while (!list_empty(&cred_unused)) {
cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru);
@@ -234,6 +239,10 @@
number_cred_unused--;
if (atomic_read(&cred->cr_count) != 0)
continue;
+ /* Enforce a 5 second garbage collection moratorium */
+ if (time_in_range(cred->cr_expire, expired, jiffies) &&
+ test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0)
+ continue;
cache_lock = &cred->cr_auth->au_credcache->lock;
spin_lock(cache_lock);
if (atomic_read(&cred->cr_count) == 0) {
@@ -280,10 +289,9 @@
struct hlist_node *pos;
struct rpc_cred *cred = NULL,
*entry, *new;
- int nr = 0;
+ unsigned int nr;
- if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS))
- nr = acred->uid & RPC_CREDCACHE_MASK;
+ nr = hash_long(acred->uid, RPC_CREDCACHE_HASHBITS);
rcu_read_lock();
hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) {
@@ -356,7 +364,6 @@
put_group_info(acred.group_info);
return ret;
}
-EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
void
rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
@@ -375,41 +382,58 @@
}
EXPORT_SYMBOL_GPL(rpcauth_init_cred);
-struct rpc_cred *
-rpcauth_bindcred(struct rpc_task *task)
+void
+rpcauth_generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
+{
+ task->tk_msg.rpc_cred = get_rpccred(cred);
+ dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
+ cred->cr_auth->au_ops->au_name, cred);
+}
+EXPORT_SYMBOL_GPL(rpcauth_generic_bind_cred);
+
+static void
+rpcauth_bind_root_cred(struct rpc_task *task)
{
struct rpc_auth *auth = task->tk_client->cl_auth;
struct auth_cred acred = {
- .uid = current->fsuid,
- .gid = current->fsgid,
- .group_info = current->group_info,
+ .uid = 0,
+ .gid = 0,
};
struct rpc_cred *ret;
- int flags = 0;
dprintk("RPC: %5u looking up %s cred\n",
task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
- get_group_info(acred.group_info);
- if (task->tk_flags & RPC_TASK_ROOTCREDS)
- flags |= RPCAUTH_LOOKUP_ROOTCREDS;
- ret = auth->au_ops->lookup_cred(auth, &acred, flags);
+ ret = auth->au_ops->lookup_cred(auth, &acred, 0);
if (!IS_ERR(ret))
task->tk_msg.rpc_cred = ret;
else
task->tk_status = PTR_ERR(ret);
- put_group_info(acred.group_info);
- return ret;
+}
+
+static void
+rpcauth_bind_new_cred(struct rpc_task *task)
+{
+ struct rpc_auth *auth = task->tk_client->cl_auth;
+ struct rpc_cred *ret;
+
+ dprintk("RPC: %5u looking up %s cred\n",
+ task->tk_pid, auth->au_ops->au_name);
+ ret = rpcauth_lookupcred(auth, 0);
+ if (!IS_ERR(ret))
+ task->tk_msg.rpc_cred = ret;
+ else
+ task->tk_status = PTR_ERR(ret);
}
void
-rpcauth_holdcred(struct rpc_task *task)
+rpcauth_bindcred(struct rpc_task *task, struct rpc_cred *cred, int flags)
{
- struct rpc_cred *cred = task->tk_msg.rpc_cred;
- if (cred != NULL) {
- get_rpccred(cred);
- dprintk("RPC: %5u holding %s cred %p\n", task->tk_pid,
- cred->cr_auth->au_ops->au_name, cred);
- }
+ if (cred != NULL)
+ cred->cr_ops->crbind(task, cred);
+ else if (flags & RPC_TASK_ROOTCREDS)
+ rpcauth_bind_root_cred(task);
+ else
+ rpcauth_bind_new_cred(task);
}
void
@@ -550,6 +574,7 @@
void __init rpcauth_init_module(void)
{
rpc_init_authunix();
+ rpc_init_generic_auth();
register_shrinker(&rpc_cred_shrinker);
}
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
new file mode 100644
index 0000000..d927d9f
--- /dev/null
+++ b/net/sunrpc/auth_generic.c
@@ -0,0 +1,177 @@
+/*
+ * Generic RPC credential
+ *
+ * Copyright (C) 2008, Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/debug.h>
+#include <linux/sunrpc/sched.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY RPCDBG_AUTH
+#endif
+
+#define RPC_ANONYMOUS_USERID ((uid_t)-2)
+#define RPC_ANONYMOUS_GROUPID ((gid_t)-2)
+
+struct generic_cred {
+ struct rpc_cred gc_base;
+ struct auth_cred acred;
+};
+
+static struct rpc_auth generic_auth;
+static struct rpc_cred_cache generic_cred_cache;
+static const struct rpc_credops generic_credops;
+
+/*
+ * Public call interface
+ */
+struct rpc_cred *rpc_lookup_cred(void)
+{
+ return rpcauth_lookupcred(&generic_auth, 0);
+}
+EXPORT_SYMBOL_GPL(rpc_lookup_cred);
+
+/*
+ * Public call interface for looking up machine creds.
+ */
+struct rpc_cred *rpc_lookup_machine_cred(void)
+{
+ struct auth_cred acred = {
+ .uid = RPC_ANONYMOUS_USERID,
+ .gid = RPC_ANONYMOUS_GROUPID,
+ .machine_cred = 1,
+ };
+
+ dprintk("RPC: looking up machine cred\n");
+ return generic_auth.au_ops->lookup_cred(&generic_auth, &acred, 0);
+}
+EXPORT_SYMBOL_GPL(rpc_lookup_machine_cred);
+
+static void
+generic_bind_cred(struct rpc_task *task, struct rpc_cred *cred)
+{
+ struct rpc_auth *auth = task->tk_client->cl_auth;
+ struct auth_cred *acred = &container_of(cred, struct generic_cred, gc_base)->acred;
+ struct rpc_cred *ret;
+
+ ret = auth->au_ops->lookup_cred(auth, acred, 0);
+ if (!IS_ERR(ret))
+ task->tk_msg.rpc_cred = ret;
+ else
+ task->tk_status = PTR_ERR(ret);
+}
+
+/*
+ * Lookup generic creds for current process
+ */
+static struct rpc_cred *
+generic_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+{
+ return rpcauth_lookup_credcache(&generic_auth, acred, flags);
+}
+
+static struct rpc_cred *
+generic_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+{
+ struct generic_cred *gcred;
+
+ gcred = kmalloc(sizeof(*gcred), GFP_KERNEL);
+ if (gcred == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ rpcauth_init_cred(&gcred->gc_base, acred, &generic_auth, &generic_credops);
+ gcred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+
+ gcred->acred.uid = acred->uid;
+ gcred->acred.gid = acred->gid;
+ gcred->acred.group_info = acred->group_info;
+ if (gcred->acred.group_info != NULL)
+ get_group_info(gcred->acred.group_info);
+ gcred->acred.machine_cred = acred->machine_cred;
+
+ dprintk("RPC: allocated %s cred %p for uid %d gid %d\n",
+ gcred->acred.machine_cred ? "machine" : "generic",
+ gcred, acred->uid, acred->gid);
+ return &gcred->gc_base;
+}
+
+static void
+generic_free_cred(struct rpc_cred *cred)
+{
+ struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
+
+ dprintk("RPC: generic_free_cred %p\n", gcred);
+ if (gcred->acred.group_info != NULL)
+ put_group_info(gcred->acred.group_info);
+ kfree(gcred);
+}
+
+static void
+generic_free_cred_callback(struct rcu_head *head)
+{
+ struct rpc_cred *cred = container_of(head, struct rpc_cred, cr_rcu);
+ generic_free_cred(cred);
+}
+
+static void
+generic_destroy_cred(struct rpc_cred *cred)
+{
+ call_rcu(&cred->cr_rcu, generic_free_cred_callback);
+}
+
+/*
+ * Match credentials against current process creds.
+ */
+static int
+generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags)
+{
+ struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base);
+
+ if (gcred->acred.uid != acred->uid ||
+ gcred->acred.gid != acred->gid ||
+ gcred->acred.group_info != acred->group_info ||
+ gcred->acred.machine_cred != acred->machine_cred)
+ return 0;
+ return 1;
+}
+
+void __init rpc_init_generic_auth(void)
+{
+ spin_lock_init(&generic_cred_cache.lock);
+}
+
+void __exit rpc_destroy_generic_auth(void)
+{
+ rpcauth_clear_credcache(&generic_cred_cache);
+}
+
+static struct rpc_cred_cache generic_cred_cache = {
+ {{ NULL, },},
+};
+
+static const struct rpc_authops generic_auth_ops = {
+ .owner = THIS_MODULE,
+ .au_name = "Generic",
+ .lookup_cred = generic_lookup_cred,
+ .crcreate = generic_create_cred,
+};
+
+static struct rpc_auth generic_auth = {
+ .au_ops = &generic_auth_ops,
+ .au_count = ATOMIC_INIT(0),
+ .au_credcache = &generic_cred_cache,
+};
+
+static const struct rpc_credops generic_credops = {
+ .cr_name = "Generic cred",
+ .crdestroy = generic_destroy_cred,
+ .crbind = generic_bind_cred,
+ .crmatch = generic_match,
+};
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 5828e5c..cc12d5f 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -114,27 +114,14 @@
gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
{
struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
- struct gss_cl_ctx *old;
- old = gss_cred->gc_ctx;
+ if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags))
+ return;
+ gss_get_ctx(ctx);
rcu_assign_pointer(gss_cred->gc_ctx, ctx);
set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ smp_mb__before_clear_bit();
clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
- if (old)
- gss_put_ctx(old);
-}
-
-static int
-gss_cred_is_uptodate_ctx(struct rpc_cred *cred)
-{
- struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base);
- int res = 0;
-
- rcu_read_lock();
- if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx)
- res = 1;
- rcu_read_unlock();
- return res;
}
static const void *
@@ -266,6 +253,7 @@
BUG_ON(!list_empty(&gss_msg->list));
if (gss_msg->ctx != NULL)
gss_put_ctx(gss_msg->ctx);
+ rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue);
kfree(gss_msg);
}
@@ -339,7 +327,7 @@
spin_lock(&inode->i_lock);
if (gss_msg->ctx)
- gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx));
+ gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
else
task->tk_status = gss_msg->msg.errno;
gss_cred->gc_upcall = NULL;
@@ -370,9 +358,16 @@
static struct gss_upcall_msg *
gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred)
{
+ struct gss_cred *gss_cred = container_of(cred,
+ struct gss_cred, gc_base);
struct gss_upcall_msg *gss_new, *gss_msg;
+ uid_t uid = cred->cr_uid;
- gss_new = gss_alloc_msg(gss_auth, cred->cr_uid);
+ /* Special case: rpc.gssd assumes that uid == 0 implies machine creds */
+ if (gss_cred->gc_machine_cred != 0)
+ uid = 0;
+
+ gss_new = gss_alloc_msg(gss_auth, uid);
if (gss_new == NULL)
return ERR_PTR(-ENOMEM);
gss_msg = gss_add_msg(gss_auth, gss_new);
@@ -408,13 +403,17 @@
}
spin_lock(&inode->i_lock);
if (gss_cred->gc_upcall != NULL)
- rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL);
- else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
+ rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
+ else if (gss_msg->ctx != NULL) {
+ gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
+ gss_cred->gc_upcall = NULL;
+ rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+ } else if (gss_msg->msg.errno >= 0) {
task->tk_timeout = 0;
gss_cred->gc_upcall = gss_msg;
/* gss_upcall_callback will release the reference to gss_upcall_msg */
atomic_inc(&gss_msg->count);
- rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL);
+ rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback);
} else
err = gss_msg->msg.errno;
spin_unlock(&inode->i_lock);
@@ -454,7 +453,7 @@
schedule();
}
if (gss_msg->ctx)
- gss_cred_set_ctx(cred, gss_get_ctx(gss_msg->ctx));
+ gss_cred_set_ctx(cred, gss_msg->ctx);
else
err = gss_msg->msg.errno;
spin_unlock(&inode->i_lock);
@@ -709,7 +708,7 @@
struct rpc_task *task;
if (gss_cred->gc_ctx == NULL ||
- gss_cred->gc_ctx->gc_proc == RPC_GSS_PROC_DESTROY)
+ test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
return 0;
gss_cred->gc_ctx->gc_proc = RPC_GSS_PROC_DESTROY;
@@ -719,7 +718,7 @@
* by the RPC call or by the put_rpccred() below */
get_rpccred(cred);
- task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC);
+ task = rpc_call_null(gss_auth->client, cred, RPC_TASK_ASYNC|RPC_TASK_SOFT);
if (!IS_ERR(task))
rpc_put_task(task);
@@ -817,6 +816,7 @@
*/
cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW;
cred->gc_service = gss_auth->service;
+ cred->gc_machine_cred = acred->machine_cred;
kref_get(&gss_auth->kref);
return &cred->gc_base;
@@ -843,17 +843,16 @@
{
struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base);
- /*
- * If the searchflags have set RPCAUTH_LOOKUP_NEW, then
- * we don't really care if the credential has expired or not,
- * since the caller should be prepared to reinitialise it.
- */
- if ((flags & RPCAUTH_LOOKUP_NEW) && test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
+ if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
goto out;
/* Don't match with creds that have expired. */
- if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry))
+ if (time_after(jiffies, gss_cred->gc_ctx->gc_expiry))
+ return 0;
+ if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags))
return 0;
out:
+ if (acred->machine_cred != gss_cred->gc_machine_cred)
+ return 0;
return (rc->cr_uid == acred->uid);
}
@@ -917,16 +916,48 @@
return NULL;
}
+static int gss_renew_cred(struct rpc_task *task)
+{
+ struct rpc_cred *oldcred = task->tk_msg.rpc_cred;
+ struct gss_cred *gss_cred = container_of(oldcred,
+ struct gss_cred,
+ gc_base);
+ struct rpc_auth *auth = oldcred->cr_auth;
+ struct auth_cred acred = {
+ .uid = oldcred->cr_uid,
+ .machine_cred = gss_cred->gc_machine_cred,
+ };
+ struct rpc_cred *new;
+
+ new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+ task->tk_msg.rpc_cred = new;
+ put_rpccred(oldcred);
+ return 0;
+}
+
/*
* Refresh credentials. XXX - finish
*/
static int
gss_refresh(struct rpc_task *task)
{
+ struct rpc_cred *cred = task->tk_msg.rpc_cred;
+ int ret = 0;
- if (!gss_cred_is_uptodate_ctx(task->tk_msg.rpc_cred))
- return gss_refresh_upcall(task);
- return 0;
+ if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
+ !test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
+ ret = gss_renew_cred(task);
+ if (ret < 0)
+ goto out;
+ cred = task->tk_msg.rpc_cred;
+ }
+
+ if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags))
+ ret = gss_refresh_upcall(task);
+out:
+ return ret;
}
/* Dummy refresh routine: used only when destroying the context */
@@ -1286,9 +1317,7 @@
static const struct rpc_authops authgss_ops = {
.owner = THIS_MODULE,
.au_flavor = RPC_AUTH_GSS,
-#ifdef RPC_DEBUG
.au_name = "RPCSEC_GSS",
-#endif
.create = gss_create,
.destroy = gss_destroy,
.lookup_cred = gss_lookup_cred,
@@ -1299,6 +1328,7 @@
.cr_name = "AUTH_GSS",
.crdestroy = gss_destroy_cred,
.cr_init = gss_cred_init,
+ .crbind = rpcauth_generic_bind_cred,
.crmatch = gss_match,
.crmarshal = gss_marshal,
.crrefresh = gss_refresh,
@@ -1310,6 +1340,7 @@
static const struct rpc_credops gss_nullops = {
.cr_name = "AUTH_GSS",
.crdestroy = gss_destroy_cred,
+ .crbind = rpcauth_generic_bind_cred,
.crmatch = gss_match,
.crmarshal = gss_marshal,
.crrefresh = gss_refresh_null,
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 537d0e8..c70dd7f 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -104,9 +104,7 @@
const struct rpc_authops authnull_ops = {
.owner = THIS_MODULE,
.au_flavor = RPC_AUTH_NULL,
-#ifdef RPC_DEBUG
.au_name = "NULL",
-#endif
.create = nul_create,
.destroy = nul_destroy,
.lookup_cred = nul_lookup_cred,
@@ -125,6 +123,7 @@
const struct rpc_credops null_credops = {
.cr_name = "AUTH_NULL",
.crdestroy = nul_destroy_cred,
+ .crbind = rpcauth_generic_bind_cred,
.crmatch = nul_match,
.crmarshal = nul_marshal,
.crrefresh = nul_refresh,
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 5ed91e5..44920b9 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -60,7 +60,8 @@
unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
{
struct unx_cred *cred;
- int i;
+ unsigned int groups = 0;
+ unsigned int i;
dprintk("RPC: allocating UNIX cred for uid %d gid %d\n",
acred->uid, acred->gid);
@@ -70,21 +71,17 @@
rpcauth_init_cred(&cred->uc_base, acred, auth, &unix_credops);
cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
- if (flags & RPCAUTH_LOOKUP_ROOTCREDS) {
- cred->uc_uid = 0;
- cred->uc_gid = 0;
- cred->uc_gids[0] = NOGROUP;
- } else {
- int groups = acred->group_info->ngroups;
- if (groups > NFS_NGROUPS)
- groups = NFS_NGROUPS;
- cred->uc_gid = acred->gid;
- for (i = 0; i < groups; i++)
- cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
- if (i < NFS_NGROUPS)
- cred->uc_gids[i] = NOGROUP;
- }
+ if (acred->group_info != NULL)
+ groups = acred->group_info->ngroups;
+ if (groups > NFS_NGROUPS)
+ groups = NFS_NGROUPS;
+
+ cred->uc_gid = acred->gid;
+ for (i = 0; i < groups; i++)
+ cred->uc_gids[i] = GROUP_AT(acred->group_info, i);
+ if (i < NFS_NGROUPS)
+ cred->uc_gids[i] = NOGROUP;
return &cred->uc_base;
}
@@ -118,26 +115,21 @@
unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
{
struct unx_cred *cred = container_of(rcred, struct unx_cred, uc_base);
- int i;
+ unsigned int groups = 0;
+ unsigned int i;
- if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) {
- int groups;
- if (cred->uc_uid != acred->uid
- || cred->uc_gid != acred->gid)
- return 0;
+ if (cred->uc_uid != acred->uid || cred->uc_gid != acred->gid)
+ return 0;
+ if (acred->group_info != NULL)
groups = acred->group_info->ngroups;
- if (groups > NFS_NGROUPS)
- groups = NFS_NGROUPS;
- for (i = 0; i < groups ; i++)
- if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i))
- return 0;
- return 1;
- }
- return (cred->uc_uid == 0
- && cred->uc_gid == 0
- && cred->uc_gids[0] == (gid_t) NOGROUP);
+ if (groups > NFS_NGROUPS)
+ groups = NFS_NGROUPS;
+ for (i = 0; i < groups ; i++)
+ if (cred->uc_gids[i] != GROUP_AT(acred->group_info, i))
+ return 0;
+ return 1;
}
/*
@@ -218,9 +210,7 @@
const struct rpc_authops authunix_ops = {
.owner = THIS_MODULE,
.au_flavor = RPC_AUTH_UNIX,
-#ifdef RPC_DEBUG
.au_name = "UNIX",
-#endif
.create = unx_create,
.destroy = unx_destroy,
.lookup_cred = unx_lookup_cred,
@@ -245,6 +235,7 @@
const struct rpc_credops unix_credops = {
.cr_name = "AUTH_UNIX",
.crdestroy = unx_destroy_cred,
+ .crbind = rpcauth_generic_bind_cred,
.crmatch = unx_match,
.crmarshal = unx_marshal,
.crrefresh = unx_refresh,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 7b96ff3..8945307 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -544,7 +544,7 @@
* @msg: RPC call parameters
* @flags: RPC call flags
*/
-int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
+int rpc_call_sync(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags)
{
struct rpc_task *task;
struct rpc_task_setup task_setup_data = {
@@ -575,7 +575,7 @@
* @data: user call data
*/
int
-rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
+rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags,
const struct rpc_call_ops *tk_ops, void *data)
{
struct rpc_task *task;
@@ -1062,7 +1062,7 @@
if (task->tk_msg.rpc_proc->p_decode != NULL)
return;
task->tk_action = rpc_exit_task;
- rpc_wake_up_task(task);
+ rpc_wake_up_queued_task(&task->tk_xprt->pending, task);
}
/*
@@ -1116,7 +1116,8 @@
case -ETIMEDOUT:
task->tk_action = call_timeout;
if (task->tk_client->cl_discrtry)
- xprt_force_disconnect(task->tk_xprt);
+ xprt_conditional_disconnect(task->tk_xprt,
+ req->rq_connect_cookie);
break;
case -ECONNREFUSED:
case -ENOTCONN:
@@ -1168,6 +1169,11 @@
clnt->cl_protname, clnt->cl_server);
}
rpc_force_rebind(clnt);
+ /*
+ * Did our request time out due to an RPCSEC_GSS out-of-sequence
+ * event? RFC2203 requires the server to drop all such requests.
+ */
+ rpcauth_invalcred(task);
retry:
clnt->cl_stats->rpcretrans++;
@@ -1195,18 +1201,6 @@
task->tk_flags &= ~RPC_CALL_MAJORSEEN;
}
- if (task->tk_status < 12) {
- if (!RPC_IS_SOFT(task)) {
- task->tk_action = call_bind;
- clnt->cl_stats->rpcretrans++;
- goto out_retry;
- }
- dprintk("RPC: %s: too small RPC reply size (%d bytes)\n",
- clnt->cl_protname, task->tk_status);
- task->tk_action = call_timeout;
- goto out_retry;
- }
-
/*
* Ensure that we see all writes made by xprt_complete_rqst()
* before it changed req->rq_received.
@@ -1218,6 +1212,18 @@
WARN_ON(memcmp(&req->rq_rcv_buf, &req->rq_private_buf,
sizeof(req->rq_rcv_buf)) != 0);
+ if (req->rq_rcv_buf.len < 12) {
+ if (!RPC_IS_SOFT(task)) {
+ task->tk_action = call_bind;
+ clnt->cl_stats->rpcretrans++;
+ goto out_retry;
+ }
+ dprintk("RPC: %s: too small RPC reply size (%d bytes)\n",
+ clnt->cl_protname, task->tk_status);
+ task->tk_action = call_timeout;
+ goto out_retry;
+ }
+
/* Verify the RPC header */
p = call_verify(task);
if (IS_ERR(p)) {
@@ -1236,10 +1242,14 @@
task->tk_status);
return;
out_retry:
- req->rq_received = req->rq_private_buf.len = 0;
task->tk_status = 0;
- if (task->tk_client->cl_discrtry)
- xprt_force_disconnect(task->tk_xprt);
+ /* Note: call_verify() may have freed the RPC slot */
+ if (task->tk_rqstp == req) {
+ req->rq_received = req->rq_rcv_buf.len = 0;
+ if (task->tk_client->cl_discrtry)
+ xprt_conditional_disconnect(task->tk_xprt,
+ req->rq_connect_cookie);
+ }
}
/*
@@ -1531,7 +1541,7 @@
proc = -1;
if (RPC_IS_QUEUED(t))
- rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
+ rpc_waitq = rpc_qname(t->tk_waitqueue);
printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
t->tk_pid, proc,
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 56aa018..0517967 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -298,7 +298,7 @@
/* Put self on queue before sending rpcbind request, in case
* rpcb_getport_done completes before we return from rpc_run_task */
- rpc_sleep_on(&xprt->binding, task, NULL, NULL);
+ rpc_sleep_on(&xprt->binding, task, NULL);
/* Someone else may have bound if we slept */
if (xprt_bound(xprt)) {
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 4c66912..6eab9bf 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -38,9 +38,9 @@
static mempool_t *rpc_task_mempool __read_mostly;
static mempool_t *rpc_buffer_mempool __read_mostly;
-static void __rpc_default_timer(struct rpc_task *task);
static void rpc_async_schedule(struct work_struct *);
static void rpc_release_task(struct rpc_task *task);
+static void __rpc_queue_timer_fn(unsigned long ptr);
/*
* RPC tasks sit here while waiting for conditions to improve.
@@ -57,41 +57,30 @@
* queue->lock and bh_disabled in order to avoid races within
* rpc_run_timer().
*/
-static inline void
-__rpc_disable_timer(struct rpc_task *task)
+static void
+__rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
{
+ if (task->tk_timeout == 0)
+ return;
dprintk("RPC: %5u disabling timer\n", task->tk_pid);
- task->tk_timeout_fn = NULL;
task->tk_timeout = 0;
+ list_del(&task->u.tk_wait.timer_list);
+ if (list_empty(&queue->timer_list.list))
+ del_timer(&queue->timer_list.timer);
}
-/*
- * Run a timeout function.
- * We use the callback in order to allow __rpc_wake_up_task()
- * and friends to disable the timer synchronously on SMP systems
- * without calling del_timer_sync(). The latter could cause a
- * deadlock if called while we're holding spinlocks...
- */
-static void rpc_run_timer(struct rpc_task *task)
+static void
+rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires)
{
- void (*callback)(struct rpc_task *);
-
- callback = task->tk_timeout_fn;
- task->tk_timeout_fn = NULL;
- if (callback && RPC_IS_QUEUED(task)) {
- dprintk("RPC: %5u running timer\n", task->tk_pid);
- callback(task);
- }
- smp_mb__before_clear_bit();
- clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
- smp_mb__after_clear_bit();
+ queue->timer_list.expires = expires;
+ mod_timer(&queue->timer_list.timer, expires);
}
/*
* Set up a timer for the current task.
*/
-static inline void
-__rpc_add_timer(struct rpc_task *task, rpc_action timer)
+static void
+__rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
{
if (!task->tk_timeout)
return;
@@ -99,27 +88,10 @@
dprintk("RPC: %5u setting alarm for %lu ms\n",
task->tk_pid, task->tk_timeout * 1000 / HZ);
- if (timer)
- task->tk_timeout_fn = timer;
- else
- task->tk_timeout_fn = __rpc_default_timer;
- set_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate);
- mod_timer(&task->tk_timer, jiffies + task->tk_timeout);
-}
-
-/*
- * Delete any timer for the current task. Because we use del_timer_sync(),
- * this function should never be called while holding queue->lock.
- */
-static void
-rpc_delete_timer(struct rpc_task *task)
-{
- if (RPC_IS_QUEUED(task))
- return;
- if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) {
- del_singleshot_timer_sync(&task->tk_timer);
- dprintk("RPC: %5u deleting timer\n", task->tk_pid);
- }
+ task->u.tk_wait.expires = jiffies + task->tk_timeout;
+ if (list_empty(&queue->timer_list.list) || time_before(task->u.tk_wait.expires, queue->timer_list.expires))
+ rpc_set_queue_timer(queue, task->u.tk_wait.expires);
+ list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
}
/*
@@ -161,7 +133,7 @@
list_add(&task->u.tk_wait.list, &queue->tasks[0]);
else
list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
- task->u.tk_wait.rpc_waitq = queue;
+ task->tk_waitqueue = queue;
queue->qlen++;
rpc_set_queued(task);
@@ -181,22 +153,18 @@
list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
}
- list_del(&task->u.tk_wait.list);
}
/*
* Remove request from queue.
* Note: must be called with spin lock held.
*/
-static void __rpc_remove_wait_queue(struct rpc_task *task)
+static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
{
- struct rpc_wait_queue *queue;
- queue = task->u.tk_wait.rpc_waitq;
-
+ __rpc_disable_timer(queue, task);
if (RPC_IS_PRIORITY(queue))
__rpc_remove_wait_queue_priority(task);
- else
- list_del(&task->u.tk_wait.list);
+ list_del(&task->u.tk_wait.list);
queue->qlen--;
dprintk("RPC: %5u removed from queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue));
@@ -229,6 +197,9 @@
INIT_LIST_HEAD(&queue->tasks[i]);
queue->maxpriority = nr_queues - 1;
rpc_reset_waitqueue_priority(queue);
+ queue->qlen = 0;
+ setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue);
+ INIT_LIST_HEAD(&queue->timer_list.list);
#ifdef RPC_DEBUG
queue->name = qname;
#endif
@@ -245,6 +216,12 @@
}
EXPORT_SYMBOL_GPL(rpc_init_wait_queue);
+void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
+{
+ del_timer_sync(&queue->timer_list.timer);
+}
+EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
+
static int rpc_wait_bit_killable(void *word)
{
if (fatal_signal_pending(current))
@@ -313,7 +290,6 @@
*/
static void rpc_make_runnable(struct rpc_task *task)
{
- BUG_ON(task->tk_timeout_fn);
rpc_clear_queued(task);
if (rpc_test_and_set_running(task))
return;
@@ -326,7 +302,7 @@
int status;
INIT_WORK(&task->u.tk_work, rpc_async_schedule);
- status = queue_work(task->tk_workqueue, &task->u.tk_work);
+ status = queue_work(rpciod_workqueue, &task->u.tk_work);
if (status < 0) {
printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status);
task->tk_status = status;
@@ -343,7 +319,7 @@
* as it's on a wait queue.
*/
static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
- rpc_action action, rpc_action timer)
+ rpc_action action)
{
dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
task->tk_pid, rpc_qname(q), jiffies);
@@ -357,11 +333,11 @@
BUG_ON(task->tk_callback != NULL);
task->tk_callback = action;
- __rpc_add_timer(task, timer);
+ __rpc_add_timer(q, task);
}
void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
- rpc_action action, rpc_action timer)
+ rpc_action action)
{
/* Mark the task as being activated if so needed */
rpc_set_active(task);
@@ -370,18 +346,19 @@
* Protect the queue operations.
*/
spin_lock_bh(&q->lock);
- __rpc_sleep_on(q, task, action, timer);
+ __rpc_sleep_on(q, task, action);
spin_unlock_bh(&q->lock);
}
EXPORT_SYMBOL_GPL(rpc_sleep_on);
/**
* __rpc_do_wake_up_task - wake up a single rpc_task
+ * @queue: wait queue
* @task: task to be woken up
*
* Caller must hold queue->lock, and have cleared the task queued flag.
*/
-static void __rpc_do_wake_up_task(struct rpc_task *task)
+static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task *task)
{
dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n",
task->tk_pid, jiffies);
@@ -395,8 +372,7 @@
return;
}
- __rpc_disable_timer(task);
- __rpc_remove_wait_queue(task);
+ __rpc_remove_wait_queue(queue, task);
rpc_make_runnable(task);
@@ -404,48 +380,32 @@
}
/*
- * Wake up the specified task
+ * Wake up a queued task while the queue lock is being held
*/
-static void __rpc_wake_up_task(struct rpc_task *task)
+static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue, struct rpc_task *task)
{
- if (rpc_start_wakeup(task)) {
- if (RPC_IS_QUEUED(task))
- __rpc_do_wake_up_task(task);
- rpc_finish_wakeup(task);
- }
+ if (RPC_IS_QUEUED(task) && task->tk_waitqueue == queue)
+ __rpc_do_wake_up_task(queue, task);
}
/*
- * Default timeout handler if none specified by user
+ * Wake up a task on a specific queue
*/
-static void
-__rpc_default_timer(struct rpc_task *task)
+void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task)
{
- dprintk("RPC: %5u timeout (default timer)\n", task->tk_pid);
- task->tk_status = -ETIMEDOUT;
- rpc_wake_up_task(task);
+ spin_lock_bh(&queue->lock);
+ rpc_wake_up_task_queue_locked(queue, task);
+ spin_unlock_bh(&queue->lock);
}
+EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
/*
* Wake up the specified task
*/
-void rpc_wake_up_task(struct rpc_task *task)
+static void rpc_wake_up_task(struct rpc_task *task)
{
- rcu_read_lock_bh();
- if (rpc_start_wakeup(task)) {
- if (RPC_IS_QUEUED(task)) {
- struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
-
- /* Note: we're already in a bh-safe context */
- spin_lock(&queue->lock);
- __rpc_do_wake_up_task(task);
- spin_unlock(&queue->lock);
- }
- rpc_finish_wakeup(task);
- }
- rcu_read_unlock_bh();
+ rpc_wake_up_queued_task(task->tk_waitqueue, task);
}
-EXPORT_SYMBOL_GPL(rpc_wake_up_task);
/*
* Wake up the next task on a priority queue.
@@ -495,7 +455,7 @@
new_owner:
rpc_set_waitqueue_owner(queue, task->tk_owner);
out:
- __rpc_wake_up_task(task);
+ rpc_wake_up_task_queue_locked(queue, task);
return task;
}
@@ -508,16 +468,14 @@
dprintk("RPC: wake_up_next(%p \"%s\")\n",
queue, rpc_qname(queue));
- rcu_read_lock_bh();
- spin_lock(&queue->lock);
+ spin_lock_bh(&queue->lock);
if (RPC_IS_PRIORITY(queue))
task = __rpc_wake_up_next_priority(queue);
else {
task_for_first(task, &queue->tasks[0])
- __rpc_wake_up_task(task);
+ rpc_wake_up_task_queue_locked(queue, task);
}
- spin_unlock(&queue->lock);
- rcu_read_unlock_bh();
+ spin_unlock_bh(&queue->lock);
return task;
}
@@ -534,18 +492,16 @@
struct rpc_task *task, *next;
struct list_head *head;
- rcu_read_lock_bh();
- spin_lock(&queue->lock);
+ spin_lock_bh(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
list_for_each_entry_safe(task, next, head, u.tk_wait.list)
- __rpc_wake_up_task(task);
+ rpc_wake_up_task_queue_locked(queue, task);
if (head == &queue->tasks[0])
break;
head--;
}
- spin_unlock(&queue->lock);
- rcu_read_unlock_bh();
+ spin_unlock_bh(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up);
@@ -561,26 +517,48 @@
struct rpc_task *task, *next;
struct list_head *head;
- rcu_read_lock_bh();
- spin_lock(&queue->lock);
+ spin_lock_bh(&queue->lock);
head = &queue->tasks[queue->maxpriority];
for (;;) {
list_for_each_entry_safe(task, next, head, u.tk_wait.list) {
task->tk_status = status;
- __rpc_wake_up_task(task);
+ rpc_wake_up_task_queue_locked(queue, task);
}
if (head == &queue->tasks[0])
break;
head--;
}
- spin_unlock(&queue->lock);
- rcu_read_unlock_bh();
+ spin_unlock_bh(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_status);
+static void __rpc_queue_timer_fn(unsigned long ptr)
+{
+ struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr;
+ struct rpc_task *task, *n;
+ unsigned long expires, now, timeo;
+
+ spin_lock(&queue->lock);
+ expires = now = jiffies;
+ list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) {
+ timeo = task->u.tk_wait.expires;
+ if (time_after_eq(now, timeo)) {
+ dprintk("RPC: %5u timeout\n", task->tk_pid);
+ task->tk_status = -ETIMEDOUT;
+ rpc_wake_up_task_queue_locked(queue, task);
+ continue;
+ }
+ if (expires == now || time_after(expires, timeo))
+ expires = timeo;
+ }
+ if (!list_empty(&queue->timer_list.list))
+ rpc_set_queue_timer(queue, expires);
+ spin_unlock(&queue->lock);
+}
+
static void __rpc_atrun(struct rpc_task *task)
{
- rpc_wake_up_task(task);
+ task->tk_status = 0;
}
/*
@@ -589,7 +567,7 @@
void rpc_delay(struct rpc_task *task, unsigned long delay)
{
task->tk_timeout = delay;
- rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
+ rpc_sleep_on(&delay_queue, task, __rpc_atrun);
}
EXPORT_SYMBOL_GPL(rpc_delay);
@@ -644,10 +622,6 @@
BUG_ON(RPC_IS_QUEUED(task));
for (;;) {
- /*
- * Garbage collection of pending timers...
- */
- rpc_delete_timer(task);
/*
* Execute any pending callback.
@@ -816,8 +790,6 @@
static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data)
{
memset(task, 0, sizeof(*task));
- setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer,
- (unsigned long)task);
atomic_set(&task->tk_count, 1);
task->tk_flags = task_setup_data->flags;
task->tk_ops = task_setup_data->callback_ops;
@@ -832,7 +804,7 @@
task->tk_owner = current->tgid;
/* Initialize workqueue for async tasks */
- task->tk_workqueue = rpciod_workqueue;
+ task->tk_workqueue = task_setup_data->workqueue;
task->tk_client = task_setup_data->rpc_client;
if (task->tk_client != NULL) {
@@ -845,12 +817,11 @@
task->tk_action = rpc_prepare_task;
if (task_setup_data->rpc_message != NULL) {
- memcpy(&task->tk_msg, task_setup_data->rpc_message, sizeof(task->tk_msg));
+ task->tk_msg.rpc_proc = task_setup_data->rpc_message->rpc_proc;
+ task->tk_msg.rpc_argp = task_setup_data->rpc_message->rpc_argp;
+ task->tk_msg.rpc_resp = task_setup_data->rpc_message->rpc_resp;
/* Bind the user cred */
- if (task->tk_msg.rpc_cred != NULL)
- rpcauth_holdcred(task);
- else
- rpcauth_bindcred(task);
+ rpcauth_bindcred(task, task_setup_data->rpc_message->rpc_cred, task_setup_data->flags);
if (task->tk_action == NULL)
rpc_call_start(task);
}
@@ -868,13 +839,6 @@
return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
}
-static void rpc_free_task(struct rcu_head *rcu)
-{
- struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu);
- dprintk("RPC: %5u freeing task\n", task->tk_pid);
- mempool_free(task, rpc_task_mempool);
-}
-
/*
* Create a new task for the specified client.
*/
@@ -898,12 +862,25 @@
return task;
}
-
-void rpc_put_task(struct rpc_task *task)
+static void rpc_free_task(struct rpc_task *task)
{
const struct rpc_call_ops *tk_ops = task->tk_ops;
void *calldata = task->tk_calldata;
+ if (task->tk_flags & RPC_TASK_DYNAMIC) {
+ dprintk("RPC: %5u freeing task\n", task->tk_pid);
+ mempool_free(task, rpc_task_mempool);
+ }
+ rpc_release_calldata(tk_ops, calldata);
+}
+
+static void rpc_async_release(struct work_struct *work)
+{
+ rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
+}
+
+void rpc_put_task(struct rpc_task *task)
+{
if (!atomic_dec_and_test(&task->tk_count))
return;
/* Release resources */
@@ -915,9 +892,11 @@
rpc_release_client(task->tk_client);
task->tk_client = NULL;
}
- if (task->tk_flags & RPC_TASK_DYNAMIC)
- call_rcu_bh(&task->u.tk_rcu, rpc_free_task);
- rpc_release_calldata(tk_ops, calldata);
+ if (task->tk_workqueue != NULL) {
+ INIT_WORK(&task->u.tk_work, rpc_async_release);
+ queue_work(task->tk_workqueue, &task->u.tk_work);
+ } else
+ rpc_free_task(task);
}
EXPORT_SYMBOL_GPL(rpc_put_task);
@@ -937,9 +916,6 @@
}
BUG_ON (RPC_IS_QUEUED(task));
- /* Synchronously delete any running timer */
- rpc_delete_timer(task);
-
#ifdef RPC_DEBUG
task->tk_magic = 0;
#endif
@@ -1029,11 +1005,20 @@
kmem_cache_destroy(rpc_task_slabp);
if (rpc_buffer_slabp)
kmem_cache_destroy(rpc_buffer_slabp);
+ rpc_destroy_wait_queue(&delay_queue);
}
int
rpc_init_mempool(void)
{
+ /*
+ * The following is not strictly a mempool initialisation,
+ * but there is no harm in doing it here
+ */
+ rpc_init_wait_queue(&delay_queue, "delayq");
+ if (!rpciod_start())
+ goto err_nomem;
+
rpc_task_slabp = kmem_cache_create("rpc_tasks",
sizeof(struct rpc_task),
0, SLAB_HWCACHE_ALIGN,
@@ -1054,13 +1039,6 @@
rpc_buffer_slabp);
if (!rpc_buffer_mempool)
goto err_nomem;
- if (!rpciod_start())
- goto err_nomem;
- /*
- * The following is not strictly a mempool initialisation,
- * but there is no harm in doing it here
- */
- rpc_init_wait_queue(&delay_queue, "delayq");
return 0;
err_nomem:
rpc_destroy_mempool();
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d5553b8..75d748e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -188,9 +188,9 @@
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (req && req->rq_ntrans)
- rpc_sleep_on(&xprt->resend, task, NULL, NULL);
+ rpc_sleep_on(&xprt->resend, task, NULL);
else
- rpc_sleep_on(&xprt->sending, task, NULL, NULL);
+ rpc_sleep_on(&xprt->sending, task, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
@@ -238,9 +238,9 @@
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (req && req->rq_ntrans)
- rpc_sleep_on(&xprt->resend, task, NULL, NULL);
+ rpc_sleep_on(&xprt->resend, task, NULL);
else
- rpc_sleep_on(&xprt->sending, task, NULL, NULL);
+ rpc_sleep_on(&xprt->sending, task, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@@ -447,13 +447,13 @@
* @task: task to be put to sleep
*
*/
-void xprt_wait_for_buffer_space(struct rpc_task *task)
+void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
task->tk_timeout = req->rq_timeout;
- rpc_sleep_on(&xprt->pending, task, NULL, NULL);
+ rpc_sleep_on(&xprt->pending, task, action);
}
EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
@@ -472,7 +472,7 @@
if (xprt->snd_task) {
dprintk("RPC: write space: waking waiting task on "
"xprt %p\n", xprt);
- rpc_wake_up_task(xprt->snd_task);
+ rpc_wake_up_queued_task(&xprt->pending, xprt->snd_task);
}
spin_unlock_bh(&xprt->transport_lock);
}
@@ -602,11 +602,37 @@
/* Try to schedule an autoclose RPC call */
if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
queue_work(rpciod_workqueue, &xprt->task_cleanup);
- else if (xprt->snd_task != NULL)
- rpc_wake_up_task(xprt->snd_task);
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
spin_unlock_bh(&xprt->transport_lock);
}
-EXPORT_SYMBOL_GPL(xprt_force_disconnect);
+
+/**
+ * xprt_conditional_disconnect - force a transport to disconnect
+ * @xprt: transport to disconnect
+ * @cookie: 'connection cookie'
+ *
+ * This attempts to break the connection if and only if 'cookie' matches
+ * the current transport 'connection cookie'. It ensures that we don't
+ * try to break the connection more than once when we need to retransmit
+ * a batch of RPC requests.
+ *
+ */
+void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
+{
+ /* Don't race with the test_bit() in xprt_clear_locked() */
+ spin_lock_bh(&xprt->transport_lock);
+ if (cookie != xprt->connect_cookie)
+ goto out;
+ if (test_bit(XPRT_CLOSING, &xprt->state) || !xprt_connected(xprt))
+ goto out;
+ set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ /* Try to schedule an autoclose RPC call */
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+ queue_work(rpciod_workqueue, &xprt->task_cleanup);
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
+out:
+ spin_unlock_bh(&xprt->transport_lock);
+}
static void
xprt_init_autodisconnect(unsigned long data)
@@ -653,7 +679,7 @@
task->tk_rqstp->rq_bytes_sent = 0;
task->tk_timeout = xprt->connect_timeout;
- rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
+ rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
xprt->stat.connect_start = jiffies;
xprt->ops->connect(task);
}
@@ -749,18 +775,20 @@
void xprt_complete_rqst(struct rpc_task *task, int copied)
{
struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
dprintk("RPC: %5u xid %08x complete (%d bytes received)\n",
task->tk_pid, ntohl(req->rq_xid), copied);
- task->tk_xprt->stat.recvs++;
+ xprt->stat.recvs++;
task->tk_rtt = (long)jiffies - req->rq_xtime;
list_del_init(&req->rq_list);
+ req->rq_private_buf.len = copied;
/* Ensure all writes are done before we update req->rq_received */
smp_wmb();
- req->rq_received = req->rq_private_buf.len = copied;
- rpc_wake_up_task(task);
+ req->rq_received = copied;
+ rpc_wake_up_queued_task(&xprt->pending, task);
}
EXPORT_SYMBOL_GPL(xprt_complete_rqst);
@@ -769,17 +797,17 @@
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
+ if (task->tk_status != -ETIMEDOUT)
+ return;
dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
- spin_lock(&xprt->transport_lock);
+ spin_lock_bh(&xprt->transport_lock);
if (!req->rq_received) {
if (xprt->ops->timer)
xprt->ops->timer(task);
- task->tk_status = -ETIMEDOUT;
- }
- task->tk_timeout = 0;
- rpc_wake_up_task(task);
- spin_unlock(&xprt->transport_lock);
+ } else
+ task->tk_status = 0;
+ spin_unlock_bh(&xprt->transport_lock);
}
/**
@@ -849,6 +877,7 @@
} else if (!req->rq_bytes_sent)
return;
+ req->rq_connect_cookie = xprt->connect_cookie;
status = xprt->ops->send_request(task);
if (status == 0) {
dprintk("RPC: %5u xmit complete\n", task->tk_pid);
@@ -864,7 +893,7 @@
if (!xprt_connected(xprt))
task->tk_status = -ENOTCONN;
else if (!req->rq_received)
- rpc_sleep_on(&xprt->pending, task, NULL, xprt_timer);
+ rpc_sleep_on(&xprt->pending, task, xprt_timer);
spin_unlock_bh(&xprt->transport_lock);
return;
}
@@ -875,7 +904,7 @@
*/
task->tk_status = status;
if (status == -ECONNREFUSED)
- rpc_sleep_on(&xprt->sending, task, NULL, NULL);
+ rpc_sleep_on(&xprt->sending, task, NULL);
}
static inline void do_xprt_reserve(struct rpc_task *task)
@@ -895,7 +924,7 @@
dprintk("RPC: waiting for request slot\n");
task->tk_status = -EAGAIN;
task->tk_timeout = 0;
- rpc_sleep_on(&xprt->backlog, task, NULL, NULL);
+ rpc_sleep_on(&xprt->backlog, task, NULL);
}
/**
@@ -1052,6 +1081,11 @@
xprt->shutdown = 1;
del_timer_sync(&xprt->timer);
+ rpc_destroy_wait_queue(&xprt->binding);
+ rpc_destroy_wait_queue(&xprt->pending);
+ rpc_destroy_wait_queue(&xprt->sending);
+ rpc_destroy_wait_queue(&xprt->resend);
+ rpc_destroy_wait_queue(&xprt->backlog);
/*
* Tear down transport state and free the rpc_xprt
*/
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 613daf8..ddbe981 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -136,12 +136,6 @@
#endif
/*
- * How many times to try sending a request on a socket before waiting
- * for the socket buffer to clear.
- */
-#define XS_SENDMSG_RETRY (10U)
-
-/*
* Time out for an RPC UDP socket connect. UDP socket connects are
* synchronous, but we set a timeout anyway in case of resource
* exhaustion on the local host.
@@ -516,6 +510,14 @@
return sent;
}
+static void xs_nospace_callback(struct rpc_task *task)
+{
+ struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
+
+ transport->inet->sk_write_pending--;
+ clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+}
+
/**
* xs_nospace - place task on wait queue if transmit was incomplete
* @task: task to put to sleep
@@ -531,20 +533,27 @@
task->tk_pid, req->rq_slen - req->rq_bytes_sent,
req->rq_slen);
- if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
- /* Protect against races with write_space */
- spin_lock_bh(&xprt->transport_lock);
+ /* Protect against races with write_space */
+ spin_lock_bh(&xprt->transport_lock);
- /* Don't race with disconnect */
- if (!xprt_connected(xprt))
- task->tk_status = -ENOTCONN;
- else if (test_bit(SOCK_NOSPACE, &transport->sock->flags))
- xprt_wait_for_buffer_space(task);
+ /* Don't race with disconnect */
+ if (xprt_connected(xprt)) {
+ if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
+ /*
+ * Notify TCP that we're limited by the application
+ * window size
+ */
+ set_bit(SOCK_NOSPACE, &transport->sock->flags);
+ transport->inet->sk_write_pending++;
+ /* ...and wait for more buffer space */
+ xprt_wait_for_buffer_space(task, xs_nospace_callback);
+ }
+ } else {
+ clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+ task->tk_status = -ENOTCONN;
+ }
- spin_unlock_bh(&xprt->transport_lock);
- } else
- /* Keep holding the socket if it is blocked */
- rpc_delay(task, HZ>>4);
+ spin_unlock_bh(&xprt->transport_lock);
}
/**
@@ -588,19 +597,20 @@
}
switch (status) {
+ case -EAGAIN:
+ xs_nospace(task);
+ break;
case -ENETUNREACH:
case -EPIPE:
case -ECONNREFUSED:
/* When the server has died, an ICMP port unreachable message
* prompts ECONNREFUSED. */
- break;
- case -EAGAIN:
- xs_nospace(task);
+ clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
break;
default:
+ clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
- break;
}
return status;
@@ -650,7 +660,6 @@
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
int status;
- unsigned int retry = 0;
xs_encode_tcp_record_marker(&req->rq_snd_buf);
@@ -681,9 +690,10 @@
return 0;
}
+ if (status != 0)
+ continue;
status = -EAGAIN;
- if (retry++ > XS_SENDMSG_RETRY)
- break;
+ break;
}
switch (status) {
@@ -695,12 +705,13 @@
case -ENOTCONN:
case -EPIPE:
status = -ENOTCONN;
+ clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
-status);
+ clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
xs_tcp_shutdown(xprt);
- break;
}
return status;
@@ -1073,6 +1084,7 @@
{
struct rpc_xprt *xprt;
read_descriptor_t rd_desc;
+ int read;
dprintk("RPC: xs_tcp_data_ready...\n");
@@ -1084,8 +1096,10 @@
/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
rd_desc.arg.data = xprt;
- rd_desc.count = 65536;
- tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
+ do {
+ rd_desc.count = 65536;
+ read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
+ } while (read > 0);
out:
read_unlock(&sk->sk_callback_lock);
}
@@ -1128,6 +1142,7 @@
break;
case TCP_FIN_WAIT1:
/* The client initiated a shutdown of the socket */
+ xprt->connect_cookie++;
xprt->reestablish_timeout = 0;
set_bit(XPRT_CLOSING, &xprt->state);
smp_mb__before_clear_bit();
@@ -1140,6 +1155,7 @@
set_bit(XPRT_CLOSING, &xprt->state);
xprt_force_disconnect(xprt);
case TCP_SYN_SENT:
+ xprt->connect_cookie++;
case TCP_CLOSING:
/*
* If the server closed down the connection, make sure that
@@ -1186,9 +1202,11 @@
if (unlikely(!(sock = sk->sk_socket)))
goto out;
+ clear_bit(SOCK_NOSPACE, &sock->flags);
+
if (unlikely(!(xprt = xprt_from_sock(sk))))
goto out;
- if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)))
+ if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
goto out;
xprt_write_space(xprt);
@@ -1219,9 +1237,11 @@
if (unlikely(!(sock = sk->sk_socket)))
goto out;
+ clear_bit(SOCK_NOSPACE, &sock->flags);
+
if (unlikely(!(xprt = xprt_from_sock(sk))))
goto out;
- if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)))
+ if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
goto out;
xprt_write_space(xprt);