SUNRPC: Fix up xprt_write_space()

The rest of the networking layer uses SOCK_ASYNC_NOSPACE to signal whether
or not we have someone waiting for buffer memory. Convert the SUNRPC layer
to use the same idiom.
Remove the unlikely()s in xs_udp_write_space and xs_tcp_write_space. In
fact, the most common case will be that there is nobody waiting for buffer
space.

SOCK_NOSPACE is there to tell the TCP layer whether or not the cwnd was
limited by the application window. Ensure that we follow the same idiom as
the rest of the networking layer here too.

Finally, ensure that we clear SOCK_ASYNC_NOSPACE once we wake up, so that
write_space() doesn't keep waking things up on xprt->pending.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 85199c6..3ba64f9 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -447,13 +447,13 @@
  * @task: task to be put to sleep
  *
  */
-void xprt_wait_for_buffer_space(struct rpc_task *task)
+void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
 
 	task->tk_timeout = req->rq_timeout;
-	rpc_sleep_on(&xprt->pending, task, NULL);
+	rpc_sleep_on(&xprt->pending, task, action);
 }
 EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8bd3b0f..4c2462e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -516,6 +516,14 @@
 	return sent;
 }
 
+static void xs_nospace_callback(struct rpc_task *task)
+{
+	struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
+
+	transport->inet->sk_write_pending--;
+	clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+}
+
 /**
  * xs_nospace - place task on wait queue if transmit was incomplete
  * @task: task to put to sleep
@@ -531,20 +539,27 @@
 			task->tk_pid, req->rq_slen - req->rq_bytes_sent,
 			req->rq_slen);
 
-	if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
-		/* Protect against races with write_space */
-		spin_lock_bh(&xprt->transport_lock);
+	/* Protect against races with write_space */
+	spin_lock_bh(&xprt->transport_lock);
 
-		/* Don't race with disconnect */
-		if (!xprt_connected(xprt))
-			task->tk_status = -ENOTCONN;
-		else if (test_bit(SOCK_NOSPACE, &transport->sock->flags))
-			xprt_wait_for_buffer_space(task);
+	/* Don't race with disconnect */
+	if (xprt_connected(xprt)) {
+		if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
+			/*
+			 * Notify TCP that we're limited by the application
+			 * window size
+			 */
+			set_bit(SOCK_NOSPACE, &transport->sock->flags);
+			transport->inet->sk_write_pending++;
+			/* ...and wait for more buffer space */
+			xprt_wait_for_buffer_space(task, xs_nospace_callback);
+		}
+	} else {
+		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
+		task->tk_status = -ENOTCONN;
+	}
 
-		spin_unlock_bh(&xprt->transport_lock);
-	} else
-		/* Keep holding the socket if it is blocked */
-		rpc_delay(task, HZ>>4);
+	spin_unlock_bh(&xprt->transport_lock);
 }
 
 /**
@@ -588,19 +603,20 @@
 	}
 
 	switch (status) {
+	case -EAGAIN:
+		xs_nospace(task);
+		break;
 	case -ENETUNREACH:
 	case -EPIPE:
 	case -ECONNREFUSED:
 		/* When the server has died, an ICMP port unreachable message
 		 * prompts ECONNREFUSED. */
-		break;
-	case -EAGAIN:
-		xs_nospace(task);
+		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
 		break;
 	default:
+		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
 		dprintk("RPC:       sendmsg returned unrecognized error %d\n",
 			-status);
-		break;
 	}
 
 	return status;
@@ -695,12 +711,13 @@
 	case -ENOTCONN:
 	case -EPIPE:
 		status = -ENOTCONN;
+		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
 		break;
 	default:
 		dprintk("RPC:       sendmsg returned unrecognized error %d\n",
 			-status);
+		clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
 		xs_tcp_shutdown(xprt);
-		break;
 	}
 
 	return status;
@@ -1189,9 +1206,11 @@
 
 		if (unlikely(!(sock = sk->sk_socket)))
 			goto out;
+		clear_bit(SOCK_NOSPACE, &sock->flags);
+
 		if (unlikely(!(xprt = xprt_from_sock(sk))))
 			goto out;
-		if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)))
+		if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
 			goto out;
 
 		xprt_write_space(xprt);
@@ -1222,9 +1241,11 @@
 
 		if (unlikely(!(sock = sk->sk_socket)))
 			goto out;
+		clear_bit(SOCK_NOSPACE, &sock->flags);
+
 		if (unlikely(!(xprt = xprt_from_sock(sk))))
 			goto out;
-		if (unlikely(!test_and_clear_bit(SOCK_NOSPACE, &sock->flags)))
+		if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
 			goto out;
 
 		xprt_write_space(xprt);