IB/ipath: Change UD to queue work requests like RC & UC

The code to post UD sends tried to process work requests at the time
ib_post_send() is called without using a WQE queue.  This was fine as
long as HW resources were available for sending a packet.  This patch
changes UD to be handled more like RC and UC and shares more code.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index c69c252..4b6b7ee 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -31,6 +31,8 @@
  * SOFTWARE.
  */
 
+#include <linux/spinlock.h>
+
 #include "ipath_verbs.h"
 #include "ipath_kernel.h"
 
@@ -106,27 +108,30 @@
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 }
 
-static int init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe)
+/**
+ * ipath_init_sge - Validate a RWQE and fill in the SGE state
+ * @qp: the QP
+ *
+ * Return 1 if OK.
+ */
+int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
+		   u32 *lengthp, struct ipath_sge_state *ss)
 {
-	int user = to_ipd(qp->ibqp.pd)->user;
 	int i, j, ret;
 	struct ib_wc wc;
 
-	qp->r_len = 0;
+	*lengthp = 0;
 	for (i = j = 0; i < wqe->num_sge; i++) {
 		if (wqe->sg_list[i].length == 0)
 			continue;
 		/* Check LKEY */
-		if ((user && wqe->sg_list[i].lkey == 0) ||
-		    !ipath_lkey_ok(qp, &qp->r_sg_list[j], &wqe->sg_list[i],
-				   IB_ACCESS_LOCAL_WRITE))
+		if (!ipath_lkey_ok(qp, j ? &ss->sg_list[j - 1] : &ss->sge,
+				   &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
 			goto bad_lkey;
-		qp->r_len += wqe->sg_list[i].length;
+		*lengthp += wqe->sg_list[i].length;
 		j++;
 	}
-	qp->r_sge.sge = qp->r_sg_list[0];
-	qp->r_sge.sg_list = qp->r_sg_list + 1;
-	qp->r_sge.num_sge = j;
+	ss->num_sge = j;
 	ret = 1;
 	goto bail;
 
@@ -172,6 +177,8 @@
 	u32 tail;
 	int ret;
 
+	qp->r_sge.sg_list = qp->r_sg_list;
+
 	if (qp->ibqp.srq) {
 		srq = to_isrq(qp->ibqp.srq);
 		handler = srq->ibsrq.event_handler;
@@ -199,7 +206,8 @@
 		wqe = get_rwqe_ptr(rq, tail);
 		if (++tail >= rq->size)
 			tail = 0;
-	} while (!wr_id_only && !init_sge(qp, wqe));
+	} while (!wr_id_only && !ipath_init_sge(qp, wqe, &qp->r_len,
+						&qp->r_sge));
 	qp->r_wr_id = wqe->wr_id;
 	wq->tail = tail;
 
@@ -239,9 +247,9 @@
 
 /**
  * ipath_ruc_loopback - handle UC and RC lookback requests
- * @sqp: the loopback QP
+ * @sqp: the sending QP
  *
- * This is called from ipath_do_uc_send() or ipath_do_rc_send() to
+ * This is called from ipath_do_send() to
  * forward a WQE addressed to the same HCA.
  * Note that although we are single threaded due to the tasklet, we still
  * have to protect against post_send().  We don't have to worry about
@@ -450,40 +458,18 @@
 	wc.byte_len = wqe->length;
 	wc.qp = &qp->ibqp;
 	wc.src_qp = qp->remote_qpn;
-	/* XXX do we know which pkey matched? Only needed for GSI. */
 	wc.pkey_index = 0;
 	wc.slid = qp->remote_ah_attr.dlid;
 	wc.sl = qp->remote_ah_attr.sl;
 	wc.dlid_path_bits = 0;
+	wc.port_num = 1;
 	/* Signal completion event if the solicited bit is set. */
 	ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
 		       wqe->wr.send_flags & IB_SEND_SOLICITED);
 
 send_comp:
 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
-
-	if (!(sqp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
-	    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
-		wc.wr_id = wqe->wr.wr_id;
-		wc.status = IB_WC_SUCCESS;
-		wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
-		wc.vendor_err = 0;
-		wc.byte_len = wqe->length;
-		wc.qp = &sqp->ibqp;
-		wc.src_qp = 0;
-		wc.pkey_index = 0;
-		wc.slid = 0;
-		wc.sl = 0;
-		wc.dlid_path_bits = 0;
-		wc.port_num = 0;
-		ipath_cq_enter(to_icq(sqp->ibqp.send_cq), &wc, 0);
-	}
-
-	/* Update s_last now that we are finished with the SWQE */
-	spin_lock_irqsave(&sqp->s_lock, flags);
-	if (++sqp->s_last >= sqp->s_size)
-		sqp->s_last = 0;
-	spin_unlock_irqrestore(&sqp->s_lock, flags);
+	ipath_send_complete(sqp, wqe, IB_WC_SUCCESS);
 	goto again;
 
 done:
@@ -491,13 +477,11 @@
 		wake_up(&qp->wait);
 }
 
-static int want_buffer(struct ipath_devdata *dd)
+static void want_buffer(struct ipath_devdata *dd)
 {
 	set_bit(IPATH_S_PIOINTBUFAVAIL, &dd->ipath_sendctrl);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
 			 dd->ipath_sendctrl);
-
-	return 0;
 }
 
 /**
@@ -507,14 +491,11 @@
  *
  * Called when we run out of PIO buffers.
  */
-static void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
+static void ipath_no_bufs_available(struct ipath_qp *qp,
+				    struct ipath_ibdev *dev)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&dev->pending_lock, flags);
-	if (list_empty(&qp->piowait))
-		list_add_tail(&qp->piowait, &dev->piowait);
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
 	/*
 	 * Note that as soon as want_buffer() is called and
 	 * possibly before it returns, ipath_ib_piobufavail()
@@ -524,101 +505,14 @@
 	 * We leave the busy flag set so that another post send doesn't
 	 * try to put the same QP on the piowait list again.
 	 */
+	spin_lock_irqsave(&dev->pending_lock, flags);
+	list_add_tail(&qp->piowait, &dev->piowait);
+	spin_unlock_irqrestore(&dev->pending_lock, flags);
 	want_buffer(dev->dd);
 	dev->n_piowait++;
 }
 
 /**
- * ipath_post_ruc_send - post RC and UC sends
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-int ipath_post_ruc_send(struct ipath_qp *qp, struct ib_send_wr *wr)
-{
-	struct ipath_swqe *wqe;
-	unsigned long flags;
-	u32 next;
-	int i, j;
-	int acc;
-	int ret;
-
-	/*
-	 * Don't allow RDMA reads or atomic operations on UC or
-	 * undefined operations.
-	 * Make sure buffer is large enough to hold the result for atomics.
-	 */
-	if (qp->ibqp.qp_type == IB_QPT_UC) {
-		if ((unsigned) wr->opcode >= IB_WR_RDMA_READ) {
-			ret = -EINVAL;
-			goto bail;
-		}
-	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
-		ret = -EINVAL;
-		goto bail;
-	} else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-		   (wr->num_sge == 0 ||
-		    wr->sg_list[0].length < sizeof(u64) ||
-		    wr->sg_list[0].addr & (sizeof(u64) - 1))) {
-		ret = -EINVAL;
-		goto bail;
-	} else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	/* IB spec says that num_sge == 0 is OK. */
-	if (wr->num_sge > qp->s_max_sge) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	spin_lock_irqsave(&qp->s_lock, flags);
-	next = qp->s_head + 1;
-	if (next >= qp->s_size)
-		next = 0;
-	if (next == qp->s_last) {
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	wqe = get_swqe_ptr(qp, qp->s_head);
-	wqe->wr = *wr;
-	wqe->ssn = qp->s_ssn++;
-	wqe->sg_list[0].mr = NULL;
-	wqe->sg_list[0].vaddr = NULL;
-	wqe->sg_list[0].length = 0;
-	wqe->sg_list[0].sge_length = 0;
-	wqe->length = 0;
-	acc = wr->opcode >= IB_WR_RDMA_READ ? IB_ACCESS_LOCAL_WRITE : 0;
-	for (i = 0, j = 0; i < wr->num_sge; i++) {
-		if (to_ipd(qp->ibqp.pd)->user && wr->sg_list[i].lkey == 0) {
-			spin_unlock_irqrestore(&qp->s_lock, flags);
-			ret = -EINVAL;
-			goto bail;
-		}
-		if (wr->sg_list[i].length == 0)
-			continue;
-		if (!ipath_lkey_ok(qp, &wqe->sg_list[j], &wr->sg_list[i],
-				   acc)) {
-			spin_unlock_irqrestore(&qp->s_lock, flags);
-			ret = -EINVAL;
-			goto bail;
-		}
-		wqe->length += wr->sg_list[i].length;
-		j++;
-	}
-	wqe->wr.num_sge = j;
-	qp->s_head = next;
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-
-	ipath_do_ruc_send((unsigned long) qp);
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
  * ipath_make_grh - construct a GRH header
  * @dev: a pointer to the ipath device
  * @hdr: a pointer to the GRH header being constructed
@@ -648,81 +542,16 @@
 	return sizeof(struct ib_grh) / sizeof(u32);
 }
 
-/**
- * ipath_do_ruc_send - perform a send on an RC or UC QP
- * @data: contains a pointer to the QP
- *
- * Process entries in the send work queue until credit or queue is
- * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
- * Otherwise, after we drop the QP s_lock, two threads could send
- * packets out of order.
- */
-void ipath_do_ruc_send(unsigned long data)
+void ipath_make_ruc_header(struct ipath_ibdev *dev, struct ipath_qp *qp,
+			   struct ipath_other_headers *ohdr,
+			   u32 bth0, u32 bth2)
 {
-	struct ipath_qp *qp = (struct ipath_qp *)data;
-	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
-	unsigned long flags;
 	u16 lrh0;
 	u32 nwords;
 	u32 extra_bytes;
-	u32 bth0;
-	u32 bth2;
-	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
-	struct ipath_other_headers *ohdr;
-
-	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
-		goto bail;
-
-	if (unlikely(qp->remote_ah_attr.dlid == dev->dd->ipath_lid)) {
-		ipath_ruc_loopback(qp);
-		goto clear;
-	}
-
-	ohdr = &qp->s_hdr.u.oth;
-	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr.u.l.oth;
-
-again:
-	/* Check for a constructed packet to be sent. */
-	if (qp->s_hdrwords != 0) {
-		/*
-		 * If no PIO bufs are available, return.  An interrupt will
-		 * call ipath_ib_piobufavail() when one is available.
-		 */
-		if (ipath_verbs_send(dev->dd, qp->s_hdrwords,
-				     (u32 *) &qp->s_hdr, qp->s_cur_size,
-				     qp->s_cur_sge)) {
-			ipath_no_bufs_available(qp, dev);
-			goto bail;
-		}
-		dev->n_unicast_xmit++;
-		/* Record that we sent the packet and s_hdr is empty. */
-		qp->s_hdrwords = 0;
-	}
-
-	/*
-	 * The lock is needed to synchronize between setting
-	 * qp->s_ack_state, resend timer, and post_send().
-	 */
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (!((qp->ibqp.qp_type == IB_QPT_RC) ?
-	       ipath_make_rc_req(qp, ohdr, pmtu, &bth0, &bth2) :
-	       ipath_make_uc_req(qp, ohdr, pmtu, &bth0, &bth2))) {
-		/*
-		 * Clear the busy bit before unlocking to avoid races with
-		 * adding new work queue items and then failing to process
-		 * them.
-		 */
-		clear_bit(IPATH_S_BUSY, &qp->s_busy);
-		spin_unlock_irqrestore(&qp->s_lock, flags);
-		goto bail;
-	}
-
-	spin_unlock_irqrestore(&qp->s_lock, flags);
 
 	/* Construct the header. */
-	extra_bytes = (4 - qp->s_cur_size) & 3;
+	extra_bytes = -qp->s_cur_size & 3;
 	nwords = (qp->s_cur_size + extra_bytes) >> 2;
 	lrh0 = IPATH_LRH_BTH;
 	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
@@ -734,20 +563,99 @@
 	lrh0 |= qp->remote_ah_attr.sl << 4;
 	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
 	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords +
-				       SIZE_OF_CRC);
+	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
 	qp->s_hdr.lrh[3] = cpu_to_be16(dev->dd->ipath_lid);
 	bth0 |= ipath_get_pkey(dev->dd, qp->s_pkey_index);
 	bth0 |= extra_bytes << 20;
-	ohdr->bth[0] = cpu_to_be32(bth0);
+	ohdr->bth[0] = cpu_to_be32(bth0 | (1 << 22));
 	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
 	ohdr->bth[2] = cpu_to_be32(bth2);
+}
 
-	/* Check for more work to do. */
-	goto again;
+/**
+ * ipath_do_send - perform a send on a QP
+ * @data: contains a pointer to the QP
+ *
+ * Process entries in the send work queue until credit or queue is
+ * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
+ * Otherwise, two threads could send packets out of order.
+ */
+void ipath_do_send(unsigned long data)
+{
+	struct ipath_qp *qp = (struct ipath_qp *)data;
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+	int (*make_req)(struct ipath_qp *qp);
 
+	if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
+		goto bail;
+
+	if ((qp->ibqp.qp_type == IB_QPT_RC ||
+	     qp->ibqp.qp_type == IB_QPT_UC) &&
+	    qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
+		ipath_ruc_loopback(qp);
+		goto clear;
+	}
+
+	if (qp->ibqp.qp_type == IB_QPT_RC)
+	       make_req = ipath_make_rc_req;
+	else if (qp->ibqp.qp_type == IB_QPT_UC)
+	       make_req = ipath_make_uc_req;
+	else
+	       make_req = ipath_make_ud_req;
+
+again:
+	/* Check for a constructed packet to be sent. */
+	if (qp->s_hdrwords != 0) {
+		/*
+		 * If no PIO bufs are available, return.  An interrupt will
+		 * call ipath_ib_piobufavail() when one is available.
+		 */
+		if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
+				     qp->s_cur_sge, qp->s_cur_size)) {
+			ipath_no_bufs_available(qp, dev);
+			goto bail;
+		}
+		dev->n_unicast_xmit++;
+		/* Record that we sent the packet and s_hdr is empty. */
+		qp->s_hdrwords = 0;
+	}
+
+	if (make_req(qp))
+		goto again;
 clear:
 	clear_bit(IPATH_S_BUSY, &qp->s_busy);
-bail:
-	return;
+bail:;
+}
+
+void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
+			 enum ib_wc_status status)
+{
+	u32 last = qp->s_last;
+
+	if (++last == qp->s_size)
+		last = 0;
+	qp->s_last = last;
+
+	/* See ch. 11.2.4.1 and 10.7.3.1 */
+	if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
+	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+	    status != IB_WC_SUCCESS) {
+		struct ib_wc wc;
+
+		wc.wr_id = wqe->wr.wr_id;
+		wc.status = status;
+		wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
+		wc.vendor_err = 0;
+		wc.byte_len = wqe->length;
+		wc.imm_data = 0;
+		wc.qp = &qp->ibqp;
+		wc.src_qp = 0;
+		wc.wc_flags = 0;
+		wc.pkey_index = 0;
+		wc.slid = 0;
+		wc.sl = 0;
+		wc.dlid_path_bits = 0;
+		wc.port_num = 0;
+		ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+	}
 }