Merge branches 'cma', 'cxgb3', 'ipoib', 'mlx4' and 'nes' into for-next
diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
index 864ff32..6d40f00 100644
--- a/Documentation/infiniband/ipoib.txt
+++ b/Documentation/infiniband/ipoib.txt
@@ -24,6 +24,49 @@
   The P_Key for any interface is given by the "pkey" file, and the
   main interface for a subinterface is in "parent."
 
+Datagram vs Connected modes
+
+  The IPoIB driver supports two modes of operation: datagram and
+  connected.  The mode is set and read through an interface's
+  /sys/class/net/<intf name>/mode file.
+
+  In datagram mode, the IB UD (Unreliable Datagram) transport is used
+  and so the interface MTU has is equal to the IB L2 MTU minus the
+  IPoIB encapsulation header (4 bytes).  For example, in a typical IB
+  fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes.
+
+  In connected mode, the IB RC (Reliable Connected) transport is used.
+  Connected mode is to takes advantage of the connected nature of the
+  IB transport and allows an MTU up to the maximal IP packet size of
+  64K, which reduces the number of IP packets needed for handling
+  large UDP datagrams, TCP segments, etc and increases the performance
+  for large messages.
+
+  In connected mode, the interface's UD QP is still used for multicast
+  and communication with peers that don't support connected mode. In
+  this case, RX emulation of ICMP PMTU packets is used to cause the
+  networking stack to use the smaller UD MTU for these neighbours.
+
+Stateless offloads
+
+  If the IB HW supports IPoIB stateless offloads, IPoIB advertises
+  TCP/IP checksum and/or Large Send (LSO) offloading capability to the
+  network stack.
+
+  Large Receive (LRO) offloading is also implemented and may be turned
+  on/off using ethtool calls.  Currently LRO is supported only for
+  checksum offload capable devices.
+
+  Stateless offloads are supported only in datagram mode.  
+
+Interrupt moderation
+
+  If the underlying IB device supports CQ event moderation, one can
+  use ethtool to set interrupt mitigation parameters and thus reduce
+  the overhead incurred by handling interrupts.  The main code path of
+  IPoIB doesn't use events for TX completion signaling so only RX
+  moderation is supported.
+
 Debugging Information
 
   By compiling the IPoIB driver with CONFIG_INFINIBAND_IPOIB_DEBUG set
@@ -55,3 +98,5 @@
     http://ietf.org/rfc/rfc4391.txt 
   IP over InfiniBand (IPoIB) Architecture (RFC 4392)
     http://ietf.org/rfc/rfc4392.txt 
+  IP over InfiniBand: Connected Mode (RFC 4755)
+    http://ietf.org/rfc/rfc4755.txt
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 2a2e508..851de83 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -297,21 +297,25 @@
 	id_priv->cma_dev = NULL;
 }
 
-static int cma_set_qkey(struct ib_device *device, u8 port_num,
-			enum rdma_port_space ps,
-			struct rdma_dev_addr *dev_addr, u32 *qkey)
+static int cma_set_qkey(struct rdma_id_private *id_priv)
 {
 	struct ib_sa_mcmember_rec rec;
 	int ret = 0;
 
-	switch (ps) {
+	if (id_priv->qkey)
+		return 0;
+
+	switch (id_priv->id.ps) {
 	case RDMA_PS_UDP:
-		*qkey = RDMA_UDP_QKEY;
+		id_priv->qkey = RDMA_UDP_QKEY;
 		break;
 	case RDMA_PS_IPOIB:
-		ib_addr_get_mgid(dev_addr, &rec.mgid);
-		ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
-		*qkey = be32_to_cpu(rec.qkey);
+		ib_addr_get_mgid(&id_priv->id.route.addr.dev_addr, &rec.mgid);
+		ret = ib_sa_get_mcmember_rec(id_priv->id.device,
+					     id_priv->id.port_num, &rec.mgid,
+					     &rec);
+		if (!ret)
+			id_priv->qkey = be32_to_cpu(rec.qkey);
 		break;
 	default:
 		break;
@@ -341,12 +345,7 @@
 		ret = ib_find_cached_gid(cma_dev->device, &gid,
 					 &id_priv->id.port_num, NULL);
 		if (!ret) {
-			ret = cma_set_qkey(cma_dev->device,
-					   id_priv->id.port_num,
-					   id_priv->id.ps, dev_addr,
-					   &id_priv->qkey);
-			if (!ret)
-				cma_attach_to_dev(id_priv, cma_dev);
+			cma_attach_to_dev(id_priv, cma_dev);
 			break;
 		}
 	}
@@ -578,6 +577,10 @@
 	*qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
 
 	if (cma_is_ud_ps(id_priv->id.ps)) {
+		ret = cma_set_qkey(id_priv);
+		if (ret)
+			return ret;
+
 		qp_attr->qkey = id_priv->qkey;
 		*qp_attr_mask |= IB_QP_QKEY;
 	} else {
@@ -2201,6 +2204,12 @@
 			event.status = ib_event->param.sidr_rep_rcvd.status;
 			break;
 		}
+		ret = cma_set_qkey(id_priv);
+		if (ret) {
+			event.event = RDMA_CM_EVENT_ADDR_ERROR;
+			event.status = -EINVAL;
+			break;
+		}
 		if (id_priv->qkey != rep->qkey) {
 			event.event = RDMA_CM_EVENT_UNREACHABLE;
 			event.status = -EINVAL;
@@ -2480,10 +2489,14 @@
 			     const void *private_data, int private_data_len)
 {
 	struct ib_cm_sidr_rep_param rep;
+	int ret;
 
 	memset(&rep, 0, sizeof rep);
 	rep.status = status;
 	if (status == IB_SIDR_SUCCESS) {
+		ret = cma_set_qkey(id_priv);
+		if (ret)
+			return ret;
 		rep.qp_num = id_priv->qp_num;
 		rep.qkey = id_priv->qkey;
 	}
@@ -2713,6 +2726,10 @@
 		    IB_SA_MCMEMBER_REC_FLOW_LABEL |
 		    IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
 
+	if (id_priv->id.ps == RDMA_PS_IPOIB)
+		comp_mask |= IB_SA_MCMEMBER_REC_RATE |
+			     IB_SA_MCMEMBER_REC_RATE_SELECTOR;
+
 	mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
 						id_priv->id.port_num, &rec,
 						comp_mask, GFP_KERNEL,
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
index a4a82bf..8d71086 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
@@ -152,7 +152,7 @@
 	sge_cmd = qpid << 8 | 3;
 	wqe->sge_cmd = cpu_to_be64(sge_cmd);
 	skb->priority = CPL_PRIORITY_CONTROL;
-	return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+	return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
 }
 
 int cxio_create_cq(struct cxio_rdev *rdev_p, struct t3_cq *cq)
@@ -571,7 +571,7 @@
 	     (unsigned long long) rdev_p->ctrl_qp.dma_addr,
 	     rdev_p->ctrl_qp.workq, 1 << T3_CTRL_QP_SIZE_LOG2);
 	skb->priority = CPL_PRIORITY_CONTROL;
-	return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+	return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
 err:
 	kfree_skb(skb);
 	return err;
@@ -701,7 +701,7 @@
 	u32 stag_idx;
 	u32 wptr;
 
-	if (rdev_p->flags)
+	if (cxio_fatal_error(rdev_p))
 		return -EIO;
 
 	stag_state = stag_state > 0;
@@ -858,7 +858,7 @@
 	wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
 	wqe->irs = cpu_to_be32(attr->irs);
 	skb->priority = 0;	/* 0=>ToeQ; 1=>CtrlQ */
-	return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
+	return iwch_cxgb3_ofld_send(rdev_p->t3cdev_p, skb);
 }
 
 void cxio_register_ev_cb(cxio_hal_ev_callback_func_t ev_cb)
@@ -1041,9 +1041,9 @@
 		cxio_hal_pblpool_destroy(rdev_p);
 		cxio_hal_rqtpool_destroy(rdev_p);
 		list_del(&rdev_p->entry);
-		rdev_p->t3cdev_p->ulp = NULL;
 		cxio_hal_destroy_ctrl_qp(rdev_p);
 		cxio_hal_destroy_resource(rdev_p->rscp);
+		rdev_p->t3cdev_p->ulp = NULL;
 	}
 }
 
diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
index 094a66d..bfd03bf 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
+++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
@@ -115,6 +115,11 @@
 #define	CXIO_ERROR_FATAL	1
 };
 
+static inline int cxio_fatal_error(struct cxio_rdev *rdev_p)
+{
+	return rdev_p->flags & CXIO_ERROR_FATAL;
+}
+
 static inline int cxio_num_stags(struct cxio_rdev *rdev_p)
 {
 	return min((int)T3_MAX_NUM_STAG, (int)((rdev_p->rnic_info.tpt_top - rdev_p->rnic_info.tpt_base) >> 5));
@@ -188,6 +193,7 @@
 void cxio_flush_hw_cq(struct t3_cq *cq);
 int cxio_poll_cq(struct t3_wq *wq, struct t3_cq *cq, struct t3_cqe *cqe,
 		     u8 *cqe_flushed, u64 *cookie, u32 *credit);
+int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb);
 
 #define MOD "iw_cxgb3: "
 #define PDBG(fmt, args...) pr_debug(MOD fmt, ## args)
diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c
index 37a4fc2..26fc0a4 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.c
+++ b/drivers/infiniband/hw/cxgb3/iwch.c
@@ -165,12 +165,19 @@
 static void iwch_err_handler(struct t3cdev *tdev, u32 status, u32 error)
 {
 	struct cxio_rdev *rdev = tdev->ulp;
+	struct iwch_dev *rnicp = rdev_to_iwch_dev(rdev);
+	struct ib_event event;
 
-	if (status == OFFLOAD_STATUS_DOWN)
+	if (status == OFFLOAD_STATUS_DOWN) {
 		rdev->flags = CXIO_ERROR_FATAL;
 
-	return;
+		event.device = &rnicp->ibdev;
+		event.event  = IB_EVENT_DEVICE_FATAL;
+		event.element.port_num = 0;
+		ib_dispatch_event(&event);
+	}
 
+	return;
 }
 
 static int __init iwch_init_module(void)
diff --git a/drivers/infiniband/hw/cxgb3/iwch.h b/drivers/infiniband/hw/cxgb3/iwch.h
index 3773453..8473550 100644
--- a/drivers/infiniband/hw/cxgb3/iwch.h
+++ b/drivers/infiniband/hw/cxgb3/iwch.h
@@ -117,6 +117,11 @@
 	return container_of(ibdev, struct iwch_dev, ibdev);
 }
 
+static inline struct iwch_dev *rdev_to_iwch_dev(struct cxio_rdev *rdev)
+{
+	return container_of(rdev, struct iwch_dev, rdev);
+}
+
 static inline int t3b_device(const struct iwch_dev *rhp)
 {
 	return rhp->rdev.t3cdev_p->type == T3B;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 8699947..fef3f1a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -139,6 +139,38 @@
 	put_ep(&ep->com);
 }
 
+int iwch_l2t_send(struct t3cdev *tdev, struct sk_buff *skb, struct l2t_entry *l2e)
+{
+	int	error = 0;
+	struct cxio_rdev *rdev;
+
+	rdev = (struct cxio_rdev *)tdev->ulp;
+	if (cxio_fatal_error(rdev)) {
+		kfree_skb(skb);
+		return -EIO;
+	}
+	error = l2t_send(tdev, skb, l2e);
+	if (error)
+		kfree_skb(skb);
+	return error;
+}
+
+int iwch_cxgb3_ofld_send(struct t3cdev *tdev, struct sk_buff *skb)
+{
+	int	error = 0;
+	struct cxio_rdev *rdev;
+
+	rdev = (struct cxio_rdev *)tdev->ulp;
+	if (cxio_fatal_error(rdev)) {
+		kfree_skb(skb);
+		return -EIO;
+	}
+	error = cxgb3_ofld_send(tdev, skb);
+	if (error)
+		kfree_skb(skb);
+	return error;
+}
+
 static void release_tid(struct t3cdev *tdev, u32 hwtid, struct sk_buff *skb)
 {
 	struct cpl_tid_release *req;
@@ -150,7 +182,7 @@
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
 	skb->priority = CPL_PRIORITY_SETUP;
-	cxgb3_ofld_send(tdev, skb);
+	iwch_cxgb3_ofld_send(tdev, skb);
 	return;
 }
 
@@ -172,8 +204,7 @@
 	req->val = cpu_to_be64(1 << S_TCB_RX_QUIESCE);
 
 	skb->priority = CPL_PRIORITY_DATA;
-	cxgb3_ofld_send(ep->com.tdev, skb);
-	return 0;
+	return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 }
 
 int iwch_resume_tid(struct iwch_ep *ep)
@@ -194,8 +225,7 @@
 	req->val = 0;
 
 	skb->priority = CPL_PRIORITY_DATA;
-	cxgb3_ofld_send(ep->com.tdev, skb);
-	return 0;
+	return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 }
 
 static void set_emss(struct iwch_ep *ep, u16 opt)
@@ -252,18 +282,22 @@
 
 void __free_ep(struct kref *kref)
 {
-	struct iwch_ep_common *epc;
-	epc = container_of(kref, struct iwch_ep_common, kref);
-	PDBG("%s ep %p state %s\n", __func__, epc, states[state_read(epc)]);
-	kfree(epc);
+	struct iwch_ep *ep;
+	ep = container_of(container_of(kref, struct iwch_ep_common, kref),
+			  struct iwch_ep, com);
+	PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
+	if (ep->com.flags & RELEASE_RESOURCES) {
+		cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
+		dst_release(ep->dst);
+		l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+	}
+	kfree(ep);
 }
 
 static void release_ep_resources(struct iwch_ep *ep)
 {
 	PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
-	cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
-	dst_release(ep->dst);
-	l2t_release(L2DATA(ep->com.tdev), ep->l2t);
+	ep->com.flags |= RELEASE_RESOURCES;
 	put_ep(&ep->com);
 }
 
@@ -382,7 +416,7 @@
 
 	PDBG("%s t3cdev %p\n", __func__, dev);
 	req->cmd = CPL_ABORT_NO_RST;
-	cxgb3_ofld_send(dev, skb);
+	iwch_cxgb3_ofld_send(dev, skb);
 }
 
 static int send_halfclose(struct iwch_ep *ep, gfp_t gfp)
@@ -402,8 +436,7 @@
 	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON));
 	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, ep->hwtid));
-	l2t_send(ep->com.tdev, skb, ep->l2t);
-	return 0;
+	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 }
 
 static int send_abort(struct iwch_ep *ep, struct sk_buff *skb, gfp_t gfp)
@@ -424,8 +457,7 @@
 	req->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
 	req->cmd = CPL_ABORT_SEND_RST;
-	l2t_send(ep->com.tdev, skb, ep->l2t);
-	return 0;
+	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 }
 
 static int send_connect(struct iwch_ep *ep)
@@ -469,8 +501,7 @@
 	req->opt0l = htonl(opt0l);
 	req->params = 0;
 	req->opt2 = htonl(opt2);
-	l2t_send(ep->com.tdev, skb, ep->l2t);
-	return 0;
+	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 }
 
 static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
@@ -527,7 +558,7 @@
 	req->sndseq = htonl(ep->snd_seq);
 	BUG_ON(ep->mpa_skb);
 	ep->mpa_skb = skb;
-	l2t_send(ep->com.tdev, skb, ep->l2t);
+	iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 	start_ep_timer(ep);
 	state_set(&ep->com, MPA_REQ_SENT);
 	return;
@@ -578,8 +609,7 @@
 	req->sndseq = htonl(ep->snd_seq);
 	BUG_ON(ep->mpa_skb);
 	ep->mpa_skb = skb;
-	l2t_send(ep->com.tdev, skb, ep->l2t);
-	return 0;
+	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 }
 
 static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
@@ -630,8 +660,7 @@
 	req->sndseq = htonl(ep->snd_seq);
 	ep->mpa_skb = skb;
 	state_set(&ep->com, MPA_REP_SENT);
-	l2t_send(ep->com.tdev, skb, ep->l2t);
-	return 0;
+	return iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 }
 
 static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
@@ -795,7 +824,7 @@
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, ep->hwtid));
 	req->credit_dack = htonl(V_RX_CREDITS(credits) | V_RX_FORCE_ACK(1));
 	skb->priority = CPL_PRIORITY_ACK;
-	cxgb3_ofld_send(ep->com.tdev, skb);
+	iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 	return credits;
 }
 
@@ -1127,8 +1156,8 @@
 	 * We get 2 abort replies from the HW.  The first one must
 	 * be ignored except for scribbling that we need one more.
 	 */
-	if (!(ep->flags & ABORT_REQ_IN_PROGRESS)) {
-		ep->flags |= ABORT_REQ_IN_PROGRESS;
+	if (!(ep->com.flags & ABORT_REQ_IN_PROGRESS)) {
+		ep->com.flags |= ABORT_REQ_IN_PROGRESS;
 		return CPL_RET_BUF_DONE;
 	}
 
@@ -1203,8 +1232,7 @@
 	req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
 
 	skb->priority = 1;
-	cxgb3_ofld_send(ep->com.tdev, skb);
-	return 0;
+	return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 }
 
 static int pass_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
@@ -1237,8 +1265,7 @@
 	req->cpu_idx = 0;
 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, ep->stid));
 	skb->priority = 1;
-	cxgb3_ofld_send(ep->com.tdev, skb);
-	return 0;
+	return iwch_cxgb3_ofld_send(ep->com.tdev, skb);
 }
 
 static int close_listsrv_rpl(struct t3cdev *tdev, struct sk_buff *skb,
@@ -1286,7 +1313,7 @@
 	rpl->opt2 = htonl(opt2);
 	rpl->rsvd = rpl->opt2;	/* workaround for HW bug */
 	skb->priority = CPL_PRIORITY_SETUP;
-	l2t_send(ep->com.tdev, skb, ep->l2t);
+	iwch_l2t_send(ep->com.tdev, skb, ep->l2t);
 
 	return;
 }
@@ -1315,7 +1342,7 @@
 		rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT);
 		rpl->opt2 = 0;
 		rpl->rsvd = rpl->opt2;
-		cxgb3_ofld_send(tdev, skb);
+		iwch_cxgb3_ofld_send(tdev, skb);
 	}
 }
 
@@ -1534,8 +1561,8 @@
 	 * We get 2 peer aborts from the HW.  The first one must
 	 * be ignored except for scribbling that we need one more.
 	 */
-	if (!(ep->flags & PEER_ABORT_IN_PROGRESS)) {
-		ep->flags |= PEER_ABORT_IN_PROGRESS;
+	if (!(ep->com.flags & PEER_ABORT_IN_PROGRESS)) {
+		ep->com.flags |= PEER_ABORT_IN_PROGRESS;
 		return CPL_RET_BUF_DONE;
 	}
 
@@ -1613,7 +1640,7 @@
 	rpl->wr.wr_lo = htonl(V_WR_TID(ep->hwtid));
 	OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
 	rpl->cmd = CPL_ABORT_NO_RST;
-	cxgb3_ofld_send(ep->com.tdev, rpl_skb);
+	iwch_cxgb3_ofld_send(ep->com.tdev, rpl_skb);
 out:
 	if (release)
 		release_ep_resources(ep);
@@ -2017,8 +2044,11 @@
 	ep->com.rpl_done = 0;
 	ep->com.rpl_err = 0;
 	err = listen_stop(ep);
+	if (err)
+		goto done;
 	wait_event(ep->com.waitq, ep->com.rpl_done);
 	cxgb3_free_stid(ep->com.tdev, ep->stid);
+done:
 	err = ep->com.rpl_err;
 	cm_id->rem_ref(cm_id);
 	put_ep(&ep->com);
@@ -2030,12 +2060,22 @@
 	int ret=0;
 	unsigned long flags;
 	int close = 0;
+	int fatal = 0;
+	struct t3cdev *tdev;
+	struct cxio_rdev *rdev;
 
 	spin_lock_irqsave(&ep->com.lock, flags);
 
 	PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
 	     states[ep->com.state], abrupt);
 
+	tdev = (struct t3cdev *)ep->com.tdev;
+	rdev = (struct cxio_rdev *)tdev->ulp;
+	if (cxio_fatal_error(rdev)) {
+		fatal = 1;
+		close_complete_upcall(ep);
+		ep->com.state = DEAD;
+	}
 	switch (ep->com.state) {
 	case MPA_REQ_WAIT:
 	case MPA_REQ_SENT:
@@ -2075,7 +2115,11 @@
 			ret = send_abort(ep, NULL, gfp);
 		else
 			ret = send_halfclose(ep, gfp);
+		if (ret)
+			fatal = 1;
 	}
+	if (fatal)
+		release_ep_resources(ep);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.h b/drivers/infiniband/hw/cxgb3/iwch_cm.h
index d7c7e09..43c0aea 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.h
@@ -147,6 +147,7 @@
 enum iwch_ep_flags {
 	PEER_ABORT_IN_PROGRESS	= (1 << 0),
 	ABORT_REQ_IN_PROGRESS	= (1 << 1),
+	RELEASE_RESOURCES	= (1 << 2),
 };
 
 struct iwch_ep_common {
@@ -161,6 +162,7 @@
 	wait_queue_head_t waitq;
 	int rpl_done;
 	int rpl_err;
+	u32 flags;
 };
 
 struct iwch_listen_ep {
@@ -188,7 +190,6 @@
 	u16 plen;
 	u32 ird;
 	u32 ord;
-	u32 flags;
 };
 
 static inline struct iwch_ep *to_ep(struct iw_cm_id *cm_id)
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index c758fbd..2f546a6 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -751,7 +751,7 @@
 	wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid)|
 						V_FW_RIWR_LEN(flit_cnt));
 	skb->priority = CPL_PRIORITY_DATA;
-	return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
+	return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
 }
 
 /*
@@ -783,7 +783,7 @@
 			 V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
 	wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
 	skb->priority = CPL_PRIORITY_DATA;
-	return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
+	return iwch_cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
 }
 
 /*
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 04b12ad..17621de 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -289,8 +289,8 @@
 static inline void
 set_wqe_64bit_value(__le32 *wqe_words, u32 index, u64 value)
 {
-	wqe_words[index]     = cpu_to_le32((u32) ((unsigned long)value));
-	wqe_words[index + 1] = cpu_to_le32((u32)(upper_32_bits((unsigned long)value)));
+	wqe_words[index]     = cpu_to_le32((u32) value);
+	wqe_words[index + 1] = cpu_to_le32(upper_32_bits(value));
 }
 
 static inline void
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 5242515..dbd9a75 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -426,6 +426,7 @@
 	if (type == NES_TIMER_TYPE_CLOSE) {
 		new_send->timetosend += (HZ/10);
 		if (cm_node->recv_entry) {
+			kfree(new_send);
 			WARN_ON(1);
 			return -EINVAL;
 		}
@@ -445,8 +446,8 @@
 		if (ret != NETDEV_TX_OK) {
 			nes_debug(NES_DBG_CM, "Error sending packet %p "
 				"(jiffies = %lu)\n", new_send, jiffies);
-			atomic_dec(&new_send->skb->users);
 			new_send->timetosend = jiffies;
+			ret = NETDEV_TX_OK;
 		} else {
 			cm_packets_sent++;
 			if (!send_retrans) {
@@ -630,7 +631,6 @@
 				nes_debug(NES_DBG_CM, "rexmit failed for "
 					"node=%p\n", cm_node);
 				cm_packets_bounced++;
-				atomic_dec(&send_entry->skb->users);
 				send_entry->retrycount--;
 				nexttimeout = jiffies + NES_SHORT_TIME;
 				settimer = 1;
@@ -666,11 +666,6 @@
 
 		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
 		rem_ref_cm_node(cm_node->cm_core, cm_node);
-		if (ret != NETDEV_TX_OK) {
-			nes_debug(NES_DBG_CM, "rexmit failed for cm_node=%p\n",
-				cm_node);
-			break;
-		}
 	}
 
 	if (settimer) {
@@ -1262,7 +1257,6 @@
 		cm_node->nesqp = NULL;
 	}
 
-	cm_node->freed = 1;
 	kfree(cm_node);
 	return 0;
 }
@@ -1999,13 +1993,17 @@
 		if (loopbackremotelistener == NULL) {
 			create_event(cm_node, NES_CM_EVENT_ABORTED);
 		} else {
-			atomic_inc(&cm_loopbacks);
 			loopback_cm_info = *cm_info;
 			loopback_cm_info.loc_port = cm_info->rem_port;
 			loopback_cm_info.rem_port = cm_info->loc_port;
 			loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
 			loopbackremotenode = make_cm_node(cm_core, nesvnic,
 				&loopback_cm_info, loopbackremotelistener);
+			if (!loopbackremotenode) {
+				rem_ref_cm_node(cm_node->cm_core, cm_node);
+				return NULL;
+			}
+			atomic_inc(&cm_loopbacks);
 			loopbackremotenode->loopbackpartner = cm_node;
 			loopbackremotenode->tcp_cntxt.rcv_wscale =
 				NES_CM_DEFAULT_RCV_WND_SCALE;
@@ -2690,6 +2688,7 @@
 	struct ib_mr *ibmr = NULL;
 	struct ib_phys_buf ibphysbuf;
 	struct nes_pd *nespd;
+	u64 tagged_offset;
 
 
 
@@ -2755,10 +2754,11 @@
 		ibphysbuf.addr = nesqp->ietf_frame_pbase;
 		ibphysbuf.size = conn_param->private_data_len +
 					sizeof(struct ietf_mpa_frame);
+		tagged_offset = (u64)(unsigned long)nesqp->ietf_frame;
 		ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
 						&ibphysbuf, 1,
 						IB_ACCESS_LOCAL_WRITE,
-						(u64 *)&nesqp->ietf_frame);
+						&tagged_offset);
 		if (!ibmr) {
 			nes_debug(NES_DBG_CM, "Unable to register memory region"
 					"for lSMM for cm_node = %p \n",
@@ -2782,7 +2782,7 @@
 			sizeof(struct ietf_mpa_frame));
 		set_wqe_64bit_value(wqe->wqe_words,
 					NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
-					(u64)nesqp->ietf_frame);
+					(u64)(unsigned long)nesqp->ietf_frame);
 		wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
 			cpu_to_le32(conn_param->private_data_len +
 			sizeof(struct ietf_mpa_frame));
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index d5f7782..80bba18 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -298,7 +298,6 @@
 	struct nes_vnic           *nesvnic;
 	int                       apbvt_set;
 	int                       accept_pend;
-	int			freed;
 	struct list_head	timer_entry;
 	struct list_head	reset_entry;
 	struct nes_qp		*nesqp;
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 52e7340..d6fc9ae 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -46,6 +46,10 @@
 module_param(nes_lro_max_aggr, uint, 0444);
 MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation");
 
+static int wide_ppm_offset;
+module_param(wide_ppm_offset, int, 0644);
+MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm");
+
 static u32 crit_err_count;
 u32 int_mod_timer_init;
 u32 int_mod_cq_depth_256;
@@ -546,8 +550,11 @@
 			msleep(1);
 		}
 		if (int_cnt > 1) {
+			u32 sds;
 			spin_lock_irqsave(&nesadapter->phy_lock, flags);
-			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x0000F088);
+			sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
+			sds |= 0x00000040;
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds);
 			mh_detected++;
 			reset_value = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
 			reset_value |= 0x0000003d;
@@ -736,39 +743,49 @@
 {
 	int i;
 	u32 u32temp;
-	u32 serdes_common_control;
+	u32 sds;
 
 	if (hw_rev != NE020_REV) {
 		/* init serdes 0 */
+		if (wide_ppm_offset && (nesadapter->phy_type[0] == NES_PHY_TYPE_CX4))
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000FFFAA);
+		else
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
 
-		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000FF);
 		if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
-			serdes_common_control = nes_read_indexed(nesdev,
-					NES_IDX_ETH_SERDES_COMMON_CONTROL0);
-			serdes_common_control |= 0x000000100;
-			nes_write_indexed(nesdev,
-					NES_IDX_ETH_SERDES_COMMON_CONTROL0,
-					serdes_common_control);
-		} else if (!OneG_Mode) {
+			sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
+			sds |= 0x00000100;
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds);
+		}
+		if (!OneG_Mode)
 			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE0, 0x11110000);
+
+		if (port_count < 2)
+			return 0;
+
+		/* init serdes 1 */
+		switch (nesadapter->phy_type[1]) {
+		case NES_PHY_TYPE_ARGUS:
+		case NES_PHY_TYPE_SFP_D:
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x00000000);
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x00000000);
+			break;
+		case NES_PHY_TYPE_CX4:
+			sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
+			sds &= 0xFFFFFFBF;
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds);
+			if (wide_ppm_offset)
+				nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000FFFAA);
+			else
+				nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF);
+			break;
+		case NES_PHY_TYPE_PUMA_1G:
+			sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1);
+			sds |= 0x000000100;
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, sds);
 		}
-		if (((port_count > 1) &&
-			(nesadapter->phy_type[0] != NES_PHY_TYPE_PUMA_1G)) ||
-			((port_count > 2) &&
-			(nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G))) {
-			/* init serdes 1 */
-			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000FF);
-			if (nesadapter->phy_type[0] == NES_PHY_TYPE_PUMA_1G) {
-				serdes_common_control = nes_read_indexed(nesdev,
-					NES_IDX_ETH_SERDES_COMMON_CONTROL1);
-				serdes_common_control |= 0x000000100;
-				nes_write_indexed(nesdev,
-					NES_IDX_ETH_SERDES_COMMON_CONTROL1,
-					serdes_common_control);
-			} else if (!OneG_Mode) {
-				nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000);
-			}
-		}
+		if (!OneG_Mode)
+			nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_HIGHZ_LANE_MODE1, 0x11110000);
 	} else {
 		/* init serdes 0 */
 		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008);
@@ -1259,203 +1276,162 @@
 {
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
 	u32 counter = 0;
-	u32 sds_common_control0;
+	u32 sds;
 	u32 mac_index = nesdev->mac_index;
 	u32 tx_config = 0;
 	u16 phy_data;
 	u32 temp_phy_data = 0;
 	u32 temp_phy_data2 = 0;
-	u32 i = 0;
+	u8  phy_type = nesadapter->phy_type[mac_index];
+	u8  phy_index = nesadapter->phy_index[mac_index];
 
 	if ((nesadapter->OneG_Mode) &&
-	    (nesadapter->phy_type[mac_index] != NES_PHY_TYPE_PUMA_1G)) {
+	    (phy_type != NES_PHY_TYPE_PUMA_1G)) {
 		nes_debug(NES_DBG_PHY, "1G PHY, mac_index = %d.\n", mac_index);
-		if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_1G) {
-			printk(PFX "%s: Programming mdc config for 1G\n", __func__);
+		if (phy_type == NES_PHY_TYPE_1G) {
 			tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
 			tx_config &= 0xFFFFFFE3;
 			tx_config |= 0x04;
 			nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
 		}
 
-		nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data);
-		nes_debug(NES_DBG_PHY, "Phy data from register 1 phy address %u = 0x%X.\n",
-				nesadapter->phy_index[mac_index], phy_data);
-		nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index], 0xb000);
+		nes_read_1G_phy_reg(nesdev, 1, phy_index, &phy_data);
+		nes_write_1G_phy_reg(nesdev, 23, phy_index, 0xb000);
 
 		/* Reset the PHY */
-		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000);
+		nes_write_1G_phy_reg(nesdev, 0, phy_index, 0x8000);
 		udelay(100);
 		counter = 0;
 		do {
-			nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
-			nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data);
-			if (counter++ > 100) break;
+			nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
+			if (counter++ > 100)
+				break;
 		} while (phy_data & 0x8000);
 
 		/* Setting no phy loopback */
 		phy_data &= 0xbfff;
 		phy_data |= 0x1140;
-		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index],  phy_data);
-		nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
-		nes_debug(NES_DBG_PHY, "Phy data from register 0 = 0x%X.\n", phy_data);
-
-		nes_read_1G_phy_reg(nesdev, 0x17, nesadapter->phy_index[mac_index], &phy_data);
-		nes_debug(NES_DBG_PHY, "Phy data from register 0x17 = 0x%X.\n", phy_data);
-
-		nes_read_1G_phy_reg(nesdev, 0x1e, nesadapter->phy_index[mac_index], &phy_data);
-		nes_debug(NES_DBG_PHY, "Phy data from register 0x1e = 0x%X.\n", phy_data);
+		nes_write_1G_phy_reg(nesdev, 0, phy_index,  phy_data);
+		nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
+		nes_read_1G_phy_reg(nesdev, 0x17, phy_index, &phy_data);
+		nes_read_1G_phy_reg(nesdev, 0x1e, phy_index, &phy_data);
 
 		/* Setting the interrupt mask */
-		nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data);
-		nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data);
-		nes_write_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], 0xffee);
-
-		nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data);
-		nes_debug(NES_DBG_PHY, "Phy data from register 0x19 = 0x%X.\n", phy_data);
+		nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
+		nes_write_1G_phy_reg(nesdev, 0x19, phy_index, 0xffee);
+		nes_read_1G_phy_reg(nesdev, 0x19, phy_index, &phy_data);
 
 		/* turning on flow control */
-		nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data);
-		nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data);
-		nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index],
-				(phy_data & ~(0x03E0)) | 0xc00);
-		/* nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index],
-				phy_data | 0xc00); */
-		nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data);
-		nes_debug(NES_DBG_PHY, "Phy data from register 0x4 = 0x%X.\n", phy_data);
+		nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
+		nes_write_1G_phy_reg(nesdev, 4, phy_index, (phy_data & ~(0x03E0)) | 0xc00);
+		nes_read_1G_phy_reg(nesdev, 4, phy_index, &phy_data);
 
-		nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data);
-		nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data);
 		/* Clear Half duplex */
-		nes_write_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index],
-				phy_data & ~(0x0100));
-		nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data);
-		nes_debug(NES_DBG_PHY, "Phy data from register 0x9 = 0x%X.\n", phy_data);
+		nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
+		nes_write_1G_phy_reg(nesdev, 9, phy_index, phy_data & ~(0x0100));
+		nes_read_1G_phy_reg(nesdev, 9, phy_index, &phy_data);
 
-		nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
-		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300);
-	} else {
-		if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_IRIS) ||
-		    (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
-			/* setup 10G MDIO operation */
-			tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
-			tx_config &= 0xFFFFFFE3;
-			tx_config |= 0x15;
-			nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
+		nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
+		nes_write_1G_phy_reg(nesdev, 0, phy_index, phy_data | 0x0300);
+
+		return 0;
+	}
+
+	if ((phy_type == NES_PHY_TYPE_IRIS) ||
+	    (phy_type == NES_PHY_TYPE_ARGUS) ||
+	    (phy_type == NES_PHY_TYPE_SFP_D)) {
+		/* setup 10G MDIO operation */
+		tx_config = nes_read_indexed(nesdev, NES_IDX_MAC_TX_CONFIG);
+		tx_config &= 0xFFFFFFE3;
+		tx_config |= 0x15;
+		nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
+	}
+	if ((phy_type == NES_PHY_TYPE_ARGUS) ||
+	    (phy_type == NES_PHY_TYPE_SFP_D)) {
+		/* Check firmware heartbeat */
+		nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+		temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+		udelay(1500);
+		nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+		temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+
+		if (temp_phy_data != temp_phy_data2)
+			return 0;
+
+		/* no heartbeat, configure the PHY */
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0x0000, 0x8000);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0000);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc316, 0x000A);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc318, 0x0052);
+		if (phy_type == NES_PHY_TYPE_ARGUS) {
+			nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x000C);
+			nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0008);
+		} else {
+			nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc302, 0x0004);
+			nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc319, 0x0038);
 		}
-		if ((nesadapter->phy_type[mac_index] == NES_PHY_TYPE_ARGUS)) {
-			nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc31a, 0x0098);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0026, 0x0E00);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0027, 0x0001);
 
-			temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-			mdelay(10);
-			nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
-			temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+		/* setup LEDs */
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd006, 0x0007);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd007, 0x000A);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd008, 0x0009);
 
-			/*
-			 * if firmware is already running (like from a
-			 * driver un-load/load, don't do anything.
-			 */
-			if (temp_phy_data == temp_phy_data2) {
-				/* configure QT2505 AMCC PHY */
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0x0000, 0x8000);
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0000);
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc302, 0x0044);
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc318, 0x0052);
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc319, 0x0008);
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc31a, 0x0098);
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0026, 0x0E00);
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0027, 0x0001);
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0x0028, 0xA528);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x3, 0x0028, 0xA528);
 
-				/*
-				 * remove micro from reset; chip boots from ROM,
-				 * uploads EEPROM f/w image, uC executes f/w
-				 */
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc300, 0x0002);
+		/* Bring PHY out of reset */
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc300, 0x0002);
 
-				/*
-				 * wait for heart beat to start to
-				 * know loading is done
-				 */
-				counter = 0;
-				do {
-					nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
-					temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-					if (counter++ > 1000) {
-						nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from heartbeat check <this is bad!!!> \n");
-						break;
-					}
-					mdelay(100);
-					nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7ee);
-					temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-				} while ((temp_phy_data2 == temp_phy_data));
-
-				/*
-				 * wait for tracking to start to know
-				 * f/w is good to go
-				 */
-				counter = 0;
-				do {
-					nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x3, 0xd7fd);
-					temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-					if (counter++ > 1000) {
-						nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from status check <this is bad!!!> \n");
-						break;
-					}
-					mdelay(1000);
-					/*
-					 * nes_debug(NES_DBG_PHY, "AMCC PHY- phy_status not ready yet = 0x%02X\n",
-					 *			temp_phy_data);
-					 */
-				} while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70));
-
-				/* set LOS Control invert RXLOSB_I_PADINV */
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd003, 0x0000);
-				/* set LOS Control to mask of RXLOSB_I */
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xc314, 0x0042);
-				/* set LED1 to input mode (LED1 and LED2 share same LED) */
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd006, 0x0007);
-				/* set LED2 to RX link_status and activity */
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd007, 0x000A);
-				/* set LED3 to RX link_status */
-				nes_write_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 0x1, 0xd008, 0x0009);
-
-				/*
-				 * reset the res-calibration on t2
-				 * serdes; ensures it is stable after
-				 * the amcc phy is stable
-				 */
-
-				sds_common_control0  = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0);
-				sds_common_control0 |= 0x1;
-				nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0);
-
-				/* release the res-calibration reset */
-				sds_common_control0 &= 0xfffffffe;
-				nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, sds_common_control0);
-
-				i = 0;
-				while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
-						&& (i++ < 5000)) {
-					/* mdelay(1); */
-				}
-
-				/*
-				 * wait for link train done before moving on,
-				 * or will get an interupt storm
-				 */
-				counter = 0;
-				do {
-					temp_phy_data = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 +
-								(0x200 * (nesdev->mac_index & 1)));
-					if (counter++ > 1000) {
-						nes_debug(NES_DBG_PHY, "AMCC PHY- breaking from link train wait <this is bad, link didnt train!!!>\n");
-						break;
-					}
-					mdelay(1);
-				} while (((temp_phy_data & 0x0f1f0000) != 0x0f0f0000));
+		/* Check for heartbeat */
+		counter = 0;
+		mdelay(690);
+		nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+		temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+		do {
+			if (counter++ > 150) {
+				nes_debug(NES_DBG_PHY, "No PHY heartbeat\n");
+				break;
 			}
-		}
+			mdelay(1);
+			nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7ee);
+			temp_phy_data2 = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+		} while ((temp_phy_data2 == temp_phy_data));
+
+		/* wait for tracking */
+		counter = 0;
+		do {
+			nes_read_10G_phy_reg(nesdev, phy_index, 0x3, 0xd7fd);
+			temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+			if (counter++ > 300) {
+				nes_debug(NES_DBG_PHY, "PHY did not track\n");
+				break;
+			}
+			mdelay(10);
+		} while (((temp_phy_data & 0xff) != 0x50) && ((temp_phy_data & 0xff) != 0x70));
+
+		/* setup signal integrity */
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xd003, 0x0000);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00D, 0x00FE);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00E, 0x0032);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xF00F, 0x0002);
+		nes_write_10G_phy_reg(nesdev, phy_index, 0x1, 0xc314, 0x0063);
+
+		/* reset serdes */
+		sds = nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 +
+				       mac_index * 0x200);
+		sds |= 0x1;
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 +
+				  mac_index * 0x200, sds);
+		sds &= 0xfffffffe;
+		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0 +
+				  mac_index * 0x200, sds);
+
+		counter = 0;
+		while (((nes_read32(nesdev->regs + NES_SOFTWARE_RESET) & 0x00000040) != 0x00000040)
+				&& (counter++ < 5000))
+			;
 	}
 	return 0;
 }
@@ -2359,6 +2335,7 @@
 	u16 temp_phy_data;
 	u32 pcs_val  = 0x0f0f0000;
 	u32 pcs_mask = 0x0f1f0000;
+	u32 cdr_ctrl;
 
 	spin_lock_irqsave(&nesadapter->phy_lock, flags);
 	if (nesadapter->mac_sw_state[mac_number] != NES_MAC_SW_IDLE) {
@@ -2473,6 +2450,7 @@
 				break;
 
 			case NES_PHY_TYPE_ARGUS:
+			case NES_PHY_TYPE_SFP_D:
 				/* clear the alarms */
 				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0x0008);
 				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 4, 0xc001);
@@ -2483,19 +2461,18 @@
 				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9004);
 				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9005);
 				/* check link status */
-				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 0x9003);
 				temp_phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-				u32temp = 100;
-				do {
-					nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 1, 1);
 
-					phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
-					if ((phy_data == temp_phy_data) || (!(--u32temp)))
-						break;
-					temp_phy_data = phy_data;
-				} while (1);
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
+				nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+				nes_read_10G_phy_reg(nesdev, nesadapter->phy_index[mac_index], 3, 0x0021);
+				phy_data = (u16)nes_read_indexed(nesdev, NES_IDX_MAC_MDIO_CONTROL);
+
+				phy_data = (!temp_phy_data && (phy_data == 0x8000)) ? 0x4 : 0x0;
+
 				nes_debug(NES_DBG_PHY, "%s: Phy data = 0x%04X, link was %s.\n",
-					__func__, phy_data, nesadapter->mac_link_down ? "DOWN" : "UP");
+					__func__, phy_data, nesadapter->mac_link_down[mac_index] ? "DOWN" : "UP");
 				break;
 
 			case NES_PHY_TYPE_PUMA_1G:
@@ -2511,6 +2488,17 @@
 		}
 
 		if (phy_data & 0x0004) {
+			if (wide_ppm_offset &&
+			    (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) &&
+			    (nesadapter->hw_rev != NE020_REV)) {
+				cdr_ctrl = nes_read_indexed(nesdev,
+							    NES_IDX_ETH_SERDES_CDR_CONTROL0 +
+							    mac_index * 0x200);
+				nes_write_indexed(nesdev,
+						  NES_IDX_ETH_SERDES_CDR_CONTROL0 +
+						  mac_index * 0x200,
+						  cdr_ctrl | 0x000F0000);
+			}
 			nesadapter->mac_link_down[mac_index] = 0;
 			list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
 				nes_debug(NES_DBG_PHY, "The Link is UP!!.  linkup was %d\n",
@@ -2525,6 +2513,17 @@
 				}
 			}
 		} else {
+			if (wide_ppm_offset &&
+			    (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_CX4) &&
+			    (nesadapter->hw_rev != NE020_REV)) {
+				cdr_ctrl = nes_read_indexed(nesdev,
+							    NES_IDX_ETH_SERDES_CDR_CONTROL0 +
+							    mac_index * 0x200);
+				nes_write_indexed(nesdev,
+						  NES_IDX_ETH_SERDES_CDR_CONTROL0 +
+						  mac_index * 0x200,
+						  cdr_ctrl & 0xFFF0FFFF);
+			}
 			nesadapter->mac_link_down[mac_index] = 1;
 			list_for_each_entry(nesvnic, &nesadapter->nesvnic_list[mac_index], list) {
 				nes_debug(NES_DBG_PHY, "The Link is Down!!. linkup was %d\n",
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index f41a871..c3654c6 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -35,12 +35,14 @@
 
 #include <linux/inet_lro.h>
 
+#define NES_PHY_TYPE_CX4       1
 #define NES_PHY_TYPE_1G        2
 #define NES_PHY_TYPE_IRIS      3
 #define NES_PHY_TYPE_ARGUS     4
 #define NES_PHY_TYPE_PUMA_1G   5
 #define NES_PHY_TYPE_PUMA_10G  6
 #define NES_PHY_TYPE_GLADIUS   7
+#define NES_PHY_TYPE_SFP_D     8
 
 #define NES_MULTICAST_PF_MAX 8
 
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index ecb1f6f..c6e6611 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1426,49 +1426,55 @@
 	struct nes_vnic *nesvnic = netdev_priv(netdev);
 	struct nes_device *nesdev = nesvnic->nesdev;
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	u32 mac_index = nesdev->mac_index;
+	u8 phy_type = nesadapter->phy_type[mac_index];
+	u8 phy_index = nesadapter->phy_index[mac_index];
 	u16 phy_data;
 
 	et_cmd->duplex = DUPLEX_FULL;
 	et_cmd->port   = PORT_MII;
+	et_cmd->maxtxpkt = 511;
+	et_cmd->maxrxpkt = 511;
 
 	if (nesadapter->OneG_Mode) {
 		et_cmd->speed = SPEED_1000;
-		if (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_PUMA_1G) {
+		if (phy_type == NES_PHY_TYPE_PUMA_1G) {
 			et_cmd->supported   = SUPPORTED_1000baseT_Full;
 			et_cmd->advertising = ADVERTISED_1000baseT_Full;
 			et_cmd->autoneg     = AUTONEG_DISABLE;
 			et_cmd->transceiver = XCVR_INTERNAL;
-			et_cmd->phy_address = nesdev->mac_index;
+			et_cmd->phy_address = mac_index;
 		} else {
-			et_cmd->supported   = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg;
-			et_cmd->advertising = ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg;
-			nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[nesdev->mac_index], &phy_data);
+			et_cmd->supported   = SUPPORTED_1000baseT_Full
+					    | SUPPORTED_Autoneg;
+			et_cmd->advertising = ADVERTISED_1000baseT_Full
+					    | ADVERTISED_Autoneg;
+			nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
 			if (phy_data & 0x1000)
 				et_cmd->autoneg = AUTONEG_ENABLE;
 			else
 				et_cmd->autoneg = AUTONEG_DISABLE;
 			et_cmd->transceiver = XCVR_EXTERNAL;
-			et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
+			et_cmd->phy_address = phy_index;
 		}
-	} else {
-		if ((nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_IRIS) ||
-		    (nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_ARGUS)) {
-			et_cmd->transceiver = XCVR_EXTERNAL;
-			et_cmd->port        = PORT_FIBRE;
-			et_cmd->supported   = SUPPORTED_FIBRE;
-			et_cmd->advertising = ADVERTISED_FIBRE;
-			et_cmd->phy_address = nesadapter->phy_index[nesdev->mac_index];
-		} else {
-			et_cmd->transceiver = XCVR_INTERNAL;
-			et_cmd->supported   = SUPPORTED_10000baseT_Full;
-			et_cmd->advertising = ADVERTISED_10000baseT_Full;
-			et_cmd->phy_address = nesdev->mac_index;
-		}
-		et_cmd->speed = SPEED_10000;
-		et_cmd->autoneg = AUTONEG_DISABLE;
+		return 0;
 	}
-	et_cmd->maxtxpkt = 511;
-	et_cmd->maxrxpkt = 511;
+	if ((phy_type == NES_PHY_TYPE_IRIS) ||
+	    (phy_type == NES_PHY_TYPE_ARGUS) ||
+	    (phy_type == NES_PHY_TYPE_SFP_D)) {
+		et_cmd->transceiver = XCVR_EXTERNAL;
+		et_cmd->port        = PORT_FIBRE;
+		et_cmd->supported   = SUPPORTED_FIBRE;
+		et_cmd->advertising = ADVERTISED_FIBRE;
+		et_cmd->phy_address = phy_index;
+	} else {
+		et_cmd->transceiver = XCVR_INTERNAL;
+		et_cmd->supported   = SUPPORTED_10000baseT_Full;
+		et_cmd->advertising = ADVERTISED_10000baseT_Full;
+		et_cmd->phy_address = mac_index;
+	}
+	et_cmd->speed = SPEED_10000;
+	et_cmd->autoneg = AUTONEG_DISABLE;
 	return 0;
 }
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 5a76a55..4c57f32 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -70,12 +70,14 @@
 	 */
 	if (ppriv->pkey == pkey) {
 		result = -ENOTUNIQ;
+		priv = NULL;
 		goto err;
 	}
 
 	list_for_each_entry(priv, &ppriv->child_intfs, list) {
 		if (priv->pkey == pkey) {
 			result = -ENOTUNIQ;
+			priv = NULL;
 			goto err;
 		}
 	}
@@ -96,7 +98,7 @@
 
 	result = ipoib_set_dev_features(priv, ppriv->ca);
 	if (result)
-		goto device_init_failed;
+		goto err;
 
 	priv->pkey = pkey;
 
@@ -109,7 +111,7 @@
 		ipoib_warn(ppriv, "failed to initialize subinterface: "
 			   "device %s, port %d",
 			   ppriv->ca->name, ppriv->port);
-		goto device_init_failed;
+		goto err;
 	}
 
 	result = register_netdevice(priv->dev);
@@ -146,19 +148,19 @@
 register_failed:
 	ipoib_dev_cleanup(priv->dev);
 
-device_init_failed:
-	free_netdev(priv->dev);
-
 err:
 	mutex_unlock(&ppriv->vlan_mutex);
 	rtnl_unlock();
+	if (priv)
+		free_netdev(priv->dev);
+
 	return result;
 }
 
 int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 {
 	struct ipoib_dev_priv *ppriv, *priv, *tpriv;
-	int ret = -ENOENT;
+	struct net_device *dev = NULL;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -172,14 +174,17 @@
 			unregister_netdevice(priv->dev);
 			ipoib_dev_cleanup(priv->dev);
 			list_del(&priv->list);
-			free_netdev(priv->dev);
-
-			ret = 0;
+			dev = priv->dev;
 			break;
 		}
 	}
 	mutex_unlock(&ppriv->vlan_mutex);
 	rtnl_unlock();
 
-	return ret;
+	if (dev) {
+		free_netdev(dev);
+		return 0;
+	}
+
+	return -ENODEV;
 }