[IB] Fix MAD layer DMA mappings to avoid touching data buffer once mapped

The MAD layer was violating the DMA API by touching data buffers used
for sends after the DMA mapping was done.  This causes problems on
non-cache-coherent architectures, because the device doing DMA won't
see updates to the payload buffers that exist only in the CPU cache.

Fix this by having all MAD consumers use ib_create_send_mad() to
allocate their send buffers, and moving the DMA mapping into the MAD
layer so it can be done just before calling send (and after any
modifications of the send buffer by the MAD layer).

Tested on a non-cache-coherent PowerPC 440SPe system.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 5ac86f5..0c3c695 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -37,58 +37,41 @@
  * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $
  */
 
-#include <linux/dma-mapping.h>
-
-#include <asm/bug.h>
-
-#include <rdma/ib_smi.h>
-
-#include "smi.h"
-#include "agent_priv.h"
-#include "mad_priv.h"
 #include "agent.h"
+#include "smi.h"
 
-spinlock_t ib_agent_port_list_lock;
+#define SPFX "ib_agent: "
+
+struct ib_agent_port_private {
+	struct list_head port_list;
+	struct ib_mad_agent *agent[2];
+};
+
+static DEFINE_SPINLOCK(ib_agent_port_list_lock);
 static LIST_HEAD(ib_agent_port_list);
 
-/*
- * Caller must hold ib_agent_port_list_lock
- */
-static inline struct ib_agent_port_private *
-__ib_get_agent_port(struct ib_device *device, int port_num,
-		    struct ib_mad_agent *mad_agent)
+static struct ib_agent_port_private *
+__ib_get_agent_port(struct ib_device *device, int port_num)
 {
 	struct ib_agent_port_private *entry;
 
-	BUG_ON(!(!!device ^ !!mad_agent));  /* Exactly one MUST be (!NULL) */
-
-	if (device) {
-		list_for_each_entry(entry, &ib_agent_port_list, port_list) {
-			if (entry->smp_agent->device == device &&
-			    entry->port_num == port_num)
-				return entry;
-		}
-	} else {
-		list_for_each_entry(entry, &ib_agent_port_list, port_list) {
-			if ((entry->smp_agent == mad_agent) ||
-			    (entry->perf_mgmt_agent == mad_agent))
-				return entry;
-		}
+	list_for_each_entry(entry, &ib_agent_port_list, port_list) {
+		if (entry->agent[0]->device == device &&
+		    entry->agent[0]->port_num == port_num)
+			return entry;
 	}
 	return NULL;
 }
 
-static inline struct ib_agent_port_private *
-ib_get_agent_port(struct ib_device *device, int port_num,
-		  struct ib_mad_agent *mad_agent)
+static struct ib_agent_port_private *
+ib_get_agent_port(struct ib_device *device, int port_num)
 {
 	struct ib_agent_port_private *entry;
 	unsigned long flags;
 
 	spin_lock_irqsave(&ib_agent_port_list_lock, flags);
-	entry = __ib_get_agent_port(device, port_num, mad_agent);
+	entry = __ib_get_agent_port(device, port_num);
 	spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
-
 	return entry;
 }
 
@@ -100,192 +83,76 @@
 
 	if (smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
 		return 1;
-	port_priv = ib_get_agent_port(device, port_num, NULL);
+
+	port_priv = ib_get_agent_port(device, port_num);
 	if (!port_priv) {
 		printk(KERN_DEBUG SPFX "smi_check_local_dr_smp %s port %d "
-		       "not open\n",
-		       device->name, port_num);
+		       "not open\n", device->name, port_num);
 		return 1;
 	}
 
-	return smi_check_local_smp(port_priv->smp_agent, smp);
+	return smi_check_local_smp(port_priv->agent[0], smp);
 }
 
-static int agent_mad_send(struct ib_mad_agent *mad_agent,
-			  struct ib_agent_port_private *port_priv,
-			  struct ib_mad_private *mad_priv,
-			  struct ib_grh *grh,
-			  struct ib_wc *wc)
-{
-	struct ib_agent_send_wr *agent_send_wr;
-	struct ib_sge gather_list;
-	struct ib_send_wr send_wr;
-	struct ib_send_wr *bad_send_wr;
-	struct ib_ah_attr ah_attr;
-	unsigned long flags;
-	int ret = 1;
-
-	agent_send_wr = kmalloc(sizeof(*agent_send_wr), GFP_KERNEL);
-	if (!agent_send_wr)
-		goto out;
-	agent_send_wr->mad = mad_priv;
-
-	gather_list.addr = dma_map_single(mad_agent->device->dma_device,
-					  &mad_priv->mad,
-					  sizeof(mad_priv->mad),
-					  DMA_TO_DEVICE);
-	gather_list.length = sizeof(mad_priv->mad);
-	gather_list.lkey = mad_agent->mr->lkey;
-
-	send_wr.next = NULL;
-	send_wr.opcode = IB_WR_SEND;
-	send_wr.sg_list = &gather_list;
-	send_wr.num_sge = 1;
-	send_wr.wr.ud.remote_qpn = wc->src_qp; /* DQPN */
-	send_wr.wr.ud.timeout_ms = 0;
-	send_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
-
-	ah_attr.dlid = wc->slid;
-	ah_attr.port_num = mad_agent->port_num;
-	ah_attr.src_path_bits = wc->dlid_path_bits;
-	ah_attr.sl = wc->sl;
-	ah_attr.static_rate = 0;
-	ah_attr.ah_flags = 0; /* No GRH */
-	if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
-		if (wc->wc_flags & IB_WC_GRH) {
-			ah_attr.ah_flags = IB_AH_GRH;
-			/* Should sgid be looked up ? */
-			ah_attr.grh.sgid_index = 0;
-			ah_attr.grh.hop_limit = grh->hop_limit;
-			ah_attr.grh.flow_label = be32_to_cpu(
-				grh->version_tclass_flow)  & 0xfffff;
-			ah_attr.grh.traffic_class = (be32_to_cpu(
-				grh->version_tclass_flow) >> 20) & 0xff;
-			memcpy(ah_attr.grh.dgid.raw,
-			       grh->sgid.raw,
-			       sizeof(ah_attr.grh.dgid));
-		}
-	}
-
-	agent_send_wr->ah = ib_create_ah(mad_agent->qp->pd, &ah_attr);
-	if (IS_ERR(agent_send_wr->ah)) {
-		printk(KERN_ERR SPFX "No memory for address handle\n");
-		kfree(agent_send_wr);
-		goto out;
-	}
-
-	send_wr.wr.ud.ah = agent_send_wr->ah;
-	if (mad_priv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT) {
-		send_wr.wr.ud.pkey_index = wc->pkey_index;
-		send_wr.wr.ud.remote_qkey = IB_QP1_QKEY;
-	} else { 	/* for SMPs */
-		send_wr.wr.ud.pkey_index = 0;
-		send_wr.wr.ud.remote_qkey = 0;
-	}
-	send_wr.wr.ud.mad_hdr = &mad_priv->mad.mad.mad_hdr;
-	send_wr.wr_id = (unsigned long)agent_send_wr;
-
-	pci_unmap_addr_set(agent_send_wr, mapping, gather_list.addr);
-
-	/* Send */
-	spin_lock_irqsave(&port_priv->send_list_lock, flags);
-	if (ib_post_send_mad(mad_agent, &send_wr, &bad_send_wr)) {
-		spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
-		dma_unmap_single(mad_agent->device->dma_device,
-				 pci_unmap_addr(agent_send_wr, mapping),
-				 sizeof(mad_priv->mad),
-				 DMA_TO_DEVICE);
-		ib_destroy_ah(agent_send_wr->ah);
-		kfree(agent_send_wr);
-	} else {
-		list_add_tail(&agent_send_wr->send_list,
-			      &port_priv->send_posted_list);
-		spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
-		ret = 0;
-	}
-
-out:
-	return ret;
-}
-
-int agent_send(struct ib_mad_private *mad,
-	       struct ib_grh *grh,
-	       struct ib_wc *wc,
-	       struct ib_device *device,
-	       int port_num)
+int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
+			struct ib_wc *wc, struct ib_device *device,
+			int port_num, int qpn)
 {
 	struct ib_agent_port_private *port_priv;
-	struct ib_mad_agent *mad_agent;
+	struct ib_mad_agent *agent;
+	struct ib_mad_send_buf *send_buf;
+	struct ib_ah *ah;
+	int ret;
 
-	port_priv = ib_get_agent_port(device, port_num, NULL);
+	port_priv = ib_get_agent_port(device, port_num);
 	if (!port_priv) {
-		printk(KERN_DEBUG SPFX "agent_send %s port %d not open\n",
-		       device->name, port_num);
-		return 1;
+		printk(KERN_ERR SPFX "Unable to find port agent\n");
+		return -ENODEV;
 	}
 
-	/* Get mad agent based on mgmt_class in MAD */
-	switch (mad->mad.mad.mad_hdr.mgmt_class) {
-		case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
-		case IB_MGMT_CLASS_SUBN_LID_ROUTED:
-			mad_agent = port_priv->smp_agent;
-			break;
-		case IB_MGMT_CLASS_PERF_MGMT:
-			mad_agent = port_priv->perf_mgmt_agent;
-			break;
-		default:
-			return 1;
+	agent = port_priv->agent[qpn];
+	ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
+	if (IS_ERR(ah)) {
+		ret = PTR_ERR(ah);
+		printk(KERN_ERR SPFX "ib_create_ah_from_wc error:%d\n", ret);
+		return ret;
 	}
 
-	return agent_mad_send(mad_agent, port_priv, mad, grh, wc);
+	send_buf = ib_create_send_mad(agent, wc->src_qp, wc->pkey_index, 0,
+				      IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
+				      GFP_KERNEL);
+	if (IS_ERR(send_buf)) {
+		ret = PTR_ERR(send_buf);
+		printk(KERN_ERR SPFX "ib_create_send_mad error:%d\n", ret);
+		goto err1;
+	}
+
+	memcpy(send_buf->mad, mad, sizeof *mad);
+	send_buf->ah = ah;
+	if ((ret = ib_post_send_mad(send_buf, NULL))) {
+		printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret);
+		goto err2;
+	}
+	return 0;
+err2:
+	ib_free_send_mad(send_buf);
+err1:
+	ib_destroy_ah(ah);
+	return ret;
 }
 
 static void agent_send_handler(struct ib_mad_agent *mad_agent,
 			       struct ib_mad_send_wc *mad_send_wc)
 {
-	struct ib_agent_port_private	*port_priv;
-	struct ib_agent_send_wr		*agent_send_wr;
-	unsigned long			flags;
-
-	/* Find matching MAD agent */
-	port_priv = ib_get_agent_port(NULL, 0, mad_agent);
-	if (!port_priv) {
-		printk(KERN_ERR SPFX "agent_send_handler: no matching MAD "
-		       "agent %p\n", mad_agent);
-		return;
-	}
-
-	agent_send_wr = (struct ib_agent_send_wr *)(unsigned long)mad_send_wc->wr_id;
-	spin_lock_irqsave(&port_priv->send_list_lock, flags);
-	/* Remove completed send from posted send MAD list */
-	list_del(&agent_send_wr->send_list);
-	spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
-
-	dma_unmap_single(mad_agent->device->dma_device,
-			 pci_unmap_addr(agent_send_wr, mapping),
-			 sizeof(agent_send_wr->mad->mad),
-			 DMA_TO_DEVICE);
-
-	ib_destroy_ah(agent_send_wr->ah);
-
-	/* Release allocated memory */
-	kmem_cache_free(ib_mad_cache, agent_send_wr->mad);
-	kfree(agent_send_wr);
+	ib_destroy_ah(mad_send_wc->send_buf->ah);
+	ib_free_send_mad(mad_send_wc->send_buf);
 }
 
 int ib_agent_port_open(struct ib_device *device, int port_num)
 {
-	int ret;
 	struct ib_agent_port_private *port_priv;
 	unsigned long flags;
-
-	/* First, check if port already open for SMI */
-	port_priv = ib_get_agent_port(device, port_num, NULL);
-	if (port_priv) {
-		printk(KERN_DEBUG SPFX "%s port %d already open\n",
-		       device->name, port_num);
-		return 0;
-	}
+	int ret;
 
 	/* Create new device info */
 	port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL);
@@ -294,32 +161,25 @@
 		ret = -ENOMEM;
 		goto error1;
 	}
-
 	memset(port_priv, 0, sizeof *port_priv);
-	port_priv->port_num = port_num;
-	spin_lock_init(&port_priv->send_list_lock);
-	INIT_LIST_HEAD(&port_priv->send_posted_list);
 
-	/* Obtain send only MAD agent for SM class (SMI QP) */
-	port_priv->smp_agent = ib_register_mad_agent(device, port_num,
-						     IB_QPT_SMI,
-						     NULL, 0,
+	/* Obtain send only MAD agent for SMI QP */
+	port_priv->agent[0] = ib_register_mad_agent(device, port_num,
+						    IB_QPT_SMI, NULL, 0,
 						    &agent_send_handler,
-						     NULL, NULL);
-
-	if (IS_ERR(port_priv->smp_agent)) {
-		ret = PTR_ERR(port_priv->smp_agent);
+						    NULL, NULL);
+	if (IS_ERR(port_priv->agent[0])) {
+		ret = PTR_ERR(port_priv->agent[0]);
 		goto error2;
 	}
 
-	/* Obtain send only MAD agent for PerfMgmt class (GSI QP) */
-	port_priv->perf_mgmt_agent = ib_register_mad_agent(device, port_num,
-							   IB_QPT_GSI,
-							   NULL, 0,
-							  &agent_send_handler,
-							   NULL, NULL);
-	if (IS_ERR(port_priv->perf_mgmt_agent)) {
-		ret = PTR_ERR(port_priv->perf_mgmt_agent);
+	/* Obtain send only MAD agent for GSI QP */
+	port_priv->agent[1] = ib_register_mad_agent(device, port_num,
+						    IB_QPT_GSI, NULL, 0,
+						    &agent_send_handler,
+						    NULL, NULL);
+	if (IS_ERR(port_priv->agent[1])) {
+		ret = PTR_ERR(port_priv->agent[1]);
 		goto error3;
 	}
 
@@ -330,7 +190,7 @@
 	return 0;
 
 error3:
-	ib_unregister_mad_agent(port_priv->smp_agent);
+	ib_unregister_mad_agent(port_priv->agent[0]);
 error2:
 	kfree(port_priv);
 error1:
@@ -343,7 +203,7 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&ib_agent_port_list_lock, flags);
-	port_priv = __ib_get_agent_port(device, port_num, NULL);
+	port_priv = __ib_get_agent_port(device, port_num);
 	if (port_priv == NULL) {
 		spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
 		printk(KERN_ERR SPFX "Port %d not found\n", port_num);
@@ -352,9 +212,8 @@
 	list_del(&port_priv->port_list);
 	spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
 
-	ib_unregister_mad_agent(port_priv->perf_mgmt_agent);
-	ib_unregister_mad_agent(port_priv->smp_agent);
+	ib_unregister_mad_agent(port_priv->agent[1]);
+	ib_unregister_mad_agent(port_priv->agent[0]);
 	kfree(port_priv);
-
 	return 0;
 }
diff --git a/drivers/infiniband/core/agent.h b/drivers/infiniband/core/agent.h
index d942684..c5f3cfe 100644
--- a/drivers/infiniband/core/agent.h
+++ b/drivers/infiniband/core/agent.h
@@ -39,17 +39,14 @@
 #ifndef __AGENT_H_
 #define __AGENT_H_
 
-extern spinlock_t ib_agent_port_list_lock;
+#include <rdma/ib_mad.h>
 
-extern int ib_agent_port_open(struct ib_device *device,
-			      int port_num);
+extern int ib_agent_port_open(struct ib_device *device, int port_num);
 
 extern int ib_agent_port_close(struct ib_device *device, int port_num);
 
-extern int agent_send(struct ib_mad_private *mad,
-		      struct ib_grh *grh,
-		      struct ib_wc *wc,
-		      struct ib_device *device,
-		      int port_num);
+extern int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
+			       struct ib_wc *wc, struct ib_device *device,
+			       int port_num, int qpn);
 
 #endif	/* __AGENT_H_ */
diff --git a/drivers/infiniband/core/agent_priv.h b/drivers/infiniband/core/agent_priv.h
deleted file mode 100644
index 2ec6d7f..0000000
--- a/drivers/infiniband/core/agent_priv.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2004, 2005 Mellanox Technologies Ltd.  All rights reserved.
- * Copyright (c) 2004, 2005 Infinicon Corporation.  All rights reserved.
- * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
- * Copyright (c) 2004, 2005 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * $Id: agent_priv.h 1640 2005-01-24 22:39:02Z halr $
- */
-
-#ifndef __IB_AGENT_PRIV_H__
-#define __IB_AGENT_PRIV_H__
-
-#include <linux/pci.h>
-
-#define SPFX "ib_agent: "
-
-struct ib_agent_send_wr {
-	struct list_head send_list;
-	struct ib_ah *ah;
-	struct ib_mad_private *mad;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
-};
-
-struct ib_agent_port_private {
-	struct list_head port_list;
-	struct list_head send_posted_list;
-	spinlock_t send_list_lock;
-	int port_num;
-	struct ib_mad_agent *smp_agent;	      /* SM class */
-	struct ib_mad_agent *perf_mgmt_agent; /* PerfMgmt class */
-};
-
-#endif	/* __IB_AGENT_PRIV_H__ */
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 389fecba..580c3a2 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -176,8 +176,7 @@
 
 	m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, 
 			       cm_id_priv->av.pkey_index,
-			       ah, 0, sizeof(struct ib_mad_hdr),
-			       sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr),
+			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 			       GFP_ATOMIC);
 	if (IS_ERR(m)) {
 		ib_destroy_ah(ah);
@@ -185,7 +184,8 @@
 	}
 
 	/* Timeout set by caller if response is expected. */
-	m->send_wr.wr.ud.retries = cm_id_priv->max_cm_retries;
+	m->ah = ah;
+	m->retries = cm_id_priv->max_cm_retries;
 
 	atomic_inc(&cm_id_priv->refcount);
 	m->context[0] = cm_id_priv;
@@ -206,20 +206,20 @@
 		return PTR_ERR(ah);
 
 	m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
-			       ah, 0, sizeof(struct ib_mad_hdr),
-			       sizeof(struct ib_mad)-sizeof(struct ib_mad_hdr),
+			       0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
 			       GFP_ATOMIC);
 	if (IS_ERR(m)) {
 		ib_destroy_ah(ah);
 		return PTR_ERR(m);
 	}
+	m->ah = ah;
 	*msg = m;
 	return 0;
 }
 
 static void cm_free_msg(struct ib_mad_send_buf *msg)
 {
-	ib_destroy_ah(msg->send_wr.wr.ud.ah);
+	ib_destroy_ah(msg->ah);
 	if (msg->context[0])
 		cm_deref_id(msg->context[0]);
 	ib_free_send_mad(msg);
@@ -678,8 +678,7 @@
 		break;
 	case IB_CM_SIDR_REQ_SENT:
 		cm_id->state = IB_CM_IDLE;
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-			      (unsigned long) cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		break;
 	case IB_CM_SIDR_REQ_RCVD:
@@ -690,8 +689,7 @@
 	case IB_CM_MRA_REQ_RCVD:
 	case IB_CM_REP_SENT:
 	case IB_CM_MRA_REP_RCVD:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-			      (unsigned long) cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		/* Fall through */
 	case IB_CM_REQ_RCVD:
 	case IB_CM_MRA_REQ_SENT:
@@ -708,8 +706,7 @@
 		ib_send_cm_dreq(cm_id, NULL, 0);
 		goto retest;
 	case IB_CM_DREQ_SENT:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-			      (unsigned long) cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		cm_enter_timewait(cm_id_priv);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		break;
@@ -883,7 +880,6 @@
 		   struct ib_cm_req_param *param)
 {
 	struct cm_id_private *cm_id_priv;
-	struct ib_send_wr *bad_send_wr;
 	struct cm_req_msg *req_msg;
 	unsigned long flags;
 	int ret;
@@ -936,7 +932,7 @@
 	req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
 	cm_format_req(req_msg, cm_id_priv, param);
 	cm_id_priv->tid = req_msg->hdr.tid;
-	cm_id_priv->msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+	cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
 	cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
 
 	cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
@@ -945,8 +941,7 @@
 				cm_req_get_primary_local_ack_timeout(req_msg);
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				&cm_id_priv->msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(cm_id_priv->msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		goto error2;
@@ -969,7 +964,6 @@
 			void *ari, u8 ari_length)
 {
 	struct ib_mad_send_buf *msg = NULL;
-	struct ib_send_wr *bad_send_wr;
 	struct cm_rej_msg *rej_msg, *rcv_msg;
 	int ret;
 
@@ -992,7 +986,7 @@
 		memcpy(rej_msg->ari, ari, ari_length);
 	}
 
-	ret = ib_post_send_mad(port->mad_agent, &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		cm_free_msg(msg);
 
@@ -1172,7 +1166,6 @@
 			       struct cm_id_private *cm_id_priv)
 {
 	struct ib_mad_send_buf *msg = NULL;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -1201,8 +1194,7 @@
 	}
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
-			       &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		goto free;
 	return;
@@ -1367,7 +1359,6 @@
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
 	struct cm_rep_msg *rep_msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -1389,11 +1380,10 @@
 
 	rep_msg = (struct cm_rep_msg *) msg->mad;
 	cm_format_rep(rep_msg, cm_id_priv, param);
-	msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+	msg->timeout_ms = cm_id_priv->timeout_ms;
 	msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_free_msg(msg);
@@ -1431,7 +1421,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	void *data;
 	int ret;
@@ -1458,8 +1447,7 @@
 	cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
 		      private_data, private_data_len);
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_free_msg(msg);
@@ -1504,7 +1492,6 @@
 	struct cm_id_private *cm_id_priv;
 	struct cm_rep_msg *rep_msg;
 	struct ib_mad_send_buf *msg = NULL;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -1532,8 +1519,7 @@
 		goto unlock;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
-			       &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		goto free;
 	goto deref;
@@ -1601,8 +1587,7 @@
 
 	/* todo: handle peer_to_peer */
 
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-		      (unsigned long) cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1636,8 +1621,7 @@
 		goto out;
 	}
 
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-		      (unsigned long) cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1676,8 +1660,7 @@
 	}
 	cm_id_priv->id.state = IB_CM_ESTABLISHED;
 
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-		      (unsigned long) cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1714,7 +1697,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -1736,11 +1718,10 @@
 
 	cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
 		       private_data, private_data_len);
-	msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+	msg->timeout_ms = cm_id_priv->timeout_ms;
 	msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		cm_enter_timewait(cm_id_priv);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -1774,7 +1755,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	void *data;
 	int ret;
@@ -1804,8 +1784,7 @@
 	cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
 		       private_data, private_data_len);
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent, &msg->send_wr,
-			       &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_free_msg(msg);
@@ -1822,7 +1801,6 @@
 	struct cm_id_private *cm_id_priv;
 	struct cm_dreq_msg *dreq_msg;
 	struct ib_mad_send_buf *msg = NULL;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -1841,8 +1819,7 @@
 	switch (cm_id_priv->id.state) {
 	case IB_CM_REP_SENT:
 	case IB_CM_DREQ_SENT:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-			      (unsigned long) cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		break;
 	case IB_CM_ESTABLISHED:
 	case IB_CM_MRA_REP_RCVD:
@@ -1856,8 +1833,7 @@
 			       cm_id_priv->private_data_len);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
-		if (ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				     &msg->send_wr, &bad_send_wr))
+		if (ib_post_send_mad(msg, NULL))
 			cm_free_msg(msg);
 		goto deref;
 	default:
@@ -1904,8 +1880,7 @@
 	}
 	cm_enter_timewait(cm_id_priv);
 
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-		      (unsigned long) cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
@@ -1930,7 +1905,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -1974,8 +1948,7 @@
 	if (ret)
 		goto out;
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		cm_free_msg(msg);
 
@@ -2051,8 +2024,7 @@
 	case IB_CM_MRA_REQ_RCVD:
 	case IB_CM_REP_SENT:
 	case IB_CM_MRA_REP_RCVD:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-			      (unsigned long) cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		/* fall through */
 	case IB_CM_REQ_RCVD:
 	case IB_CM_MRA_REQ_SENT:
@@ -2062,8 +2034,7 @@
 			cm_reset_to_idle(cm_id_priv);
 		break;
 	case IB_CM_DREQ_SENT:
-		ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-			      (unsigned long) cm_id_priv->msg);
+		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		/* fall through */
 	case IB_CM_REP_RCVD:
 	case IB_CM_MRA_REP_SENT:
@@ -2098,7 +2069,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	void *data;
 	unsigned long flags;
 	int ret;
@@ -2122,8 +2092,7 @@
 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
 			      CM_MSG_RESPONSE_REQ, service_timeout,
 			      private_data, private_data_len);
-		ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				       &msg->send_wr, &bad_send_wr);
+		ret = ib_post_send_mad(msg, NULL);
 		if (ret)
 			goto error2;
 		cm_id->state = IB_CM_MRA_REQ_SENT;
@@ -2136,8 +2105,7 @@
 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
 			      CM_MSG_RESPONSE_REP, service_timeout,
 			      private_data, private_data_len);
-		ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				       &msg->send_wr, &bad_send_wr);
+		ret = ib_post_send_mad(msg, NULL);
 		if (ret)
 			goto error2;
 		cm_id->state = IB_CM_MRA_REP_SENT;
@@ -2150,8 +2118,7 @@
 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
 			      CM_MSG_RESPONSE_OTHER, service_timeout,
 			      private_data, private_data_len);
-		ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				       &msg->send_wr, &bad_send_wr);
+		ret = ib_post_send_mad(msg, NULL);
 		if (ret)
 			goto error2;
 		cm_id->lap_state = IB_CM_MRA_LAP_SENT;
@@ -2213,14 +2180,14 @@
 	case IB_CM_REQ_SENT:
 		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
-				  (unsigned long) cm_id_priv->msg, timeout))
+				  cm_id_priv->msg, timeout))
 			goto out;
 		cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
 		break;
 	case IB_CM_REP_SENT:
 		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
-				  (unsigned long) cm_id_priv->msg, timeout))
+				  cm_id_priv->msg, timeout))
 			goto out;
 		cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
 		break;
@@ -2228,7 +2195,7 @@
 		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
 		    cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
 		    ib_modify_mad(cm_id_priv->av.port->mad_agent,
-				  (unsigned long) cm_id_priv->msg, timeout))
+				  cm_id_priv->msg, timeout))
 			goto out;
 		cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
 		break;
@@ -2291,7 +2258,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -2312,11 +2278,10 @@
 
 	cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
 		      alternate_path, private_data, private_data_len);
-	msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+	msg->timeout_ms = cm_id_priv->timeout_ms;
 	msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
 
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_free_msg(msg);
@@ -2360,7 +2325,6 @@
 	struct cm_lap_msg *lap_msg;
 	struct ib_cm_lap_event_param *param;
 	struct ib_mad_send_buf *msg = NULL;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -2394,8 +2358,7 @@
 			      cm_id_priv->private_data_len);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
-		if (ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				     &msg->send_wr, &bad_send_wr))
+		if (ib_post_send_mad(msg, NULL))
 			cm_free_msg(msg);
 		goto deref;
 	default:
@@ -2451,7 +2414,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -2474,8 +2436,7 @@
 
 	cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
 		      info, info_length, private_data, private_data_len);
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_free_msg(msg);
@@ -2514,8 +2475,7 @@
 		goto out;
 	}
 	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-		      (unsigned long) cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	cm_id_priv->msg = NULL;
 
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
@@ -2590,7 +2550,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -2613,13 +2572,12 @@
 
 	cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
 			   param);
-	msg->send_wr.wr.ud.timeout_ms = cm_id_priv->timeout_ms;
+	msg->timeout_ms = cm_id_priv->timeout_ms;
 	msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
 
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	if (cm_id->state == IB_CM_IDLE)
-		ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-				       &msg->send_wr, &bad_send_wr);
+		ret = ib_post_send_mad(msg, NULL);
 	else
 		ret = -EINVAL;
 
@@ -2733,7 +2691,6 @@
 {
 	struct cm_id_private *cm_id_priv;
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
 	unsigned long flags;
 	int ret;
 
@@ -2755,8 +2712,7 @@
 
 	cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
 			   param);
-	ret = ib_post_send_mad(cm_id_priv->av.port->mad_agent,
-			       &msg->send_wr, &bad_send_wr);
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret) {
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		cm_free_msg(msg);
@@ -2809,8 +2765,7 @@
 		goto out;
 	}
 	cm_id_priv->id.state = IB_CM_IDLE;
-	ib_cancel_mad(cm_id_priv->av.port->mad_agent,
-		      (unsigned long) cm_id_priv->msg);
+	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 	cm_format_sidr_rep_event(work);
@@ -2878,9 +2833,7 @@
 static void cm_send_handler(struct ib_mad_agent *mad_agent,
 			    struct ib_mad_send_wc *mad_send_wc)
 {
-	struct ib_mad_send_buf *msg;
-
-	msg = (struct ib_mad_send_buf *)(unsigned long)mad_send_wc->wr_id;
+	struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
 
 	switch (mad_send_wc->status) {
 	case IB_WC_SUCCESS:
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index af302e8..88f9f8c 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -579,7 +579,7 @@
 }
 
 static void snoop_send(struct ib_mad_qp_info *qp_info,
-		       struct ib_send_wr *send_wr,
+		       struct ib_mad_send_buf *send_buf,
 		       struct ib_mad_send_wc *mad_send_wc,
 		       int mad_snoop_flags)
 {
@@ -597,7 +597,7 @@
 		atomic_inc(&mad_snoop_priv->refcount);
 		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
 		mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
-						    send_wr, mad_send_wc);
+						    send_buf, mad_send_wc);
 		if (atomic_dec_and_test(&mad_snoop_priv->refcount))
 			wake_up(&mad_snoop_priv->wait);
 		spin_lock_irqsave(&qp_info->snoop_lock, flags);
@@ -654,10 +654,10 @@
  * Return < 0 if error
  */
 static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
-				  struct ib_smp *smp,
-				  struct ib_send_wr *send_wr)
+				  struct ib_mad_send_wr_private *mad_send_wr)
 {
 	int ret;
+	struct ib_smp *smp = mad_send_wr->send_buf.mad;
 	unsigned long flags;
 	struct ib_mad_local_private *local;
 	struct ib_mad_private *mad_priv;
@@ -666,6 +666,7 @@
 	struct ib_device *device = mad_agent_priv->agent.device;
 	u8 port_num = mad_agent_priv->agent.port_num;
 	struct ib_wc mad_wc;
+	struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
 
 	if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) {
 		ret = -EINVAL;
@@ -745,13 +746,7 @@
 		goto out;
 	}
 
-	local->send_wr = *send_wr;
-	local->send_wr.sg_list = local->sg_list;
-	memcpy(local->sg_list, send_wr->sg_list,
-	       sizeof *send_wr->sg_list * send_wr->num_sge);
-	local->send_wr.next = NULL;
-	local->tid = send_wr->wr.ud.mad_hdr->tid;
-	local->wr_id = send_wr->wr_id;
+	local->mad_send_wr = mad_send_wr;
 	/* Reference MAD agent until send side of local completion handled */
 	atomic_inc(&mad_agent_priv->refcount);
 	/* Queue local completion to local list */
@@ -781,17 +776,17 @@
 
 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 					    u32 remote_qpn, u16 pkey_index,
-					    struct ib_ah *ah, int rmpp_active,
+					    int rmpp_active,
 					    int hdr_len, int data_len,
 					    gfp_t gfp_mask)
 {
 	struct ib_mad_agent_private *mad_agent_priv;
-	struct ib_mad_send_buf *send_buf;
+	struct ib_mad_send_wr_private *mad_send_wr;
 	int buf_size;
 	void *buf;
 
-	mad_agent_priv = container_of(mad_agent,
-				      struct ib_mad_agent_private, agent);
+	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
+				      agent);
 	buf_size = get_buf_length(hdr_len, data_len);
 
 	if ((!mad_agent->rmpp_version &&
@@ -799,45 +794,40 @@
 	    (!rmpp_active && buf_size > sizeof(struct ib_mad)))
 		return ERR_PTR(-EINVAL);
 
-	buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask);
+	buf = kmalloc(sizeof *mad_send_wr + buf_size, gfp_mask);
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
-	memset(buf, 0, sizeof *send_buf + buf_size);
+	memset(buf, 0, sizeof *mad_send_wr + buf_size);
 
-	send_buf = buf + buf_size;
-	send_buf->mad = buf;
+	mad_send_wr = buf + buf_size;
+	mad_send_wr->send_buf.mad = buf;
 
-	send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device,
-					    buf, buf_size, DMA_TO_DEVICE);
-	pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr);
-	send_buf->sge.length = buf_size;
-	send_buf->sge.lkey = mad_agent->mr->lkey;
+	mad_send_wr->mad_agent_priv = mad_agent_priv;
+	mad_send_wr->sg_list[0].length = buf_size;
+	mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
 
-	send_buf->send_wr.wr_id = (unsigned long) send_buf;
-	send_buf->send_wr.sg_list = &send_buf->sge;
-	send_buf->send_wr.num_sge = 1;
-	send_buf->send_wr.opcode = IB_WR_SEND;
-	send_buf->send_wr.send_flags = IB_SEND_SIGNALED;
-	send_buf->send_wr.wr.ud.ah = ah;
-	send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr;
-	send_buf->send_wr.wr.ud.remote_qpn = remote_qpn;
-	send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
-	send_buf->send_wr.wr.ud.pkey_index = pkey_index;
+	mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
+	mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
+	mad_send_wr->send_wr.num_sge = 1;
+	mad_send_wr->send_wr.opcode = IB_WR_SEND;
+	mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
+	mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
+	mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
+	mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
 
 	if (rmpp_active) {
-		struct ib_rmpp_mad *rmpp_mad;
-		rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad;
+		struct ib_rmpp_mad *rmpp_mad = mad_send_wr->send_buf.mad;
 		rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
-			offsetof(struct ib_rmpp_mad, data) + data_len);
+						   IB_MGMT_RMPP_HDR + data_len);
 		rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version;
 		rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
 		ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
 				  IB_MGMT_RMPP_FLAG_ACTIVE);
 	}
 
-	send_buf->mad_agent = mad_agent;
+	mad_send_wr->send_buf.mad_agent = mad_agent;
 	atomic_inc(&mad_agent_priv->refcount);
-	return send_buf;
+	return &mad_send_wr->send_buf;
 }
 EXPORT_SYMBOL(ib_create_send_mad);
 
@@ -847,10 +837,6 @@
 
 	mad_agent_priv = container_of(send_buf->mad_agent,
 				      struct ib_mad_agent_private, agent);
-
-	dma_unmap_single(send_buf->mad_agent->device->dma_device,
-			 pci_unmap_addr(send_buf, mapping),
-			 send_buf->sge.length, DMA_TO_DEVICE);
 	kfree(send_buf->mad);
 
 	if (atomic_dec_and_test(&mad_agent_priv->refcount))
@@ -861,8 +847,10 @@
 int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 {
 	struct ib_mad_qp_info *qp_info;
-	struct ib_send_wr *bad_send_wr;
 	struct list_head *list;
+	struct ib_send_wr *bad_send_wr;
+	struct ib_mad_agent *mad_agent;
+	struct ib_sge *sge;
 	unsigned long flags;
 	int ret;
 
@@ -871,10 +859,17 @@
 	mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
 	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
 
+	mad_agent = mad_send_wr->send_buf.mad_agent;
+	sge = mad_send_wr->sg_list;
+	sge->addr = dma_map_single(mad_agent->device->dma_device,
+				   mad_send_wr->send_buf.mad, sge->length,
+				   DMA_TO_DEVICE);
+	pci_unmap_addr_set(mad_send_wr, mapping, sge->addr);
+
 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
 	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
-		ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp,
-				   &mad_send_wr->send_wr, &bad_send_wr);
+		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
+				   &bad_send_wr);
 		list = &qp_info->send_queue.list;
 	} else {
 		ret = 0;
@@ -886,6 +881,11 @@
 		list_add_tail(&mad_send_wr->mad_list.list, list);
 	}
 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
+	if (ret)
+		dma_unmap_single(mad_agent->device->dma_device,
+				 pci_unmap_addr(mad_send_wr, mapping),
+				 sge->length, DMA_TO_DEVICE);
+
 	return ret;
 }
 
@@ -893,45 +893,28 @@
  * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
  *  with the registered client
  */
-int ib_post_send_mad(struct ib_mad_agent *mad_agent,
-		     struct ib_send_wr *send_wr,
-		     struct ib_send_wr **bad_send_wr)
+int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
+		     struct ib_mad_send_buf **bad_send_buf)
 {
-	int ret = -EINVAL;
 	struct ib_mad_agent_private *mad_agent_priv;
-
-	/* Validate supplied parameters */
-	if (!bad_send_wr)
-		goto error1;
-
-	if (!mad_agent || !send_wr)
-		goto error2;
-
-	if (!mad_agent->send_handler)
-		goto error2;
-
-	mad_agent_priv = container_of(mad_agent,
-				      struct ib_mad_agent_private,
-				      agent);
+	struct ib_mad_send_buf *next_send_buf;
+	struct ib_mad_send_wr_private *mad_send_wr;
+	unsigned long flags;
+	int ret = -EINVAL;
 
 	/* Walk list of send WRs and post each on send list */
-	while (send_wr) {
-		unsigned long			flags;
-		struct ib_send_wr		*next_send_wr;
-		struct ib_mad_send_wr_private	*mad_send_wr;
-		struct ib_smp			*smp;
+	for (; send_buf; send_buf = next_send_buf) {
 
-		/* Validate more parameters */
-		if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG)
-			goto error2;
+		mad_send_wr = container_of(send_buf,
+					   struct ib_mad_send_wr_private,
+					   send_buf);
+		mad_agent_priv = mad_send_wr->mad_agent_priv;
 
-		if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler)
-			goto error2;
-
-		if (!send_wr->wr.ud.mad_hdr) {
-			printk(KERN_ERR PFX "MAD header must be supplied "
-			       "in WR %p\n", send_wr);
-			goto error2;
+		if (!send_buf->mad_agent->send_handler ||
+		    (send_buf->timeout_ms &&
+		     !send_buf->mad_agent->recv_handler)) {
+			ret = -EINVAL;
+			goto error;
 		}
 
 		/*
@@ -939,40 +922,24 @@
 		 * current one completes, and the user modifies the work
 		 * request associated with the completion
 		 */
-		next_send_wr = (struct ib_send_wr *)send_wr->next;
+		next_send_buf = send_buf->next;
+		mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
 
-		smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr;
-		if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
-			ret = handle_outgoing_dr_smp(mad_agent_priv, smp,
-						     send_wr);
+		if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
+		    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+			ret = handle_outgoing_dr_smp(mad_agent_priv,
+						     mad_send_wr);
 			if (ret < 0)		/* error */
-				goto error2;
+				goto error;
 			else if (ret == 1)	/* locally consumed */
-				goto next;
+				continue;
 		}
 
-		/* Allocate MAD send WR tracking structure */
-		mad_send_wr = kmalloc(sizeof *mad_send_wr, GFP_ATOMIC);
-		if (!mad_send_wr) {
-			printk(KERN_ERR PFX "No memory for "
-			       "ib_mad_send_wr_private\n");
-			ret = -ENOMEM;
-			goto error2;
-		}
-		memset(mad_send_wr, 0, sizeof *mad_send_wr);
-
-		mad_send_wr->send_wr = *send_wr;
-		mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
-		memcpy(mad_send_wr->sg_list, send_wr->sg_list,
-		       sizeof *send_wr->sg_list * send_wr->num_sge);
-		mad_send_wr->wr_id = send_wr->wr_id;
-		mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid;
-		mad_send_wr->mad_agent_priv = mad_agent_priv;
+		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
 		/* Timeout will be updated after send completes */
-		mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr.
-							ud.timeout_ms);
-		mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries;
-		/* One reference for each work request to QP + response */
+		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
+		mad_send_wr->retries = send_buf->retries;
+		/* Reference for work request to QP + response */
 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
 		mad_send_wr->status = IB_WC_SUCCESS;
 
@@ -995,16 +962,13 @@
 			list_del(&mad_send_wr->agent_list);
 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 			atomic_dec(&mad_agent_priv->refcount);
-			goto error2;
+			goto error;
 		}
-next:
-		send_wr = next_send_wr;
 	}
 	return 0;
-
-error2:
-	*bad_send_wr = send_wr;
-error1:
+error:
+	if (bad_send_buf)
+		*bad_send_buf = send_buf;
 	return ret;
 }
 EXPORT_SYMBOL(ib_post_send_mad);
@@ -1447,8 +1411,7 @@
 		 * of MAD.
 		 */
 		hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
-		list_for_each_entry(entry, &port_priv->agent_list,
-				    agent_list) {
+		list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
 			if (entry->agent.hi_tid == hi_tid) {
 				mad_agent = entry;
 				break;
@@ -1571,8 +1534,7 @@
 	 */
 	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
 			    agent_list) {
-		if (is_data_mad(mad_agent_priv,
-				mad_send_wr->send_wr.wr.ud.mad_hdr) &&
+		if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
 		    mad_send_wr->tid == tid && mad_send_wr->timeout) {
 			/* Verify request has not been canceled */
 			return (mad_send_wr->status == IB_WC_SUCCESS) ?
@@ -1628,14 +1590,14 @@
 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 
 		/* Defined behavior is to complete response before request */
-		mad_recv_wc->wc->wr_id = mad_send_wr->wr_id;
+		mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
 						   mad_recv_wc);
 		atomic_dec(&mad_agent_priv->refcount);
 
 		mad_send_wc.status = IB_WC_SUCCESS;
 		mad_send_wc.vendor_err = 0;
-		mad_send_wc.wr_id = mad_send_wr->wr_id;
+		mad_send_wc.send_buf = &mad_send_wr->send_buf;
 		ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
 	} else {
 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
@@ -1728,11 +1690,11 @@
 			if (ret & IB_MAD_RESULT_CONSUMED)
 				goto out;
 			if (ret & IB_MAD_RESULT_REPLY) {
-				/* Send response */
-				if (!agent_send(response, &recv->grh, wc,
-						port_priv->device,
-						port_priv->port_num))
-					response = NULL;
+				agent_send_response(&response->mad.mad,
+						    &recv->grh, wc,
+						    port_priv->device,
+						    port_priv->port_num,
+						    qp_info->qp->qp_num);
 				goto out;
 			}
 		}
@@ -1866,15 +1828,15 @@
 
 	if (mad_send_wr->status != IB_WC_SUCCESS )
 		mad_send_wc->status = mad_send_wr->status;
-	if (ret != IB_RMPP_RESULT_INTERNAL)
+	if (ret == IB_RMPP_RESULT_INTERNAL)
+		ib_rmpp_send_handler(mad_send_wc);
+	else
 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
 						   mad_send_wc);
 
 	/* Release reference on agent taken when sending */
 	if (atomic_dec_and_test(&mad_agent_priv->refcount))
 		wake_up(&mad_agent_priv->wait);
-
-	kfree(mad_send_wr);
 	return;
 done:
 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
@@ -1888,6 +1850,7 @@
 	struct ib_mad_qp_info		*qp_info;
 	struct ib_mad_queue		*send_queue;
 	struct ib_send_wr		*bad_send_wr;
+	struct ib_mad_send_wc		mad_send_wc;
 	unsigned long flags;
 	int ret;
 
@@ -1898,6 +1861,9 @@
 	qp_info = send_queue->qp_info;
 
 retry:
+	dma_unmap_single(mad_send_wr->send_buf.mad_agent->device->dma_device,
+			 pci_unmap_addr(mad_send_wr, mapping),
+			 mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
 	queued_send_wr = NULL;
 	spin_lock_irqsave(&send_queue->lock, flags);
 	list_del(&mad_list->list);
@@ -1914,17 +1880,17 @@
 	}
 	spin_unlock_irqrestore(&send_queue->lock, flags);
 
-	/* Restore client wr_id in WC and complete send */
-	wc->wr_id = mad_send_wr->wr_id;
+	mad_send_wc.send_buf = &mad_send_wr->send_buf;
+	mad_send_wc.status = wc->status;
+	mad_send_wc.vendor_err = wc->vendor_err;
 	if (atomic_read(&qp_info->snoop_count))
-		snoop_send(qp_info, &mad_send_wr->send_wr,
-			   (struct ib_mad_send_wc *)wc,
+		snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
 			   IB_MAD_SNOOP_SEND_COMPLETIONS);
-	ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc);
+	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
 
 	if (queued_send_wr) {
 		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
-				&bad_send_wr);
+				   &bad_send_wr);
 		if (ret) {
 			printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
 			mad_send_wr = queued_send_wr;
@@ -2066,38 +2032,37 @@
 
 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
 				 &cancel_list, agent_list) {
-		mad_send_wc.wr_id = mad_send_wr->wr_id;
+		mad_send_wc.send_buf = &mad_send_wr->send_buf;
+		list_del(&mad_send_wr->agent_list);
 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
 						   &mad_send_wc);
-
-		list_del(&mad_send_wr->agent_list);
-		kfree(mad_send_wr);
 		atomic_dec(&mad_agent_priv->refcount);
 	}
 }
 
 static struct ib_mad_send_wr_private*
-find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id)
+find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
+	     struct ib_mad_send_buf *send_buf)
 {
 	struct ib_mad_send_wr_private *mad_send_wr;
 
 	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
 			    agent_list) {
-		if (mad_send_wr->wr_id == wr_id)
+		if (&mad_send_wr->send_buf == send_buf)
 			return mad_send_wr;
 	}
 
 	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
 			    agent_list) {
-		if (is_data_mad(mad_agent_priv,
-				mad_send_wr->send_wr.wr.ud.mad_hdr) &&
-		    mad_send_wr->wr_id == wr_id)
+		if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
+		    &mad_send_wr->send_buf == send_buf)
 			return mad_send_wr;
 	}
 	return NULL;
 }
 
-int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
+int ib_modify_mad(struct ib_mad_agent *mad_agent,
+		  struct ib_mad_send_buf *send_buf, u32 timeout_ms)
 {
 	struct ib_mad_agent_private *mad_agent_priv;
 	struct ib_mad_send_wr_private *mad_send_wr;
@@ -2107,7 +2072,7 @@
 	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
 				      agent);
 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
-	mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id);
+	mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
 	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
 		return -EINVAL;
@@ -2119,7 +2084,7 @@
 		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
 	}
 
-	mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms;
+	mad_send_wr->send_buf.timeout_ms = timeout_ms;
 	if (active)
 		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
 	else
@@ -2130,9 +2095,10 @@
 }
 EXPORT_SYMBOL(ib_modify_mad);
 
-void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id)
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+		   struct ib_mad_send_buf *send_buf)
 {
-	ib_modify_mad(mad_agent, wr_id, 0);
+	ib_modify_mad(mad_agent, send_buf, 0);
 }
 EXPORT_SYMBOL(ib_cancel_mad);
 
@@ -2166,10 +2132,9 @@
 			 * Defined behavior is to complete response
 			 * before request
 			 */
-			build_smp_wc(local->wr_id,
+			build_smp_wc((unsigned long) local->mad_send_wr,
 				     be16_to_cpu(IB_LID_PERMISSIVE),
-				     0 /* pkey index */,
-				     recv_mad_agent->agent.port_num, &wc);
+				     0, recv_mad_agent->agent.port_num, &wc);
 
 			local->mad_priv->header.recv_wc.wc = &wc;
 			local->mad_priv->header.recv_wc.mad_len =
@@ -2196,11 +2161,11 @@
 		/* Complete send */
 		mad_send_wc.status = IB_WC_SUCCESS;
 		mad_send_wc.vendor_err = 0;
-		mad_send_wc.wr_id = local->wr_id;
+		mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
 		if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
-			snoop_send(mad_agent_priv->qp_info, &local->send_wr,
-				  &mad_send_wc,
-				   IB_MAD_SNOOP_SEND_COMPLETIONS);
+			snoop_send(mad_agent_priv->qp_info,
+				   &local->mad_send_wr->send_buf,
+				   &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
 						   &mad_send_wc);
 
@@ -2221,8 +2186,7 @@
 	if (!mad_send_wr->retries--)
 		return -ETIMEDOUT;
 
-	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr.
-						wr.ud.timeout_ms);
+	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
 
 	if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
 		ret = ib_retry_rmpp(mad_send_wr);
@@ -2285,11 +2249,10 @@
 			mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
 		else
 			mad_send_wc.status = mad_send_wr->status;
-		mad_send_wc.wr_id = mad_send_wr->wr_id;
+		mad_send_wc.send_buf = &mad_send_wr->send_buf;
 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
 						   &mad_send_wc);
 
-		kfree(mad_send_wr);
 		atomic_dec(&mad_agent_priv->refcount);
 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
 	}
@@ -2761,7 +2724,6 @@
 	int ret;
 
 	spin_lock_init(&ib_mad_port_list_lock);
-	spin_lock_init(&ib_agent_port_list_lock);
 
 	ib_mad_cache = kmem_cache_create("ib_mad",
 					 sizeof(struct ib_mad_private),
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index f1ba794..570f786 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -118,9 +118,10 @@
 	struct ib_mad_list_head mad_list;
 	struct list_head agent_list;
 	struct ib_mad_agent_private *mad_agent_priv;
+	struct ib_mad_send_buf send_buf;
+	DECLARE_PCI_UNMAP_ADDR(mapping)
 	struct ib_send_wr send_wr;
 	struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
-	u64 wr_id;			/* client WR ID */
 	__be64 tid;
 	unsigned long timeout;
 	int retries;
@@ -141,10 +142,7 @@
 	struct list_head completion_list;
 	struct ib_mad_private *mad_priv;
 	struct ib_mad_agent_private *recv_mad_agent;
-	struct ib_send_wr send_wr;
-	struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
-	u64 wr_id;			/* client WR ID */
-	__be64 tid;
+	struct ib_mad_send_wr_private *mad_send_wr;
 };
 
 struct ib_mad_mgmt_method_table {
diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c
index e23836d..ba112cd 100644
--- a/drivers/infiniband/core/mad_rmpp.c
+++ b/drivers/infiniband/core/mad_rmpp.c
@@ -103,12 +103,12 @@
 static int data_offset(u8 mgmt_class)
 {
 	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
-		return offsetof(struct ib_sa_mad, data);
+		return IB_MGMT_SA_HDR;
 	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
 		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
-		return offsetof(struct ib_vendor_mad, data);
+		return IB_MGMT_VENDOR_HDR;
 	else
-		return offsetof(struct ib_rmpp_mad, data);
+		return IB_MGMT_RMPP_HDR;
 }
 
 static void format_ack(struct ib_rmpp_mad *ack,
@@ -135,21 +135,18 @@
 		     struct ib_mad_recv_wc *recv_wc)
 {
 	struct ib_mad_send_buf *msg;
-	struct ib_send_wr *bad_send_wr;
-	int hdr_len, ret;
+	int ret;
 
-	hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
 	msg = ib_create_send_mad(&rmpp_recv->agent->agent, recv_wc->wc->src_qp,
-				 recv_wc->wc->pkey_index, rmpp_recv->ah, 1,
-				 hdr_len, sizeof(struct ib_rmpp_mad) - hdr_len,
-				 GFP_KERNEL);
+				 recv_wc->wc->pkey_index, 1, IB_MGMT_RMPP_HDR,
+				 IB_MGMT_RMPP_DATA, GFP_KERNEL);
 	if (!msg)
 		return;
 
-	format_ack((struct ib_rmpp_mad *) msg->mad,
-		   (struct ib_rmpp_mad *) recv_wc->recv_buf.mad, rmpp_recv);
-	ret = ib_post_send_mad(&rmpp_recv->agent->agent, &msg->send_wr,
-			       &bad_send_wr);
+	format_ack(msg->mad, (struct ib_rmpp_mad *) recv_wc->recv_buf.mad,
+		   rmpp_recv);
+	msg->ah = rmpp_recv->ah;
+	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 		ib_free_send_mad(msg);
 }
@@ -160,30 +157,31 @@
 {
 	struct ib_mad_send_buf *m;
 	struct ib_ah *ah;
-	int hdr_len;
 
 	ah = ib_create_ah_from_wc(agent->qp->pd, recv_wc->wc,
 				  recv_wc->recv_buf.grh, agent->port_num);
 	if (IS_ERR(ah))
 		return PTR_ERR(ah);
 
-	hdr_len = sizeof(struct ib_mad_hdr) + sizeof(struct ib_rmpp_hdr);
 	m = ib_create_send_mad(agent, recv_wc->wc->src_qp,
-			       recv_wc->wc->pkey_index, ah, 1, hdr_len,
-			       sizeof(struct ib_rmpp_mad) - hdr_len,
-			       GFP_KERNEL);
+			       recv_wc->wc->pkey_index, 1,
+			       IB_MGMT_RMPP_HDR, IB_MGMT_RMPP_DATA, GFP_KERNEL);
 	if (IS_ERR(m)) {
 		ib_destroy_ah(ah);
 		return PTR_ERR(m);
 	}
+	m->ah = ah;
 	*msg = m;
 	return 0;
 }
 
-static void free_msg(struct ib_mad_send_buf *msg)
+void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
 {
-	ib_destroy_ah(msg->send_wr.wr.ud.ah);
-	ib_free_send_mad(msg);
+	struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
+
+	if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_ACK)
+		ib_destroy_ah(mad_send_wc->send_buf->ah);
+	ib_free_send_mad(mad_send_wc->send_buf);
 }
 
 static void nack_recv(struct ib_mad_agent_private *agent,
@@ -191,14 +189,13 @@
 {
 	struct ib_mad_send_buf *msg;
 	struct ib_rmpp_mad *rmpp_mad;
-	struct ib_send_wr *bad_send_wr;
 	int ret;
 
 	ret = alloc_response_msg(&agent->agent, recv_wc, &msg);
 	if (ret)
 		return;
 
-	rmpp_mad = (struct ib_rmpp_mad *) msg->mad;
+	rmpp_mad = msg->mad;
 	memcpy(rmpp_mad, recv_wc->recv_buf.mad,
 	       data_offset(recv_wc->recv_buf.mad->mad_hdr.mgmt_class));
 
@@ -210,9 +207,11 @@
 	rmpp_mad->rmpp_hdr.seg_num = 0;
 	rmpp_mad->rmpp_hdr.paylen_newwin = 0;
 
-	ret = ib_post_send_mad(&agent->agent, &msg->send_wr, &bad_send_wr);
-	if (ret)
-		free_msg(msg);
+	ret = ib_post_send_mad(msg, NULL);
+	if (ret) {
+		ib_destroy_ah(msg->ah);
+		ib_free_send_mad(msg);
+	}
 }
 
 static void recv_timeout_handler(void *data)
@@ -585,7 +584,7 @@
 	int timeout;
 	u32 paylen;
 
-	rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+	rmpp_mad = mad_send_wr->send_buf.mad;
 	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
 	rmpp_mad->rmpp_hdr.seg_num = cpu_to_be32(mad_send_wr->seg_num);
 
@@ -612,7 +611,7 @@
 	}
 
 	/* 2 seconds for an ACK until we can find the packet lifetime */
-	timeout = mad_send_wr->send_wr.wr.ud.timeout_ms;
+	timeout = mad_send_wr->send_buf.timeout_ms;
 	if (!timeout || timeout > 2000)
 		mad_send_wr->timeout = msecs_to_jiffies(2000);
 	mad_send_wr->seg_num++;
@@ -640,7 +639,7 @@
 
 	wc.status = IB_WC_REM_ABORT_ERR;
 	wc.vendor_err = rmpp_status;
-	wc.wr_id = mad_send_wr->wr_id;
+	wc.send_buf = &mad_send_wr->send_buf;
 	ib_mad_complete_send_wr(mad_send_wr, &wc);
 	return;
 out:
@@ -694,12 +693,12 @@
 
 	if (seg_num > mad_send_wr->last_ack) {
 		mad_send_wr->last_ack = seg_num;
-		mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries;
+		mad_send_wr->retries = mad_send_wr->send_buf.retries;
 	}
 	mad_send_wr->newwin = newwin;
 	if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
 		/* If no response is expected, the ACK completes the send */
-		if (!mad_send_wr->send_wr.wr.ud.timeout_ms) {
+		if (!mad_send_wr->send_buf.timeout_ms) {
 			struct ib_mad_send_wc wc;
 
 			ib_mark_mad_done(mad_send_wr);
@@ -707,13 +706,13 @@
 
 			wc.status = IB_WC_SUCCESS;
 			wc.vendor_err = 0;
-			wc.wr_id = mad_send_wr->wr_id;
+			wc.send_buf = &mad_send_wr->send_buf;
 			ib_mad_complete_send_wr(mad_send_wr, &wc);
 			return;
 		}
 		if (mad_send_wr->refcount == 1)
-			ib_reset_mad_timeout(mad_send_wr, mad_send_wr->
-					     send_wr.wr.ud.timeout_ms);
+			ib_reset_mad_timeout(mad_send_wr,
+					     mad_send_wr->send_buf.timeout_ms);
 	} else if (mad_send_wr->refcount == 1 &&
 		   mad_send_wr->seg_num < mad_send_wr->newwin &&
 		   mad_send_wr->seg_num <= mad_send_wr->total_seg) {
@@ -842,7 +841,7 @@
 	struct ib_rmpp_mad *rmpp_mad;
 	int i, total_len, ret;
 
-	rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+	rmpp_mad = mad_send_wr->send_buf.mad;
 	if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
 	      IB_MGMT_RMPP_FLAG_ACTIVE))
 		return IB_RMPP_RESULT_UNHANDLED;
@@ -863,7 +862,7 @@
 
         mad_send_wr->total_seg = (total_len - mad_send_wr->data_offset) /
 			(sizeof(struct ib_rmpp_mad) - mad_send_wr->data_offset);
-	mad_send_wr->pad = total_len - offsetof(struct ib_rmpp_mad, data) -
+	mad_send_wr->pad = total_len - IB_MGMT_RMPP_HDR -
 			   be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin);
 
 	/* We need to wait for the final ACK even if there isn't a response */
@@ -878,23 +877,15 @@
 			    struct ib_mad_send_wc *mad_send_wc)
 {
 	struct ib_rmpp_mad *rmpp_mad;
-	struct ib_mad_send_buf *msg;
 	int ret;
 
-	rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+	rmpp_mad = mad_send_wr->send_buf.mad;
 	if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
 	      IB_MGMT_RMPP_FLAG_ACTIVE))
 		return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
 
-	if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA) {
-		msg = (struct ib_mad_send_buf *) (unsigned long)
-		      mad_send_wc->wr_id;
-		if (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_ACK)
-			ib_free_send_mad(msg);
-		else
-			free_msg(msg);
+	if (rmpp_mad->rmpp_hdr.rmpp_type != IB_MGMT_RMPP_TYPE_DATA)
 		return IB_RMPP_RESULT_INTERNAL;	 /* ACK, STOP, or ABORT */
-	}
 
 	if (mad_send_wc->status != IB_WC_SUCCESS ||
 	    mad_send_wr->status != IB_WC_SUCCESS)
@@ -905,7 +896,7 @@
 
 	if (mad_send_wr->last_ack == mad_send_wr->total_seg) {
 		mad_send_wr->timeout =
-			msecs_to_jiffies(mad_send_wr->send_wr.wr.ud.timeout_ms);
+			msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
 		return IB_RMPP_RESULT_PROCESSED; /* Send done */
 	}
 
@@ -926,7 +917,7 @@
 	struct ib_rmpp_mad *rmpp_mad;
 	int ret;
 
-	rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr;
+	rmpp_mad = mad_send_wr->send_buf.mad;
 	if (!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
 	      IB_MGMT_RMPP_FLAG_ACTIVE))
 		return IB_RMPP_RESULT_UNHANDLED; /* RMPP not active */
diff --git a/drivers/infiniband/core/mad_rmpp.h b/drivers/infiniband/core/mad_rmpp.h
index c4924df..f0616fd 100644
--- a/drivers/infiniband/core/mad_rmpp.h
+++ b/drivers/infiniband/core/mad_rmpp.h
@@ -51,6 +51,8 @@
 int ib_process_rmpp_send_wc(struct ib_mad_send_wr_private *mad_send_wr,
 			    struct ib_mad_send_wc *mad_send_wc);
 
+void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc);
+
 void ib_cancel_rmpp_recvs(struct ib_mad_agent_private *agent);
 
 int ib_retry_rmpp(struct ib_mad_send_wr_private *mad_send_wr);
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 0e5ef97..89ce9dc 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -73,11 +73,10 @@
 struct ib_sa_query {
 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
 	void (*release)(struct ib_sa_query *);
-	struct ib_sa_port  *port;
-	struct ib_sa_mad   *mad;
-	struct ib_sa_sm_ah *sm_ah;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
-	int                 id;
+	struct ib_sa_port      *port;
+	struct ib_mad_send_buf *mad_buf;
+	struct ib_sa_sm_ah     *sm_ah;
+	int			id;
 };
 
 struct ib_sa_service_query {
@@ -426,6 +425,7 @@
 {
 	unsigned long flags;
 	struct ib_mad_agent *agent;
+	struct ib_mad_send_buf *mad_buf;
 
 	spin_lock_irqsave(&idr_lock, flags);
 	if (idr_find(&query_idr, id) != query) {
@@ -433,9 +433,10 @@
 		return;
 	}
 	agent = query->port->agent;
+	mad_buf = query->mad_buf;
 	spin_unlock_irqrestore(&idr_lock, flags);
 
-	ib_cancel_mad(agent, id);
+	ib_cancel_mad(agent, mad_buf);
 }
 EXPORT_SYMBOL(ib_sa_cancel_query);
 
@@ -457,71 +458,46 @@
 
 static int send_mad(struct ib_sa_query *query, int timeout_ms)
 {
-	struct ib_sa_port *port = query->port;
 	unsigned long flags;
-	int ret;
-	struct ib_sge      gather_list;
-	struct ib_send_wr *bad_wr, wr = {
-		.opcode      = IB_WR_SEND,
-		.sg_list     = &gather_list,
-		.num_sge     = 1,
-		.send_flags  = IB_SEND_SIGNALED,
-		.wr	     = {
-			 .ud = {
-				 .mad_hdr     = &query->mad->mad_hdr,
-				 .remote_qpn  = 1,
-				 .remote_qkey = IB_QP1_QKEY,
-				 .timeout_ms  = timeout_ms,
-			 }
-		 }
-	};
+	int ret, id;
 
 retry:
 	if (!idr_pre_get(&query_idr, GFP_ATOMIC))
 		return -ENOMEM;
 	spin_lock_irqsave(&idr_lock, flags);
-	ret = idr_get_new(&query_idr, query, &query->id);
+	ret = idr_get_new(&query_idr, query, &id);
 	spin_unlock_irqrestore(&idr_lock, flags);
 	if (ret == -EAGAIN)
 		goto retry;
 	if (ret)
 		return ret;
 
-	wr.wr_id = query->id;
+	query->mad_buf->timeout_ms  = timeout_ms;
+	query->mad_buf->context[0] = query;
+	query->id = id;
 
-	spin_lock_irqsave(&port->ah_lock, flags);
-	kref_get(&port->sm_ah->ref);
-	query->sm_ah = port->sm_ah;
-	wr.wr.ud.ah  = port->sm_ah->ah;
-	spin_unlock_irqrestore(&port->ah_lock, flags);
+	spin_lock_irqsave(&query->port->ah_lock, flags);
+	kref_get(&query->port->sm_ah->ref);
+	query->sm_ah = query->port->sm_ah;
+	spin_unlock_irqrestore(&query->port->ah_lock, flags);
 
-	gather_list.addr   = dma_map_single(port->agent->device->dma_device,
-					    query->mad,
-					    sizeof (struct ib_sa_mad),
-					    DMA_TO_DEVICE);
-	gather_list.length = sizeof (struct ib_sa_mad);
-	gather_list.lkey   = port->agent->mr->lkey;
-	pci_unmap_addr_set(query, mapping, gather_list.addr);
+	query->mad_buf->ah = query->sm_ah->ah;
 
-	ret = ib_post_send_mad(port->agent, &wr, &bad_wr);
+	ret = ib_post_send_mad(query->mad_buf, NULL);
 	if (ret) {
-		dma_unmap_single(port->agent->device->dma_device,
-				 pci_unmap_addr(query, mapping),
-				 sizeof (struct ib_sa_mad),
-				 DMA_TO_DEVICE);
-		kref_put(&query->sm_ah->ref, free_sm_ah);
 		spin_lock_irqsave(&idr_lock, flags);
-		idr_remove(&query_idr, query->id);
+		idr_remove(&query_idr, id);
 		spin_unlock_irqrestore(&idr_lock, flags);
+
+		kref_put(&query->sm_ah->ref, free_sm_ah);
 	}
 
 	/*
 	 * It's not safe to dereference query any more, because the
 	 * send may already have completed and freed the query in
-	 * another context.  So use wr.wr_id, which has a copy of the
-	 * query's id.
+	 * another context.
 	 */
-	return ret ? ret : wr.wr_id;
+	return ret ? ret : id;
 }
 
 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
@@ -543,7 +519,6 @@
 
 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
 {
-	kfree(sa_query->mad);
 	kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
 }
 
@@ -585,6 +560,7 @@
 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
 	struct ib_sa_port   *port;
 	struct ib_mad_agent *agent;
+	struct ib_sa_mad *mad;
 	int ret;
 
 	if (!sa_dev)
@@ -596,37 +572,45 @@
 	query = kmalloc(sizeof *query, gfp_mask);
 	if (!query)
 		return -ENOMEM;
-	query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
-	if (!query->sa_query.mad) {
-		kfree(query);
-		return -ENOMEM;
+
+	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
+						     0, IB_MGMT_SA_HDR,
+						     IB_MGMT_SA_DATA, gfp_mask);
+	if (!query->sa_query.mad_buf) {
+		ret = -ENOMEM;
+		goto err1;
 	}
 
 	query->callback = callback;
 	query->context  = context;
 
-	init_mad(query->sa_query.mad, agent);
+	mad = query->sa_query.mad_buf->mad;
+	init_mad(mad, agent);
 
-	query->sa_query.callback              = callback ? ib_sa_path_rec_callback : NULL;
-	query->sa_query.release               = ib_sa_path_rec_release;
-	query->sa_query.port                  = port;
-	query->sa_query.mad->mad_hdr.method   = IB_MGMT_METHOD_GET;
-	query->sa_query.mad->mad_hdr.attr_id  = cpu_to_be16(IB_SA_ATTR_PATH_REC);
-	query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
+	query->sa_query.release  = ib_sa_path_rec_release;
+	query->sa_query.port     = port;
+	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
+	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
+	mad->sa_hdr.comp_mask	 = comp_mask;
 
-	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table),
-		rec, query->sa_query.mad->data);
+	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
 
 	*sa_query = &query->sa_query;
 
 	ret = send_mad(&query->sa_query, timeout_ms);
-	if (ret < 0) {
-		*sa_query = NULL;
-		kfree(query->sa_query.mad);
-		kfree(query);
-	}
+	if (ret < 0)
+		goto err2;
 
 	return ret;
+
+err2:
+	*sa_query = NULL;
+	ib_free_send_mad(query->sa_query.mad_buf);
+
+err1:
+	kfree(query);
+	return ret;
 }
 EXPORT_SYMBOL(ib_sa_path_rec_get);
 
@@ -649,7 +633,6 @@
 
 static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
 {
-	kfree(sa_query->mad);
 	kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
 }
 
@@ -693,6 +676,7 @@
 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
 	struct ib_sa_port   *port;
 	struct ib_mad_agent *agent;
+	struct ib_sa_mad *mad;
 	int ret;
 
 	if (!sa_dev)
@@ -709,38 +693,46 @@
 	query = kmalloc(sizeof *query, gfp_mask);
 	if (!query)
 		return -ENOMEM;
-	query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
-	if (!query->sa_query.mad) {
-		kfree(query);
-		return -ENOMEM;
+
+	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
+						     0, IB_MGMT_SA_HDR,
+						     IB_MGMT_SA_DATA, gfp_mask);
+	if (!query->sa_query.mad_buf) {
+		ret = -ENOMEM;
+		goto err1;
 	}
 
 	query->callback = callback;
 	query->context  = context;
 
-	init_mad(query->sa_query.mad, agent);
+	mad = query->sa_query.mad_buf->mad;
+	init_mad(mad, agent);
 
-	query->sa_query.callback              = callback ? ib_sa_service_rec_callback : NULL;
-	query->sa_query.release               = ib_sa_service_rec_release;
-	query->sa_query.port                  = port;
-	query->sa_query.mad->mad_hdr.method   = method;
-	query->sa_query.mad->mad_hdr.attr_id  =
-				cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
-	query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+	query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
+	query->sa_query.release  = ib_sa_service_rec_release;
+	query->sa_query.port     = port;
+	mad->mad_hdr.method	 = method;
+	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
+	mad->sa_hdr.comp_mask	 = comp_mask;
 
 	ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
-		rec, query->sa_query.mad->data);
+		rec, mad->data);
 
 	*sa_query = &query->sa_query;
 
 	ret = send_mad(&query->sa_query, timeout_ms);
-	if (ret < 0) {
-		*sa_query = NULL;
-		kfree(query->sa_query.mad);
-		kfree(query);
-	}
+	if (ret < 0)
+		goto err2;
 
 	return ret;
+
+err2:
+	*sa_query = NULL;
+	ib_free_send_mad(query->sa_query.mad_buf);
+
+err1:
+	kfree(query);
+	return ret;
 }
 EXPORT_SYMBOL(ib_sa_service_rec_query);
 
@@ -763,7 +755,6 @@
 
 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
 {
-	kfree(sa_query->mad);
 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
 }
 
@@ -782,6 +773,7 @@
 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
 	struct ib_sa_port   *port;
 	struct ib_mad_agent *agent;
+	struct ib_sa_mad *mad;
 	int ret;
 
 	if (!sa_dev)
@@ -793,53 +785,55 @@
 	query = kmalloc(sizeof *query, gfp_mask);
 	if (!query)
 		return -ENOMEM;
-	query->sa_query.mad = kmalloc(sizeof *query->sa_query.mad, gfp_mask);
-	if (!query->sa_query.mad) {
-		kfree(query);
-		return -ENOMEM;
+
+	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
+						     0, IB_MGMT_SA_HDR,
+						     IB_MGMT_SA_DATA, gfp_mask);
+	if (!query->sa_query.mad_buf) {
+		ret = -ENOMEM;
+		goto err1;
 	}
 
 	query->callback = callback;
 	query->context  = context;
 
-	init_mad(query->sa_query.mad, agent);
+	mad = query->sa_query.mad_buf->mad;
+	init_mad(mad, agent);
 
-	query->sa_query.callback              = callback ? ib_sa_mcmember_rec_callback : NULL;
-	query->sa_query.release               = ib_sa_mcmember_rec_release;
-	query->sa_query.port                  = port;
-	query->sa_query.mad->mad_hdr.method   = method;
-	query->sa_query.mad->mad_hdr.attr_id  = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
-	query->sa_query.mad->sa_hdr.comp_mask = comp_mask;
+	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
+	query->sa_query.release  = ib_sa_mcmember_rec_release;
+	query->sa_query.port     = port;
+	mad->mad_hdr.method	 = method;
+	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
+	mad->sa_hdr.comp_mask	 = comp_mask;
 
 	ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
-		rec, query->sa_query.mad->data);
+		rec, mad->data);
 
 	*sa_query = &query->sa_query;
 
 	ret = send_mad(&query->sa_query, timeout_ms);
-	if (ret < 0) {
-		*sa_query = NULL;
-		kfree(query->sa_query.mad);
-		kfree(query);
-	}
+	if (ret < 0)
+		goto err2;
 
 	return ret;
+
+err2:
+	*sa_query = NULL;
+	ib_free_send_mad(query->sa_query.mad_buf);
+
+err1:
+	kfree(query);
+	return ret;
 }
 EXPORT_SYMBOL(ib_sa_mcmember_rec_query);
 
 static void send_handler(struct ib_mad_agent *agent,
 			 struct ib_mad_send_wc *mad_send_wc)
 {
-	struct ib_sa_query *query;
+	struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
 	unsigned long flags;
 
-	spin_lock_irqsave(&idr_lock, flags);
-	query = idr_find(&query_idr, mad_send_wc->wr_id);
-	spin_unlock_irqrestore(&idr_lock, flags);
-
-	if (!query)
-		return;
-
 	if (query->callback)
 		switch (mad_send_wc->status) {
 		case IB_WC_SUCCESS:
@@ -856,30 +850,25 @@
 			break;
 		}
 
-	dma_unmap_single(agent->device->dma_device,
-			 pci_unmap_addr(query, mapping),
-			 sizeof (struct ib_sa_mad),
-			 DMA_TO_DEVICE);
-	kref_put(&query->sm_ah->ref, free_sm_ah);
-
-	query->release(query);
-
 	spin_lock_irqsave(&idr_lock, flags);
-	idr_remove(&query_idr, mad_send_wc->wr_id);
+	idr_remove(&query_idr, query->id);
 	spin_unlock_irqrestore(&idr_lock, flags);
+
+        ib_free_send_mad(mad_send_wc->send_buf);
+	kref_put(&query->sm_ah->ref, free_sm_ah);
+	query->release(query);
 }
 
 static void recv_handler(struct ib_mad_agent *mad_agent,
 			 struct ib_mad_recv_wc *mad_recv_wc)
 {
 	struct ib_sa_query *query;
-	unsigned long flags;
+	struct ib_mad_send_buf *mad_buf;
 
-	spin_lock_irqsave(&idr_lock, flags);
-	query = idr_find(&query_idr, mad_recv_wc->wc->wr_id);
-	spin_unlock_irqrestore(&idr_lock, flags);
+	mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
+	query = mad_buf->context[0];
 
-	if (query && query->callback) {
+	if (query->callback) {
 		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
 			query->callback(query,
 					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
diff --git a/drivers/infiniband/core/smi.h b/drivers/infiniband/core/smi.h
index db25503..2b3c401 100644
--- a/drivers/infiniband/core/smi.h
+++ b/drivers/infiniband/core/smi.h
@@ -39,6 +39,8 @@
 #ifndef __SMI_H_
 #define __SMI_H_
 
+#include <rdma/ib_smi.h>
+
 int smi_handle_dr_smp_recv(struct ib_smp *smp,
 			   u8 node_type,
 			   int port_num,
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index fd200c0..fc5519a 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -96,7 +96,6 @@
 };
 
 struct ib_umad_packet {
-	struct ib_ah      *ah;
 	struct ib_mad_send_buf *msg;
 	struct list_head   list;
 	int		   length;
@@ -139,10 +138,10 @@
 			 struct ib_mad_send_wc *send_wc)
 {
 	struct ib_umad_file *file = agent->context;
-	struct ib_umad_packet *timeout, *packet =
-		(void *) (unsigned long) send_wc->wr_id;
+	struct ib_umad_packet *timeout;
+	struct ib_umad_packet *packet = send_wc->send_buf->context[0];
 
-	ib_destroy_ah(packet->msg->send_wr.wr.ud.ah);
+	ib_destroy_ah(packet->msg->ah);
 	ib_free_send_mad(packet->msg);
 
 	if (send_wc->status == IB_WC_RESP_TIMEOUT_ERR) {
@@ -268,11 +267,11 @@
 	struct ib_umad_packet *packet;
 	struct ib_mad_agent *agent;
 	struct ib_ah_attr ah_attr;
-	struct ib_send_wr *bad_wr;
+	struct ib_ah *ah;
 	struct ib_rmpp_mad *rmpp_mad;
 	u8 method;
 	__be64 *tid;
-	int ret, length, hdr_len, data_len, rmpp_hdr_size;
+	int ret, length, hdr_len, rmpp_hdr_size;
 	int rmpp_active = 0;
 
 	if (count < sizeof (struct ib_user_mad))
@@ -321,9 +320,9 @@
 		ah_attr.grh.traffic_class  = packet->mad.hdr.traffic_class;
 	}
 
-	packet->ah = ib_create_ah(agent->qp->pd, &ah_attr);
-	if (IS_ERR(packet->ah)) {
-		ret = PTR_ERR(packet->ah);
+	ah = ib_create_ah(agent->qp->pd, &ah_attr);
+	if (IS_ERR(ah)) {
+		ret = PTR_ERR(ah);
 		goto err_up;
 	}
 
@@ -337,12 +336,10 @@
 
 		/* Validate that the management class can support RMPP */
 		if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) {
-			hdr_len = offsetof(struct ib_sa_mad, data);
-			data_len = length - hdr_len;
+			hdr_len = IB_MGMT_SA_HDR;
 		} else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
 			    (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) {
-				hdr_len = offsetof(struct ib_vendor_mad, data);
-				data_len = length - hdr_len;
+				hdr_len = IB_MGMT_VENDOR_HDR;
 		} else {
 			ret = -EINVAL;
 			goto err_ah;
@@ -353,25 +350,23 @@
 			ret = -EINVAL;
 			goto err_ah;
 		}
-		hdr_len = offsetof(struct ib_mad, data);
-		data_len = length - hdr_len;
+		hdr_len = IB_MGMT_MAD_HDR;
 	}
 
 	packet->msg = ib_create_send_mad(agent,
 					 be32_to_cpu(packet->mad.hdr.qpn),
-					 0, packet->ah, rmpp_active,
-					 hdr_len, data_len,
+					 0, rmpp_active,
+					 hdr_len, length - hdr_len,
 					 GFP_KERNEL);
 	if (IS_ERR(packet->msg)) {
 		ret = PTR_ERR(packet->msg);
 		goto err_ah;
 	}
 
-	packet->msg->send_wr.wr.ud.timeout_ms  = packet->mad.hdr.timeout_ms;
-	packet->msg->send_wr.wr.ud.retries = packet->mad.hdr.retries;
-
-	/* Override send WR WRID initialized in ib_create_send_mad */
-	packet->msg->send_wr.wr_id = (unsigned long) packet;
+	packet->msg->ah 	= ah;
+	packet->msg->timeout_ms = packet->mad.hdr.timeout_ms;
+	packet->msg->retries 	= packet->mad.hdr.retries;
+	packet->msg->context[0] = packet;
 
 	if (!rmpp_active) {
 		/* Copy message from user into send buffer */
@@ -403,17 +398,17 @@
 	 * transaction ID matches the agent being used to send the
 	 * MAD.
 	 */
-	method = packet->msg->mad->mad_hdr.method;
+	method = ((struct ib_mad_hdr *) packet->msg)->method;
 
 	if (!(method & IB_MGMT_METHOD_RESP)       &&
 	    method != IB_MGMT_METHOD_TRAP_REPRESS &&
 	    method != IB_MGMT_METHOD_SEND) {
-		tid = &packet->msg->mad->mad_hdr.tid;
+		tid = &((struct ib_mad_hdr *) packet->msg)->tid;
 		*tid = cpu_to_be64(((u64) agent->hi_tid) << 32 |
 				   (be64_to_cpup(tid) & 0xffffffff));
 	}
 
-	ret = ib_post_send_mad(agent, &packet->msg->send_wr, &bad_wr);
+	ret = ib_post_send_mad(packet->msg, NULL);
 	if (ret)
 		goto err_msg;
 
@@ -425,7 +420,7 @@
 	ib_free_send_mad(packet->msg);
 
 err_ah:
-	ib_destroy_ah(packet->ah);
+	ib_destroy_ah(ah);
 
 err_up:
 	up_read(&file->agent_mutex);
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index 9804174..8561b29 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -46,11 +46,6 @@
 	MTHCA_VENDOR_CLASS2 = 0xa
 };
 
-struct mthca_trap_mad {
-	struct ib_mad *mad;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
-};
-
 static void update_sm_ah(struct mthca_dev *dev,
 			 u8 port_num, u16 lid, u8 sl)
 {
@@ -116,49 +111,14 @@
 			 struct ib_mad *mad)
 {
 	int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
-	struct mthca_trap_mad *tmad;
-	struct ib_sge      gather_list;
-	struct ib_send_wr *bad_wr, wr = {
-		.opcode      = IB_WR_SEND,
-		.sg_list     = &gather_list,
-		.num_sge     = 1,
-		.send_flags  = IB_SEND_SIGNALED,
-		.wr	     = {
-			 .ud = {
-				 .remote_qpn  = qpn,
-				 .remote_qkey = qpn ? IB_QP1_QKEY : 0,
-				 .timeout_ms  = 0
-			 }
-		 }
-	};
+	struct ib_mad_send_buf *send_buf;
 	struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
 	int ret;
 	unsigned long flags;
 
 	if (agent) {
-		tmad = kmalloc(sizeof *tmad, GFP_KERNEL);
-		if (!tmad)
-			return;
-
-		tmad->mad = kmalloc(sizeof *tmad->mad, GFP_KERNEL);
-		if (!tmad->mad) {
-			kfree(tmad);
-			return;
-		}
-
-		memcpy(tmad->mad, mad, sizeof *mad);
-
-		wr.wr.ud.mad_hdr = &tmad->mad->mad_hdr;
-		wr.wr_id         = (unsigned long) tmad;
-
-		gather_list.addr   = dma_map_single(agent->device->dma_device,
-						    tmad->mad,
-						    sizeof *tmad->mad,
-						    DMA_TO_DEVICE);
-		gather_list.length = sizeof *tmad->mad;
-		gather_list.lkey   = to_mpd(agent->qp->pd)->ntmr.ibmr.lkey;
-		pci_unmap_addr_set(tmad, mapping, gather_list.addr);
-
+		send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
+					      IB_MGMT_MAD_DATA, GFP_ATOMIC);
 		/*
 		 * We rely here on the fact that MLX QPs don't use the
 		 * address handle after the send is posted (this is
@@ -166,21 +126,15 @@
 		 * it's OK for our devices).
 		 */
 		spin_lock_irqsave(&dev->sm_lock, flags);
-		wr.wr.ud.ah      = dev->sm_ah[port_num - 1];
-		if (wr.wr.ud.ah)
-			ret = ib_post_send_mad(agent, &wr, &bad_wr);
+		memcpy(send_buf->mad, mad, sizeof *mad);
+		if ((send_buf->ah = dev->sm_ah[port_num - 1]))
+			ret = ib_post_send_mad(send_buf, NULL);
 		else
 			ret = -EINVAL;
 		spin_unlock_irqrestore(&dev->sm_lock, flags);
 
-		if (ret) {
-			dma_unmap_single(agent->device->dma_device,
-					 pci_unmap_addr(tmad, mapping),
-					 sizeof *tmad->mad,
-					 DMA_TO_DEVICE);
-			kfree(tmad->mad);
-			kfree(tmad);
-		}
+		if (ret)
+			ib_free_send_mad(send_buf);
 	}
 }
 
@@ -267,15 +221,7 @@
 static void send_handler(struct ib_mad_agent *agent,
 			 struct ib_mad_send_wc *mad_send_wc)
 {
-	struct mthca_trap_mad *tmad =
-		(void *) (unsigned long) mad_send_wc->wr_id;
-
-	dma_unmap_single(agent->device->dma_device,
-			 pci_unmap_addr(tmad, mapping),
-			 sizeof *tmad->mad,
-			 DMA_TO_DEVICE);
-	kfree(tmad->mad);
-	kfree(tmad);
+	ib_free_send_mad(mad_send_wc->send_buf);
 }
 
 int mthca_create_agents(struct mthca_dev *dev)
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 4172e68..2c13350 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -109,10 +109,14 @@
 #define IB_QP_SET_QKEY	0x80000000
 
 enum {
+	IB_MGMT_MAD_HDR = 24,
 	IB_MGMT_MAD_DATA = 232,
+	IB_MGMT_RMPP_HDR = 36,
 	IB_MGMT_RMPP_DATA = 220,
+	IB_MGMT_VENDOR_HDR = 40,
 	IB_MGMT_VENDOR_DATA = 216,
-	IB_MGMT_SA_DATA = 200
+	IB_MGMT_SA_HDR = 56,
+	IB_MGMT_SA_DATA = 200,
 };
 
 struct ib_mad_hdr {
@@ -203,26 +207,25 @@
 
 /**
  * ib_mad_send_buf - MAD data buffer and work request for sends.
- * @mad: References an allocated MAD data buffer.  The size of the data
- *   buffer is specified in the @send_wr.length field.
- * @mapping: DMA mapping information.
+ * @next: A pointer used to chain together MADs for posting.
+ * @mad: References an allocated MAD data buffer.
  * @mad_agent: MAD agent that allocated the buffer.
+ * @ah: The address handle to use when sending the MAD.
  * @context: User-controlled context fields.
- * @send_wr: An initialized work request structure used when sending the MAD.
- *   The wr_id field of the work request is initialized to reference this
- *   data structure.
- * @sge: A scatter-gather list referenced by the work request.
+ * @timeout_ms: Time to wait for a response.
+ * @retries: Number of times to retry a request for a response.
  *
  * Users are responsible for initializing the MAD buffer itself, with the
  * exception of specifying the payload length field in any RMPP MAD.
  */
 struct ib_mad_send_buf {
-	struct ib_mad		*mad;
-	DECLARE_PCI_UNMAP_ADDR(mapping)
+	struct ib_mad_send_buf	*next;
+	void			*mad;
 	struct ib_mad_agent	*mad_agent;
+	struct ib_ah		*ah;
 	void			*context[2];
-	struct ib_send_wr	send_wr;
-	struct ib_sge		sge;
+	int			timeout_ms;
+	int			retries;
 };
 
 /**
@@ -287,7 +290,7 @@
  * or @mad_send_wc.
  */
 typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent,
-				     struct ib_send_wr *send_wr,
+				     struct ib_mad_send_buf *send_buf,
 				     struct ib_mad_send_wc *mad_send_wc);
 
 /**
@@ -334,13 +337,13 @@
 
 /**
  * ib_mad_send_wc - MAD send completion information.
- * @wr_id: Work request identifier associated with the send MAD request.
+ * @send_buf: Send MAD data buffer associated with the send MAD request.
  * @status: Completion status.
  * @vendor_err: Optional vendor error information returned with a failed
  *   request.
  */
 struct ib_mad_send_wc {
-	u64			wr_id;
+	struct ib_mad_send_buf	*send_buf;
 	enum ib_wc_status	status;
 	u32			vendor_err;
 };
@@ -366,7 +369,7 @@
  * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers.
  * @mad_len: The length of the received MAD, without duplicated headers.
  *
- * For received response, the wr_id field of the wc is set to the wr_id
+ * For received response, the wr_id contains a pointer to the ib_mad_send_buf
  *   for the corresponding send request.
  */
 struct ib_mad_recv_wc {
@@ -463,9 +466,9 @@
 /**
  * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
  *   with the registered client.
- * @mad_agent: Specifies the associated registration to post the send to.
- * @send_wr: Specifies the information needed to send the MAD(s).
- * @bad_send_wr: Specifies the MAD on which an error was encountered.
+ * @send_buf: Specifies the information needed to send the MAD(s).
+ * @bad_send_buf: Specifies the MAD on which an error was encountered.  This
+ *   parameter is optional if only a single MAD is posted.
  *
  * Sent MADs are not guaranteed to complete in the order that they were posted.
  *
@@ -479,9 +482,8 @@
  * defined data being transferred.  The paylen_newwin field should be
  * specified in network-byte order.
  */
-int ib_post_send_mad(struct ib_mad_agent *mad_agent,
-		     struct ib_send_wr *send_wr,
-		     struct ib_send_wr **bad_send_wr);
+int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
+		     struct ib_mad_send_buf **bad_send_buf);
 
 /**
  * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer.
@@ -507,23 +509,25 @@
 /**
  * ib_cancel_mad - Cancels an outstanding send MAD operation.
  * @mad_agent: Specifies the registration associated with sent MAD.
- * @wr_id: Indicates the work request identifier of the MAD to cancel.
+ * @send_buf: Indicates the MAD to cancel.
  *
  * MADs will be returned to the user through the corresponding
  * ib_mad_send_handler.
  */
-void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id);
+void ib_cancel_mad(struct ib_mad_agent *mad_agent,
+		   struct ib_mad_send_buf *send_buf);
 
 /**
  * ib_modify_mad - Modifies an outstanding send MAD operation.
  * @mad_agent: Specifies the registration associated with sent MAD.
- * @wr_id: Indicates the work request identifier of the MAD to modify.
+ * @send_buf: Indicates the MAD to modify.
  * @timeout_ms: New timeout value for sent MAD.
  *
  * This call will reset the timeout value for a sent MAD to the specified
  * value.
  */
-int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms);
+int ib_modify_mad(struct ib_mad_agent *mad_agent,
+		  struct ib_mad_send_buf *send_buf, u32 timeout_ms);
 
 /**
  * ib_redirect_mad_qp - Registers a QP for MAD services.
@@ -572,7 +576,6 @@
  * @remote_qpn: Specifies the QPN of the receiving node.
  * @pkey_index: Specifies which PKey the MAD will be sent using.  This field
  *   is valid only if the remote_qpn is QP 1.
- * @ah: References the address handle used to transfer to the remote node.
  * @rmpp_active: Indicates if the send will enable RMPP.
  * @hdr_len: Indicates the size of the data header of the MAD.  This length
  *   should include the common MAD header, RMPP header, plus any class
@@ -582,11 +585,10 @@
  *   additional padding that may be necessary.
  * @gfp_mask: GFP mask used for the memory allocation.
  *
- * This is a helper routine that may be used to allocate a MAD.  Users are
- * not required to allocate outbound MADs using this call.  The returned
- * MAD send buffer will reference a data buffer usable for sending a MAD, along
+ * This routine allocates a MAD for sending.  The returned MAD send buffer
+ * will reference a data buffer usable for sending a MAD, along
  * with an initialized work request structure.  Users may modify the returned
- * MAD data buffer or work request before posting the send.
+ * MAD data buffer before posting the send.
  *
  * The returned data buffer will be cleared.  Users are responsible for
  * initializing the common MAD and any class specific headers.  If @rmpp_active
@@ -594,7 +596,7 @@
  */
 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 					    u32 remote_qpn, u16 pkey_index,
-					    struct ib_ah *ah, int rmpp_active,
+					    int rmpp_active,
 					    int hdr_len, int data_len,
 					    gfp_t gfp_mask);
 
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index a5a963c..f72d46d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -595,11 +595,8 @@
 		} atomic;
 		struct {
 			struct ib_ah *ah;
-			struct ib_mad_hdr *mad_hdr;
 			u32	remote_qpn;
 			u32	remote_qkey;
-			int	timeout_ms; /* valid for MADs only */
-			int	retries;    /* valid for MADs only */
 			u16	pkey_index; /* valid for GSI only */
 			u8	port_num;   /* valid for DR SMPs on switch only */
 		} ud;