Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
Pull rdma updates from Doug Ledford:
"This is the complete update for the rdma stack for this release cycle.
Most of it is typical driver and core updates, but there is the
entirely new VMWare pvrdma driver. You may have noticed that there
were changes in DaveM's pull request to the bnxt Ethernet driver to
support a RoCE RDMA driver. The bnxt_re driver was tentatively set to
be pulled in this release cycle, but it simply wasn't ready in time
and was dropped (a few review comments still to address, and some
multi-arch build issues like prefetch() not working across all
arches).
Summary:
- shared mlx5 updates with net stack (will drop out on merge if
Dave's tree has already been merged)
- driver updates: cxgb4, hfi1, hns-roce, i40iw, mlx4, mlx5, qedr, rxe
- debug cleanups
- new connection rejection helpers
- SRP updates
- various misc fixes
- new paravirt driver from vmware"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (210 commits)
IB: Add vmw_pvrdma driver
IB/mlx4: fix improper return value
IB/ocrdma: fix bad initialization
infiniband: nes: return value of skb_linearize should be handled
MAINTAINERS: Update Intel RDMA RNIC driver maintainers
MAINTAINERS: Remove Mitesh Ahuja from emulex maintainers
IB/core: fix unmap_sg argument
qede: fix general protection fault may occur on probe
IB/mthca: Replace pci_pool_alloc by pci_pool_zalloc
mlx5, calc_sq_size(): Make a debug message more informative
mlx5: Remove a set-but-not-used variable
mlx5: Use { } instead of { 0 } to init struct
IB/srp: Make writing the add_target sysfs attr interruptible
IB/srp: Make mapping failures easier to debug
IB/srp: Make login failures easier to debug
IB/srp: Introduce a local variable in srp_add_one()
IB/srp: Fix CONFIG_DYNAMIC_DEBUG=n build
IB/multicast: Check ib_find_pkey() return value
IPoIB: Avoid reading an uninitialized member variable
IB/mad: Fix an array index check
...
diff --git a/MAINTAINERS b/MAINTAINERS
index 3ef3167..9c48147 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6515,10 +6515,7 @@
INTEL RDMA RNIC DRIVER
M: Faisal Latif <faisal.latif@intel.com>
-R: Chien Tin Tung <chien.tin.tung@intel.com>
-R: Mustafa Ismail <mustafa.ismail@intel.com>
-R: Shiraz Saleem <shiraz.saleem@intel.com>
-R: Tatyana Nikolova <tatyana.e.nikolova@intel.com>
+M: Shiraz Saleem <shiraz.saleem@intel.com>
L: linux-rdma@vger.kernel.org
S: Supported
F: drivers/infiniband/hw/i40iw/
@@ -11078,7 +11075,6 @@
EMULEX ONECONNECT ROCE DRIVER
M: Selvin Xavier <selvin.xavier@avagotech.com>
M: Devesh Sharma <devesh.sharma@avagotech.com>
-M: Mitesh Ahuja <mitesh.ahuja@avagotech.com>
L: linux-rdma@vger.kernel.org
W: http://www.emulex.com
S: Supported
@@ -13129,6 +13125,13 @@
F: drivers/scsi/vmw_pvscsi.c
F: drivers/scsi/vmw_pvscsi.h
+VMWARE PVRDMA DRIVER
+M: Adit Ranadive <aditr@vmware.com>
+M: VMware PV-Drivers <pv-drivers@vmware.com>
+L: linux-rdma@vger.kernel.org
+S: Maintained
+F: drivers/infiniband/hw/vmw_pvrdma/
+
VOLTAGE AND CURRENT REGULATOR FRAMEWORK
M: Liam Girdwood <lgirdwood@gmail.com>
M: Mark Brown <broonie@kernel.org>
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index fb3fb89..6709173 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -73,6 +73,7 @@
source "drivers/infiniband/hw/mlx5/Kconfig"
source "drivers/infiniband/hw/nes/Kconfig"
source "drivers/infiniband/hw/ocrdma/Kconfig"
+source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
source "drivers/infiniband/hw/usnic/Kconfig"
source "drivers/infiniband/hw/hns/Kconfig"
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 4fa524d..11dacd9 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -156,7 +156,6 @@
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
if (!port_priv) {
- dev_err(&device->dev, "No memory for ib_agent_port_private\n");
ret = -ENOMEM;
goto error1;
}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 1a2984c..ae04826 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -770,12 +770,8 @@
int err = 0;
table = kcalloc(ib_dev->phys_port_cnt, sizeof(*table), GFP_KERNEL);
-
- if (!table) {
- pr_warn("failed to allocate ib gid cache for %s\n",
- ib_dev->name);
+ if (!table)
return -ENOMEM;
- }
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);
@@ -1170,14 +1166,13 @@
GFP_KERNEL);
if (!device->cache.pkey_cache ||
!device->cache.lmc_cache) {
- pr_warn("Couldn't allocate cache for %s\n", device->name);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto free;
}
err = gid_table_setup_one(device);
if (err)
- /* Allocated memory will be cleaned in the release function */
- return err;
+ goto free;
for (p = 0; p <= rdma_end_port(device) - rdma_start_port(device); ++p)
ib_cache_update(device, p + rdma_start_port(device));
@@ -1192,6 +1187,9 @@
err:
gid_table_cleanup_one(device);
+free:
+ kfree(device->cache.pkey_cache);
+ kfree(device->cache.lmc_cache);
return err;
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 71c7c4c..cf1edfa 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -57,6 +57,54 @@
MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");
+static const char * const ibcm_rej_reason_strs[] = {
+ [IB_CM_REJ_NO_QP] = "no QP",
+ [IB_CM_REJ_NO_EEC] = "no EEC",
+ [IB_CM_REJ_NO_RESOURCES] = "no resources",
+ [IB_CM_REJ_TIMEOUT] = "timeout",
+ [IB_CM_REJ_UNSUPPORTED] = "unsupported",
+ [IB_CM_REJ_INVALID_COMM_ID] = "invalid comm ID",
+ [IB_CM_REJ_INVALID_COMM_INSTANCE] = "invalid comm instance",
+ [IB_CM_REJ_INVALID_SERVICE_ID] = "invalid service ID",
+ [IB_CM_REJ_INVALID_TRANSPORT_TYPE] = "invalid transport type",
+ [IB_CM_REJ_STALE_CONN] = "stale conn",
+ [IB_CM_REJ_RDC_NOT_EXIST] = "RDC not exist",
+ [IB_CM_REJ_INVALID_GID] = "invalid GID",
+ [IB_CM_REJ_INVALID_LID] = "invalid LID",
+ [IB_CM_REJ_INVALID_SL] = "invalid SL",
+ [IB_CM_REJ_INVALID_TRAFFIC_CLASS] = "invalid traffic class",
+ [IB_CM_REJ_INVALID_HOP_LIMIT] = "invalid hop limit",
+ [IB_CM_REJ_INVALID_PACKET_RATE] = "invalid packet rate",
+ [IB_CM_REJ_INVALID_ALT_GID] = "invalid alt GID",
+ [IB_CM_REJ_INVALID_ALT_LID] = "invalid alt LID",
+ [IB_CM_REJ_INVALID_ALT_SL] = "invalid alt SL",
+ [IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS] = "invalid alt traffic class",
+ [IB_CM_REJ_INVALID_ALT_HOP_LIMIT] = "invalid alt hop limit",
+ [IB_CM_REJ_INVALID_ALT_PACKET_RATE] = "invalid alt packet rate",
+ [IB_CM_REJ_PORT_CM_REDIRECT] = "port CM redirect",
+ [IB_CM_REJ_PORT_REDIRECT] = "port redirect",
+ [IB_CM_REJ_INVALID_MTU] = "invalid MTU",
+ [IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES] = "insufficient resp resources",
+ [IB_CM_REJ_CONSUMER_DEFINED] = "consumer defined",
+ [IB_CM_REJ_INVALID_RNR_RETRY] = "invalid RNR retry",
+ [IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID] = "duplicate local comm ID",
+ [IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version",
+ [IB_CM_REJ_INVALID_FLOW_LABEL] = "invalid flow label",
+ [IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label",
+};
+
+const char *__attribute_const__ ibcm_reject_msg(int reason)
+{
+ size_t index = reason;
+
+ if (index < ARRAY_SIZE(ibcm_rej_reason_strs) &&
+ ibcm_rej_reason_strs[index])
+ return ibcm_rej_reason_strs[index];
+ else
+ return "unrecognized reason";
+}
+EXPORT_SYMBOL(ibcm_reject_msg);
+
static void cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
@@ -1582,6 +1630,7 @@
struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
struct cm_timewait_info *timewait_info;
struct cm_req_msg *req_msg;
+ struct ib_cm_id *cm_id;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1603,10 +1652,18 @@
timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
if (timewait_info) {
cm_cleanup_timewait(cm_id_priv->timewait_info);
+ cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+
spin_unlock_irq(&cm.lock);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
NULL, 0);
+ if (cur_cm_id_priv) {
+ cm_id = &cur_cm_id_priv->id;
+ ib_send_cm_dreq(cm_id, NULL, 0);
+ cm_deref_id(cur_cm_id_priv);
+ }
return NULL;
}
@@ -1984,6 +2041,9 @@
struct cm_id_private *cm_id_priv;
struct cm_rep_msg *rep_msg;
int ret;
+ struct cm_id_private *cur_cm_id_priv;
+ struct ib_cm_id *cm_id;
+ struct cm_timewait_info *timewait_info;
rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
@@ -2018,16 +2078,26 @@
goto error;
}
/* Check for a stale connection. */
- if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
+ timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
+ if (timewait_info) {
rb_erase(&cm_id_priv->timewait_info->remote_id_node,
&cm.remote_id_table);
cm_id_priv->timewait_info->inserted_remote_id = 0;
+ cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+ timewait_info->work.remote_id);
+
spin_unlock(&cm.lock);
spin_unlock_irq(&cm_id_priv->lock);
cm_issue_rej(work->port, work->mad_recv_wc,
IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
NULL, 0);
ret = -EINVAL;
+ if (cur_cm_id_priv) {
+ cm_id = &cur_cm_id_priv->id;
+ ib_send_cm_dreq(cm_id, NULL, 0);
+ cm_deref_id(cur_cm_id_priv);
+ }
+
goto error;
}
spin_unlock(&cm.lock);
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 22fcf28..e7dcfac 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -101,6 +101,49 @@
}
EXPORT_SYMBOL(rdma_event_msg);
+const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
+ int reason)
+{
+ if (rdma_ib_or_roce(id->device, id->port_num))
+ return ibcm_reject_msg(reason);
+
+ if (rdma_protocol_iwarp(id->device, id->port_num))
+ return iwcm_reject_msg(reason);
+
+ WARN_ON_ONCE(1);
+ return "unrecognized transport";
+}
+EXPORT_SYMBOL(rdma_reject_msg);
+
+bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason)
+{
+ if (rdma_ib_or_roce(id->device, id->port_num))
+ return reason == IB_CM_REJ_CONSUMER_DEFINED;
+
+ if (rdma_protocol_iwarp(id->device, id->port_num))
+ return reason == -ECONNREFUSED;
+
+ WARN_ON_ONCE(1);
+ return false;
+}
+EXPORT_SYMBOL(rdma_is_consumer_reject);
+
+const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
+ struct rdma_cm_event *ev, u8 *data_len)
+{
+ const void *p;
+
+ if (rdma_is_consumer_reject(id, ev->status)) {
+ *data_len = ev->param.conn.private_data_len;
+ p = ev->param.conn.private_data;
+ } else {
+ *data_len = 0;
+ p = NULL;
+ }
+ return p;
+}
+EXPORT_SYMBOL(rdma_consumer_reject_data);
+
static void cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 0c0bea0..d293726 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -72,9 +72,6 @@
void ib_cache_setup(void);
void ib_cache_cleanup(void);
-int ib_resolve_eth_dmac(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr, int *qp_attr_mask);
-
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 760ef60..571974c 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -254,11 +254,8 @@
unsigned long flags;
context = kmalloc(sizeof *context, GFP_KERNEL);
- if (!context) {
- pr_warn("Couldn't allocate client context for %s/%s\n",
- device->name, client->name);
+ if (!context)
return -ENOMEM;
- }
context->client = client;
context->data = NULL;
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index cdbb1f1..cdfad5f 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -247,7 +247,6 @@
kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
GFP_KERNEL);
if (!pool->cache_bucket) {
- pr_warn(PFX "Failed to allocate cache in pool\n");
ret = -ENOMEM;
goto out_free_pool;
}
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 5495e22..31661b5 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -59,6 +59,27 @@
MODULE_DESCRIPTION("iWARP CM");
MODULE_LICENSE("Dual BSD/GPL");
+static const char * const iwcm_rej_reason_strs[] = {
+ [ECONNRESET] = "reset by remote host",
+ [ECONNREFUSED] = "refused by remote application",
+ [ETIMEDOUT] = "setup timeout",
+};
+
+const char *__attribute_const__ iwcm_reject_msg(int reason)
+{
+ size_t index;
+
+ /* iWARP uses negative errnos */
+ index = -reason;
+
+ if (index < ARRAY_SIZE(iwcm_rej_reason_strs) &&
+ iwcm_rej_reason_strs[index])
+ return iwcm_rej_reason_strs[index];
+ else
+ return "unrecognized reason";
+}
+EXPORT_SYMBOL(iwcm_reject_msg);
+
static struct ibnl_client_cbs iwcm_nl_cb_table[] = {
[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 1c41b95..a0e7c16 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -604,7 +604,6 @@
}
rem_info = kzalloc(sizeof(struct iwpm_remote_info), GFP_ATOMIC);
if (!rem_info) {
- pr_err("%s: Unable to allocate a remote info\n", __func__);
ret = -ENOMEM;
return ret;
}
diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c
index ade71e7..3ef51a9 100644
--- a/drivers/infiniband/core/iwpm_util.c
+++ b/drivers/infiniband/core/iwpm_util.c
@@ -62,7 +62,6 @@
sizeof(struct hlist_head), GFP_KERNEL);
if (!iwpm_hash_bucket) {
ret = -ENOMEM;
- pr_err("%s Unable to create mapinfo hash table\n", __func__);
goto init_exit;
}
iwpm_reminfo_bucket = kzalloc(IWPM_REMINFO_HASH_SIZE *
@@ -70,7 +69,6 @@
if (!iwpm_reminfo_bucket) {
kfree(iwpm_hash_bucket);
ret = -ENOMEM;
- pr_err("%s Unable to create reminfo hash table\n", __func__);
goto init_exit;
}
}
@@ -128,10 +126,9 @@
if (!iwpm_valid_client(nl_client))
return ret;
map_info = kzalloc(sizeof(struct iwpm_mapping_info), GFP_KERNEL);
- if (!map_info) {
- pr_err("%s: Unable to allocate a mapping info\n", __func__);
+ if (!map_info)
return -ENOMEM;
- }
+
memcpy(&map_info->local_sockaddr, local_sockaddr,
sizeof(struct sockaddr_storage));
memcpy(&map_info->mapped_sockaddr, mapped_sockaddr,
@@ -309,10 +306,9 @@
unsigned long flags;
nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);
- if (!nlmsg_request) {
- pr_err("%s Unable to allocate a nlmsg_request\n", __func__);
+ if (!nlmsg_request)
return NULL;
- }
+
spin_lock_irqsave(&iwpm_nlmsg_req_lock, flags);
list_add_tail(&nlmsg_request->inprocess_list, &iwpm_nlmsg_req_list);
spin_unlock_irqrestore(&iwpm_nlmsg_req_lock, flags);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 40cbd6b..a009f71 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -769,7 +769,7 @@
* If we are at the start of the LID routed part, don't update the
* hop_ptr or hop_cnt. See section 14.2.2, Vol 1 IB spec.
*/
- if (opa && smp->class_version == OPA_SMP_CLASS_VERSION) {
+ if (opa && smp->class_version == OPA_SM_CLASS_VERSION) {
u32 opa_drslid;
if ((opa_get_smp_direction(opa_smp)
@@ -816,7 +816,6 @@
local = kmalloc(sizeof *local, GFP_ATOMIC);
if (!local) {
ret = -ENOMEM;
- dev_err(&device->dev, "No memory for ib_mad_local_private\n");
goto out;
}
local->mad_priv = NULL;
@@ -824,7 +823,6 @@
mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC);
if (!mad_priv) {
ret = -ENOMEM;
- dev_err(&device->dev, "No memory for local response MAD\n");
kfree(local);
goto out;
}
@@ -947,9 +945,6 @@
for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
if (!seg) {
- dev_err(&send_buf->mad_agent->device->dev,
- "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n",
- sizeof (*seg) + seg_size, gfp_mask);
free_send_rmpp_list(send_wr);
return -ENOMEM;
}
@@ -1362,12 +1357,7 @@
{
/* Allocate management method table */
*method = kzalloc(sizeof **method, GFP_ATOMIC);
- if (!*method) {
- pr_err("No memory for ib_mad_mgmt_method_table\n");
- return -ENOMEM;
- }
-
- return 0;
+ return (*method) ? 0 : (-ENOMEM);
}
/*
@@ -1458,8 +1448,6 @@
/* Allocate management class table for "new" class version */
*class = kzalloc(sizeof **class, GFP_ATOMIC);
if (!*class) {
- dev_err(&agent_priv->agent.device->dev,
- "No memory for ib_mad_mgmt_class_table\n");
ret = -ENOMEM;
goto error1;
}
@@ -1524,22 +1512,16 @@
if (!*vendor_table) {
/* Allocate mgmt vendor class table for "new" class version */
vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
- if (!vendor) {
- dev_err(&agent_priv->agent.device->dev,
- "No memory for ib_mad_mgmt_vendor_class_table\n");
+ if (!vendor)
goto error1;
- }
*vendor_table = vendor;
}
if (!(*vendor_table)->vendor_class[vclass]) {
/* Allocate table for this management vendor class */
vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
- if (!vendor_class) {
- dev_err(&agent_priv->agent.device->dev,
- "No memory for ib_mad_mgmt_vendor_class\n");
+ if (!vendor_class)
goto error2;
- }
(*vendor_table)->vendor_class[vclass] = vendor_class;
}
@@ -1746,7 +1728,7 @@
if (!class)
goto out;
if (convert_mgmt_class(mad_hdr->mgmt_class) >=
- IB_MGMT_MAX_METHODS)
+ ARRAY_SIZE(class->method_table))
goto out;
method = class->method_table[convert_mgmt_class(
mad_hdr->mgmt_class)];
@@ -2167,7 +2149,7 @@
struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad;
if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION &&
- mad_hdr->class_version == OPA_SMI_CLASS_VERSION)
+ mad_hdr->class_version == OPA_SM_CLASS_VERSION)
return handle_opa_smi(port_priv, qp_info, wc, port_num, recv,
response);
@@ -2238,11 +2220,8 @@
mad_size = recv->mad_size;
response = alloc_mad_private(mad_size, GFP_KERNEL);
- if (!response) {
- dev_err(&port_priv->device->dev,
- "%s: no memory for response buffer\n", __func__);
+ if (!response)
goto out;
- }
if (rdma_cap_ib_switch(port_priv->device))
port_num = wc->port_num;
@@ -2869,8 +2848,6 @@
mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
GFP_ATOMIC);
if (!mad_priv) {
- dev_err(&qp_info->port_priv->device->dev,
- "No memory for receive buffer\n");
ret = -ENOMEM;
break;
}
@@ -2961,11 +2938,8 @@
u16 pkey_index;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
- if (!attr) {
- dev_err(&port_priv->device->dev,
- "Couldn't kmalloc ib_qp_attr\n");
+ if (!attr)
return -ENOMEM;
- }
ret = ib_find_pkey(port_priv->device, port_priv->port_num,
IB_DEFAULT_PKEY_FULL, &pkey_index);
@@ -3135,10 +3109,8 @@
/* Create new device info */
port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
- if (!port_priv) {
- dev_err(&device->dev, "No memory for ib_mad_port_private\n");
+ if (!port_priv)
return -ENOMEM;
- }
port_priv->device = device;
port_priv->port_num = port_num;
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index e51b739..322cb67 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -518,8 +518,11 @@
process_join_error(group, status);
else {
int mgids_changed, is_mgid0;
- ib_find_pkey(group->port->dev->device, group->port->port_num,
- be16_to_cpu(rec->pkey), &pkey_index);
+
+ if (ib_find_pkey(group->port->dev->device,
+ group->port->port_num, be16_to_cpu(rec->pkey),
+ &pkey_index))
+ pkey_index = MCAST_INVALID_PKEY_INDEX;
spin_lock_irq(&group->port->lock);
if (group->state == MCAST_BUSY &&
diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c
index 3a64a08..0621f44 100644
--- a/drivers/infiniband/core/roce_gid_mgmt.c
+++ b/drivers/infiniband/core/roce_gid_mgmt.c
@@ -304,10 +304,9 @@
for_ifa(in_dev) {
struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
- if (!entry) {
- pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv4 update\n");
+ if (!entry)
continue;
- }
+
entry->ip.sin_family = AF_INET;
entry->ip.sin_addr.s_addr = ifa->ifa_address;
list_add_tail(&entry->list, &sin_list);
@@ -348,10 +347,8 @@
list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
- if (!entry) {
- pr_warn("roce_gid_mgmt: couldn't allocate entry for IPv6 update\n");
+ if (!entry)
continue;
- }
entry->sin6.sin6_family = AF_INET6;
entry->sin6.sin6_addr = ifp->addr;
@@ -447,10 +444,8 @@
struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
struct list_head *upper_list = data;
- if (!entry) {
- pr_info("roce_gid_mgmt: couldn't allocate entry to delete ndev\n");
+ if (!entry)
return 0;
- }
list_add_tail(&entry->list, upper_list);
dev_hold(upper);
@@ -559,10 +554,8 @@
struct netdev_event_work *ndev_work =
kmalloc(sizeof(*ndev_work), GFP_KERNEL);
- if (!ndev_work) {
- pr_warn("roce_gid_mgmt: can't allocate work for netdevice_event\n");
+ if (!ndev_work)
return NOTIFY_DONE;
- }
memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds));
for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) {
@@ -696,10 +689,8 @@
}
work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (!work) {
- pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
+ if (!work)
return NOTIFY_DONE;
- }
INIT_WORK(&work->work, update_gid_event_work_handler);
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 7713ef0..579f9a7 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -1104,8 +1104,11 @@
struct ib_ucm_cmd_hdr hdr;
ssize_t result;
- if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ if (!ib_safe_file_access(filp)) {
+ pr_err_once("ucm_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ task_tgid_vnr(current), current->comm);
return -EACCES;
+ }
if (len < sizeof(hdr))
return -EINVAL;
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 9520154..e12f8fa 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -1584,8 +1584,11 @@
struct rdma_ucm_cmd_hdr hdr;
ssize_t ret;
- if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ if (!ib_safe_file_access(filp)) {
+ pr_err_once("ucma_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ task_tgid_vnr(current), current->comm);
return -EACCES;
+ }
if (len < sizeof(hdr))
return -EINVAL;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 84b4eff..1e62a5f 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -51,7 +51,7 @@
if (umem->nmap > 0)
ib_dma_unmap_sg(dev, umem->sg_head.sgl,
- umem->nmap,
+ umem->npages,
DMA_BIDIRECTIONAL);
for_each_sg(umem->sg_head.sgl, sg, umem->npages, i) {
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index df26a74..455034a 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -289,5 +289,6 @@
IB_UVERBS_DECLARE_EX_CMD(destroy_wq);
IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table);
IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table);
+IB_UVERBS_DECLARE_EX_CMD(modify_qp);
#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index cb3f515a..09b6491 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2328,94 +2328,88 @@
}
}
-ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
- struct ib_device *ib_dev,
- const char __user *buf, int in_len,
- int out_len)
+static int modify_qp(struct ib_uverbs_file *file,
+ struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata)
{
- struct ib_uverbs_modify_qp cmd;
- struct ib_udata udata;
- struct ib_qp *qp;
- struct ib_qp_attr *attr;
- int ret;
-
- if (copy_from_user(&cmd, buf, sizeof cmd))
- return -EFAULT;
-
- INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
- out_len);
+ struct ib_qp_attr *attr;
+ struct ib_qp *qp;
+ int ret;
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr)
return -ENOMEM;
- qp = idr_read_qp(cmd.qp_handle, file->ucontext);
+ qp = idr_read_qp(cmd->base.qp_handle, file->ucontext);
if (!qp) {
ret = -EINVAL;
goto out;
}
- attr->qp_state = cmd.qp_state;
- attr->cur_qp_state = cmd.cur_qp_state;
- attr->path_mtu = cmd.path_mtu;
- attr->path_mig_state = cmd.path_mig_state;
- attr->qkey = cmd.qkey;
- attr->rq_psn = cmd.rq_psn;
- attr->sq_psn = cmd.sq_psn;
- attr->dest_qp_num = cmd.dest_qp_num;
- attr->qp_access_flags = cmd.qp_access_flags;
- attr->pkey_index = cmd.pkey_index;
- attr->alt_pkey_index = cmd.alt_pkey_index;
- attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
- attr->max_rd_atomic = cmd.max_rd_atomic;
- attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
- attr->min_rnr_timer = cmd.min_rnr_timer;
- attr->port_num = cmd.port_num;
- attr->timeout = cmd.timeout;
- attr->retry_cnt = cmd.retry_cnt;
- attr->rnr_retry = cmd.rnr_retry;
- attr->alt_port_num = cmd.alt_port_num;
- attr->alt_timeout = cmd.alt_timeout;
+ attr->qp_state = cmd->base.qp_state;
+ attr->cur_qp_state = cmd->base.cur_qp_state;
+ attr->path_mtu = cmd->base.path_mtu;
+ attr->path_mig_state = cmd->base.path_mig_state;
+ attr->qkey = cmd->base.qkey;
+ attr->rq_psn = cmd->base.rq_psn;
+ attr->sq_psn = cmd->base.sq_psn;
+ attr->dest_qp_num = cmd->base.dest_qp_num;
+ attr->qp_access_flags = cmd->base.qp_access_flags;
+ attr->pkey_index = cmd->base.pkey_index;
+ attr->alt_pkey_index = cmd->base.alt_pkey_index;
+ attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify;
+ attr->max_rd_atomic = cmd->base.max_rd_atomic;
+ attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic;
+ attr->min_rnr_timer = cmd->base.min_rnr_timer;
+ attr->port_num = cmd->base.port_num;
+ attr->timeout = cmd->base.timeout;
+ attr->retry_cnt = cmd->base.retry_cnt;
+ attr->rnr_retry = cmd->base.rnr_retry;
+ attr->alt_port_num = cmd->base.alt_port_num;
+ attr->alt_timeout = cmd->base.alt_timeout;
+ attr->rate_limit = cmd->rate_limit;
- memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
- attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
- attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
- attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
- attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
- attr->ah_attr.dlid = cmd.dest.dlid;
- attr->ah_attr.sl = cmd.dest.sl;
- attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
- attr->ah_attr.static_rate = cmd.dest.static_rate;
- attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
- attr->ah_attr.port_num = cmd.dest.port_num;
+ memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16);
+ attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label;
+ attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index;
+ attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit;
+ attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class;
+ attr->ah_attr.dlid = cmd->base.dest.dlid;
+ attr->ah_attr.sl = cmd->base.dest.sl;
+ attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits;
+ attr->ah_attr.static_rate = cmd->base.dest.static_rate;
+ attr->ah_attr.ah_flags = cmd->base.dest.is_global ?
+ IB_AH_GRH : 0;
+ attr->ah_attr.port_num = cmd->base.dest.port_num;
- memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
- attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
- attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
- attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
- attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
- attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
- attr->alt_ah_attr.sl = cmd.alt_dest.sl;
- attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
- attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
- attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
- attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
+ memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16);
+ attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label;
+ attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index;
+ attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit;
+ attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class;
+ attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid;
+ attr->alt_ah_attr.sl = cmd->base.alt_dest.sl;
+ attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits;
+ attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate;
+ attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ?
+ IB_AH_GRH : 0;
+ attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num;
if (qp->real_qp == qp) {
- ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
- if (ret)
- goto release_qp;
+ if (cmd->base.attr_mask & IB_QP_AV) {
+ ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
+ if (ret)
+ goto release_qp;
+ }
ret = qp->device->modify_qp(qp, attr,
- modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
+ modify_qp_mask(qp->qp_type,
+ cmd->base.attr_mask),
+ udata);
} else {
- ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
+ ret = ib_modify_qp(qp, attr,
+ modify_qp_mask(qp->qp_type,
+ cmd->base.attr_mask));
}
- if (ret)
- goto release_qp;
-
- ret = in_len;
-
release_qp:
put_qp_read(qp);
@@ -2425,6 +2419,68 @@
return ret;
}
+ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_ex_modify_qp cmd = {};
+ struct ib_udata udata;
+ int ret;
+
+ if (copy_from_user(&cmd.base, buf, sizeof(cmd.base)))
+ return -EFAULT;
+
+ if (cmd.base.attr_mask &
+ ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1))
+ return -EOPNOTSUPP;
+
+ INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL,
+ in_len - sizeof(cmd.base), out_len);
+
+ ret = modify_qp(file, &cmd, &udata);
+ if (ret)
+ return ret;
+
+ return in_len;
+}
+
+int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
+ struct ib_device *ib_dev,
+ struct ib_udata *ucore,
+ struct ib_udata *uhw)
+{
+ struct ib_uverbs_ex_modify_qp cmd = {};
+ int ret;
+
+ /*
+ * Last bit is reserved for extending the attr_mask by
+ * using another field.
+ */
+ BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31));
+
+ if (ucore->inlen < sizeof(cmd.base))
+ return -EINVAL;
+
+ ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+ if (ret)
+ return ret;
+
+ if (cmd.base.attr_mask &
+ ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1))
+ return -EOPNOTSUPP;
+
+ if (ucore->inlen > sizeof(cmd)) {
+ if (ib_is_udata_cleared(ucore, sizeof(cmd),
+ ucore->inlen - sizeof(cmd)))
+ return -EOPNOTSUPP;
+ }
+
+ ret = modify_qp(file, &cmd, uhw);
+
+ return ret;
+}
+
ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf, int in_len,
@@ -2875,6 +2931,7 @@
struct ib_ah *ah;
struct ib_ah_attr attr;
int ret;
+ struct ib_udata udata;
if (out_len < sizeof resp)
return -ENOSPC;
@@ -2882,6 +2939,10 @@
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
+ INIT_UDATA(&udata, buf + sizeof(cmd),
+ (unsigned long)cmd.response + sizeof(resp),
+ in_len - sizeof(cmd), out_len - sizeof(resp));
+
uobj = kmalloc(sizeof *uobj, GFP_KERNEL);
if (!uobj)
return -ENOMEM;
@@ -2908,12 +2969,16 @@
memset(&attr.dmac, 0, sizeof(attr.dmac));
memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
- ah = ib_create_ah(pd, &attr);
+ ah = pd->device->create_ah(pd, &attr, &udata);
+
if (IS_ERR(ah)) {
ret = PTR_ERR(ah);
goto err_put;
}
+ ah->device = pd->device;
+ ah->pd = pd;
+ atomic_inc(&pd->usecnt);
ah->uobject = uobj;
uobj->object = ah;
@@ -3124,8 +3189,10 @@
kern_spec_val = (void *)kern_spec +
sizeof(struct ib_uverbs_flow_spec_hdr);
kern_spec_mask = kern_spec_val + kern_filter_sz;
+ if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL))
+ return -EINVAL;
- switch (ib_spec->type) {
+ switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz);
actual_filter_sz = spec_filter_size(kern_spec_mask,
@@ -3175,6 +3242,21 @@
memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz);
memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz);
break;
+ case IB_FLOW_SPEC_VXLAN_TUNNEL:
+ ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz);
+ actual_filter_sz = spec_filter_size(kern_spec_mask,
+ kern_filter_sz,
+ ib_filter_sz);
+ if (actual_filter_sz <= 0)
+ return -EINVAL;
+ ib_spec->tunnel.size = sizeof(struct ib_flow_spec_tunnel);
+ memcpy(&ib_spec->tunnel.val, kern_spec_val, actual_filter_sz);
+ memcpy(&ib_spec->tunnel.mask, kern_spec_mask, actual_filter_sz);
+
+ if ((ntohl(ib_spec->tunnel.mask.tunnel_id)) >= BIT(24) ||
+ (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24))
+ return -EINVAL;
+ break;
default:
return -EINVAL;
}
@@ -3745,7 +3827,6 @@
err = PTR_ERR(flow_id);
goto err_free;
}
- flow_id->qp = qp;
flow_id->uobject = uobj;
uobj->object = flow_id;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 44b1104..8135935 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -137,6 +137,7 @@
[IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq,
[IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table,
[IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table,
+ [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp,
};
static void ib_uverbs_add_one(struct ib_device *device);
@@ -746,8 +747,11 @@
int srcu_key;
ssize_t ret;
- if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
+ if (!ib_safe_file_access(filp)) {
+ pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ task_tgid_vnr(current), current->comm);
return -EACCES;
+ }
if (count < sizeof hdr)
return -EINVAL;
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 8368764..71580cc 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -315,7 +315,7 @@
{
struct ib_ah *ah;
- ah = pd->device->create_ah(pd, ah_attr);
+ ah = pd->device->create_ah(pd, ah_attr, NULL);
if (!IS_ERR(ah)) {
ah->device = pd->device;
@@ -328,7 +328,7 @@
}
EXPORT_SYMBOL(ib_create_ah);
-static int ib_get_header_version(const union rdma_network_hdr *hdr)
+int ib_get_rdma_header_version(const union rdma_network_hdr *hdr)
{
const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh;
struct iphdr ip4h_checked;
@@ -359,6 +359,7 @@
return 4;
return 6;
}
+EXPORT_SYMBOL(ib_get_rdma_header_version);
static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
u8 port_num,
@@ -369,7 +370,7 @@
if (rdma_protocol_ib(device, port_num))
return RDMA_NETWORK_IB;
- grh_version = ib_get_header_version((union rdma_network_hdr *)grh);
+ grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh);
if (grh_version == 4)
return RDMA_NETWORK_IPV4;
@@ -415,9 +416,9 @@
&context, gid_index);
}
-static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr,
- enum rdma_network_type net_type,
- union ib_gid *sgid, union ib_gid *dgid)
+int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
+ enum rdma_network_type net_type,
+ union ib_gid *sgid, union ib_gid *dgid)
{
struct sockaddr_in src_in;
struct sockaddr_in dst_in;
@@ -447,6 +448,7 @@
return -EINVAL;
}
}
+EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
@@ -469,8 +471,8 @@
net_type = ib_get_net_type_by_grh(device, port_num, grh);
gid_type = ib_network_to_gid_type(net_type);
}
- ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
- &sgid, &dgid);
+ ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+ &sgid, &dgid);
if (ret)
return ret;
@@ -1014,6 +1016,7 @@
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
+ [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
}
}
},
@@ -1047,6 +1050,7 @@
IB_QP_QKEY),
[IB_QPT_GSI] = (IB_QP_CUR_STATE |
IB_QP_QKEY),
+ [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
}
},
[IB_QPS_SQD] = {
@@ -1196,66 +1200,66 @@
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
-int ib_resolve_eth_dmac(struct ib_qp *qp,
- struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+int ib_resolve_eth_dmac(struct ib_device *device,
+ struct ib_ah_attr *ah_attr)
{
int ret = 0;
- if (*qp_attr_mask & IB_QP_AV) {
- if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) ||
- qp_attr->ah_attr.port_num > rdma_end_port(qp->device))
- return -EINVAL;
+ if (ah_attr->port_num < rdma_start_port(device) ||
+ ah_attr->port_num > rdma_end_port(device))
+ return -EINVAL;
- if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))
- return 0;
+ if (!rdma_cap_eth_ah(device, ah_attr->port_num))
+ return 0;
- if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
- rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw,
- qp_attr->ah_attr.dmac);
- } else {
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
- int ifindex;
- int hop_limit;
+ if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
+ rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw,
+ ah_attr->dmac);
+ } else {
+ union ib_gid sgid;
+ struct ib_gid_attr sgid_attr;
+ int ifindex;
+ int hop_limit;
- ret = ib_query_gid(qp->device,
- qp_attr->ah_attr.port_num,
- qp_attr->ah_attr.grh.sgid_index,
- &sgid, &sgid_attr);
+ ret = ib_query_gid(device,
+ ah_attr->port_num,
+ ah_attr->grh.sgid_index,
+ &sgid, &sgid_attr);
- if (ret || !sgid_attr.ndev) {
- if (!ret)
- ret = -ENXIO;
- goto out;
- }
-
- ifindex = sgid_attr.ndev->ifindex;
-
- ret = rdma_addr_find_l2_eth_by_grh(&sgid,
- &qp_attr->ah_attr.grh.dgid,
- qp_attr->ah_attr.dmac,
- NULL, &ifindex, &hop_limit);
-
- dev_put(sgid_attr.ndev);
-
- qp_attr->ah_attr.grh.hop_limit = hop_limit;
+ if (ret || !sgid_attr.ndev) {
+ if (!ret)
+ ret = -ENXIO;
+ goto out;
}
+
+ ifindex = sgid_attr.ndev->ifindex;
+
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid,
+ &ah_attr->grh.dgid,
+ ah_attr->dmac,
+ NULL, &ifindex, &hop_limit);
+
+ dev_put(sgid_attr.ndev);
+
+ ah_attr->grh.hop_limit = hop_limit;
}
out:
return ret;
}
EXPORT_SYMBOL(ib_resolve_eth_dmac);
-
int ib_modify_qp(struct ib_qp *qp,
struct ib_qp_attr *qp_attr,
int qp_attr_mask)
{
- int ret;
- ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask);
- if (ret)
- return ret;
+ if (qp_attr_mask & IB_QP_AV) {
+ int ret;
+
+ ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr);
+ if (ret)
+ return ret;
+ }
return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
}
@@ -1734,8 +1738,10 @@
return ERR_PTR(-ENOSYS);
flow_id = qp->device->create_flow(qp, flow_attr, domain);
- if (!IS_ERR(flow_id))
+ if (!IS_ERR(flow_id)) {
atomic_inc(&qp->usecnt);
+ flow_id->qp = qp;
+ }
return flow_id;
}
EXPORT_SYMBOL(ib_create_flow);
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index e7a5ed9..ed553de 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -7,6 +7,7 @@
obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/
obj-$(CONFIG_INFINIBAND_NES) += nes/
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
+obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma/
obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
obj-$(CONFIG_INFINIBAND_HNS) += hns/
diff --git a/drivers/infiniband/hw/cxgb3/cxio_dbg.c b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
index 8bca6b4..445e89e 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_dbg.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_dbg.c
@@ -45,10 +45,9 @@
int size = 32;
m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m) {
- PDBG("%s couldn't allocate memory.\n", __func__);
+ if (!m)
return;
- }
+
m->mem_id = MEM_PMRX;
m->addr = (stag>>8) * 32 + rdev->rnic_info.tpt_base;
m->len = size;
@@ -82,10 +81,9 @@
size = npages * sizeof(u64);
m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m) {
- PDBG("%s couldn't allocate memory.\n", __func__);
+ if (!m)
return;
- }
+
m->mem_id = MEM_PMRX;
m->addr = pbl_addr;
m->len = size;
@@ -144,10 +142,9 @@
int rc;
m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m) {
- PDBG("%s couldn't allocate memory.\n", __func__);
+ if (!m)
return;
- }
+
m->mem_id = MEM_PMRX;
m->addr = ((hwtid)<<10) + rdev->rnic_info.rqt_base;
m->len = size;
@@ -177,10 +174,9 @@
int rc;
m = kmalloc(sizeof(*m) + size, GFP_ATOMIC);
- if (!m) {
- PDBG("%s couldn't allocate memory.\n", __func__);
+ if (!m)
return;
- }
+
m->mem_id = MEM_CM;
m->addr = hwtid * size;
m->len = size;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index cba57bb..9d5fe18 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -62,7 +62,8 @@
#include "common.h"
static struct ib_ah *iwch_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr)
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
{
return ERR_PTR(-ENOSYS);
}
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 4e5baf4..516b0ae 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -828,8 +828,10 @@
}
rdev->status_page = (struct t4_dev_status_page *)
__get_free_page(GFP_KERNEL);
- if (!rdev->status_page)
+ if (!rdev->status_page) {
+ err = -ENOMEM;
goto destroy_ocqp_pool;
+ }
rdev->status_page->qp_start = rdev->lldi.vr->qp.start;
rdev->status_page->qp_size = rdev->lldi.vr->qp.size;
rdev->status_page->cq_start = rdev->lldi.vr->cq.start;
@@ -841,8 +843,6 @@
if (rdev->wr_log) {
rdev->wr_log_size = 1 << c4iw_wr_log_size_order;
atomic_set(&rdev->wr_log_idx, 0);
- } else {
- pr_err(MOD "error allocating wr_log. Logging disabled\n");
}
}
@@ -1424,8 +1424,6 @@
qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
if (!qp_list.qps) {
- printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
- pci_name(ctx->lldi.pdev));
spin_unlock_irq(&ctx->dev->lock);
return;
}
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 645e606..49b51b7 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -59,7 +59,9 @@
MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)");
static struct ib_ah *c4iw_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr)
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+
{
return ERR_PTR(-ENOSYS);
}
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 67ea85a..7a3d906 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -125,6 +125,7 @@
cpumask_weight(topology_sibling_cpumask(
cpumask_first(&node_affinity.proc.mask)
));
+ node_affinity.num_possible_nodes = num_possible_nodes();
node_affinity.num_online_nodes = num_online_nodes();
node_affinity.num_online_cpus = num_online_cpus();
@@ -135,7 +136,7 @@
*/
init_real_cpu_mask();
- hfi1_per_node_cntr = kcalloc(num_possible_nodes(),
+ hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes,
sizeof(*hfi1_per_node_cntr), GFP_KERNEL);
if (!hfi1_per_node_cntr)
return -ENOMEM;
diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h
index 42e6331..e78c7aa 100644
--- a/drivers/infiniband/hw/hfi1/affinity.h
+++ b/drivers/infiniband/hw/hfi1/affinity.h
@@ -70,14 +70,6 @@
uint gen;
};
-struct hfi1_affinity {
- struct cpu_mask_set def_intr;
- struct cpu_mask_set rcv_intr;
- struct cpumask real_cpu_mask;
- /* spin lock to protect affinity struct */
- spinlock_t lock;
-};
-
struct hfi1_msix_entry;
/* Initialize non-HT cpu cores mask */
@@ -115,6 +107,7 @@
struct cpumask real_cpu_mask;
struct cpu_mask_set proc;
int num_core_siblings;
+ int num_possible_nodes;
int num_online_nodes;
int num_online_cpus;
struct mutex lock; /* protects affinity nodes */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 24d0820..ef72bc2 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -8477,7 +8477,10 @@
*/
if (type == HCMD_WRITE_LCB_CSR) {
in_data |= ((*out_data) & 0xffffffffffull) << 8;
- reg = ((((*out_data) >> 40) & 0xff) <<
+ /* must preserve COMPLETED - it is tied to hardware */
+ reg = read_csr(dd, DC_DC8051_CFG_EXT_DEV_0);
+ reg &= DC_DC8051_CFG_EXT_DEV_0_COMPLETED_SMASK;
+ reg |= ((((*out_data) >> 40) & 0xff) <<
DC_DC8051_CFG_EXT_DEV_0_RETURN_CODE_SHIFT)
| ((((*out_data) >> 48) & 0xffff) <<
DC_DC8051_CFG_EXT_DEV_0_RSP_DATA_SHIFT);
@@ -9556,11 +9559,11 @@
if (HFI1_CAP_IS_KSET(EXTENDED_PSN))
add_rcvctrl(dd, RCV_CTRL_RCV_EXTENDED_PSN_ENABLE_SMASK);
- guid = ppd->guid;
+ guid = ppd->guids[HFI1_PORT_GUID_INDEX];
if (!guid) {
if (dd->base_guid)
guid = dd->base_guid + ppd->port - 1;
- ppd->guid = guid;
+ ppd->guids[HFI1_PORT_GUID_INDEX] = guid;
}
/* Set linkinit_reason on power up per OPA spec */
diff --git a/drivers/infiniband/hw/hfi1/chip_registers.h b/drivers/infiniband/hw/hfi1/chip_registers.h
index 5b99938..5bfa839 100644
--- a/drivers/infiniband/hw/hfi1/chip_registers.h
+++ b/drivers/infiniband/hw/hfi1/chip_registers.h
@@ -415,6 +415,9 @@
#define ASIC_CFG_SBUS_REQUEST_DATA_IN_SHIFT 32
#define ASIC_CFG_SBUS_REQUEST_RECEIVER_ADDR_SHIFT 0
#define ASIC_CFG_SCRATCH (ASIC + 0x000000000020)
+#define ASIC_CFG_SCRATCH_1 (ASIC_CFG_SCRATCH + 0x08)
+#define ASIC_CFG_SCRATCH_2 (ASIC_CFG_SCRATCH + 0x10)
+#define ASIC_CFG_SCRATCH_3 (ASIC_CFG_SCRATCH + 0x18)
#define ASIC_CFG_THERM_POLL_EN (ASIC + 0x000000000050)
#define ASIC_EEP_ADDR_CMD (ASIC + 0x000000000308)
#define ASIC_EEP_ADDR_CMD_EP_ADDR_MASK 0xFFFFFFull
diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c
index 632ba21..8725f4c 100644
--- a/drivers/infiniband/hw/hfi1/debugfs.c
+++ b/drivers/infiniband/hw/hfi1/debugfs.c
@@ -541,6 +541,114 @@
return ret;
}
+/* read the dc8051 memory */
+static ssize_t dc8051_memory_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hfi1_pportdata *ppd = private2ppd(file);
+ ssize_t rval;
+ void *tmp;
+ loff_t start, end;
+
+ /* the checks below expect the position to be positive */
+ if (*ppos < 0)
+ return -EINVAL;
+
+ tmp = kzalloc(DC8051_DATA_MEM_SIZE, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ /*
+ * Fill in the requested portion of the temporary buffer from the
+ * 8051 memory. The 8051 memory read is done in terms of 8 bytes.
+ * Adjust start and end to fit. Skip reading anything if out of
+ * range.
+ */
+ start = *ppos & ~0x7; /* round down */
+ if (start < DC8051_DATA_MEM_SIZE) {
+ end = (*ppos + count + 7) & ~0x7; /* round up */
+ if (end > DC8051_DATA_MEM_SIZE)
+ end = DC8051_DATA_MEM_SIZE;
+ rval = read_8051_data(ppd->dd, start, end - start,
+ (u64 *)(tmp + start));
+ if (rval)
+ goto done;
+ }
+
+ rval = simple_read_from_buffer(buf, count, ppos, tmp,
+ DC8051_DATA_MEM_SIZE);
+done:
+ kfree(tmp);
+ return rval;
+}
+
+static ssize_t debugfs_lcb_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hfi1_pportdata *ppd = private2ppd(file);
+ struct hfi1_devdata *dd = ppd->dd;
+ unsigned long total, csr_off;
+ u64 data;
+
+ if (*ppos < 0)
+ return -EINVAL;
+ /* only read 8 byte quantities */
+ if ((count % 8) != 0)
+ return -EINVAL;
+ /* offset must be 8-byte aligned */
+ if ((*ppos % 8) != 0)
+ return -EINVAL;
+ /* do nothing if out of range or zero count */
+ if (*ppos >= (LCB_END - LCB_START) || !count)
+ return 0;
+ /* reduce count if needed */
+ if (*ppos + count > LCB_END - LCB_START)
+ count = (LCB_END - LCB_START) - *ppos;
+
+ csr_off = LCB_START + *ppos;
+ for (total = 0; total < count; total += 8, csr_off += 8) {
+ if (read_lcb_csr(dd, csr_off, (u64 *)&data))
+ break; /* failed */
+ if (put_user(data, (unsigned long __user *)(buf + total)))
+ break;
+ }
+ *ppos += total;
+ return total;
+}
+
+static ssize_t debugfs_lcb_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct hfi1_pportdata *ppd = private2ppd(file);
+ struct hfi1_devdata *dd = ppd->dd;
+ unsigned long total, csr_off, data;
+
+ if (*ppos < 0)
+ return -EINVAL;
+ /* only write 8 byte quantities */
+ if ((count % 8) != 0)
+ return -EINVAL;
+ /* offset must be 8-byte aligned */
+ if ((*ppos % 8) != 0)
+ return -EINVAL;
+ /* do nothing if out of range or zero count */
+ if (*ppos >= (LCB_END - LCB_START) || !count)
+ return 0;
+ /* reduce count if needed */
+ if (*ppos + count > LCB_END - LCB_START)
+ count = (LCB_END - LCB_START) - *ppos;
+
+ csr_off = LCB_START + *ppos;
+ for (total = 0; total < count; total += 8, csr_off += 8) {
+ if (get_user(data, (unsigned long __user *)(buf + total)))
+ break;
+ if (write_lcb_csr(dd, csr_off, data))
+ break; /* failed */
+ }
+ *ppos += total;
+ return total;
+}
+
/*
* read the per-port QSFP data for ppd
*/
@@ -931,6 +1039,8 @@
DEBUGFS_XOPS("qsfp2", qsfp2_debugfs_read, qsfp2_debugfs_write,
qsfp2_debugfs_open, qsfp2_debugfs_release),
DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write),
+ DEBUGFS_OPS("dc8051_memory", dc8051_memory_read, NULL),
+ DEBUGFS_OPS("lcb", debugfs_lcb_read, debugfs_lcb_write),
};
static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos)
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index c5efff2..4fbaee6 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -795,8 +795,7 @@
hfi1_schedule_send(qp);
spin_unlock_irqrestore(&qp->s_lock, flags);
}
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c
index e70c223..26da124 100644
--- a/drivers/infiniband/hw/hfi1/eprom.c
+++ b/drivers/infiniband/hw/hfi1/eprom.c
@@ -207,6 +207,40 @@
/* magic character sequence that trails an image */
#define IMAGE_TRAIL_MAGIC "egamiAPO"
+/* EPROM file types */
+#define HFI1_EFT_PLATFORM_CONFIG 2
+
+/* segment size - 128 KiB */
+#define SEG_SIZE (128 * 1024)
+
+struct hfi1_eprom_footer {
+ u32 oprom_size; /* size of the oprom, in bytes */
+ u16 num_table_entries;
+ u16 version; /* version of this footer */
+ u32 magic; /* must be last */
+};
+
+struct hfi1_eprom_table_entry {
+ u32 type; /* file type */
+ u32 offset; /* file offset from start of EPROM */
+ u32 size; /* file size, in bytes */
+};
+
+/*
+ * Calculate the max number of table entries that will fit within a directory
+ * buffer of size 'dir_size'.
+ */
+#define MAX_TABLE_ENTRIES(dir_size) \
+ (((dir_size) - sizeof(struct hfi1_eprom_footer)) / \
+ sizeof(struct hfi1_eprom_table_entry))
+
+#define DIRECTORY_SIZE(n) (sizeof(struct hfi1_eprom_footer) + \
+ (sizeof(struct hfi1_eprom_table_entry) * (n)))
+
+#define MAGIC4(a, b, c, d) ((d) << 24 | (c) << 16 | (b) << 8 | (a))
+#define FOOTER_MAGIC MAGIC4('e', 'p', 'r', 'm')
+#define FOOTER_VERSION 1
+
/*
* Read all of partition 1. The actual file is at the front. Adjust
* the returned size if a trailing image magic is found.
@@ -242,6 +276,167 @@
}
/*
+ * The segment magic has been checked. There is a footer and table of
+ * contents present.
+ *
+ * directory is a u32 aligned buffer of size EP_PAGE_SIZE.
+ */
+static int read_segment_platform_config(struct hfi1_devdata *dd,
+ void *directory, void **data, u32 *size)
+{
+ struct hfi1_eprom_footer *footer;
+ struct hfi1_eprom_table_entry *table;
+ struct hfi1_eprom_table_entry *entry;
+ void *buffer = NULL;
+ void *table_buffer = NULL;
+ int ret, i;
+ u32 directory_size;
+ u32 seg_base, seg_offset;
+ u32 bytes_available, ncopied, to_copy;
+
+ /* the footer is at the end of the directory */
+ footer = (struct hfi1_eprom_footer *)
+ (directory + EP_PAGE_SIZE - sizeof(*footer));
+
+ /* make sure the structure version is supported */
+ if (footer->version != FOOTER_VERSION)
+ return -EINVAL;
+
+ /* oprom size cannot be larger than a segment */
+ if (footer->oprom_size >= SEG_SIZE)
+ return -EINVAL;
+
+ /* the file table must fit in a segment with the oprom */
+ if (footer->num_table_entries >
+ MAX_TABLE_ENTRIES(SEG_SIZE - footer->oprom_size))
+ return -EINVAL;
+
+ /* find the file table start, which precedes the footer */
+ directory_size = DIRECTORY_SIZE(footer->num_table_entries);
+ if (directory_size <= EP_PAGE_SIZE) {
+ /* the file table fits into the directory buffer handed in */
+ table = (struct hfi1_eprom_table_entry *)
+ (directory + EP_PAGE_SIZE - directory_size);
+ } else {
+ /* need to allocate and read more */
+ table_buffer = kmalloc(directory_size, GFP_KERNEL);
+ if (!table_buffer)
+ return -ENOMEM;
+ ret = read_length(dd, SEG_SIZE - directory_size,
+ directory_size, table_buffer);
+ if (ret)
+ goto done;
+ table = table_buffer;
+ }
+
+ /* look for the platform configuration file in the table */
+ for (entry = NULL, i = 0; i < footer->num_table_entries; i++) {
+ if (table[i].type == HFI1_EFT_PLATFORM_CONFIG) {
+ entry = &table[i];
+ break;
+ }
+ }
+ if (!entry) {
+ ret = -ENOENT;
+ goto done;
+ }
+
+ /*
+ * Sanity check on the configuration file size - it should never
+ * be larger than 4 KiB.
+ */
+ if (entry->size > (4 * 1024)) {
+ dd_dev_err(dd, "Bad configuration file size 0x%x\n",
+ entry->size);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* check for bogus offset and size that wrap when added together */
+ if (entry->offset + entry->size < entry->offset) {
+ dd_dev_err(dd,
+ "Bad configuration file start + size 0x%x+0x%x\n",
+ entry->offset, entry->size);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ /* allocate the buffer to return */
+ buffer = kmalloc(entry->size, GFP_KERNEL);
+ if (!buffer) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ /*
+ * Extract the file by looping over segments until it is fully read.
+ */
+ seg_offset = entry->offset % SEG_SIZE;
+ seg_base = entry->offset - seg_offset;
+ ncopied = 0;
+ while (ncopied < entry->size) {
+ /* calculate data bytes available in this segment */
+
+ /* start with the bytes from the current offset to the end */
+ bytes_available = SEG_SIZE - seg_offset;
+ /* subtract off footer and table from segment 0 */
+ if (seg_base == 0) {
+ /*
+ * Sanity check: should not have a starting point
+ * at or within the directory.
+ */
+ if (bytes_available <= directory_size) {
+ dd_dev_err(dd,
+ "Bad configuration file - offset 0x%x within footer+table\n",
+ entry->offset);
+ ret = -EINVAL;
+ goto done;
+ }
+ bytes_available -= directory_size;
+ }
+
+ /* calculate bytes wanted */
+ to_copy = entry->size - ncopied;
+
+ /* max out at the available bytes in this segment */
+ if (to_copy > bytes_available)
+ to_copy = bytes_available;
+
+ /*
+ * Read from the EPROM.
+ *
+ * The sanity check for entry->offset is done in read_length().
+ * The EPROM offset is validated against what the hardware
+ * addressing supports. In addition, if the offset is larger
+ * than the actual EPROM, it silently wraps. It will work
+ * fine, though the reader may not get what they expected
+ * from the EPROM.
+ */
+ ret = read_length(dd, seg_base + seg_offset, to_copy,
+ buffer + ncopied);
+ if (ret)
+ goto done;
+
+ ncopied += to_copy;
+
+ /* set up for next segment */
+ seg_offset = footer->oprom_size;
+ seg_base += SEG_SIZE;
+ }
+
+ /* success */
+ ret = 0;
+ *data = buffer;
+ *size = entry->size;
+
+done:
+ kfree(table_buffer);
+ if (ret)
+ kfree(buffer);
+ return ret;
+}
+
+/*
* Read the platform configuration file from the EPROM.
*
* On success, an allocated buffer containing the data and its size are
@@ -253,6 +448,7 @@
* -EBUSY - not able to acquire access to the EPROM
* -ENOENT - no recognizable file written
* -ENOMEM - buffer could not be allocated
+ * -EINVAL - invalid EPROM contentents found
*/
int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size)
{
@@ -266,21 +462,20 @@
if (ret)
return -EBUSY;
- /* read the last page of P0 for the EPROM format magic */
- ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
+ /* read the last page of the segment for the EPROM format magic */
+ ret = read_length(dd, SEG_SIZE - EP_PAGE_SIZE, EP_PAGE_SIZE, directory);
if (ret)
goto done;
- /* last dword of P0 contains a magic indicator */
- if (directory[EP_PAGE_DWORDS - 1] == 0) {
+ /* last dword of the segment contains a magic value */
+ if (directory[EP_PAGE_DWORDS - 1] == FOOTER_MAGIC) {
+ /* segment format */
+ ret = read_segment_platform_config(dd, directory, data, size);
+ } else {
/* partition format */
ret = read_partition_platform_config(dd, data, size);
- goto done;
}
- /* nothing recognized */
- ret = -ENOENT;
-
done:
release_chip_resource(dd, CR_EPROM);
return ret;
diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 13db8eb..0dd50cd 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -239,6 +239,16 @@
const u8 pcie_serdes_broadcast[2] = { 0xe2, 0xe3 };
static const u8 all_pcie_serdes_broadcast = 0xe0;
+static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
+ 0,
+ SYSTEM_TABLE_MAX,
+ PORT_TABLE_MAX,
+ RX_PRESET_TABLE_MAX,
+ TX_PRESET_TABLE_MAX,
+ QSFP_ATTEN_TABLE_MAX,
+ VARIABLE_SETTINGS_TABLE_MAX
+};
+
/* forwards */
static void dispose_one_firmware(struct firmware_details *fdet);
static int load_fabric_serdes_firmware(struct hfi1_devdata *dd,
@@ -263,11 +273,13 @@
u64 reg;
int count;
- /* start the read at the given address */
- reg = ((addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK)
- << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT)
- | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK;
+ /* step 1: set the address, clear enable */
+ reg = (addr & DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_MASK)
+ << DC_DC8051_CFG_RAM_ACCESS_CTRL_ADDRESS_SHIFT;
write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL, reg);
+ /* step 2: enable */
+ write_csr(dd, DC_DC8051_CFG_RAM_ACCESS_CTRL,
+ reg | DC_DC8051_CFG_RAM_ACCESS_CTRL_READ_ENA_SMASK);
/* wait until ACCESS_COMPLETED is set */
count = 0;
@@ -707,6 +719,9 @@
&dd->pcidev->dev);
if (err) {
platform_config = NULL;
+ dd_dev_err(dd,
+ "%s: No default platform config file found\n",
+ __func__);
goto done;
}
dd->platform_config.data = platform_config->data;
@@ -1761,8 +1776,17 @@
u32 record_idx = 0, table_type = 0, table_length_dwords = 0;
int ret = -EINVAL; /* assume failure */
+ /*
+ * For integrated devices that did not fall back to the default file,
+ * the SI tuning information for active channels is acquired from the
+ * scratch register bitmap, thus there is no platform config to parse.
+ * Skip parsing in these situations.
+ */
+ if (is_integrated(dd) && !platform_config_load)
+ return 0;
+
if (!dd->platform_config.data) {
- dd_dev_info(dd, "%s: Missing config file\n", __func__);
+ dd_dev_err(dd, "%s: Missing config file\n", __func__);
goto bail;
}
ptr = (u32 *)dd->platform_config.data;
@@ -1770,7 +1794,7 @@
magic_num = *ptr;
ptr++;
if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) {
- dd_dev_info(dd, "%s: Bad config file\n", __func__);
+ dd_dev_err(dd, "%s: Bad config file\n", __func__);
goto bail;
}
@@ -1797,9 +1821,9 @@
header1 = *ptr;
header2 = *(ptr + 1);
if (header1 != ~header2) {
- dd_dev_info(dd, "%s: Failed validation at offset %ld\n",
- __func__, (ptr - (u32 *)
- dd->platform_config.data));
+ dd_dev_err(dd, "%s: Failed validation at offset %ld\n",
+ __func__, (ptr - (u32 *)
+ dd->platform_config.data));
goto bail;
}
@@ -1841,11 +1865,11 @@
table_length_dwords;
break;
default:
- dd_dev_info(dd,
- "%s: Unknown data table %d, offset %ld\n",
- __func__, table_type,
- (ptr - (u32 *)
- dd->platform_config.data));
+ dd_dev_err(dd,
+ "%s: Unknown data table %d, offset %ld\n",
+ __func__, table_type,
+ (ptr - (u32 *)
+ dd->platform_config.data));
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table = ptr;
@@ -1865,11 +1889,11 @@
case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
break;
default:
- dd_dev_info(dd,
- "%s: Unknown meta table %d, offset %ld\n",
- __func__, table_type,
- (ptr -
- (u32 *)dd->platform_config.data));
+ dd_dev_err(dd,
+ "%s: Unknown meta table %d, offset %ld\n",
+ __func__, table_type,
+ (ptr -
+ (u32 *)dd->platform_config.data));
goto bail; /* We don't trust this file now */
}
pcfgcache->config_tables[table_type].table_metadata =
@@ -1884,10 +1908,9 @@
/* Jump the table */
ptr += table_length_dwords;
if (crc != *ptr) {
- dd_dev_info(dd, "%s: Failed CRC check at offset %ld\n",
- __func__, (ptr -
- (u32 *)
- dd->platform_config.data));
+ dd_dev_err(dd, "%s: Failed CRC check at offset %ld\n",
+ __func__, (ptr -
+ (u32 *)dd->platform_config.data));
goto bail;
}
/* Jump the CRC DWORD */
@@ -1901,6 +1924,84 @@
return ret;
}
+static void get_integrated_platform_config_field(
+ struct hfi1_devdata *dd,
+ enum platform_config_table_type_encoding table_type,
+ int field_index, u32 *data)
+{
+ struct hfi1_pportdata *ppd = dd->pport;
+ u8 *cache = ppd->qsfp_info.cache;
+ u32 tx_preset = 0;
+
+ switch (table_type) {
+ case PLATFORM_CONFIG_SYSTEM_TABLE:
+ if (field_index == SYSTEM_TABLE_QSFP_POWER_CLASS_MAX)
+ *data = ppd->max_power_class;
+ else if (field_index == SYSTEM_TABLE_QSFP_ATTENUATION_DEFAULT_25G)
+ *data = ppd->default_atten;
+ break;
+ case PLATFORM_CONFIG_PORT_TABLE:
+ if (field_index == PORT_TABLE_PORT_TYPE)
+ *data = ppd->port_type;
+ else if (field_index == PORT_TABLE_LOCAL_ATTEN_25G)
+ *data = ppd->local_atten;
+ else if (field_index == PORT_TABLE_REMOTE_ATTEN_25G)
+ *data = ppd->remote_atten;
+ break;
+ case PLATFORM_CONFIG_RX_PRESET_TABLE:
+ if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR_APPLY)
+ *data = (ppd->rx_preset & QSFP_RX_CDR_APPLY_SMASK) >>
+ QSFP_RX_CDR_APPLY_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP_APPLY)
+ *data = (ppd->rx_preset & QSFP_RX_EMP_APPLY_SMASK) >>
+ QSFP_RX_EMP_APPLY_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP_APPLY)
+ *data = (ppd->rx_preset & QSFP_RX_AMP_APPLY_SMASK) >>
+ QSFP_RX_AMP_APPLY_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_CDR)
+ *data = (ppd->rx_preset & QSFP_RX_CDR_SMASK) >>
+ QSFP_RX_CDR_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_EMP)
+ *data = (ppd->rx_preset & QSFP_RX_EMP_SMASK) >>
+ QSFP_RX_EMP_SHIFT;
+ else if (field_index == RX_PRESET_TABLE_QSFP_RX_AMP)
+ *data = (ppd->rx_preset & QSFP_RX_AMP_SMASK) >>
+ QSFP_RX_AMP_SHIFT;
+ break;
+ case PLATFORM_CONFIG_TX_PRESET_TABLE:
+ if (cache[QSFP_EQ_INFO_OFFS] & 0x4)
+ tx_preset = ppd->tx_preset_eq;
+ else
+ tx_preset = ppd->tx_preset_noeq;
+ if (field_index == TX_PRESET_TABLE_PRECUR)
+ *data = (tx_preset & TX_PRECUR_SMASK) >>
+ TX_PRECUR_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_ATTN)
+ *data = (tx_preset & TX_ATTN_SMASK) >>
+ TX_ATTN_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_POSTCUR)
+ *data = (tx_preset & TX_POSTCUR_SMASK) >>
+ TX_POSTCUR_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR_APPLY)
+ *data = (tx_preset & QSFP_TX_CDR_APPLY_SMASK) >>
+ QSFP_TX_CDR_APPLY_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ_APPLY)
+ *data = (tx_preset & QSFP_TX_EQ_APPLY_SMASK) >>
+ QSFP_TX_EQ_APPLY_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_QSFP_TX_CDR)
+ *data = (tx_preset & QSFP_TX_CDR_SMASK) >>
+ QSFP_TX_CDR_SHIFT;
+ else if (field_index == TX_PRESET_TABLE_QSFP_TX_EQ)
+ *data = (tx_preset & QSFP_TX_EQ_SMASK) >>
+ QSFP_TX_EQ_SHIFT;
+ break;
+ case PLATFORM_CONFIG_QSFP_ATTEN_TABLE:
+ case PLATFORM_CONFIG_VARIABLE_SETTINGS_TABLE:
+ default:
+ break;
+ }
+}
+
static int get_platform_fw_field_metadata(struct hfi1_devdata *dd, int table,
int field, u32 *field_len_bits,
u32 *field_start_bits)
@@ -1976,6 +2077,15 @@
else
return -EINVAL;
+ if (is_integrated(dd) && !platform_config_load) {
+ /*
+ * Use saved configuration from ppd for integrated platforms
+ */
+ get_integrated_platform_config_field(dd, table_type,
+ field_index, data);
+ return 0;
+ }
+
ret = get_platform_fw_field_metadata(dd, table_type, field_index,
&field_len_bits,
&field_start_bits);
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index cc87fd4..751a0fb 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -492,6 +492,9 @@
#define HFI1_MIN_VLS_SUPPORTED 1
#define HFI1_MAX_VLS_SUPPORTED 8
+#define HFI1_GUIDS_PER_PORT 5
+#define HFI1_PORT_GUID_INDEX 0
+
static inline void incr_cntr64(u64 *cntr)
{
if (*cntr < (u64)-1LL)
@@ -559,11 +562,20 @@
struct kobject vl2mtu_kobj;
/* PHY support */
- u32 port_type;
struct qsfp_data qsfp_info;
+ /* Values for SI tuning of SerDes */
+ u32 port_type;
+ u32 tx_preset_eq;
+ u32 tx_preset_noeq;
+ u32 rx_preset;
+ u8 local_atten;
+ u8 remote_atten;
+ u8 default_atten;
+ u8 max_power_class;
- /* GUID for this interface, in host order */
- u64 guid;
+ /* GUIDs for this interface, in host order, guids[0] is a port guid */
+ u64 guids[HFI1_GUIDS_PER_PORT];
+
/* GUID for peer interface, in host order */
u64 neighbor_guid;
@@ -826,32 +838,29 @@
u8 __iomem *kregend;
/* physical address of chip for io_remap, etc. */
resource_size_t physaddr;
- /* receive context data */
- struct hfi1_ctxtdata **rcd;
+ /* Per VL data. Enough for all VLs but not all elements are set/used. */
+ struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* send context data */
struct send_context_info *send_contexts;
/* map hardware send contexts to software index */
u8 *hw_to_sw;
/* spinlock for allocating and releasing send context resources */
spinlock_t sc_lock;
- /* Per VL data. Enough for all VLs but not all elements are set/used. */
- struct per_vl_data vld[PER_VL_SEND_CONTEXTS];
/* lock for pio_map */
spinlock_t pio_map_lock;
+ /* Send Context initialization lock. */
+ spinlock_t sc_init_lock;
+ /* lock for sdma_map */
+ spinlock_t sde_map_lock;
/* array of kernel send contexts */
struct send_context **kernel_send_context;
/* array of vl maps */
struct pio_vl_map __rcu *pio_map;
- /* seqlock for sc2vl */
- seqlock_t sc2vl_lock;
- u64 sc2vl[4];
- /* Send Context initialization lock. */
- spinlock_t sc_init_lock;
+ /* default flags to last descriptor */
+ u64 default_desc1;
/* fields common to all SDMA engines */
- /* default flags to last descriptor */
- u64 default_desc1;
volatile __le64 *sdma_heads_dma; /* DMA'ed by chip */
dma_addr_t sdma_heads_phys;
void *sdma_pad_dma; /* DMA'ed by chip */
@@ -862,8 +871,6 @@
u32 chip_sdma_engines;
/* num used */
u32 num_sdma;
- /* lock for sdma_map */
- spinlock_t sde_map_lock;
/* array of engines sized by num_sdma */
struct sdma_engine *per_sdma;
/* array of vl maps */
@@ -872,14 +879,11 @@
wait_queue_head_t sdma_unfreeze_wq;
atomic_t sdma_unfreeze_count;
+ u32 lcb_access_count; /* count of LCB users */
+
/* common data between shared ASIC HFIs in this OS */
struct hfi1_asic_data *asic_data;
- /* hfi1_pportdata, points to array of (physical) port-specific
- * data structs, indexed by pidx (0..n-1)
- */
- struct hfi1_pportdata *pport;
-
/* mem-mapped pointer to base of PIO buffers */
void __iomem *piobase;
/*
@@ -896,20 +900,13 @@
/* send context numbers and sizes for each type */
struct sc_config_sizes sc_sizes[SC_MAX];
- u32 lcb_access_count; /* count of LCB users */
-
char *boardname; /* human readable board info */
- /* device (not port) flags, basically device capabilities */
- u32 flags;
-
/* reset value */
u64 z_int_counter;
u64 z_rcv_limit;
u64 z_send_schedule;
- /* percpu int_counter */
- u64 __percpu *int_counter;
- u64 __percpu *rcv_limit;
+
u64 __percpu *send_schedule;
/* number of receive contexts in use by the driver */
u32 num_rcv_contexts;
@@ -924,6 +921,7 @@
/* base receive interrupt timeout, in CSR units */
u32 rcv_intr_timeout_csr;
+ u32 freezelen; /* max length of freezemsg */
u64 __iomem *egrtidbase;
spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
@@ -945,7 +943,6 @@
* IB link status cheaply
*/
struct hfi1_status *status;
- u32 freezelen; /* max length of freezemsg */
/* revision register shadow */
u64 revision;
@@ -973,6 +970,8 @@
u16 rcvegrbufsize_shift;
/* both sides of the PCIe link are gen3 capable */
u8 link_gen3_capable;
+ /* default link down value (poll/sleep) */
+ u8 link_default;
/* localbus width (1, 2,4,8,16,32) from config space */
u32 lbus_width;
/* localbus speed in MHz */
@@ -1008,8 +1007,6 @@
u8 hfi1_id;
/* implementation code */
u8 icode;
- /* default link down value (poll/sleep) */
- u8 link_default;
/* vAU of this device */
u8 vau;
/* vCU of this device */
@@ -1020,27 +1017,17 @@
u16 vl15_init;
/* Misc small ints */
- /* Number of physical ports available */
- u8 num_pports;
- /* Lowest context number which can be used by user processes */
- u8 first_user_ctxt;
u8 n_krcv_queues;
u8 qos_shift;
- u8 qpn_mask;
- u16 rhf_offset; /* offset of RHF within receive header entry */
u16 irev; /* implementation revision */
u16 dc8051_ver; /* 8051 firmware version */
+ spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
struct platform_config platform_config;
struct platform_config_cache pcfg_cache;
struct diag_client *diag_client;
- spinlock_t hfi1_diag_trans_lock; /* protect diag observer ops */
-
- u8 psxmitwait_supported;
- /* cycle length of PS* counters in HW (in picoseconds) */
- u16 psxmitwait_check_rate;
/* MSI-X information */
struct hfi1_msix_entry *msix_entries;
@@ -1055,6 +1042,9 @@
struct rcv_array_data rcv_entries;
+ /* cycle length of PS* counters in HW (in picoseconds) */
+ u16 psxmitwait_check_rate;
+
/*
* 64 bit synthetic counters
*/
@@ -1085,11 +1075,11 @@
struct err_info_rcvport err_info_rcvport;
struct err_info_constraint err_info_rcv_constraint;
struct err_info_constraint err_info_xmit_constraint;
- u8 err_info_uncorrectable;
- u8 err_info_fmconfig;
atomic_t drop_packet;
u8 do_drop;
+ u8 err_info_uncorrectable;
+ u8 err_info_fmconfig;
/*
* Software counters for the status bits defined by the
@@ -1112,40 +1102,60 @@
u64 sw_cce_err_status_aggregate;
/* Software counter that aggregates all bypass packet rcv errors */
u64 sw_rcv_bypass_packet_errors;
- /* receive interrupt functions */
- rhf_rcv_function_ptr *rhf_rcv_function_map;
+ /* receive interrupt function */
rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
+ /* Save the enabled LCB error bits */
+ u64 lcb_err_en;
+
/*
* Capability to have different send engines simply by changing a
* pointer value.
*/
- send_routine process_pio_send;
+ send_routine process_pio_send ____cacheline_aligned_in_smp;
send_routine process_dma_send;
void (*pio_inline_send)(struct hfi1_devdata *dd, struct pio_buf *pbuf,
u64 pbc, const void *from, size_t count);
+ /* hfi1_pportdata, points to array of (physical) port-specific
+ * data structs, indexed by pidx (0..n-1)
+ */
+ struct hfi1_pportdata *pport;
+ /* receive context data */
+ struct hfi1_ctxtdata **rcd;
+ u64 __percpu *int_counter;
+ /* device (not port) flags, basically device capabilities */
+ u16 flags;
+ /* Number of physical ports available */
+ u8 num_pports;
+ /* Lowest context number which can be used by user processes */
+ u8 first_user_ctxt;
+ /* adding a new field here would make it part of this cacheline */
+
+ /* seqlock for sc2vl */
+ seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
+ u64 sc2vl[4];
+ /* receive interrupt functions */
+ rhf_rcv_function_ptr *rhf_rcv_function_map;
+ u64 __percpu *rcv_limit;
+ u16 rhf_offset; /* offset of RHF within receive header entry */
+ /* adding a new field here would make it part of this cacheline */
/* OUI comes from the HW. Used everywhere as 3 separate bytes. */
u8 oui1;
u8 oui2;
u8 oui3;
+ u8 dc_shutdown;
+
/* Timer and counter used to detect RcvBufOvflCnt changes */
struct timer_list rcverr_timer;
- u32 rcv_ovfl_cnt;
wait_queue_head_t event_queue;
- /* Save the enabled LCB error bits */
- u64 lcb_err_en;
- u8 dc_shutdown;
-
/* receive context tail dummy address */
__le64 *rcvhdrtail_dummy_kvaddr;
dma_addr_t rcvhdrtail_dummy_dma;
- bool eprom_available; /* true if EPROM is available for this device */
- bool aspm_supported; /* Does HW support ASPM */
- bool aspm_enabled; /* ASPM state: enabled/disabled */
+ u32 rcv_ovfl_cnt;
/* Serialize ASPM enable/disable between multiple verbs contexts */
spinlock_t aspm_lock;
/* Number of verbs contexts which have disabled ASPM */
@@ -1155,8 +1165,11 @@
/* Used to wait for outstanding user space clients before dev removal */
struct completion user_comp;
- struct hfi1_affinity *affinity;
+ bool eprom_available; /* true if EPROM is available for this device */
+ bool aspm_supported; /* Does HW support ASPM */
+ bool aspm_enabled; /* ASPM state: enabled/disabled */
struct rhashtable sdma_rht;
+
struct kobject kobj;
};
@@ -1604,6 +1617,17 @@
}
/*
+ * Return the indexed GUID from the port GUIDs table.
+ */
+static inline __be64 get_sguid(struct hfi1_ibport *ibp, unsigned int index)
+{
+ struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
+
+ WARN_ON(index >= HFI1_GUIDS_PER_PORT);
+ return cpu_to_be64(ppd->guids[index]);
+}
+
+/*
* Called by readers of cc_state only, must call under rcu_read_lock().
*/
static inline struct cc_state *get_cc_state(struct hfi1_pportdata *ppd)
@@ -1982,6 +2006,12 @@
return i2c_target(dd->hfi1_id);
}
+/* Is this device integrated or discrete? */
+static inline bool is_integrated(struct hfi1_devdata *dd)
+{
+ return dd->pcidev->device == PCI_DEVICE_ID_INTEL1;
+}
+
int hfi1_tempsense_rd(struct hfi1_devdata *dd, struct hfi1_temp *temp);
#define DD_DEV_ENTRY(dd) __string(dev, dev_name(&(dd)->pcidev->dev))
diff --git a/drivers/infiniband/hw/hfi1/iowait.h b/drivers/infiniband/hw/hfi1/iowait.h
index 2ec6ef3..d9740dd 100644
--- a/drivers/infiniband/hw/hfi1/iowait.h
+++ b/drivers/infiniband/hw/hfi1/iowait.h
@@ -64,6 +64,7 @@
/**
* struct iowait - linkage for delayed progress/waiting
* @list: used to add/insert into QP/PQ wait lists
+ * @lock: uses to record the list head lock
* @tx_head: overflow list of sdma_txreq's
* @sleep: no space callback
* @wakeup: space callback wakeup
@@ -91,6 +92,11 @@
* so sleeping is not allowed.
*
* The wait_dma member along with the iow
+ *
+ * The lock field is used by waiters to record
+ * the seqlock_t that guards the list head.
+ * Waiters explicity know that, but the destroy
+ * code that unwaits QPs does not.
*/
struct iowait {
@@ -103,6 +109,7 @@
unsigned seq);
void (*wakeup)(struct iowait *wait, int reason);
void (*sdma_drained)(struct iowait *wait);
+ seqlock_t *lock;
struct work_struct iowork;
wait_queue_head_t wait_dma;
wait_queue_head_t wait_pio;
@@ -141,6 +148,7 @@
void (*sdma_drained)(struct iowait *wait))
{
wait->count = 0;
+ wait->lock = NULL;
INIT_LIST_HEAD(&wait->list);
INIT_LIST_HEAD(&wait->tx_head);
INIT_WORK(&wait->iowork, func);
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 9487c9b..6e595af 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -128,7 +128,7 @@
smp = send_buf->mad;
smp->base_version = OPA_MGMT_BASE_VERSION;
smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
- smp->class_version = OPA_SMI_CLASS_VERSION;
+ smp->class_version = OPA_SM_CLASS_VERSION;
smp->method = IB_MGMT_METHOD_TRAP;
ibp->rvp.tid++;
smp->tid = cpu_to_be64(ibp->rvp.tid);
@@ -336,20 +336,20 @@
ni = (struct opa_node_info *)data;
/* GUID 0 is illegal */
- if (am || pidx >= dd->num_pports || dd->pport[pidx].guid == 0) {
+ if (am || pidx >= dd->num_pports || ibdev->node_guid == 0 ||
+ get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
smp->status |= IB_SMP_INVALID_FIELD;
return reply((struct ib_mad_hdr *)smp);
}
- ni->port_guid = cpu_to_be64(dd->pport[pidx].guid);
+ ni->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
ni->base_version = OPA_MGMT_BASE_VERSION;
- ni->class_version = OPA_SMI_CLASS_VERSION;
+ ni->class_version = OPA_SM_CLASS_VERSION;
ni->node_type = 1; /* channel adapter */
ni->num_ports = ibdev->phys_port_cnt;
/* This is already in network order */
ni->system_image_guid = ib_hfi1_sys_image_guid;
- /* Use first-port GUID as node */
- ni->node_guid = cpu_to_be64(dd->pport->guid);
+ ni->node_guid = ibdev->node_guid;
ni->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
ni->device_id = cpu_to_be16(dd->pcidev->device);
ni->revision = cpu_to_be32(dd->minrev);
@@ -373,19 +373,20 @@
/* GUID 0 is illegal */
if (smp->attr_mod || pidx >= dd->num_pports ||
- dd->pport[pidx].guid == 0)
+ ibdev->node_guid == 0 ||
+ get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX) == 0) {
smp->status |= IB_SMP_INVALID_FIELD;
- else
- nip->port_guid = cpu_to_be64(dd->pport[pidx].guid);
+ return reply((struct ib_mad_hdr *)smp);
+ }
+ nip->port_guid = get_sguid(to_iport(ibdev, port), HFI1_PORT_GUID_INDEX);
nip->base_version = OPA_MGMT_BASE_VERSION;
- nip->class_version = OPA_SMI_CLASS_VERSION;
+ nip->class_version = OPA_SM_CLASS_VERSION;
nip->node_type = 1; /* channel adapter */
nip->num_ports = ibdev->phys_port_cnt;
/* This is already in network order */
nip->sys_guid = ib_hfi1_sys_image_guid;
- /* Use first-port GUID as node */
- nip->node_guid = cpu_to_be64(dd->pport->guid);
+ nip->node_guid = ibdev->node_guid;
nip->partition_cap = cpu_to_be16(hfi1_get_npkeys(dd));
nip->device_id = cpu_to_be16(dd->pcidev->device);
nip->revision = cpu_to_be32(dd->minrev);
@@ -2302,7 +2303,7 @@
pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
p->base_version = OPA_MGMT_BASE_VERSION;
- p->class_version = OPA_SMI_CLASS_VERSION;
+ p->class_version = OPA_SM_CLASS_VERSION;
/*
* Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
*/
@@ -4022,7 +4023,7 @@
am = be32_to_cpu(smp->attr_mod);
attr_id = smp->attr_id;
- if (smp->class_version != OPA_SMI_CLASS_VERSION) {
+ if (smp->class_version != OPA_SM_CLASS_VERSION) {
smp->status |= IB_SMP_UNSUP_VERSION;
ret = reply((struct ib_mad_hdr *)smp);
return ret;
@@ -4232,7 +4233,7 @@
*out_mad = *in_mad;
- if (pmp->mad_hdr.class_version != OPA_SMI_CLASS_VERSION) {
+ if (pmp->mad_hdr.class_version != OPA_SM_CLASS_VERSION) {
pmp->mad_hdr.status |= IB_SMP_UNSUP_VERSION;
return reply((struct ib_mad_hdr *)pmp);
}
diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index 7ad3089..ccbf52c 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -81,7 +81,7 @@
struct list_head *del_list);
static void handle_remove(struct work_struct *work);
-static struct mmu_notifier_ops mn_opts = {
+static const struct mmu_notifier_ops mn_opts = {
.invalidate_page = mmu_notifier_page,
.invalidate_range_start = mmu_notifier_range_start,
};
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index d89b874..615be68 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -758,6 +758,7 @@
sc->hw_context = hw_context;
cr_group_addresses(sc, &dma);
sc->credits = sci->credits;
+ sc->size = sc->credits * PIO_BLOCK_SIZE;
/* PIO Send Memory Address details */
#define PIO_ADDR_CONTEXT_MASK 0xfful
@@ -1242,6 +1243,7 @@
sc->free = 0;
sc->alloc_free = 0;
sc->fill = 0;
+ sc->fill_wrap = 0;
sc->sr_head = 0;
sc->sr_tail = 0;
sc->flags = 0;
@@ -1385,7 +1387,7 @@
unsigned long flags;
unsigned long avail;
unsigned long blocks = dwords_to_blocks(dw_len);
- unsigned long start_fill;
+ u32 fill_wrap;
int trycount = 0;
u32 head, next;
@@ -1410,9 +1412,7 @@
(sc->fill - sc->alloc_free);
if (blocks > avail) {
/* still no room, actively update */
- spin_unlock_irqrestore(&sc->alloc_lock, flags);
sc_release_update(sc);
- spin_lock_irqsave(&sc->alloc_lock, flags);
sc->alloc_free = ACCESS_ONCE(sc->free);
trycount++;
goto retry;
@@ -1428,8 +1428,11 @@
head = sc->sr_head;
/* "allocate" the buffer */
- start_fill = sc->fill;
sc->fill += blocks;
+ fill_wrap = sc->fill_wrap;
+ sc->fill_wrap += blocks;
+ if (sc->fill_wrap >= sc->credits)
+ sc->fill_wrap = sc->fill_wrap - sc->credits;
/*
* Fill the parts that the releaser looks at before moving the head.
@@ -1458,11 +1461,8 @@
spin_unlock_irqrestore(&sc->alloc_lock, flags);
/* finish filling in the buffer outside the lock */
- pbuf->start = sc->base_addr + ((start_fill % sc->credits)
- * PIO_BLOCK_SIZE);
- pbuf->size = sc->credits * PIO_BLOCK_SIZE;
- pbuf->end = sc->base_addr + pbuf->size;
- pbuf->block_count = blocks;
+ pbuf->start = sc->base_addr + fill_wrap * PIO_BLOCK_SIZE;
+ pbuf->end = sc->base_addr + sc->size;
pbuf->qw_written = 0;
pbuf->carry_bytes = 0;
pbuf->carry.val64 = 0;
@@ -1573,6 +1573,7 @@
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
/* refcount held until actual wake up */
qps[n++] = qp;
}
@@ -2028,29 +2029,17 @@
int init_credit_return(struct hfi1_devdata *dd)
{
int ret;
- int num_numa;
int i;
- num_numa = num_online_nodes();
- /* enforce the expectation that the numas are compact */
- for (i = 0; i < num_numa; i++) {
- if (!node_online(i)) {
- dd_dev_err(dd, "NUMA nodes are not compact\n");
- ret = -EINVAL;
- goto done;
- }
- }
-
dd->cr_base = kcalloc(
- num_numa,
+ node_affinity.num_possible_nodes,
sizeof(struct credit_return_base),
GFP_KERNEL);
if (!dd->cr_base) {
- dd_dev_err(dd, "Unable to allocate credit return base\n");
ret = -ENOMEM;
goto done;
}
- for (i = 0; i < num_numa; i++) {
+ for_each_node_with_cpus(i) {
int bytes = TXE_NUM_CONTEXTS * sizeof(struct credit_return);
set_dev_node(&dd->pcidev->dev, i);
@@ -2077,14 +2066,11 @@
void free_credit_return(struct hfi1_devdata *dd)
{
- int num_numa;
int i;
if (!dd->cr_base)
return;
-
- num_numa = num_online_nodes();
- for (i = 0; i < num_numa; i++) {
+ for (i = 0; i < node_affinity.num_possible_nodes; i++) {
if (dd->cr_base[i].va) {
dma_free_coherent(&dd->pcidev->dev,
TXE_NUM_CONTEXTS *
diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h
index e709eaf..867e5ff 100644
--- a/drivers/infiniband/hw/hfi1/pio.h
+++ b/drivers/infiniband/hw/hfi1/pio.h
@@ -83,53 +83,55 @@
void *arg; /* argument for cb */
void __iomem *start; /* buffer start address */
void __iomem *end; /* context end address */
- unsigned long size; /* context size, in bytes */
unsigned long sent_at; /* buffer is sent when <= free */
- u32 block_count; /* size of buffer, in blocks */
- u32 qw_written; /* QW written so far */
- u32 carry_bytes; /* number of valid bytes in carry */
union mix carry; /* pending unwritten bytes */
+ u16 qw_written; /* QW written so far */
+ u8 carry_bytes; /* number of valid bytes in carry */
};
/* cache line aligned pio buffer array */
union pio_shadow_ring {
struct pio_buf pbuf;
- u64 unused[16]; /* cache line spacer */
} ____cacheline_aligned;
/* per-NUMA send context */
struct send_context {
/* read-only after init */
struct hfi1_devdata *dd; /* device */
- void __iomem *base_addr; /* start of PIO memory */
union pio_shadow_ring *sr; /* shadow ring */
+ void __iomem *base_addr; /* start of PIO memory */
+ u32 __percpu *buffers_allocated;/* count of buffers allocated */
+ u32 size; /* context size, in bytes */
- volatile __le64 *hw_free; /* HW free counter */
- struct work_struct halt_work; /* halted context work queue entry */
- unsigned long flags; /* flags */
int node; /* context home node */
- int type; /* context type */
- u32 sw_index; /* software index number */
- u32 hw_context; /* hardware context number */
- u32 credits; /* number of blocks in context */
u32 sr_size; /* size of the shadow ring */
- u32 group; /* credit return group */
+ u16 flags; /* flags */
+ u8 type; /* context type */
+ u8 sw_index; /* software index number */
+ u8 hw_context; /* hardware context number */
+ u8 group; /* credit return group */
+
/* allocator fields */
spinlock_t alloc_lock ____cacheline_aligned_in_smp;
+ u32 sr_head; /* shadow ring head */
unsigned long fill; /* official alloc count */
unsigned long alloc_free; /* copy of free (less cache thrash) */
- u32 sr_head; /* shadow ring head */
+ u32 fill_wrap; /* tracks fill within ring */
+ u32 credits; /* number of blocks in context */
+ /* adding a new field here would make it part of this cacheline */
+
/* releaser fields */
spinlock_t release_lock ____cacheline_aligned_in_smp;
- unsigned long free; /* official free count */
u32 sr_tail; /* shadow ring tail */
+ unsigned long free; /* official free count */
+ volatile __le64 *hw_free; /* HW free counter */
/* list for PIO waiters */
struct list_head piowait ____cacheline_aligned_in_smp;
spinlock_t credit_ctrl_lock ____cacheline_aligned_in_smp;
- u64 credit_ctrl; /* cache for credit control */
u32 credit_intr_count; /* count of credit intr users */
- u32 __percpu *buffers_allocated;/* count of buffers allocated */
+ u64 credit_ctrl; /* cache for credit control */
wait_queue_head_t halt_wait; /* wait until kernel sees interrupt */
+ struct work_struct halt_work; /* halted context work queue entry */
};
/* send context flags */
diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c
index aa77736..03024ce 100644
--- a/drivers/infiniband/hw/hfi1/pio_copy.c
+++ b/drivers/infiniband/hw/hfi1/pio_copy.c
@@ -129,8 +129,8 @@
dest += sizeof(u64);
}
- dest -= pbuf->size;
- dend -= pbuf->size;
+ dest -= pbuf->sc->size;
+ dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@@ -361,8 +361,8 @@
dest += sizeof(u64);
}
- dest -= pbuf->size;
- dend -= pbuf->size;
+ dest -= pbuf->sc->size;
+ dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@@ -458,8 +458,8 @@
dest += sizeof(u64);
}
- dest -= pbuf->size;
- dend -= pbuf->size;
+ dest -= pbuf->sc->size;
+ dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@@ -492,7 +492,7 @@
*/
/* adjust if we have wrapped */
if (dest >= pbuf->end)
- dest -= pbuf->size;
+ dest -= pbuf->sc->size;
/* jump to the SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
@@ -584,8 +584,8 @@
dest += sizeof(u64);
}
- dest -= pbuf->size;
- dend -= pbuf->size;
+ dest -= pbuf->sc->size;
+ dend -= pbuf->sc->size;
}
/* write 8-byte non-SOP, non-wrap chunk data */
@@ -666,7 +666,7 @@
*/
/* adjust if we've wrapped */
if (dest >= pbuf->end)
- dest -= pbuf->size;
+ dest -= pbuf->sc->size;
/* jump to SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
@@ -719,7 +719,7 @@
*/
/* adjust if we have wrapped */
if (dest >= pbuf->end)
- dest -= pbuf->size;
+ dest -= pbuf->sc->size;
/* jump to the SOP range if within the first block */
else if (pbuf->qw_written < PIO_BLOCK_QWS)
dest += SOP_DISTANCE;
diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c
index 2024331..838fe84 100644
--- a/drivers/infiniband/hw/hfi1/platform.c
+++ b/drivers/infiniband/hw/hfi1/platform.c
@@ -49,6 +49,90 @@
#include "efivar.h"
#include "eprom.h"
+static int validate_scratch_checksum(struct hfi1_devdata *dd)
+{
+ u64 checksum = 0, temp_scratch = 0;
+ int i, j, version;
+
+ temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH);
+ version = (temp_scratch & BITMAP_VERSION_SMASK) >> BITMAP_VERSION_SHIFT;
+
+ /* Prevent power on default of all zeroes from passing checksum */
+ if (!version)
+ return 0;
+
+ /*
+ * ASIC scratch 0 only contains the checksum and bitmap version as
+ * fields of interest, both of which are handled separately from the
+ * loop below, so skip it
+ */
+ checksum += version;
+ for (i = 1; i < ASIC_NUM_SCRATCH; i++) {
+ temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH + (8 * i));
+ for (j = sizeof(u64); j != 0; j -= 2) {
+ checksum += (temp_scratch & 0xFFFF);
+ temp_scratch >>= 16;
+ }
+ }
+
+ while (checksum >> 16)
+ checksum = (checksum & CHECKSUM_MASK) + (checksum >> 16);
+
+ temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH);
+ temp_scratch &= CHECKSUM_SMASK;
+ temp_scratch >>= CHECKSUM_SHIFT;
+
+ if (checksum + temp_scratch == 0xFFFF)
+ return 1;
+ return 0;
+}
+
+static void save_platform_config_fields(struct hfi1_devdata *dd)
+{
+ struct hfi1_pportdata *ppd = dd->pport;
+ u64 temp_scratch = 0, temp_dest = 0;
+
+ temp_scratch = read_csr(dd, ASIC_CFG_SCRATCH_1);
+
+ temp_dest = temp_scratch &
+ (dd->hfi1_id ? PORT1_PORT_TYPE_SMASK :
+ PORT0_PORT_TYPE_SMASK);
+ ppd->port_type = temp_dest >>
+ (dd->hfi1_id ? PORT1_PORT_TYPE_SHIFT :
+ PORT0_PORT_TYPE_SHIFT);
+
+ temp_dest = temp_scratch &
+ (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SMASK :
+ PORT0_LOCAL_ATTEN_SMASK);
+ ppd->local_atten = temp_dest >>
+ (dd->hfi1_id ? PORT1_LOCAL_ATTEN_SHIFT :
+ PORT0_LOCAL_ATTEN_SHIFT);
+
+ temp_dest = temp_scratch &
+ (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SMASK :
+ PORT0_REMOTE_ATTEN_SMASK);
+ ppd->remote_atten = temp_dest >>
+ (dd->hfi1_id ? PORT1_REMOTE_ATTEN_SHIFT :
+ PORT0_REMOTE_ATTEN_SHIFT);
+
+ temp_dest = temp_scratch &
+ (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SMASK :
+ PORT0_DEFAULT_ATTEN_SMASK);
+ ppd->default_atten = temp_dest >>
+ (dd->hfi1_id ? PORT1_DEFAULT_ATTEN_SHIFT :
+ PORT0_DEFAULT_ATTEN_SHIFT);
+
+ temp_scratch = read_csr(dd, dd->hfi1_id ? ASIC_CFG_SCRATCH_3 :
+ ASIC_CFG_SCRATCH_2);
+
+ ppd->tx_preset_eq = (temp_scratch & TX_EQ_SMASK) >> TX_EQ_SHIFT;
+ ppd->tx_preset_noeq = (temp_scratch & TX_NO_EQ_SMASK) >> TX_NO_EQ_SHIFT;
+ ppd->rx_preset = (temp_scratch & RX_SMASK) >> RX_SHIFT;
+
+ ppd->max_power_class = (temp_scratch & QSFP_MAX_POWER_SMASK) >>
+ QSFP_MAX_POWER_SHIFT;
+}
+
void get_platform_config(struct hfi1_devdata *dd)
{
int ret = 0;
@@ -56,38 +140,49 @@
u8 *temp_platform_config = NULL;
u32 esize;
- ret = eprom_read_platform_config(dd, (void **)&temp_platform_config,
- &esize);
- if (!ret) {
- /* success */
- size = esize;
- goto success;
+ if (is_integrated(dd)) {
+ if (validate_scratch_checksum(dd)) {
+ save_platform_config_fields(dd);
+ return;
+ }
+ dd_dev_err(dd, "%s: Config bitmap corrupted/uninitialized\n",
+ __func__);
+ dd_dev_err(dd,
+ "%s: Please update your BIOS to support active channels\n",
+ __func__);
+ } else {
+ ret = eprom_read_platform_config(dd,
+ (void **)&temp_platform_config,
+ &esize);
+ if (!ret) {
+ /* success */
+ dd->platform_config.data = temp_platform_config;
+ dd->platform_config.size = esize;
+ return;
+ }
+ /* fail, try EFI variable */
+
+ ret = read_hfi1_efi_var(dd, "configuration", &size,
+ (void **)&temp_platform_config);
+ if (!ret) {
+ dd->platform_config.data = temp_platform_config;
+ dd->platform_config.size = size;
+ return;
+ }
}
- /* fail, try EFI variable */
-
- ret = read_hfi1_efi_var(dd, "configuration", &size,
- (void **)&temp_platform_config);
- if (!ret)
- goto success;
-
- dd_dev_info(dd,
- "%s: Failed to get platform config from UEFI, falling back to request firmware\n",
- __func__);
+ dd_dev_err(dd,
+ "%s: Failed to get platform config, falling back to sub-optimal default file\n",
+ __func__);
/* fall back to request firmware */
platform_config_load = 1;
- return;
-
-success:
- dd->platform_config.data = temp_platform_config;
- dd->platform_config.size = size;
}
void free_platform_config(struct hfi1_devdata *dd)
{
if (!platform_config_load) {
/*
- * was loaded from EFI, release memory
- * allocated by read_efi_var
+ * was loaded from EFI or the EPROM, release memory
+ * allocated by read_efi_var/eprom_read_platform_config
*/
kfree(dd->platform_config.data);
}
@@ -100,12 +195,16 @@
void get_port_type(struct hfi1_pportdata *ppd)
{
int ret;
+ u32 temp;
ret = get_platform_config_field(ppd->dd, PLATFORM_CONFIG_PORT_TABLE, 0,
- PORT_TABLE_PORT_TYPE, &ppd->port_type,
+ PORT_TABLE_PORT_TYPE, &temp,
4);
- if (ret)
+ if (ret) {
ppd->port_type = PORT_TYPE_UNKNOWN;
+ return;
+ }
+ ppd->port_type = temp;
}
int set_qsfp_tx(struct hfi1_pportdata *ppd, int on)
@@ -538,6 +637,38 @@
}
}
+/*
+ * Return a special SerDes setting for low power AOC cables. The power class
+ * threshold and setting being used were all found by empirical testing.
+ *
+ * Summary of the logic:
+ *
+ * if (QSFP and QSFP_TYPE == AOC and QSFP_POWER_CLASS < 4)
+ * return 0xe
+ * return 0; // leave at default
+ */
+static u8 aoc_low_power_setting(struct hfi1_pportdata *ppd)
+{
+ u8 *cache = ppd->qsfp_info.cache;
+ int power_class;
+
+ /* QSFP only */
+ if (ppd->port_type != PORT_TYPE_QSFP)
+ return 0; /* leave at default */
+
+ /* active optical cables only */
+ switch ((cache[QSFP_MOD_TECH_OFFS] & 0xF0) >> 4) {
+ case 0x0 ... 0x9: /* fallthrough */
+ case 0xC: /* fallthrough */
+ case 0xE:
+ /* active AOC */
+ power_class = get_qsfp_power_class(cache[QSFP_MOD_PWR_OFFS]);
+ if (power_class < QSFP_POWER_CLASS_4)
+ return 0xe;
+ }
+ return 0; /* leave at default */
+}
+
static void apply_tunings(
struct hfi1_pportdata *ppd, u32 tx_preset_index,
u8 tuning_method, u32 total_atten, u8 limiting_active)
@@ -606,7 +737,17 @@
tx_preset_index, TX_PRESET_TABLE_POSTCUR, &tx_preset, 4);
postcur = tx_preset;
- config_data = precur | (attn << 8) | (postcur << 16);
+ /*
+ * NOTES:
+ * o The aoc_low_power_setting is applied to all lanes even
+ * though only lane 0's value is examined by the firmware.
+ * o A lingering low power setting after a cable swap does
+ * not occur. On cable unplug the 8051 is reset and
+ * restarted on cable insert. This resets all settings to
+ * their default, erasing any previous low power setting.
+ */
+ config_data = precur | (attn << 8) | (postcur << 16) |
+ (aoc_low_power_setting(ppd) << 24);
apply_tx_lanes(ppd, TX_EQ_SETTINGS, config_data,
"Applying TX settings");
diff --git a/drivers/infiniband/hw/hfi1/platform.h b/drivers/infiniband/hw/hfi1/platform.h
index e2c2161..eed0aa9 100644
--- a/drivers/infiniband/hw/hfi1/platform.h
+++ b/drivers/infiniband/hw/hfi1/platform.h
@@ -168,16 +168,6 @@
struct platform_config_data config_tables[PLATFORM_CONFIG_TABLE_MAX];
};
-static const u32 platform_config_table_limits[PLATFORM_CONFIG_TABLE_MAX] = {
- 0,
- SYSTEM_TABLE_MAX,
- PORT_TABLE_MAX,
- RX_PRESET_TABLE_MAX,
- TX_PRESET_TABLE_MAX,
- QSFP_ATTEN_TABLE_MAX,
- VARIABLE_SETTINGS_TABLE_MAX
-};
-
/* This section defines default values and encodings for the
* fields defined for each table above
*/
@@ -295,6 +285,123 @@
OPA_UNKNOWN_TUNING
};
+/*
+ * Shifts and masks for the link SI tuning values stuffed into the ASIC scratch
+ * registers for integrated platforms
+ */
+#define PORT0_PORT_TYPE_SHIFT 0
+#define PORT0_LOCAL_ATTEN_SHIFT 4
+#define PORT0_REMOTE_ATTEN_SHIFT 10
+#define PORT0_DEFAULT_ATTEN_SHIFT 32
+
+#define PORT1_PORT_TYPE_SHIFT 16
+#define PORT1_LOCAL_ATTEN_SHIFT 20
+#define PORT1_REMOTE_ATTEN_SHIFT 26
+#define PORT1_DEFAULT_ATTEN_SHIFT 40
+
+#define PORT0_PORT_TYPE_MASK 0xFUL
+#define PORT0_LOCAL_ATTEN_MASK 0x3FUL
+#define PORT0_REMOTE_ATTEN_MASK 0x3FUL
+#define PORT0_DEFAULT_ATTEN_MASK 0xFFUL
+
+#define PORT1_PORT_TYPE_MASK 0xFUL
+#define PORT1_LOCAL_ATTEN_MASK 0x3FUL
+#define PORT1_REMOTE_ATTEN_MASK 0x3FUL
+#define PORT1_DEFAULT_ATTEN_MASK 0xFFUL
+
+#define PORT0_PORT_TYPE_SMASK (PORT0_PORT_TYPE_MASK << \
+ PORT0_PORT_TYPE_SHIFT)
+#define PORT0_LOCAL_ATTEN_SMASK (PORT0_LOCAL_ATTEN_MASK << \
+ PORT0_LOCAL_ATTEN_SHIFT)
+#define PORT0_REMOTE_ATTEN_SMASK (PORT0_REMOTE_ATTEN_MASK << \
+ PORT0_REMOTE_ATTEN_SHIFT)
+#define PORT0_DEFAULT_ATTEN_SMASK (PORT0_DEFAULT_ATTEN_MASK << \
+ PORT0_DEFAULT_ATTEN_SHIFT)
+
+#define PORT1_PORT_TYPE_SMASK (PORT1_PORT_TYPE_MASK << \
+ PORT1_PORT_TYPE_SHIFT)
+#define PORT1_LOCAL_ATTEN_SMASK (PORT1_LOCAL_ATTEN_MASK << \
+ PORT1_LOCAL_ATTEN_SHIFT)
+#define PORT1_REMOTE_ATTEN_SMASK (PORT1_REMOTE_ATTEN_MASK << \
+ PORT1_REMOTE_ATTEN_SHIFT)
+#define PORT1_DEFAULT_ATTEN_SMASK (PORT1_DEFAULT_ATTEN_MASK << \
+ PORT1_DEFAULT_ATTEN_SHIFT)
+
+#define QSFP_MAX_POWER_SHIFT 0
+#define TX_NO_EQ_SHIFT 4
+#define TX_EQ_SHIFT 25
+#define RX_SHIFT 46
+
+#define QSFP_MAX_POWER_MASK 0xFUL
+#define TX_NO_EQ_MASK 0x1FFFFFUL
+#define TX_EQ_MASK 0x1FFFFFUL
+#define RX_MASK 0xFFFFUL
+
+#define QSFP_MAX_POWER_SMASK (QSFP_MAX_POWER_MASK << \
+ QSFP_MAX_POWER_SHIFT)
+#define TX_NO_EQ_SMASK (TX_NO_EQ_MASK << TX_NO_EQ_SHIFT)
+#define TX_EQ_SMASK (TX_EQ_MASK << TX_EQ_SHIFT)
+#define RX_SMASK (RX_MASK << RX_SHIFT)
+
+#define TX_PRECUR_SHIFT 0
+#define TX_ATTN_SHIFT 4
+#define QSFP_TX_CDR_APPLY_SHIFT 9
+#define QSFP_TX_EQ_APPLY_SHIFT 10
+#define QSFP_TX_CDR_SHIFT 11
+#define QSFP_TX_EQ_SHIFT 12
+#define TX_POSTCUR_SHIFT 16
+
+#define TX_PRECUR_MASK 0xFUL
+#define TX_ATTN_MASK 0x1FUL
+#define QSFP_TX_CDR_APPLY_MASK 0x1UL
+#define QSFP_TX_EQ_APPLY_MASK 0x1UL
+#define QSFP_TX_CDR_MASK 0x1UL
+#define QSFP_TX_EQ_MASK 0xFUL
+#define TX_POSTCUR_MASK 0x1FUL
+
+#define TX_PRECUR_SMASK (TX_PRECUR_MASK << TX_PRECUR_SHIFT)
+#define TX_ATTN_SMASK (TX_ATTN_MASK << TX_ATTN_SHIFT)
+#define QSFP_TX_CDR_APPLY_SMASK (QSFP_TX_CDR_APPLY_MASK << \
+ QSFP_TX_CDR_APPLY_SHIFT)
+#define QSFP_TX_EQ_APPLY_SMASK (QSFP_TX_EQ_APPLY_MASK << \
+ QSFP_TX_EQ_APPLY_SHIFT)
+#define QSFP_TX_CDR_SMASK (QSFP_TX_CDR_MASK << QSFP_TX_CDR_SHIFT)
+#define QSFP_TX_EQ_SMASK (QSFP_TX_EQ_MASK << QSFP_TX_EQ_SHIFT)
+#define TX_POSTCUR_SMASK (TX_POSTCUR_MASK << TX_POSTCUR_SHIFT)
+
+#define QSFP_RX_CDR_APPLY_SHIFT 0
+#define QSFP_RX_EMP_APPLY_SHIFT 1
+#define QSFP_RX_AMP_APPLY_SHIFT 2
+#define QSFP_RX_CDR_SHIFT 3
+#define QSFP_RX_EMP_SHIFT 4
+#define QSFP_RX_AMP_SHIFT 8
+
+#define QSFP_RX_CDR_APPLY_MASK 0x1UL
+#define QSFP_RX_EMP_APPLY_MASK 0x1UL
+#define QSFP_RX_AMP_APPLY_MASK 0x1UL
+#define QSFP_RX_CDR_MASK 0x1UL
+#define QSFP_RX_EMP_MASK 0xFUL
+#define QSFP_RX_AMP_MASK 0x3UL
+
+#define QSFP_RX_CDR_APPLY_SMASK (QSFP_RX_CDR_APPLY_MASK << \
+ QSFP_RX_CDR_APPLY_SHIFT)
+#define QSFP_RX_EMP_APPLY_SMASK (QSFP_RX_EMP_APPLY_MASK << \
+ QSFP_RX_EMP_APPLY_SHIFT)
+#define QSFP_RX_AMP_APPLY_SMASK (QSFP_RX_AMP_APPLY_MASK << \
+ QSFP_RX_AMP_APPLY_SHIFT)
+#define QSFP_RX_CDR_SMASK (QSFP_RX_CDR_MASK << QSFP_RX_CDR_SHIFT)
+#define QSFP_RX_EMP_SMASK (QSFP_RX_EMP_MASK << QSFP_RX_EMP_SHIFT)
+#define QSFP_RX_AMP_SMASK (QSFP_RX_AMP_MASK << QSFP_RX_AMP_SHIFT)
+
+#define BITMAP_VERSION 1
+#define BITMAP_VERSION_SHIFT 44
+#define BITMAP_VERSION_MASK 0xFUL
+#define BITMAP_VERSION_SMASK (BITMAP_VERSION_MASK << \
+ BITMAP_VERSION_SHIFT)
+#define CHECKSUM_SHIFT 48
+#define CHECKSUM_MASK 0xFFFFUL
+#define CHECKSUM_SMASK (CHECKSUM_MASK << CHECKSUM_SHIFT)
+
/* platform.c */
void get_platform_config(struct hfi1_devdata *dd);
void free_platform_config(struct hfi1_devdata *dd);
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 9fc75e7..d752d67 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -196,15 +196,18 @@
static void flush_iowait(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;
- struct hfi1_ibdev *dev = to_idev(qp->ibqp.device);
unsigned long flags;
+ seqlock_t *lock = priv->s_iowait.lock;
- write_seqlock_irqsave(&dev->iowait_lock, flags);
+ if (!lock)
+ return;
+ write_seqlock_irqsave(lock, flags);
if (!list_empty(&priv->s_iowait.list)) {
list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
rvt_put_qp(qp);
}
- write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+ write_sequnlock_irqrestore(lock, flags);
}
static inline int opa_mtu_enum_to_int(int mtu)
@@ -543,6 +546,7 @@
ibp->rvp.n_dmawait++;
qp->s_flags |= RVT_S_WAIT_DMA_DESC;
list_add_tail(&priv->s_iowait.list, &sde->dmawait);
+ priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC);
rvt_get_qp(qp);
}
@@ -964,6 +968,7 @@
if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) {
qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
rvt_put_qp(qp);
}
write_sequnlock(&dev->iowait_lock);
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 83198a8..809b26e 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -276,7 +276,7 @@
rvt_get_mr(ps->s_txreq->mr);
qp->s_ack_rdma_sge.sge = e->rdma_sge;
qp->s_ack_rdma_sge.num_sge = 1;
- qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ ps->s_txreq->ss = &qp->s_ack_rdma_sge;
if (len > pmtu) {
len = pmtu;
qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
@@ -290,7 +290,7 @@
bth2 = mask_psn(qp->s_ack_rdma_psn++);
} else {
/* COMPARE_SWAP or FETCH_ADD */
- qp->s_cur_sge = NULL;
+ ps->s_txreq->ss = NULL;
len = 0;
qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
ohdr->u.at.aeth = hfi1_compute_aeth(qp);
@@ -306,7 +306,7 @@
qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
/* FALLTHROUGH */
case OP(RDMA_READ_RESPONSE_MIDDLE):
- qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ ps->s_txreq->ss = &qp->s_ack_rdma_sge;
ps->s_txreq->mr = qp->s_ack_rdma_sge.sge.mr;
if (ps->s_txreq->mr)
rvt_get_mr(ps->s_txreq->mr);
@@ -335,7 +335,7 @@
*/
qp->s_ack_state = OP(SEND_ONLY);
qp->s_flags &= ~RVT_S_ACK_PENDING;
- qp->s_cur_sge = NULL;
+ ps->s_txreq->ss = NULL;
if (qp->s_nak_state)
ohdr->u.aeth =
cpu_to_be32((qp->r_msn & HFI1_MSN_MASK) |
@@ -351,7 +351,7 @@
qp->s_rdma_ack_cnt++;
qp->s_hdrwords = hwords;
ps->s_txreq->sde = priv->s_sde;
- qp->s_cur_size = len;
+ ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0, bth2, middle, ps);
/* pbc */
ps->s_txreq->hdr_dwords = qp->s_hdrwords + 2;
@@ -801,8 +801,8 @@
qp->s_len -= len;
qp->s_hdrwords = hwords;
ps->s_txreq->sde = priv->s_sde;
- qp->s_cur_sge = ss;
- qp->s_cur_size = len;
+ ps->s_txreq->ss = ss;
+ ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(
qp,
ohdr,
@@ -1146,8 +1146,6 @@
{
struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
- struct ib_wc wc;
- unsigned i;
u32 opcode;
u32 psn;
@@ -1195,22 +1193,8 @@
qp->s_last = s_last;
/* see post_send() */
barrier();
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
- /* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
- }
+ rvt_put_swqe(wqe);
+ rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before re-sending,
@@ -1240,9 +1224,6 @@
struct rvt_swqe *wqe,
struct hfi1_ibport *ibp)
{
- struct ib_wc wc;
- unsigned i;
-
lockdep_assert_held(&qp->s_lock);
/*
* Don't decrement refcount and don't generate a
@@ -1253,28 +1234,14 @@
cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
u32 s_last;
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
+ rvt_put_swqe(wqe);
s_last = qp->s_last;
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
- /* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
- }
+ rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
} else {
struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
@@ -2295,7 +2262,7 @@
hfi1_copy_sge(&qp->r_sge, data, tlen, 1, copy_last);
rvt_put_ss(&qp->r_sge);
qp->r_msn++;
- if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
+ if (!__test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
break;
wc.wr_id = qp->r_wr_id;
wc.status = IB_WC_SUCCESS;
@@ -2410,8 +2377,7 @@
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
- if (len > pmtu)
- qp->r_psn += (len - 1) / pmtu;
+ qp->r_psn += rvt_div_mtu(qp, len - 1);
} else {
e->rdma_sge.mr = NULL;
e->rdma_sge.vaddr = NULL;
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index a1576ae..717ed4b15 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -239,16 +239,6 @@
return ret;
}
-static __be64 get_sguid(struct hfi1_ibport *ibp, unsigned index)
-{
- if (!index) {
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
-
- return cpu_to_be64(ppd->guid);
- }
- return ibp->guids[index - 1];
-}
-
static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
{
return (gid->global.interface_id == id &&
@@ -699,9 +689,9 @@
/* The SGID is 32-bit aligned. */
hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
hdr->sgid.global.interface_id =
- grh->sgid_index && grh->sgid_index < ARRAY_SIZE(ibp->guids) ?
- ibp->guids[grh->sgid_index - 1] :
- cpu_to_be64(ppd_from_ibp(ibp)->guid);
+ grh->sgid_index < HFI1_GUIDS_PER_PORT ?
+ get_sguid(ibp, grh->sgid_index) :
+ get_sguid(ibp, HFI1_PORT_GUID_INDEX);
hdr->dgid = grh->dgid;
/* GRH header size in 32-bit words. */
@@ -777,8 +767,8 @@
u32 bth1;
/* Construct the header. */
- extra_bytes = -qp->s_cur_size & 3;
- nwords = (qp->s_cur_size + extra_bytes) >> 2;
+ extra_bytes = -ps->s_txreq->s_cur_size & 3;
+ nwords = (ps->s_txreq->s_cur_size + extra_bytes) >> 2;
lrh0 = HFI1_LRH_BTH;
if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
qp->s_hdrwords += hfi1_make_grh(ibp,
@@ -952,7 +942,6 @@
enum ib_wc_status status)
{
u32 old_last, last;
- unsigned i;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
@@ -964,32 +953,13 @@
qp->s_last = last;
/* See post_send() */
barrier();
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
+ rvt_put_swqe(wqe);
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- /* See ch. 11.2.4.1 and 10.7.3.1 */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- status != IB_WC_SUCCESS) {
- struct ib_wc wc;
-
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = status;
- wc.opcode = ib_hfi1_wc_opcode[wqe->wr.opcode];
- wc.qp = &qp->ibqp;
- if (status == IB_WC_SUCCESS)
- wc.byte_len = wqe->length;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
- status != IB_WC_SUCCESS);
- }
+ rvt_qp_swqe_complete(qp, wqe, status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 9cbe52d..1d81cac1 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -375,7 +375,7 @@
sde->head_sn, tx->sn);
sde->head_sn++;
#endif
- sdma_txclean(sde->dd, tx);
+ __sdma_txclean(sde->dd, tx);
if (complete)
(*complete)(tx, res);
if (wait && iowait_sdma_dec(wait))
@@ -1643,7 +1643,7 @@
}
/**
- * sdma_txclean() - clean tx of mappings, descp *kmalloc's
+ * __sdma_txclean() - clean tx of mappings, descp *kmalloc's
* @dd: hfi1_devdata for unmapping
* @tx: tx request to clean
*
@@ -1653,7 +1653,7 @@
* The code can be called multiple times without issue.
*
*/
-void sdma_txclean(
+void __sdma_txclean(
struct hfi1_devdata *dd,
struct sdma_txreq *tx)
{
@@ -3065,7 +3065,7 @@
tx->descp[i] = tx->descs[i];
return 0;
enomem:
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -ENOMEM;
}
@@ -3094,14 +3094,14 @@
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return rval;
}
/* If coalesce buffer is allocated, copy data into it */
if (tx->coalesce_buf) {
if (type == SDMA_MAP_NONE) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -EINVAL;
}
@@ -3109,7 +3109,7 @@
kvaddr = kmap(page);
kvaddr += offset;
} else if (WARN_ON(!kvaddr)) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -EINVAL;
}
@@ -3139,7 +3139,7 @@
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -ENOSPC;
}
@@ -3181,7 +3181,7 @@
if ((unlikely(tx->num_desc == tx->desc_limit))) {
rval = _extend_sdma_tx_descs(dd, tx);
if (rval) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return rval;
}
}
diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h
index 56257ea..21f1e28 100644
--- a/drivers/infiniband/hw/hfi1/sdma.h
+++ b/drivers/infiniband/hw/hfi1/sdma.h
@@ -667,7 +667,13 @@
int type, void *kvaddr, struct page *page,
unsigned long offset, u16 len);
int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *);
-void sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
+void __sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
+
+static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx)
+{
+ if (tx->num_desc)
+ __sdma_txclean(dd, tx);
+}
/* helpers used by public routines */
static inline void _sdma_close_tx(struct hfi1_devdata *dd,
@@ -753,7 +759,7 @@
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -ENOSPC;
}
@@ -834,7 +840,7 @@
DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
- sdma_txclean(dd, tx);
+ __sdma_txclean(dd, tx);
return -ENOSPC;
}
diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c
index 5e6d1ba..b141a78 100644
--- a/drivers/infiniband/hw/hfi1/uc.c
+++ b/drivers/infiniband/hw/hfi1/uc.c
@@ -258,8 +258,8 @@
qp->s_len -= len;
qp->s_hdrwords = hwords;
ps->s_txreq->sde = priv->s_sde;
- qp->s_cur_sge = &qp->s_sge;
- qp->s_cur_size = len;
+ ps->s_txreq->ss = &qp->s_sge;
+ ps->s_txreq->s_cur_size = len;
hfi1_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
mask_psn(qp->s_psn++), middle, ps);
/* pbc */
diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c
index 97ae24b..c071955 100644
--- a/drivers/infiniband/hw/hfi1/ud.c
+++ b/drivers/infiniband/hw/hfi1/ud.c
@@ -354,8 +354,8 @@
/* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
qp->s_hdrwords = 7;
- qp->s_cur_size = wqe->length;
- qp->s_cur_sge = &qp->s_sge;
+ ps->s_txreq->s_cur_size = wqe->length;
+ ps->s_txreq->ss = &qp->s_sge;
qp->s_srate = ah_attr->static_rate;
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
qp->s_wqe = wqe;
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 77697d6..7d22f8e 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -115,6 +115,7 @@
#define KDETH_HCRC_LOWER_MASK 0xff
#define AHG_KDETH_INTR_SHIFT 12
+#define AHG_KDETH_SH_SHIFT 13
#define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
#define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
@@ -144,8 +145,9 @@
#define KDETH_OM_LARGE 64
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
-/* Last packet in the request */
-#define TXREQ_FLAGS_REQ_LAST_PKT BIT(0)
+/* Tx request flag bits */
+#define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
+#define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
/* SDMA request flag bits */
#define SDMA_REQ_FOR_THREAD 1
@@ -943,8 +945,13 @@
tx->busycount = 0;
INIT_LIST_HEAD(&tx->list);
+ /*
+ * For the last packet set the ACK request
+ * and disable header suppression.
+ */
if (req->seqnum == req->info.npkts - 1)
- tx->flags |= TXREQ_FLAGS_REQ_LAST_PKT;
+ tx->flags |= (TXREQ_FLAGS_REQ_ACK |
+ TXREQ_FLAGS_REQ_DISABLE_SH);
/*
* Calculate the payload size - this is min of the fragment
@@ -963,11 +970,22 @@
}
datalen = compute_data_length(req, tx);
+
+ /*
+ * Disable header suppression for the payload <= 8DWS.
+ * If there is an uncorrectable error in the receive
+ * data FIFO when the received payload size is less than
+ * or equal to 8DWS then the RxDmaDataFifoRdUncErr is
+ * not reported.There is set RHF.EccErr if the header
+ * is not suppressed.
+ */
if (!datalen) {
SDMA_DBG(req,
"Request has data but pkt len is 0");
ret = -EFAULT;
goto free_tx;
+ } else if (datalen <= 32) {
+ tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH;
}
}
@@ -990,6 +1008,10 @@
LRH2PBC(lrhlen);
tx->hdr.pbc[0] = cpu_to_le16(pbclen);
}
+ ret = check_header_template(req, &tx->hdr,
+ lrhlen, datalen);
+ if (ret)
+ goto free_tx;
ret = sdma_txinit_ahg(&tx->txreq,
SDMA_TXREQ_F_AHG_COPY,
sizeof(tx->hdr) + datalen,
@@ -1351,7 +1373,7 @@
req->seqnum));
/* Set ACK request on last packet */
- if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
+ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
hdr->bth[2] |= cpu_to_be32(1UL << 31);
/* Set the new offset */
@@ -1384,8 +1406,8 @@
/* Set KDETH.TID based on value for this TID */
KDETH_SET(hdr->kdeth.ver_tid_offset, TID,
EXP_TID_GET(tidval, IDX));
- /* Clear KDETH.SH only on the last packet */
- if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
+ /* Clear KDETH.SH when DISABLE_SH flag is set */
+ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH))
KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0);
/*
* Set the KDETH.OFFSET and KDETH.OM based on size of
@@ -1429,7 +1451,7 @@
/* BTH.PSN and BTH.A */
val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) &
(HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
- if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT))
+ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
val32 |= 1UL << 31;
AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
@@ -1468,19 +1490,23 @@
AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
((!!(req->omfactor - KDETH_OM_SMALL)) << 15 |
((req->tidoffset / req->omfactor) & 0x7fff)));
- /* KDETH.TIDCtrl, KDETH.TID */
+ /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
- (EXP_TID_GET(tidval, IDX) & 0x3ff));
- /* Clear KDETH.SH on last packet */
- if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) {
- val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset,
- INTR) <<
- AHG_KDETH_INTR_SHIFT);
- val &= cpu_to_le16(~(1U << 13));
- AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
+ (EXP_TID_GET(tidval, IDX) & 0x3ff));
+
+ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) {
+ val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
+ INTR) <<
+ AHG_KDETH_INTR_SHIFT));
} else {
- AHG_HEADER_SET(req->ahg, diff, 7, 16, 12, val);
+ val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ?
+ cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) :
+ cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
+ INTR) <<
+ AHG_KDETH_INTR_SHIFT));
}
+
+ AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
}
trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 4b7a16c..95ed4d6 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -297,22 +297,6 @@
}
/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_hfi1_wc_opcode[] = {
- [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
- [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
- [IB_WR_SEND] = IB_WC_SEND,
- [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
- [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
- [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
- [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
- [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
- [IB_WR_REG_MR] = IB_WC_REG_MR
-};
-
-/*
* Length of header by opcode, 0 --> not supported
*/
const u8 hdr_len_by_opcode[256] = {
@@ -694,6 +678,7 @@
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
+ priv->s_iowait.lock = NULL;
/* refcount held until actual wake up */
if (!list_empty(list))
mod_timer(&dev->mem_timer, jiffies + 1);
@@ -769,6 +754,7 @@
mod_timer(&dev->mem_timer, jiffies + 1);
qp->s_flags |= RVT_S_WAIT_KMEM;
list_add_tail(&priv->s_iowait.list, &dev->memwait);
+ priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM);
rvt_get_qp(qp);
}
@@ -788,10 +774,10 @@
*/
static noinline int build_verbs_ulp_payload(
struct sdma_engine *sde,
- struct rvt_sge_state *ss,
u32 length,
struct verbs_txreq *tx)
{
+ struct rvt_sge_state *ss = tx->ss;
struct rvt_sge *sg_list = ss->sg_list;
struct rvt_sge sge = ss->sge;
u8 num_sge = ss->num_sge;
@@ -835,7 +821,6 @@
/* New API */
static int build_verbs_tx_desc(
struct sdma_engine *sde,
- struct rvt_sge_state *ss,
u32 length,
struct verbs_txreq *tx,
struct hfi1_ahg_info *ahg_info,
@@ -879,9 +864,9 @@
goto bail_txadd;
}
- /* add the ulp payload - if any. ss can be NULL for acks */
- if (ss)
- ret = build_verbs_ulp_payload(sde, ss, length, tx);
+ /* add the ulp payload - if any. tx->ss can be NULL for acks */
+ if (tx->ss)
+ ret = build_verbs_ulp_payload(sde, length, tx);
bail_txadd:
return ret;
}
@@ -892,8 +877,7 @@
struct hfi1_qp_priv *priv = qp->priv;
struct hfi1_ahg_info *ahg_info = priv->s_ahg;
u32 hdrwords = qp->s_hdrwords;
- struct rvt_sge_state *ss = qp->s_cur_sge;
- u32 len = qp->s_cur_size;
+ u32 len = ps->s_txreq->s_cur_size;
u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */
struct hfi1_ibdev *dev = ps->dev;
struct hfi1_pportdata *ppd = ps->ppd;
@@ -918,7 +902,7 @@
plen);
}
tx->wqe = qp->s_wqe;
- ret = build_verbs_tx_desc(tx->sde, ss, len, tx, ahg_info, pbc);
+ ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc);
if (unlikely(ret))
goto bail_build;
}
@@ -980,6 +964,7 @@
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
list_add_tail(&priv->s_iowait.list, &sc->piowait);
+ priv->s_iowait.lock = &dev->iowait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO);
rvt_get_qp(qp);
/* counting: only call wantpiobuf_intr if first user */
@@ -1008,8 +993,8 @@
{
struct hfi1_qp_priv *priv = qp->priv;
u32 hdrwords = qp->s_hdrwords;
- struct rvt_sge_state *ss = qp->s_cur_sge;
- u32 len = qp->s_cur_size;
+ struct rvt_sge_state *ss = ps->s_txreq->ss;
+ u32 len = ps->s_txreq->s_cur_size;
u32 dwords = (len + 3) >> 2;
u32 plen = hdrwords + dwords + 2; /* includes pbc */
struct hfi1_pportdata *ppd = ps->ppd;
@@ -1237,7 +1222,7 @@
u8 op = get_opcode(h);
if (piothreshold &&
- qp->s_cur_size <= min(piothreshold, qp->pmtu) &&
+ tx->s_cur_size <= min(piothreshold, qp->pmtu) &&
(BIT(op & OPMASK) & pio_opmask[op >> 5]) &&
iowait_sdma_pending(&priv->s_iowait) == 0 &&
!sdma_txreq_built(&tx->txreq))
@@ -1483,15 +1468,11 @@
int guid_index, __be64 *guid)
{
struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp);
- struct hfi1_pportdata *ppd = ppd_from_ibp(ibp);
- if (guid_index == 0)
- *guid = cpu_to_be64(ppd->guid);
- else if (guid_index < HFI1_GUIDS_PER_PORT)
- *guid = ibp->guids[guid_index - 1];
- else
+ if (guid_index >= HFI1_GUIDS_PER_PORT)
return -EINVAL;
+ *guid = get_sguid(ibp, guid_index);
return 0;
}
@@ -1610,6 +1591,154 @@
dc8051_ver_min(ver));
}
+static const char * const driver_cntr_names[] = {
+ /* must be element 0*/
+ "DRIVER_KernIntr",
+ "DRIVER_ErrorIntr",
+ "DRIVER_Tx_Errs",
+ "DRIVER_Rcv_Errs",
+ "DRIVER_HW_Errs",
+ "DRIVER_NoPIOBufs",
+ "DRIVER_CtxtsOpen",
+ "DRIVER_RcvLen_Errs",
+ "DRIVER_EgrBufFull",
+ "DRIVER_EgrHdrFull"
+};
+
+static const char **dev_cntr_names;
+static const char **port_cntr_names;
+static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names);
+static int num_dev_cntrs;
+static int num_port_cntrs;
+static int cntr_names_initialized;
+
+/*
+ * Convert a list of names separated by '\n' into an array of NULL terminated
+ * strings. Optionally some entries can be reserved in the array to hold extra
+ * external strings.
+ */
+static int init_cntr_names(const char *names_in,
+ const int names_len,
+ int num_extra_names,
+ int *num_cntrs,
+ const char ***cntr_names)
+{
+ char *names_out, *p, **q;
+ int i, n;
+
+ n = 0;
+ for (i = 0; i < names_len; i++)
+ if (names_in[i] == '\n')
+ n++;
+
+ names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len,
+ GFP_KERNEL);
+ if (!names_out) {
+ *num_cntrs = 0;
+ *cntr_names = NULL;
+ return -ENOMEM;
+ }
+
+ p = names_out + (n + num_extra_names) * sizeof(char *);
+ memcpy(p, names_in, names_len);
+
+ q = (char **)names_out;
+ for (i = 0; i < n; i++) {
+ q[i] = p;
+ p = strchr(p, '\n');
+ *p++ = '\0';
+ }
+
+ *num_cntrs = n;
+ *cntr_names = (const char **)names_out;
+ return 0;
+}
+
+static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev,
+ u8 port_num)
+{
+ int i, err;
+
+ if (!cntr_names_initialized) {
+ struct hfi1_devdata *dd = dd_from_ibdev(ibdev);
+
+ err = init_cntr_names(dd->cntrnames,
+ dd->cntrnameslen,
+ num_driver_cntrs,
+ &num_dev_cntrs,
+ &dev_cntr_names);
+ if (err)
+ return NULL;
+
+ for (i = 0; i < num_driver_cntrs; i++)
+ dev_cntr_names[num_dev_cntrs + i] =
+ driver_cntr_names[i];
+
+ err = init_cntr_names(dd->portcntrnames,
+ dd->portcntrnameslen,
+ 0,
+ &num_port_cntrs,
+ &port_cntr_names);
+ if (err) {
+ kfree(dev_cntr_names);
+ dev_cntr_names = NULL;
+ return NULL;
+ }
+ cntr_names_initialized = 1;
+ }
+
+ if (!port_num)
+ return rdma_alloc_hw_stats_struct(
+ dev_cntr_names,
+ num_dev_cntrs + num_driver_cntrs,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+ else
+ return rdma_alloc_hw_stats_struct(
+ port_cntr_names,
+ num_port_cntrs,
+ RDMA_HW_STATS_DEFAULT_LIFESPAN);
+}
+
+static u64 hfi1_sps_ints(void)
+{
+ unsigned long flags;
+ struct hfi1_devdata *dd;
+ u64 sps_ints = 0;
+
+ spin_lock_irqsave(&hfi1_devs_lock, flags);
+ list_for_each_entry(dd, &hfi1_dev_list, list) {
+ sps_ints += get_all_cpu_total(dd->int_counter);
+ }
+ spin_unlock_irqrestore(&hfi1_devs_lock, flags);
+ return sps_ints;
+}
+
+static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
+ u8 port, int index)
+{
+ u64 *values;
+ int count;
+
+ if (!port) {
+ u64 *stats = (u64 *)&hfi1_stats;
+ int i;
+
+ hfi1_read_cntrs(dd_from_ibdev(ibdev), NULL, &values);
+ values[num_dev_cntrs] = hfi1_sps_ints();
+ for (i = 1; i < num_driver_cntrs; i++)
+ values[num_dev_cntrs + i] = stats[i];
+ count = num_dev_cntrs + num_driver_cntrs;
+ } else {
+ struct hfi1_ibport *ibp = to_iport(ibdev, port);
+
+ hfi1_read_portcntrs(ppd_from_ibp(ibp), NULL, &values);
+ count = num_port_cntrs;
+ }
+
+ memcpy(stats->value, values, count * sizeof(u64));
+ return count;
+}
+
/**
* hfi1_register_ib_device - register our device with the infiniband core
* @dd: the device data structure
@@ -1620,6 +1749,7 @@
struct hfi1_ibdev *dev = &dd->verbs_dev;
struct ib_device *ibdev = &dev->rdi.ibdev;
struct hfi1_pportdata *ppd = dd->pport;
+ struct hfi1_ibport *ibp = &ppd->ibport_data;
unsigned i;
int ret;
size_t lcpysz = IB_DEVICE_NAME_MAX;
@@ -1632,6 +1762,7 @@
setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
seqlock_init(&dev->iowait_lock);
+ seqlock_init(&dev->txwait_lock);
INIT_LIST_HEAD(&dev->txwait);
INIT_LIST_HEAD(&dev->memwait);
@@ -1639,20 +1770,24 @@
if (ret)
goto err_verbs_txreq;
+ /* Use first-port GUID as node guid */
+ ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX);
+
/*
* The system image GUID is supposed to be the same for all
* HFIs in a single system but since there can be other
* device types in the system, we can't be sure this is unique.
*/
if (!ib_hfi1_sys_image_guid)
- ib_hfi1_sys_image_guid = cpu_to_be64(ppd->guid);
+ ib_hfi1_sys_image_guid = ibdev->node_guid;
lcpysz = strlcpy(ibdev->name, class_name(), lcpysz);
strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz);
ibdev->owner = THIS_MODULE;
- ibdev->node_guid = cpu_to_be64(ppd->guid);
ibdev->phys_port_cnt = dd->num_pports;
ibdev->dma_device = &dd->pcidev->dev;
ibdev->modify_device = modify_device;
+ ibdev->alloc_hw_stats = alloc_hw_stats;
+ ibdev->get_hw_stats = get_hw_stats;
/* keep process mad in the driver */
ibdev->process_mad = hfi1_process_mad;
@@ -1767,6 +1902,10 @@
del_timer_sync(&dev->mem_timer);
verbs_txreq_exit(dev);
+
+ kfree(dev_cntr_names);
+ kfree(port_cntr_names);
+ cntr_names_initialized = 0;
}
void hfi1_cnp_rcv(struct hfi1_packet *packet)
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index 1c3815d..e6b8930 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -73,7 +73,6 @@
#include "iowait.h"
#define HFI1_MAX_RDMA_ATOMIC 16
-#define HFI1_GUIDS_PER_PORT 5
/*
* Increment this value if any changes that break userspace ABI
@@ -169,8 +168,6 @@
struct rvt_qp __rcu *qp[2];
struct rvt_ibport rvp;
- __be64 guids[HFI1_GUIDS_PER_PORT - 1]; /* writable GUIDs */
-
/* the first 16 entries are sl_to_vl for !OPA */
u8 sl_to_sc[32];
u8 sc_to_sl[32];
@@ -180,19 +177,20 @@
struct rvt_dev_info rdi; /* Must be first */
/* QP numbers are shared by all IB ports */
- /* protect wait lists */
- seqlock_t iowait_lock;
+ /* protect txwait list */
+ seqlock_t txwait_lock ____cacheline_aligned_in_smp;
struct list_head txwait; /* list for wait verbs_txreq */
struct list_head memwait; /* list for wait kernel memory */
- struct list_head txreq_free;
struct kmem_cache *verbs_txreq_cache;
- struct timer_list mem_timer;
-
- u64 n_piowait;
- u64 n_piodrain;
u64 n_txwait;
u64 n_kmem_wait;
+ /* protect iowait lists */
+ seqlock_t iowait_lock ____cacheline_aligned_in_smp;
+ u64 n_piowait;
+ u64 n_piodrain;
+ struct timer_list mem_timer;
+
#ifdef CONFIG_DEBUG_FS
/* per HFI debugfs */
struct dentry *hfi1_ibdev_dbg;
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index 094ab82..5d23172 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -72,22 +72,22 @@
kmem_cache_free(dev->verbs_txreq_cache, tx);
do {
- seq = read_seqbegin(&dev->iowait_lock);
+ seq = read_seqbegin(&dev->txwait_lock);
if (!list_empty(&dev->txwait)) {
struct iowait *wait;
- write_seqlock_irqsave(&dev->iowait_lock, flags);
+ write_seqlock_irqsave(&dev->txwait_lock, flags);
wait = list_first_entry(&dev->txwait, struct iowait,
list);
qp = iowait_to_qp(wait);
priv = qp->priv;
list_del_init(&priv->s_iowait.list);
/* refcount held until actual wake up */
- write_sequnlock_irqrestore(&dev->iowait_lock, flags);
+ write_sequnlock_irqrestore(&dev->txwait_lock, flags);
hfi1_qp_wakeup(qp, RVT_S_WAIT_TX);
break;
}
- } while (read_seqretry(&dev->iowait_lock, seq));
+ } while (read_seqretry(&dev->txwait_lock, seq));
}
struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
@@ -96,7 +96,7 @@
{
struct verbs_txreq *tx = ERR_PTR(-EBUSY);
- write_seqlock(&dev->iowait_lock);
+ write_seqlock(&dev->txwait_lock);
if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
struct hfi1_qp_priv *priv;
@@ -108,13 +108,14 @@
dev->n_txwait++;
qp->s_flags |= RVT_S_WAIT_TX;
list_add_tail(&priv->s_iowait.list, &dev->txwait);
+ priv->s_iowait.lock = &dev->txwait_lock;
trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX);
rvt_get_qp(qp);
}
qp->s_flags &= ~RVT_S_BUSY;
}
out:
- write_sequnlock(&dev->iowait_lock);
+ write_sequnlock(&dev->txwait_lock);
return tx;
}
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 5660897..76216f2 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -65,6 +65,7 @@
struct sdma_engine *sde;
struct send_context *psc;
u16 hdr_dwords;
+ u16 s_cur_size;
};
struct hfi1_ibdev;
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index 24f79ee..0ac294d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -39,7 +39,8 @@
#define HNS_ROCE_VLAN_SL_BIT_MASK 7
#define HNS_ROCE_VLAN_SL_SHIFT 13
-struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr)
+struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device);
struct device *dev = &hr_dev->pdev->dev;
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c
index 863a17a..605962f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_alloc.c
+++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c
@@ -61,9 +61,10 @@
return ret;
}
-void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj)
+void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
+ int rr)
{
- hns_roce_bitmap_free_range(bitmap, obj, 1);
+ hns_roce_bitmap_free_range(bitmap, obj, 1, rr);
}
int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
@@ -106,7 +107,8 @@
}
void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
- unsigned long obj, int cnt)
+ unsigned long obj, int cnt,
+ int rr)
{
int i;
@@ -116,7 +118,8 @@
for (i = 0; i < cnt; i++)
clear_bit(obj + i, bitmap->table);
- bitmap->last = min(bitmap->last, obj);
+ if (!rr)
+ bitmap->last = min(bitmap->last, obj);
bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
& bitmap->mask;
spin_unlock(&bitmap->lock);
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c
index 2a0b6c0..8c1f7a6f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c
@@ -216,10 +216,10 @@
goto out;
/*
- * It is timeout when wait_for_completion_timeout return 0
- * The return value is the time limit set in advance
- * how many seconds showing
- */
+ * It is timeout when wait_for_completion_timeout return 0
+ * The return value is the time limit set in advance
+ * how many seconds showing
+ */
if (!wait_for_completion_timeout(&context->done,
msecs_to_jiffies(timeout))) {
dev_err(dev, "[cmd]wait_for_completion_timeout timeout\n");
diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h
index e3997d3..f5a9ee2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cmd.h
+++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h
@@ -34,6 +34,7 @@
#define _HNS_ROCE_CMD_H
#define HNS_ROCE_MAILBOX_SIZE 4096
+#define HNS_ROCE_CMD_TIMEOUT_MSECS 10000
enum {
/* TPT commands */
@@ -57,17 +58,6 @@
HNS_ROCE_CMD_QUERY_QP = 0x22,
};
-enum {
- HNS_ROCE_CMD_TIME_CLASS_A = 10000,
- HNS_ROCE_CMD_TIME_CLASS_B = 10000,
- HNS_ROCE_CMD_TIME_CLASS_C = 10000,
-};
-
-struct hns_roce_cmd_mailbox {
- void *buf;
- dma_addr_t dma;
-};
-
int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param,
unsigned long in_modifier, u8 op_modifier, u16 op,
unsigned long timeout);
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 2970161..4af403e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -57,6 +57,32 @@
#define roce_set_bit(origin, shift, val) \
roce_set_field((origin), (1ul << (shift)), (shift), (val))
+/*
+ * roce_hw_index_cmp_lt - Compare two hardware index values in hisilicon
+ * SOC, check if a is less than b.
+ * @a: hardware index value
+ * @b: hardware index value
+ * @bits: the number of bits of a and b, range: 0~31.
+ *
+ * Hardware index increases continuously till max value, and then restart
+ * from zero, again and again. Because the bits of reg field is often
+ * limited, the reg field can only hold the low bits of the hardware index
+ * in hisilicon SOC.
+ * In some scenes we need to compare two values(a,b) getted from two reg
+ * fields in this driver, for example:
+ * If a equals 0xfffe, b equals 0x1 and bits equals 16, we think b has
+ * incresed from 0xffff to 0x1 and a is less than b.
+ * If a equals 0xfffe, b equals 0x0xf001 and bits equals 16, we think a
+ * is bigger than b.
+ *
+ * Return true on a less than b, otherwise false.
+ */
+#define roce_hw_index_mask(bits) ((1ul << (bits)) - 1)
+#define roce_hw_index_shift(bits) (32 - (bits))
+#define roce_hw_index_cmp_lt(a, b, bits) \
+ ((int)((((a) - (b)) & roce_hw_index_mask(bits)) << \
+ roce_hw_index_shift(bits)) < 0)
+
#define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3
#define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
@@ -245,16 +271,26 @@
#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \
(((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)
+#define ROCEE_SDB_PTR_CMP_BITS 28
+
#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0
#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \
(((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S)
+#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S 0
+#define ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M \
+ (((1UL << 16) - 1) << ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S)
+
+#define ROCEE_SDB_CNT_CMP_BITS 16
+
+#define ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S 20
+
+#define ROCEE_CNT_CLR_CE_CNT_CLR_CE_S 0
+
/*************ROCEE_REG DEFINITION****************/
#define ROCEE_VENDOR_ID_REG 0x0
#define ROCEE_VENDOR_PART_ID_REG 0x4
-#define ROCEE_HW_VERSION_REG 0x8
-
#define ROCEE_SYS_IMAGE_GUID_L_REG 0xC
#define ROCEE_SYS_IMAGE_GUID_H_REG 0x10
@@ -318,7 +354,11 @@
#define ROCEE_SDB_ISSUE_PTR_REG 0x758
#define ROCEE_SDB_SEND_PTR_REG 0x75C
+#define ROCEE_CAEP_CQE_WCMD_EMPTY 0x850
+#define ROCEE_SCAEP_WR_CQE_CNT 0x8D0
#define ROCEE_SDB_INV_CNT_REG 0x9A4
+#define ROCEE_SDB_RETRY_CNT_REG 0x9AC
+#define ROCEE_TSP_BP_ST_REG 0x9EC
#define ROCEE_ECC_UCERR_ALM0_REG 0xB34
#define ROCEE_ECC_CERR_ALM0_REG 0xB40
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c
index 0973659..589496c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_cq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_cq.c
@@ -35,7 +35,7 @@
#include "hns_roce_device.h"
#include "hns_roce_cmd.h"
#include "hns_roce_hem.h"
-#include "hns_roce_user.h"
+#include <rdma/hns-abi.h>
#include "hns_roce_common.h"
static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq)
@@ -77,7 +77,7 @@
unsigned long cq_num)
{
return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0,
- HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIME_CLASS_A);
+ HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIMEOUT_MSECS);
}
static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent,
@@ -166,7 +166,7 @@
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
err_out:
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn);
+ hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
return ret;
}
@@ -176,11 +176,10 @@
{
return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ,
- HNS_ROCE_CMD_TIME_CLASS_A);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-static void hns_roce_free_cq(struct hns_roce_dev *hr_dev,
- struct hns_roce_cq *hr_cq)
+void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
{
struct hns_roce_cq_table *cq_table = &hr_dev->cq_table;
struct device *dev = &hr_dev->pdev->dev;
@@ -204,7 +203,7 @@
spin_unlock_irq(&cq_table->lock);
hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn);
- hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn);
+ hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn, BITMAP_NO_RR);
}
static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev,
@@ -349,6 +348,15 @@
goto err_mtt;
}
+ /*
+ * For the QP created by kernel space, tptr value should be initialized
+ * to zero; For the QP created by user space, it will cause synchronous
+ * problems if tptr is set to zero here, so we initialze it in user
+ * space.
+ */
+ if (!context)
+ *hr_cq->tptr_addr = 0;
+
/* Get created cq handler and carry out event */
hr_cq->comp = hns_roce_ib_cq_comp;
hr_cq->event = hns_roce_ib_cq_event;
@@ -383,19 +391,25 @@
{
struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device);
struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq);
+ int ret = 0;
- hns_roce_free_cq(hr_dev, hr_cq);
- hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+ if (hr_dev->hw->destroy_cq) {
+ ret = hr_dev->hw->destroy_cq(ib_cq);
+ } else {
+ hns_roce_free_cq(hr_dev, hr_cq);
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
- if (ib_cq->uobject)
- ib_umem_release(hr_cq->umem);
- else
- /* Free the buff of stored cq */
- hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe);
+ if (ib_cq->uobject)
+ ib_umem_release(hr_cq->umem);
+ else
+ /* Free the buff of stored cq */
+ hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf,
+ ib_cq->cqe);
- kfree(hr_cq);
+ kfree(hr_cq);
+ }
- return 0;
+ return ret;
}
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 3417315..1a6cb5d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -37,6 +37,8 @@
#define DRV_NAME "hns_roce"
+#define HNS_ROCE_HW_VER1 ('h' << 24 | 'i' << 16 | '0' << 8 | '6')
+
#define MAC_ADDR_OCTET_NUM 6
#define HNS_ROCE_MAX_MSG_LEN 0x80000000
@@ -54,6 +56,12 @@
#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7
#define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000
+#define HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS 20
+#define HNS_ROCE_MAX_FREE_CQ_WAIT_CNT \
+ (5000 / HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS)
+#define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2
+#define HNS_ROCE_MIN_CQE_CNT 16
+
#define HNS_ROCE_MAX_IRQ_NUM 34
#define HNS_ROCE_COMP_VEC_NUM 32
@@ -70,6 +78,9 @@
#define HNS_ROCE_MAX_GID_NUM 16
#define HNS_ROCE_GID_SIZE 16
+#define BITMAP_NO_RR 0
+#define BITMAP_RR 1
+
#define MR_TYPE_MR 0x00
#define MR_TYPE_DMA 0x03
@@ -196,9 +207,9 @@
/* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */
/* Every bit repesent to a partner free/used status in bitmap */
/*
-* Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
-* Bit = 1 represent to idle and available; bit = 0: not available
-*/
+ * Initial, bits of other bitmap are all 0 except that a bit of max_order is 1
+ * Bit = 1 represent to idle and available; bit = 0: not available
+ */
struct hns_roce_buddy {
/* Members point to every order level bitmap */
unsigned long **bits;
@@ -296,7 +307,7 @@
u32 cq_depth;
u32 cons_index;
void __iomem *cq_db_l;
- void __iomem *tptr_addr;
+ u16 *tptr_addr;
unsigned long cqn;
u32 vector;
atomic_t refcount;
@@ -360,29 +371,34 @@
struct mutex hcr_mutex;
struct semaphore poll_sem;
/*
- * Event mode: cmd register mutex protection,
- * ensure to not exceed max_cmds and user use limit region
- */
+ * Event mode: cmd register mutex protection,
+ * ensure to not exceed max_cmds and user use limit region
+ */
struct semaphore event_sem;
int max_cmds;
spinlock_t context_lock;
int free_head;
struct hns_roce_cmd_context *context;
/*
- * Result of get integer part
- * which max_comds compute according a power of 2
- */
+ * Result of get integer part
+ * which max_comds compute according a power of 2
+ */
u16 token_mask;
/*
- * Process whether use event mode, init default non-zero
- * After the event queue of cmd event ready,
- * can switch into event mode
- * close device, switch into poll mode(non event mode)
- */
+ * Process whether use event mode, init default non-zero
+ * After the event queue of cmd event ready,
+ * can switch into event mode
+ * close device, switch into poll mode(non event mode)
+ */
u8 use_events;
u8 toggle;
};
+struct hns_roce_cmd_mailbox {
+ void *buf;
+ dma_addr_t dma;
+};
+
struct hns_roce_dev;
struct hns_roce_qp {
@@ -424,8 +440,6 @@
struct net_device *netdevs[HNS_ROCE_MAX_PORTS];
struct notifier_block nb;
struct notifier_block nb_inet;
- /* 16 GID is shared by 6 port in v1 engine. */
- union ib_gid gid_table[HNS_ROCE_MAX_GID_NUM];
u8 phy_port[HNS_ROCE_MAX_PORTS];
};
@@ -519,6 +533,8 @@
struct ib_recv_wr **bad_recv_wr);
int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+ int (*dereg_mr)(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr);
+ int (*destroy_cq)(struct ib_cq *ibcq);
void *priv;
};
@@ -553,6 +569,8 @@
int cmd_mod;
int loop_idc;
+ dma_addr_t tptr_dma_addr; /*only for hw v1*/
+ u32 tptr_size; /*only for hw v1*/
struct hns_roce_hw *hw;
};
@@ -657,7 +675,8 @@
void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev);
int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj);
-void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj);
+void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj,
+ int rr);
int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask,
u32 reserved_bot, u32 resetrved_top);
void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap);
@@ -665,9 +684,11 @@
int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt,
int align, unsigned long *obj);
void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap,
- unsigned long obj, int cnt);
+ unsigned long obj, int cnt,
+ int rr);
-struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int hns_roce_destroy_ah(struct ib_ah *ah);
@@ -681,6 +702,10 @@
u64 virt_addr, int access_flags,
struct ib_udata *udata);
int hns_roce_dereg_mr(struct ib_mr *ibmr);
+int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
+ struct hns_roce_cmd_mailbox *mailbox,
+ unsigned long mpt_index);
+unsigned long key_to_hw_index(u32 key);
void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size,
struct hns_roce_buf *buf);
@@ -717,6 +742,7 @@
struct ib_udata *udata);
int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq);
+void hns_roce_free_cq(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq);
void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn);
void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type);
diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c
index 21e21b0..50f8649 100644
--- a/drivers/infiniband/hw/hns/hns_roce_eq.c
+++ b/drivers/infiniband/hw/hns/hns_roce_eq.c
@@ -371,9 +371,9 @@
int i = 0;
/**
- * AEQ overflow ECC mult bit err CEQ overflow alarm
- * must clear interrupt, mask irq, clear irq, cancel mask operation
- */
+ * AEQ overflow ECC mult bit err CEQ overflow alarm
+ * must clear interrupt, mask irq, clear irq, cancel mask operation
+ */
aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
if (roce_get_bit(aeshift_val,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c
index 250d8f2..c5104e0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hem.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hem.c
@@ -80,9 +80,9 @@
--order;
/*
- * Alloc memory one time. If failed, don't alloc small block
- * memory, directly return fail.
- */
+ * Alloc memory one time. If failed, don't alloc small block
+ * memory, directly return fail.
+ */
mem = &chunk->mem[chunk->npages];
buf = dma_alloc_coherent(&hr_dev->pdev->dev, PAGE_SIZE << order,
&sg_dma_address(mem), gfp_mask);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 71232e5..b8111b0 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -32,6 +32,7 @@
#include <linux/platform_device.h>
#include <linux/acpi.h>
+#include <linux/etherdevice.h>
#include <rdma/ib_umem.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
@@ -72,6 +73,8 @@
int nreq = 0;
u32 ind = 0;
int ret = 0;
+ u8 *smac;
+ int loopback;
if (unlikely(ibqp->qp_type != IB_QPT_GSI &&
ibqp->qp_type != IB_QPT_RC)) {
@@ -129,6 +132,14 @@
UD_SEND_WQE_U32_8_DMAC_5_M,
UD_SEND_WQE_U32_8_DMAC_5_S,
ah->av.mac[5]);
+
+ smac = (u8 *)hr_dev->dev_addr[qp->port];
+ loopback = ether_addr_equal_unaligned(ah->av.mac,
+ smac) ? 1 : 0;
+ roce_set_bit(ud_sq_wqe->u32_8,
+ UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S,
+ loopback);
+
roce_set_field(ud_sq_wqe->u32_8,
UD_SEND_WQE_U32_8_OPERATION_TYPE_M,
UD_SEND_WQE_U32_8_OPERATION_TYPE_S,
@@ -284,6 +295,8 @@
roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M,
SQ_DOORBELL_U32_4_SQ_HEAD_S,
(qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1)));
+ roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SL_M,
+ SQ_DOORBELL_U32_4_SL_S, qp->sl);
roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M,
SQ_DOORBELL_U32_4_PORT_S, qp->phy_port);
roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M,
@@ -611,6 +624,213 @@
return ret;
}
+static struct hns_roce_qp *hns_roce_v1_create_lp_qp(struct hns_roce_dev *hr_dev,
+ struct ib_pd *pd)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct ib_qp_init_attr init_attr;
+ struct ib_qp *qp;
+
+ memset(&init_attr, 0, sizeof(struct ib_qp_init_attr));
+ init_attr.qp_type = IB_QPT_RC;
+ init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr.cap.max_recv_wr = HNS_ROCE_MIN_WQE_NUM;
+ init_attr.cap.max_send_wr = HNS_ROCE_MIN_WQE_NUM;
+
+ qp = hns_roce_create_qp(pd, &init_attr, NULL);
+ if (IS_ERR(qp)) {
+ dev_err(dev, "Create loop qp for mr free failed!");
+ return NULL;
+ }
+
+ return to_hr_qp(qp);
+}
+
+static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_caps *caps = &hr_dev->caps;
+ struct device *dev = &hr_dev->pdev->dev;
+ struct ib_cq_init_attr cq_init_attr;
+ struct hns_roce_free_mr *free_mr;
+ struct ib_qp_attr attr = { 0 };
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_qp *hr_qp;
+ struct ib_cq *cq;
+ struct ib_pd *pd;
+ u64 subnet_prefix;
+ int attr_mask = 0;
+ int i;
+ int ret;
+ u8 phy_port;
+ u8 sl;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+
+ /* Reserved cq for loop qp */
+ cq_init_attr.cqe = HNS_ROCE_MIN_WQE_NUM * 2;
+ cq_init_attr.comp_vector = 0;
+ cq = hns_roce_ib_create_cq(&hr_dev->ib_dev, &cq_init_attr, NULL, NULL);
+ if (IS_ERR(cq)) {
+ dev_err(dev, "Create cq for reseved loop qp failed!");
+ return -ENOMEM;
+ }
+ free_mr->mr_free_cq = to_hr_cq(cq);
+ free_mr->mr_free_cq->ib_cq.device = &hr_dev->ib_dev;
+ free_mr->mr_free_cq->ib_cq.uobject = NULL;
+ free_mr->mr_free_cq->ib_cq.comp_handler = NULL;
+ free_mr->mr_free_cq->ib_cq.event_handler = NULL;
+ free_mr->mr_free_cq->ib_cq.cq_context = NULL;
+ atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0);
+
+ pd = hns_roce_alloc_pd(&hr_dev->ib_dev, NULL, NULL);
+ if (IS_ERR(pd)) {
+ dev_err(dev, "Create pd for reseved loop qp failed!");
+ ret = -ENOMEM;
+ goto alloc_pd_failed;
+ }
+ free_mr->mr_free_pd = to_hr_pd(pd);
+ free_mr->mr_free_pd->ibpd.device = &hr_dev->ib_dev;
+ free_mr->mr_free_pd->ibpd.uobject = NULL;
+ atomic_set(&free_mr->mr_free_pd->ibpd.usecnt, 0);
+
+ attr.qp_access_flags = IB_ACCESS_REMOTE_WRITE;
+ attr.pkey_index = 0;
+ attr.min_rnr_timer = 0;
+ /* Disable read ability */
+ attr.max_dest_rd_atomic = 0;
+ attr.max_rd_atomic = 0;
+ /* Use arbitrary values as rq_psn and sq_psn */
+ attr.rq_psn = 0x0808;
+ attr.sq_psn = 0x0808;
+ attr.retry_cnt = 7;
+ attr.rnr_retry = 7;
+ attr.timeout = 0x12;
+ attr.path_mtu = IB_MTU_256;
+ attr.ah_attr.ah_flags = 1;
+ attr.ah_attr.static_rate = 3;
+ attr.ah_attr.grh.sgid_index = 0;
+ attr.ah_attr.grh.hop_limit = 1;
+ attr.ah_attr.grh.flow_label = 0;
+ attr.ah_attr.grh.traffic_class = 0;
+
+ subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
+ for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
+ free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd);
+ if (IS_ERR(free_mr->mr_free_qp[i])) {
+ dev_err(dev, "Create loop qp failed!\n");
+ goto create_lp_qp_failed;
+ }
+ hr_qp = free_mr->mr_free_qp[i];
+
+ sl = i / caps->num_ports;
+
+ if (caps->num_ports == HNS_ROCE_MAX_PORTS)
+ phy_port = (i >= HNS_ROCE_MAX_PORTS) ? (i - 2) :
+ (i % caps->num_ports);
+ else
+ phy_port = i % caps->num_ports;
+
+ hr_qp->port = phy_port + 1;
+ hr_qp->phy_port = phy_port;
+ hr_qp->ibqp.qp_type = IB_QPT_RC;
+ hr_qp->ibqp.device = &hr_dev->ib_dev;
+ hr_qp->ibqp.uobject = NULL;
+ atomic_set(&hr_qp->ibqp.usecnt, 0);
+ hr_qp->ibqp.pd = pd;
+ hr_qp->ibqp.recv_cq = cq;
+ hr_qp->ibqp.send_cq = cq;
+
+ attr.ah_attr.port_num = phy_port + 1;
+ attr.ah_attr.sl = sl;
+ attr.port_num = phy_port + 1;
+
+ attr.dest_qp_num = hr_qp->qpn;
+ memcpy(attr.ah_attr.dmac, hr_dev->dev_addr[phy_port],
+ MAC_ADDR_OCTET_NUM);
+
+ memcpy(attr.ah_attr.grh.dgid.raw,
+ &subnet_prefix, sizeof(u64));
+ memcpy(&attr.ah_attr.grh.dgid.raw[8],
+ hr_dev->dev_addr[phy_port], 3);
+ memcpy(&attr.ah_attr.grh.dgid.raw[13],
+ hr_dev->dev_addr[phy_port] + 3, 3);
+ attr.ah_attr.grh.dgid.raw[11] = 0xff;
+ attr.ah_attr.grh.dgid.raw[12] = 0xfe;
+ attr.ah_attr.grh.dgid.raw[8] ^= 2;
+
+ attr_mask |= IB_QP_PORT;
+
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
+ IB_QPS_RESET, IB_QPS_INIT);
+ if (ret) {
+ dev_err(dev, "modify qp failed(%d)!\n", ret);
+ goto create_lp_qp_failed;
+ }
+
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
+ IB_QPS_INIT, IB_QPS_RTR);
+ if (ret) {
+ dev_err(dev, "modify qp failed(%d)!\n", ret);
+ goto create_lp_qp_failed;
+ }
+
+ ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, &attr, attr_mask,
+ IB_QPS_RTR, IB_QPS_RTS);
+ if (ret) {
+ dev_err(dev, "modify qp failed(%d)!\n", ret);
+ goto create_lp_qp_failed;
+ }
+ }
+
+ return 0;
+
+create_lp_qp_failed:
+ for (i -= 1; i >= 0; i--) {
+ hr_qp = free_mr->mr_free_qp[i];
+ if (hns_roce_v1_destroy_qp(&hr_qp->ibqp))
+ dev_err(dev, "Destroy qp %d for mr free failed!\n", i);
+ }
+
+ if (hns_roce_dealloc_pd(pd))
+ dev_err(dev, "Destroy pd for create_lp_qp failed!\n");
+
+alloc_pd_failed:
+ if (hns_roce_ib_destroy_cq(cq))
+ dev_err(dev, "Destroy cq for create_lp_qp failed!\n");
+
+ return -EINVAL;
+}
+
+static void hns_roce_v1_release_lp_qp(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_free_mr *free_mr;
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_qp *hr_qp;
+ int ret;
+ int i;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+
+ for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
+ hr_qp = free_mr->mr_free_qp[i];
+ ret = hns_roce_v1_destroy_qp(&hr_qp->ibqp);
+ if (ret)
+ dev_err(dev, "Destroy qp %d for mr free failed(%d)!\n",
+ i, ret);
+ }
+
+ ret = hns_roce_ib_destroy_cq(&free_mr->mr_free_cq->ib_cq);
+ if (ret)
+ dev_err(dev, "Destroy cq for mr_free failed(%d)!\n", ret);
+
+ ret = hns_roce_dealloc_pd(&free_mr->mr_free_pd->ibpd);
+ if (ret)
+ dev_err(dev, "Destroy pd for mr_free failed(%d)!\n", ret);
+}
+
static int hns_roce_db_init(struct hns_roce_dev *hr_dev)
{
struct device *dev = &hr_dev->pdev->dev;
@@ -648,6 +868,223 @@
return 0;
}
+void hns_roce_v1_recreate_lp_qp_work_fn(struct work_struct *work)
+{
+ struct hns_roce_recreate_lp_qp_work *lp_qp_work;
+ struct hns_roce_dev *hr_dev;
+
+ lp_qp_work = container_of(work, struct hns_roce_recreate_lp_qp_work,
+ work);
+ hr_dev = to_hr_dev(lp_qp_work->ib_dev);
+
+ hns_roce_v1_release_lp_qp(hr_dev);
+
+ if (hns_roce_v1_rsv_lp_qp(hr_dev))
+ dev_err(&hr_dev->pdev->dev, "create reserver qp failed\n");
+
+ if (lp_qp_work->comp_flag)
+ complete(lp_qp_work->comp);
+
+ kfree(lp_qp_work);
+}
+
+static int hns_roce_v1_recreate_lp_qp(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_recreate_lp_qp_work *lp_qp_work;
+ struct hns_roce_free_mr *free_mr;
+ struct hns_roce_v1_priv *priv;
+ struct completion comp;
+ unsigned long end =
+ msecs_to_jiffies(HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS) + jiffies;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+
+ lp_qp_work = kzalloc(sizeof(struct hns_roce_recreate_lp_qp_work),
+ GFP_KERNEL);
+
+ INIT_WORK(&(lp_qp_work->work), hns_roce_v1_recreate_lp_qp_work_fn);
+
+ lp_qp_work->ib_dev = &(hr_dev->ib_dev);
+ lp_qp_work->comp = ∁
+ lp_qp_work->comp_flag = 1;
+
+ init_completion(lp_qp_work->comp);
+
+ queue_work(free_mr->free_mr_wq, &(lp_qp_work->work));
+
+ while (time_before_eq(jiffies, end)) {
+ if (try_wait_for_completion(&comp))
+ return 0;
+ msleep(HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE);
+ }
+
+ lp_qp_work->comp_flag = 0;
+ if (try_wait_for_completion(&comp))
+ return 0;
+
+ dev_warn(dev, "recreate lp qp failed 20s timeout and return failed!\n");
+ return -ETIMEDOUT;
+}
+
+static int hns_roce_v1_send_lp_wqe(struct hns_roce_qp *hr_qp)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct ib_send_wr send_wr, *bad_wr;
+ int ret;
+
+ memset(&send_wr, 0, sizeof(send_wr));
+ send_wr.next = NULL;
+ send_wr.num_sge = 0;
+ send_wr.send_flags = 0;
+ send_wr.sg_list = NULL;
+ send_wr.wr_id = (unsigned long long)&send_wr;
+ send_wr.opcode = IB_WR_RDMA_WRITE;
+
+ ret = hns_roce_v1_post_send(&hr_qp->ibqp, &send_wr, &bad_wr);
+ if (ret) {
+ dev_err(dev, "Post write wqe for mr free failed(%d)!", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void hns_roce_v1_mr_free_work_fn(struct work_struct *work)
+{
+ struct hns_roce_mr_free_work *mr_work;
+ struct ib_wc wc[HNS_ROCE_V1_RESV_QP];
+ struct hns_roce_free_mr *free_mr;
+ struct hns_roce_cq *mr_free_cq;
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_dev *hr_dev;
+ struct hns_roce_mr *hr_mr;
+ struct hns_roce_qp *hr_qp;
+ struct device *dev;
+ unsigned long end =
+ msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
+ int i;
+ int ret;
+ int ne;
+
+ mr_work = container_of(work, struct hns_roce_mr_free_work, work);
+ hr_mr = (struct hns_roce_mr *)mr_work->mr;
+ hr_dev = to_hr_dev(mr_work->ib_dev);
+ dev = &hr_dev->pdev->dev;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+ mr_free_cq = free_mr->mr_free_cq;
+
+ for (i = 0; i < HNS_ROCE_V1_RESV_QP; i++) {
+ hr_qp = free_mr->mr_free_qp[i];
+ ret = hns_roce_v1_send_lp_wqe(hr_qp);
+ if (ret) {
+ dev_err(dev,
+ "Send wqe (qp:0x%lx) for mr free failed(%d)!\n",
+ hr_qp->qpn, ret);
+ goto free_work;
+ }
+ }
+
+ ne = HNS_ROCE_V1_RESV_QP;
+ do {
+ ret = hns_roce_v1_poll_cq(&mr_free_cq->ib_cq, ne, wc);
+ if (ret < 0) {
+ dev_err(dev,
+ "(qp:0x%lx) starts, Poll cqe failed(%d) for mr 0x%x free! Remain %d cqe\n",
+ hr_qp->qpn, ret, hr_mr->key, ne);
+ goto free_work;
+ }
+ ne -= ret;
+ msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
+ } while (ne && time_before_eq(jiffies, end));
+
+ if (ne != 0)
+ dev_err(dev,
+ "Poll cqe for mr 0x%x free timeout! Remain %d cqe\n",
+ hr_mr->key, ne);
+
+free_work:
+ if (mr_work->comp_flag)
+ complete(mr_work->comp);
+ kfree(mr_work);
+}
+
+int hns_roce_v1_dereg_mr(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_mr_free_work *mr_work;
+ struct hns_roce_free_mr *free_mr;
+ struct hns_roce_v1_priv *priv;
+ struct completion comp;
+ unsigned long end =
+ msecs_to_jiffies(HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS) + jiffies;
+ unsigned long start = jiffies;
+ int npages;
+ int ret = 0;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+
+ if (mr->enabled) {
+ if (hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key)
+ & (hr_dev->caps.num_mtpts - 1)))
+ dev_warn(dev, "HW2SW_MPT failed!\n");
+ }
+
+ mr_work = kzalloc(sizeof(*mr_work), GFP_KERNEL);
+ if (!mr_work) {
+ ret = -ENOMEM;
+ goto free_mr;
+ }
+
+ INIT_WORK(&(mr_work->work), hns_roce_v1_mr_free_work_fn);
+
+ mr_work->ib_dev = &(hr_dev->ib_dev);
+ mr_work->comp = ∁
+ mr_work->comp_flag = 1;
+ mr_work->mr = (void *)mr;
+ init_completion(mr_work->comp);
+
+ queue_work(free_mr->free_mr_wq, &(mr_work->work));
+
+ while (time_before_eq(jiffies, end)) {
+ if (try_wait_for_completion(&comp))
+ goto free_mr;
+ msleep(HNS_ROCE_V1_FREE_MR_WAIT_VALUE);
+ }
+
+ mr_work->comp_flag = 0;
+ if (try_wait_for_completion(&comp))
+ goto free_mr;
+
+ dev_warn(dev, "Free mr work 0x%x over 50s and failed!\n", mr->key);
+ ret = -ETIMEDOUT;
+
+free_mr:
+ dev_dbg(dev, "Free mr 0x%x use 0x%x us.\n",
+ mr->key, jiffies_to_usecs(jiffies) - jiffies_to_usecs(start));
+
+ if (mr->size != ~0ULL) {
+ npages = ib_umem_page_count(mr->umem);
+ dma_free_coherent(dev, npages * 8, mr->pbl_buf,
+ mr->pbl_dma_addr);
+ }
+
+ hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
+ key_to_hw_index(mr->key), 0);
+
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ kfree(mr);
+
+ return ret;
+}
+
static void hns_roce_db_free(struct hns_roce_dev *hr_dev)
{
struct device *dev = &hr_dev->pdev->dev;
@@ -849,6 +1286,85 @@
priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map);
}
+static int hns_roce_tptr_init(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_buf_list *tptr_buf;
+ struct hns_roce_v1_priv *priv;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ tptr_buf = &priv->tptr_table.tptr_buf;
+
+ /*
+ * This buffer will be used for CQ's tptr(tail pointer), also
+ * named ci(customer index). Every CQ will use 2 bytes to save
+ * cqe ci in hip06. Hardware will read this area to get new ci
+ * when the queue is almost full.
+ */
+ tptr_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
+ &tptr_buf->map, GFP_KERNEL);
+ if (!tptr_buf->buf)
+ return -ENOMEM;
+
+ hr_dev->tptr_dma_addr = tptr_buf->map;
+ hr_dev->tptr_size = HNS_ROCE_V1_TPTR_BUF_SIZE;
+
+ return 0;
+}
+
+static void hns_roce_tptr_free(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_buf_list *tptr_buf;
+ struct hns_roce_v1_priv *priv;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ tptr_buf = &priv->tptr_table.tptr_buf;
+
+ dma_free_coherent(dev, HNS_ROCE_V1_TPTR_BUF_SIZE,
+ tptr_buf->buf, tptr_buf->map);
+}
+
+static int hns_roce_free_mr_init(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_free_mr *free_mr;
+ struct hns_roce_v1_priv *priv;
+ int ret = 0;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+
+ free_mr->free_mr_wq = create_singlethread_workqueue("hns_roce_free_mr");
+ if (!free_mr->free_mr_wq) {
+ dev_err(dev, "Create free mr workqueue failed!\n");
+ return -ENOMEM;
+ }
+
+ ret = hns_roce_v1_rsv_lp_qp(hr_dev);
+ if (ret) {
+ dev_err(dev, "Reserved loop qp failed(%d)!\n", ret);
+ flush_workqueue(free_mr->free_mr_wq);
+ destroy_workqueue(free_mr->free_mr_wq);
+ }
+
+ return ret;
+}
+
+static void hns_roce_free_mr_free(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_free_mr *free_mr;
+ struct hns_roce_v1_priv *priv;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ free_mr = &priv->free_mr;
+
+ flush_workqueue(free_mr->free_mr_wq);
+ destroy_workqueue(free_mr->free_mr_wq);
+
+ hns_roce_v1_release_lp_qp(hr_dev);
+}
+
/**
* hns_roce_v1_reset - reset RoCE
* @hr_dev: RoCE device struct pointer
@@ -898,6 +1414,38 @@
return ret;
}
+static int hns_roce_des_qp_init(struct hns_roce_dev *hr_dev)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_des_qp *des_qp;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ des_qp = &priv->des_qp;
+
+ des_qp->requeue_flag = 1;
+ des_qp->qp_wq = create_singlethread_workqueue("hns_roce_destroy_qp");
+ if (!des_qp->qp_wq) {
+ dev_err(dev, "Create destroy qp workqueue failed!\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void hns_roce_des_qp_free(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_des_qp *des_qp;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ des_qp = &priv->des_qp;
+
+ des_qp->requeue_flag = 0;
+ flush_workqueue(des_qp->qp_wq);
+ destroy_workqueue(des_qp->qp_wq);
+}
+
void hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
{
int i = 0;
@@ -906,12 +1454,11 @@
hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG));
hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev,
ROCEE_VENDOR_PART_ID_REG));
- hr_dev->hw_rev = le32_to_cpu(roce_read(hr_dev, ROCEE_HW_VERSION_REG));
-
hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev,
ROCEE_SYS_IMAGE_GUID_L_REG)) |
((u64)le32_to_cpu(roce_read(hr_dev,
ROCEE_SYS_IMAGE_GUID_H_REG)) << 32);
+ hr_dev->hw_rev = HNS_ROCE_HW_VER1;
caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM;
caps->max_wqes = HNS_ROCE_V1_MAX_WQE_NUM;
@@ -1001,18 +1548,44 @@
goto error_failed_raq_init;
}
- hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP);
-
ret = hns_roce_bt_init(hr_dev);
if (ret) {
dev_err(dev, "bt init failed!\n");
goto error_failed_bt_init;
}
+ ret = hns_roce_tptr_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "tptr init failed!\n");
+ goto error_failed_tptr_init;
+ }
+
+ ret = hns_roce_des_qp_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "des qp init failed!\n");
+ goto error_failed_des_qp_init;
+ }
+
+ ret = hns_roce_free_mr_init(hr_dev);
+ if (ret) {
+ dev_err(dev, "free mr init failed!\n");
+ goto error_failed_free_mr_init;
+ }
+
+ hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP);
+
return 0;
+error_failed_free_mr_init:
+ hns_roce_des_qp_free(hr_dev);
+
+error_failed_des_qp_init:
+ hns_roce_tptr_free(hr_dev);
+
+error_failed_tptr_init:
+ hns_roce_bt_free(hr_dev);
+
error_failed_bt_init:
- hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
hns_roce_raq_free(hr_dev);
error_failed_raq_init:
@@ -1022,8 +1595,11 @@
void hns_roce_v1_exit(struct hns_roce_dev *hr_dev)
{
- hns_roce_bt_free(hr_dev);
hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN);
+ hns_roce_free_mr_free(hr_dev);
+ hns_roce_des_qp_free(hr_dev);
+ hns_roce_tptr_free(hr_dev);
+ hns_roce_bt_free(hr_dev);
hns_roce_raq_free(hr_dev);
hns_roce_db_free(hr_dev);
}
@@ -1061,6 +1637,14 @@
u32 *p;
u32 val;
+ /*
+ * When mac changed, loopback may fail
+ * because of smac not equal to dmac.
+ * We Need to release and create reserved qp again.
+ */
+ if (hr_dev->hw->dereg_mr && hns_roce_v1_recreate_lp_qp(hr_dev))
+ dev_warn(&hr_dev->pdev->dev, "recreate lp qp timeout!\n");
+
p = (u32 *)(&addr[0]);
reg_smac_l = *p;
roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_SMAC_L_0_REG +
@@ -1293,9 +1877,9 @@
}
/*
- * Now backwards through the CQ, removing CQ entries
- * that match our QP by overwriting them with next entries.
- */
+ * Now backwards through the CQ, removing CQ entries
+ * that match our QP by overwriting them with next entries.
+ */
while ((int) --prod_index - (int) hr_cq->cons_index >= 0) {
cqe = get_cqe(hr_cq, prod_index & hr_cq->ib_cq.cqe);
if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M,
@@ -1317,9 +1901,9 @@
if (nfreed) {
hr_cq->cons_index += nfreed;
/*
- * Make sure update of buffer contents is done before
- * updating consumer index.
- */
+ * Make sure update of buffer contents is done before
+ * updating consumer index.
+ */
wmb();
hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
@@ -1339,14 +1923,21 @@
dma_addr_t dma_handle, int nent, u32 vector)
{
struct hns_roce_cq_context *cq_context = NULL;
- void __iomem *tptr_addr;
+ struct hns_roce_buf_list *tptr_buf;
+ struct hns_roce_v1_priv *priv;
+ dma_addr_t tptr_dma_addr;
+ int offset;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ tptr_buf = &priv->tptr_table.tptr_buf;
cq_context = mb_buf;
memset(cq_context, 0, sizeof(*cq_context));
- tptr_addr = 0;
- hr_dev->priv_addr = tptr_addr;
- hr_cq->tptr_addr = tptr_addr;
+ /* Get the tptr for this CQ. */
+ offset = hr_cq->cqn * HNS_ROCE_V1_TPTR_ENTRY_SIZE;
+ tptr_dma_addr = tptr_buf->map + offset;
+ hr_cq->tptr_addr = (u16 *)(tptr_buf->buf + offset);
/* Register cq_context members */
roce_set_field(cq_context->cqc_byte_4,
@@ -1390,10 +1981,10 @@
roce_set_field(cq_context->cqc_byte_20,
CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M,
CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S,
- (u64)tptr_addr >> 44);
+ tptr_dma_addr >> 44);
cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20);
- cq_context->cqe_tptr_addr_l = (u32)((u64)tptr_addr >> 12);
+ cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12);
roce_set_field(cq_context->cqc_byte_32,
CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M,
@@ -1407,7 +1998,7 @@
roce_set_bit(cq_context->cqc_byte_32,
CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S,
0);
- /*The initial value of cq's ci is 0 */
+ /* The initial value of cq's ci is 0 */
roce_set_field(cq_context->cqc_byte_32,
CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M,
CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0);
@@ -1424,9 +2015,9 @@
notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
/*
- * flags = 0; Notification Flag = 1, next
- * flags = 1; Notification Flag = 0, solocited
- */
+ * flags = 0; Notification Flag = 1, next
+ * flags = 1; Notification Flag = 0, solocited
+ */
doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1);
roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
@@ -1581,10 +2172,10 @@
wq = &(*cur_qp)->sq;
if ((*cur_qp)->sq_signal_bits) {
/*
- * If sg_signal_bit is 1,
- * firstly tail pointer updated to wqe
- * which current cqe correspond to
- */
+ * If sg_signal_bit is 1,
+ * firstly tail pointer updated to wqe
+ * which current cqe correspond to
+ */
wqe_ctr = (u16)roce_get_field(cqe->cqe_byte_4,
CQE_BYTE_4_WQE_INDEX_M,
CQE_BYTE_4_WQE_INDEX_S);
@@ -1659,8 +2250,14 @@
break;
}
- if (npolled)
+ if (npolled) {
+ *hr_cq->tptr_addr = hr_cq->cons_index &
+ ((hr_cq->cq_depth << 1) - 1);
+
+ /* Memroy barrier */
+ wmb();
hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index);
+ }
spin_unlock_irqrestore(&hr_cq->lock, flags);
@@ -1799,12 +2396,12 @@
if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP)
return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
HNS_ROCE_CMD_2RST_QP,
- HNS_ROCE_CMD_TIME_CLASS_A);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP)
return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2,
HNS_ROCE_CMD_2ERR_QP,
- HNS_ROCE_CMD_TIME_CLASS_A);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
if (IS_ERR(mailbox))
@@ -1814,7 +2411,7 @@
ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0,
op[cur_state][new_state],
- HNS_ROCE_CMD_TIME_CLASS_C);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
hns_roce_free_cmd_mailbox(hr_dev, mailbox);
return ret;
@@ -2000,11 +2597,11 @@
}
/*
- *Reset to init
- * Mandatory param:
- * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS
- * Optional param: NA
- */
+ * Reset to init
+ * Mandatory param:
+ * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS
+ * Optional param: NA
+ */
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
roce_set_field(context->qpc_bytes_4,
QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M,
@@ -2172,24 +2769,14 @@
QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
hr_qp->sq_signal_bits);
- for (port = 0; port < hr_dev->caps.num_ports; port++) {
- smac = (u8 *)hr_dev->dev_addr[port];
- dev_dbg(dev, "smac: %2x: %2x: %2x: %2x: %2x: %2x\n",
- smac[0], smac[1], smac[2], smac[3], smac[4],
- smac[5]);
- if ((dmac[0] == smac[0]) && (dmac[1] == smac[1]) &&
- (dmac[2] == smac[2]) && (dmac[3] == smac[3]) &&
- (dmac[4] == smac[4]) && (dmac[5] == smac[5])) {
- roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S,
- 1);
- break;
- }
- }
-
- if (hr_dev->loop_idc == 0x1)
+ port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
+ hr_qp->port;
+ smac = (u8 *)hr_dev->dev_addr[port];
+ /* when dmac equals smac or loop_idc is 1, it should loopback */
+ if (ether_addr_equal_unaligned(dmac, smac) ||
+ hr_dev->loop_idc == 0x1)
roce_set_bit(context->qpc_bytes_32,
- QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
+ QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1);
roce_set_bit(context->qpc_bytes_32,
QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S,
@@ -2509,7 +3096,7 @@
/* Every status migrate must change state */
roce_set_field(context->qpc_bytes_144,
QP_CONTEXT_QPC_BYTES_144_QP_STATE_M,
- QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, attr->qp_state);
+ QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state);
/* SW pass context to HW */
ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt,
@@ -2522,9 +3109,9 @@
}
/*
- * Use rst2init to instead of init2init with drv,
- * need to hw to flash RQ HEAD by DB again
- */
+ * Use rst2init to instead of init2init with drv,
+ * need to hw to flash RQ HEAD by DB again
+ */
if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) {
/* Memory barrier */
wmb();
@@ -2619,7 +3206,7 @@
ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0,
HNS_ROCE_CMD_QUERY_QP,
- HNS_ROCE_CMD_TIME_CLASS_A);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
if (!ret)
memcpy(hr_context, mailbox->buf, sizeof(*hr_context));
else
@@ -2630,8 +3217,78 @@
return ret;
}
-int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
- int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct hns_roce_sqp_context context;
+ u32 addr;
+
+ mutex_lock(&hr_qp->mutex);
+
+ if (hr_qp->state == IB_QPS_RESET) {
+ qp_attr->qp_state = IB_QPS_RESET;
+ goto done;
+ }
+
+ addr = ROCEE_QP1C_CFG0_0_REG +
+ hr_qp->port * sizeof(struct hns_roce_sqp_context);
+ context.qp1c_bytes_4 = roce_read(hr_dev, addr);
+ context.sq_rq_bt_l = roce_read(hr_dev, addr + 1);
+ context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2);
+ context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3);
+ context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4);
+ context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5);
+ context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6);
+ context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7);
+ context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8);
+ context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9);
+
+ hr_qp->state = roce_get_field(context.qp1c_bytes_4,
+ QP1C_BYTES_4_QP_STATE_M,
+ QP1C_BYTES_4_QP_STATE_S);
+ qp_attr->qp_state = hr_qp->state;
+ qp_attr->path_mtu = IB_MTU_256;
+ qp_attr->path_mig_state = IB_MIG_ARMED;
+ qp_attr->qkey = QKEY_VAL;
+ qp_attr->rq_psn = 0;
+ qp_attr->sq_psn = 0;
+ qp_attr->dest_qp_num = 1;
+ qp_attr->qp_access_flags = 6;
+
+ qp_attr->pkey_index = roce_get_field(context.qp1c_bytes_20,
+ QP1C_BYTES_20_PKEY_IDX_M,
+ QP1C_BYTES_20_PKEY_IDX_S);
+ qp_attr->port_num = hr_qp->port + 1;
+ qp_attr->sq_draining = 0;
+ qp_attr->max_rd_atomic = 0;
+ qp_attr->max_dest_rd_atomic = 0;
+ qp_attr->min_rnr_timer = 0;
+ qp_attr->timeout = 0;
+ qp_attr->retry_cnt = 0;
+ qp_attr->rnr_retry = 0;
+ qp_attr->alt_timeout = 0;
+
+done:
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt;
+ qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs;
+ qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt;
+ qp_attr->cap.max_send_sge = hr_qp->sq.max_gs;
+ qp_attr->cap.max_inline_data = 0;
+ qp_init_attr->cap = qp_attr->cap;
+ qp_init_attr->create_flags = 0;
+
+ mutex_unlock(&hr_qp->mutex);
+
+ return 0;
+}
+
+static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
@@ -2725,9 +3382,7 @@
qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12,
QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M,
QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S);
- qp_attr->port_num = (u8)roce_get_field(context->qpc_bytes_156,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M,
- QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) + 1;
+ qp_attr->port_num = hr_qp->port + 1;
qp_attr->sq_draining = 0;
qp_attr->max_rd_atomic = roce_get_field(context->qpc_bytes_156,
QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M,
@@ -2767,136 +3422,399 @@
return ret;
}
-static void hns_roce_v1_destroy_qp_common(struct hns_roce_dev *hr_dev,
- struct hns_roce_qp *hr_qp,
- int is_user)
+int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
{
- u32 sdbinvcnt;
- unsigned long end = 0;
- u32 sdbinvcnt_val;
- u32 sdbsendptr_val;
- u32 sdbisusepr_val;
- struct hns_roce_cq *send_cq, *recv_cq;
+ struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+
+ return hr_qp->doorbell_qpn <= 1 ?
+ hns_roce_v1_q_sqp(ibqp, qp_attr, qp_attr_mask, qp_init_attr) :
+ hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr);
+}
+
+static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ u32 sdb_issue_ptr,
+ u32 *sdb_inv_cnt,
+ u32 *wait_stage)
+{
struct device *dev = &hr_dev->pdev->dev;
+ u32 sdb_retry_cnt, old_retry;
+ u32 sdb_send_ptr, old_send;
+ u32 success_flags = 0;
+ u32 cur_cnt, old_cnt;
+ unsigned long end;
+ u32 send_ptr;
+ u32 inv_cnt;
+ u32 tsp_st;
- if (hr_qp->ibqp.qp_type == IB_QPT_RC) {
- if (hr_qp->state != IB_QPS_RESET) {
- /*
- * Set qp to ERR,
- * waiting for hw complete processing all dbs
- */
- if (hns_roce_v1_qp_modify(hr_dev, NULL,
- to_hns_roce_state(
- (enum ib_qp_state)hr_qp->state),
- HNS_ROCE_QP_STATE_ERR, NULL,
- hr_qp))
- dev_err(dev, "modify QP %06lx to ERR failed.\n",
- hr_qp->qpn);
+ if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 ||
+ *wait_stage < HNS_ROCE_V1_DB_STAGE1) {
+ dev_err(dev, "QP(0x%lx) db status wait stage(%d) error!\n",
+ hr_qp->qpn, *wait_stage);
+ return -EINVAL;
+ }
- /* Record issued doorbell */
- sdbisusepr_val = roce_read(hr_dev,
- ROCEE_SDB_ISSUE_PTR_REG);
- /*
- * Query db process status,
- * until hw process completely
- */
- end = msecs_to_jiffies(
- HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS) + jiffies;
- do {
- sdbsendptr_val = roce_read(hr_dev,
+ /* Calculate the total timeout for the entire verification process */
+ end = msecs_to_jiffies(HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS) + jiffies;
+
+ if (*wait_stage == HNS_ROCE_V1_DB_STAGE1) {
+ /* Query db process status, until hw process completely */
+ sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
+ while (roce_hw_index_cmp_lt(sdb_send_ptr, sdb_issue_ptr,
+ ROCEE_SDB_PTR_CMP_BITS)) {
+ if (!time_before(jiffies, end)) {
+ dev_dbg(dev, "QP(0x%lx) db process stage1 timeout. issue 0x%x send 0x%x.\n",
+ hr_qp->qpn, sdb_issue_ptr,
+ sdb_send_ptr);
+ return 0;
+ }
+
+ msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS);
+ sdb_send_ptr = roce_read(hr_dev,
ROCEE_SDB_SEND_PTR_REG);
- if (!time_before(jiffies, end)) {
- dev_err(dev, "destroy qp(0x%lx) timeout!!!",
- hr_qp->qpn);
- break;
- }
- } while ((short)(roce_get_field(sdbsendptr_val,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) -
- roce_get_field(sdbisusepr_val,
- ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M,
- ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S)
- ) < 0);
+ }
- /* Get list pointer */
- sdbinvcnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
+ if (roce_get_field(sdb_issue_ptr,
+ ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M,
+ ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) ==
+ roce_get_field(sdb_send_ptr,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) {
+ old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
+ old_retry = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG);
- /* Query db's list status, until hw reversal */
do {
- sdbinvcnt_val = roce_read(hr_dev,
- ROCEE_SDB_INV_CNT_REG);
- if (!time_before(jiffies, end)) {
- dev_err(dev, "destroy qp(0x%lx) timeout!!!",
- hr_qp->qpn);
- dev_err(dev, "SdbInvCnt = 0x%x\n",
- sdbinvcnt_val);
- break;
+ tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG);
+ if (roce_get_bit(tsp_st,
+ ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) {
+ *wait_stage = HNS_ROCE_V1_DB_WAIT_OK;
+ return 0;
}
- } while ((short)(roce_get_field(sdbinvcnt_val,
- ROCEE_SDB_INV_CNT_SDB_INV_CNT_M,
- ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) -
- (sdbinvcnt + SDB_INV_CNT_OFFSET)) < 0);
- /* Modify qp to reset before destroying qp */
- if (hns_roce_v1_qp_modify(hr_dev, NULL,
- to_hns_roce_state(
- (enum ib_qp_state)hr_qp->state),
- HNS_ROCE_QP_STATE_RST, NULL, hr_qp))
- dev_err(dev, "modify QP %06lx to RESET failed.\n",
- hr_qp->qpn);
+ if (!time_before(jiffies, end)) {
+ dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n"
+ "issue 0x%x send 0x%x.\n",
+ hr_qp->qpn, sdb_issue_ptr,
+ sdb_send_ptr);
+ return 0;
+ }
+
+ msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS);
+
+ sdb_send_ptr = roce_read(hr_dev,
+ ROCEE_SDB_SEND_PTR_REG);
+ sdb_retry_cnt = roce_read(hr_dev,
+ ROCEE_SDB_RETRY_CNT_REG);
+ cur_cnt = roce_get_field(sdb_send_ptr,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
+ roce_get_field(sdb_retry_cnt,
+ ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
+ ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
+ if (!roce_get_bit(tsp_st,
+ ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) {
+ old_cnt = roce_get_field(old_send,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
+ roce_get_field(old_retry,
+ ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
+ ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
+ if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
+ success_flags = 1;
+ } else {
+ old_cnt = roce_get_field(old_send,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S);
+ if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
+ success_flags = 1;
+ else {
+ send_ptr = roce_get_field(old_send,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
+ roce_get_field(sdb_retry_cnt,
+ ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
+ ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
+ roce_set_field(old_send,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S,
+ send_ptr);
+ }
+ }
+ } while (!success_flags);
+ }
+
+ *wait_stage = HNS_ROCE_V1_DB_STAGE2;
+
+ /* Get list pointer */
+ *sdb_inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
+ dev_dbg(dev, "QP(0x%lx) db process stage2. inv cnt = 0x%x.\n",
+ hr_qp->qpn, *sdb_inv_cnt);
+ }
+
+ if (*wait_stage == HNS_ROCE_V1_DB_STAGE2) {
+ /* Query db's list status, until hw reversal */
+ inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
+ while (roce_hw_index_cmp_lt(inv_cnt,
+ *sdb_inv_cnt + SDB_INV_CNT_OFFSET,
+ ROCEE_SDB_CNT_CMP_BITS)) {
+ if (!time_before(jiffies, end)) {
+ dev_dbg(dev, "QP(0x%lx) db process stage2 timeout. inv cnt 0x%x.\n",
+ hr_qp->qpn, inv_cnt);
+ return 0;
+ }
+
+ msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS);
+ inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG);
+ }
+
+ *wait_stage = HNS_ROCE_V1_DB_WAIT_OK;
+ }
+
+ return 0;
+}
+
+static int check_qp_reset_state(struct hns_roce_dev *hr_dev,
+ struct hns_roce_qp *hr_qp,
+ struct hns_roce_qp_work *qp_work_entry,
+ int *is_timeout)
+{
+ struct device *dev = &hr_dev->pdev->dev;
+ u32 sdb_issue_ptr;
+ int ret;
+
+ if (hr_qp->state != IB_QPS_RESET) {
+ /* Set qp to ERR, waiting for hw complete processing all dbs */
+ ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
+ IB_QPS_ERR);
+ if (ret) {
+ dev_err(dev, "Modify QP(0x%lx) to ERR failed!\n",
+ hr_qp->qpn);
+ return ret;
+ }
+
+ /* Record issued doorbell */
+ sdb_issue_ptr = roce_read(hr_dev, ROCEE_SDB_ISSUE_PTR_REG);
+ qp_work_entry->sdb_issue_ptr = sdb_issue_ptr;
+ qp_work_entry->db_wait_stage = HNS_ROCE_V1_DB_STAGE1;
+
+ /* Query db process status, until hw process completely */
+ ret = check_qp_db_process_status(hr_dev, hr_qp, sdb_issue_ptr,
+ &qp_work_entry->sdb_inv_cnt,
+ &qp_work_entry->db_wait_stage);
+ if (ret) {
+ dev_err(dev, "Check QP(0x%lx) db process status failed!\n",
+ hr_qp->qpn);
+ return ret;
+ }
+
+ if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK) {
+ qp_work_entry->sche_cnt = 0;
+ *is_timeout = 1;
+ return 0;
+ }
+
+ /* Modify qp to reset before destroying qp */
+ ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
+ IB_QPS_RESET);
+ if (ret) {
+ dev_err(dev, "Modify QP(0x%lx) to RST failed!\n",
+ hr_qp->qpn);
+ return ret;
}
}
- send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
- recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
+ return 0;
+}
- hns_roce_lock_cqs(send_cq, recv_cq);
+static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work)
+{
+ struct hns_roce_qp_work *qp_work_entry;
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_dev *hr_dev;
+ struct hns_roce_qp *hr_qp;
+ struct device *dev;
+ int ret;
- if (!is_user) {
- __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
- to_hr_srq(hr_qp->ibqp.srq) : NULL);
- if (send_cq != recv_cq)
- __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL);
+ qp_work_entry = container_of(work, struct hns_roce_qp_work, work);
+ hr_dev = to_hr_dev(qp_work_entry->ib_dev);
+ dev = &hr_dev->pdev->dev;
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ hr_qp = qp_work_entry->qp;
+
+ dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", hr_qp->qpn);
+
+ qp_work_entry->sche_cnt++;
+
+ /* Query db process status, until hw process completely */
+ ret = check_qp_db_process_status(hr_dev, hr_qp,
+ qp_work_entry->sdb_issue_ptr,
+ &qp_work_entry->sdb_inv_cnt,
+ &qp_work_entry->db_wait_stage);
+ if (ret) {
+ dev_err(dev, "Check QP(0x%lx) db process status failed!\n",
+ hr_qp->qpn);
+ return;
+ }
+
+ if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK &&
+ priv->des_qp.requeue_flag) {
+ queue_work(priv->des_qp.qp_wq, work);
+ return;
+ }
+
+ /* Modify qp to reset before destroying qp */
+ ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state,
+ IB_QPS_RESET);
+ if (ret) {
+ dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", hr_qp->qpn);
+ return;
}
hns_roce_qp_remove(hr_dev, hr_qp);
-
- hns_roce_unlock_cqs(send_cq, recv_cq);
-
hns_roce_qp_free(hr_dev, hr_qp);
- /* Not special_QP, free their QPN */
- if ((hr_qp->ibqp.qp_type == IB_QPT_RC) ||
- (hr_qp->ibqp.qp_type == IB_QPT_UC) ||
- (hr_qp->ibqp.qp_type == IB_QPT_UD))
+ if (hr_qp->ibqp.qp_type == IB_QPT_RC) {
+ /* RC QP, release QPN */
hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
+ kfree(hr_qp);
+ } else
+ kfree(hr_to_hr_sqp(hr_qp));
- hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
+ kfree(qp_work_entry);
- if (is_user) {
- ib_umem_release(hr_qp->umem);
- } else {
- kfree(hr_qp->sq.wrid);
- kfree(hr_qp->rq.wrid);
- hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
- }
+ dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", hr_qp->qpn);
}
int hns_roce_v1_destroy_qp(struct ib_qp *ibqp)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
+ struct device *dev = &hr_dev->pdev->dev;
+ struct hns_roce_qp_work qp_work_entry;
+ struct hns_roce_qp_work *qp_work;
+ struct hns_roce_v1_priv *priv;
+ struct hns_roce_cq *send_cq, *recv_cq;
+ int is_user = !!ibqp->pd->uobject;
+ int is_timeout = 0;
+ int ret;
- hns_roce_v1_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject);
+ ret = check_qp_reset_state(hr_dev, hr_qp, &qp_work_entry, &is_timeout);
+ if (ret) {
+ dev_err(dev, "QP reset state check failed(%d)!\n", ret);
+ return ret;
+ }
- if (hr_qp->ibqp.qp_type == IB_QPT_GSI)
- kfree(hr_to_hr_sqp(hr_qp));
- else
- kfree(hr_qp);
+ send_cq = to_hr_cq(hr_qp->ibqp.send_cq);
+ recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq);
+
+ hns_roce_lock_cqs(send_cq, recv_cq);
+ if (!is_user) {
+ __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ?
+ to_hr_srq(hr_qp->ibqp.srq) : NULL);
+ if (send_cq != recv_cq)
+ __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL);
+ }
+ hns_roce_unlock_cqs(send_cq, recv_cq);
+
+ if (!is_timeout) {
+ hns_roce_qp_remove(hr_dev, hr_qp);
+ hns_roce_qp_free(hr_dev, hr_qp);
+
+ /* RC QP, release QPN */
+ if (hr_qp->ibqp.qp_type == IB_QPT_RC)
+ hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
+ }
+
+ hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
+
+ if (is_user)
+ ib_umem_release(hr_qp->umem);
+ else {
+ kfree(hr_qp->sq.wrid);
+ kfree(hr_qp->rq.wrid);
+
+ hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf);
+ }
+
+ if (!is_timeout) {
+ if (hr_qp->ibqp.qp_type == IB_QPT_RC)
+ kfree(hr_qp);
+ else
+ kfree(hr_to_hr_sqp(hr_qp));
+ } else {
+ qp_work = kzalloc(sizeof(*qp_work), GFP_KERNEL);
+ if (!qp_work)
+ return -ENOMEM;
+
+ INIT_WORK(&qp_work->work, hns_roce_v1_destroy_qp_work_fn);
+ qp_work->ib_dev = &hr_dev->ib_dev;
+ qp_work->qp = hr_qp;
+ qp_work->db_wait_stage = qp_work_entry.db_wait_stage;
+ qp_work->sdb_issue_ptr = qp_work_entry.sdb_issue_ptr;
+ qp_work->sdb_inv_cnt = qp_work_entry.sdb_inv_cnt;
+ qp_work->sche_cnt = qp_work_entry.sche_cnt;
+
+ priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv;
+ queue_work(priv->des_qp.qp_wq, &qp_work->work);
+ dev_dbg(dev, "Begin destroy QP(0x%lx) work.\n", hr_qp->qpn);
+ }
return 0;
}
+int hns_roce_v1_destroy_cq(struct ib_cq *ibcq)
+{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibcq->device);
+ struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
+ struct device *dev = &hr_dev->pdev->dev;
+ u32 cqe_cnt_ori;
+ u32 cqe_cnt_cur;
+ u32 cq_buf_size;
+ int wait_time = 0;
+ int ret = 0;
+
+ hns_roce_free_cq(hr_dev, hr_cq);
+
+ /*
+ * Before freeing cq buffer, we need to ensure that the outstanding CQE
+ * have been written by checking the CQE counter.
+ */
+ cqe_cnt_ori = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT);
+ while (1) {
+ if (roce_read(hr_dev, ROCEE_CAEP_CQE_WCMD_EMPTY) &
+ HNS_ROCE_CQE_WCMD_EMPTY_BIT)
+ break;
+
+ cqe_cnt_cur = roce_read(hr_dev, ROCEE_SCAEP_WR_CQE_CNT);
+ if ((cqe_cnt_cur - cqe_cnt_ori) >= HNS_ROCE_MIN_CQE_CNT)
+ break;
+
+ msleep(HNS_ROCE_EACH_FREE_CQ_WAIT_MSECS);
+ if (wait_time > HNS_ROCE_MAX_FREE_CQ_WAIT_CNT) {
+ dev_warn(dev, "Destroy cq 0x%lx timeout!\n",
+ hr_cq->cqn);
+ ret = -ETIMEDOUT;
+ break;
+ }
+ wait_time++;
+ }
+
+ hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt);
+
+ if (ibcq->uobject)
+ ib_umem_release(hr_cq->umem);
+ else {
+ /* Free the buff of stored cq */
+ cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz;
+ hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf);
+ }
+
+ kfree(hr_cq);
+
+ return ret;
+}
+
struct hns_roce_v1_priv hr_v1_priv;
struct hns_roce_hw hns_roce_hw_v1 = {
@@ -2917,5 +3835,7 @@
.post_recv = hns_roce_v1_post_recv,
.req_notify_cq = hns_roce_v1_req_notify_cq,
.poll_cq = hns_roce_v1_poll_cq,
+ .dereg_mr = hns_roce_v1_dereg_mr,
+ .destroy_cq = hns_roce_v1_destroy_cq,
.priv = &hr_v1_priv,
};
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index 539b0a3b..b213b5e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -58,6 +58,7 @@
#define HNS_ROCE_V1_PHY_UAR_NUM 8
#define HNS_ROCE_V1_GID_NUM 16
+#define HNS_ROCE_V1_RESV_QP 8
#define HNS_ROCE_V1_NUM_COMP_EQE 0x8000
#define HNS_ROCE_V1_NUM_ASYNC_EQE 0x400
@@ -102,8 +103,22 @@
#define HNS_ROCE_V1_EXT_ODB_ALFUL \
(HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD)
+#define HNS_ROCE_V1_DB_WAIT_OK 0
+#define HNS_ROCE_V1_DB_STAGE1 1
+#define HNS_ROCE_V1_DB_STAGE2 2
+#define HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS 10000
+#define HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS 20
+#define HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS 50000
+#define HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS 10000
+#define HNS_ROCE_V1_FREE_MR_WAIT_VALUE 5
+#define HNS_ROCE_V1_RECREATE_LP_QP_WAIT_VALUE 20
+
#define HNS_ROCE_BT_RSV_BUF_SIZE (1 << 17)
+#define HNS_ROCE_V1_TPTR_ENTRY_SIZE 2
+#define HNS_ROCE_V1_TPTR_BUF_SIZE \
+ (HNS_ROCE_V1_TPTR_ENTRY_SIZE * HNS_ROCE_V1_MAX_CQ_NUM)
+
#define HNS_ROCE_ODB_POLL_MODE 0
#define HNS_ROCE_SDB_NORMAL_MODE 0
@@ -140,6 +155,7 @@
#define SQ_PSN_SHIFT 8
#define QKEY_VAL 0x80010000
#define SDB_INV_CNT_OFFSET 8
+#define SDB_ST_CMP_VAL 8
struct hns_roce_cq_context {
u32 cqc_byte_4;
@@ -436,6 +452,8 @@
#define UD_SEND_WQE_U32_8_DMAC_5_M \
(((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S)
+#define UD_SEND_WQE_U32_8_LOOPBACK_INDICATOR_S 22
+
#define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16
#define UD_SEND_WQE_U32_8_OPERATION_TYPE_M \
(((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S)
@@ -480,13 +498,17 @@
u32 qp1c_bytes_12;
u32 qp1c_bytes_16;
u32 qp1c_bytes_20;
- u32 qp1c_bytes_28;
u32 cur_rq_wqe_ba_l;
+ u32 qp1c_bytes_28;
u32 qp1c_bytes_32;
u32 cur_sq_wqe_ba_l;
u32 qp1c_bytes_40;
};
+#define QP1C_BYTES_4_QP_STATE_S 0
+#define QP1C_BYTES_4_QP_STATE_M \
+ (((1UL << 3) - 1) << QP1C_BYTES_4_QP_STATE_S)
+
#define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8
#define QP1C_BYTES_4_SQ_WQE_SHIFT_M \
(((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S)
@@ -952,6 +974,10 @@
#define SQ_DOORBELL_U32_4_SQ_HEAD_M \
(((1UL << 15) - 1) << SQ_DOORBELL_U32_4_SQ_HEAD_S)
+#define SQ_DOORBELL_U32_4_SL_S 16
+#define SQ_DOORBELL_U32_4_SL_M \
+ (((1UL << 2) - 1) << SQ_DOORBELL_U32_4_SL_S)
+
#define SQ_DOORBELL_U32_4_PORT_S 18
#define SQ_DOORBELL_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DOORBELL_U32_4_PORT_S)
@@ -979,12 +1005,58 @@
struct hns_roce_buf_list cqc_buf;
};
+struct hns_roce_tptr_table {
+ struct hns_roce_buf_list tptr_buf;
+};
+
+struct hns_roce_qp_work {
+ struct work_struct work;
+ struct ib_device *ib_dev;
+ struct hns_roce_qp *qp;
+ u32 db_wait_stage;
+ u32 sdb_issue_ptr;
+ u32 sdb_inv_cnt;
+ u32 sche_cnt;
+};
+
+struct hns_roce_des_qp {
+ struct workqueue_struct *qp_wq;
+ int requeue_flag;
+};
+
+struct hns_roce_mr_free_work {
+ struct work_struct work;
+ struct ib_device *ib_dev;
+ struct completion *comp;
+ int comp_flag;
+ void *mr;
+};
+
+struct hns_roce_recreate_lp_qp_work {
+ struct work_struct work;
+ struct ib_device *ib_dev;
+ struct completion *comp;
+ int comp_flag;
+};
+
+struct hns_roce_free_mr {
+ struct workqueue_struct *free_mr_wq;
+ struct hns_roce_qp *mr_free_qp[HNS_ROCE_V1_RESV_QP];
+ struct hns_roce_cq *mr_free_cq;
+ struct hns_roce_pd *mr_free_pd;
+};
+
struct hns_roce_v1_priv {
struct hns_roce_db_table db_table;
struct hns_roce_raq_table raq_table;
struct hns_roce_bt_table bt_table;
+ struct hns_roce_tptr_table tptr_table;
+ struct hns_roce_des_qp des_qp;
+ struct hns_roce_free_mr free_mr;
};
int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset);
+int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int hns_roce_v1_destroy_qp(struct ib_qp *ibqp);
#endif
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 764e35a..4953d9c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -35,52 +35,13 @@
#include <rdma/ib_addr.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_cache.h>
#include "hns_roce_common.h"
#include "hns_roce_device.h"
-#include "hns_roce_user.h"
+#include <rdma/hns-abi.h>
#include "hns_roce_hem.h"
/**
- * hns_roce_addrconf_ifid_eui48 - Get default gid.
- * @eui: eui.
- * @vlan_id: gid
- * @dev: net device
- * Description:
- * MAC convert to GID
- * gid[0..7] = fe80 0000 0000 0000
- * gid[8] = mac[0] ^ 2
- * gid[9] = mac[1]
- * gid[10] = mac[2]
- * gid[11] = ff (VLAN ID high byte (4 MS bits))
- * gid[12] = fe (VLAN ID low byte)
- * gid[13] = mac[3]
- * gid[14] = mac[4]
- * gid[15] = mac[5]
- */
-static void hns_roce_addrconf_ifid_eui48(u8 *eui, u16 vlan_id,
- struct net_device *dev)
-{
- memcpy(eui, dev->dev_addr, 3);
- memcpy(eui + 5, dev->dev_addr + 3, 3);
- if (vlan_id < 0x1000) {
- eui[3] = vlan_id >> 8;
- eui[4] = vlan_id & 0xff;
- } else {
- eui[3] = 0xff;
- eui[4] = 0xfe;
- }
- eui[0] ^= 2;
-}
-
-static void hns_roce_make_default_gid(struct net_device *dev, union ib_gid *gid)
-{
- memset(gid, 0, sizeof(*gid));
- gid->raw[0] = 0xFE;
- gid->raw[1] = 0x80;
- hns_roce_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev);
-}
-
-/**
* hns_get_gid_index - Get gid index.
* @hr_dev: pointer to structure hns_roce_dev.
* @port: port, value range: 0 ~ MAX
@@ -96,30 +57,6 @@
return gid_index * hr_dev->caps.num_ports + port;
}
-static int hns_roce_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index,
- union ib_gid *gid)
-{
- struct device *dev = &hr_dev->pdev->dev;
- u8 gid_idx = 0;
-
- if (gid_index >= hr_dev->caps.gid_table_len[port]) {
- dev_err(dev, "gid_index %d illegal, port %d gid range: 0~%d\n",
- gid_index, port, hr_dev->caps.gid_table_len[port] - 1);
- return -EINVAL;
- }
-
- gid_idx = hns_get_gid_index(hr_dev, port, gid_index);
-
- if (!memcmp(gid, &hr_dev->iboe.gid_table[gid_idx], sizeof(*gid)))
- return -EINVAL;
-
- memcpy(&hr_dev->iboe.gid_table[gid_idx], gid, sizeof(*gid));
-
- hr_dev->hw->set_gid(hr_dev, port, gid_index, gid);
-
- return 0;
-}
-
static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
{
u8 phy_port;
@@ -135,27 +72,44 @@
hr_dev->hw->set_mac(hr_dev, phy_port, addr);
}
-static void hns_roce_set_mtu(struct hns_roce_dev *hr_dev, u8 port, int mtu)
+static int hns_roce_add_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr, void **context)
{
- u8 phy_port = hr_dev->iboe.phy_port[port];
- enum ib_mtu tmp;
+ struct hns_roce_dev *hr_dev = to_hr_dev(device);
+ u8 port = port_num - 1;
+ unsigned long flags;
- tmp = iboe_get_mtu(mtu);
- if (!tmp)
- tmp = IB_MTU_256;
+ if (port >= hr_dev->caps.num_ports)
+ return -EINVAL;
- hr_dev->hw->set_mtu(hr_dev, phy_port, tmp);
+ spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+
+ hr_dev->hw->set_gid(hr_dev, port, index, (union ib_gid *)gid);
+
+ spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+
+ return 0;
}
-static void hns_roce_update_gids(struct hns_roce_dev *hr_dev, int port)
+static int hns_roce_del_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, void **context)
{
- struct ib_event event;
+ struct hns_roce_dev *hr_dev = to_hr_dev(device);
+ union ib_gid zgid = { {0} };
+ u8 port = port_num - 1;
+ unsigned long flags;
- /* Refresh gid in ib_cache */
- event.device = &hr_dev->ib_dev;
- event.element.port_num = port + 1;
- event.event = IB_EVENT_GID_CHANGE;
- ib_dispatch_event(&event);
+ if (port >= hr_dev->caps.num_ports)
+ return -EINVAL;
+
+ spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+
+ hr_dev->hw->set_gid(hr_dev, port, index, &zgid);
+
+ spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
+
+ return 0;
}
static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port,
@@ -163,9 +117,6 @@
{
struct device *dev = &hr_dev->pdev->dev;
struct net_device *netdev;
- unsigned long flags;
- union ib_gid gid;
- int ret = 0;
netdev = hr_dev->iboe.netdevs[port];
if (!netdev) {
@@ -173,7 +124,7 @@
return -ENODEV;
}
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
+ spin_lock_bh(&hr_dev->iboe.lock);
switch (event) {
case NETDEV_UP:
@@ -181,23 +132,19 @@
case NETDEV_REGISTER:
case NETDEV_CHANGEADDR:
hns_roce_set_mac(hr_dev, port, netdev->dev_addr);
- hns_roce_make_default_gid(netdev, &gid);
- ret = hns_roce_set_gid(hr_dev, port, 0, &gid);
- if (!ret)
- hns_roce_update_gids(hr_dev, port);
break;
case NETDEV_DOWN:
/*
- * In v1 engine, only support all ports closed together.
- */
+ * In v1 engine, only support all ports closed together.
+ */
break;
default:
dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event));
break;
}
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
- return ret;
+ spin_unlock_bh(&hr_dev->iboe.lock);
+ return 0;
}
static int hns_roce_netdev_event(struct notifier_block *self,
@@ -224,118 +171,17 @@
return NOTIFY_DONE;
}
-static void hns_roce_addr_event(int event, struct net_device *event_netdev,
- struct hns_roce_dev *hr_dev, union ib_gid *gid)
+static int hns_roce_setup_mtu_mac(struct hns_roce_dev *hr_dev)
{
- struct hns_roce_ib_iboe *iboe = NULL;
- int gid_table_len = 0;
- unsigned long flags;
- union ib_gid zgid;
- u8 gid_idx = 0;
- u8 port = 0;
- int i = 0;
- int free;
- struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
- rdma_vlan_dev_real_dev(event_netdev) :
- event_netdev;
-
- if (event != NETDEV_UP && event != NETDEV_DOWN)
- return;
-
- iboe = &hr_dev->iboe;
- while (port < hr_dev->caps.num_ports) {
- if (real_dev == iboe->netdevs[port])
- break;
- port++;
- }
-
- if (port >= hr_dev->caps.num_ports) {
- dev_dbg(&hr_dev->pdev->dev, "can't find netdev\n");
- return;
- }
-
- memset(zgid.raw, 0, sizeof(zgid.raw));
- free = -1;
- gid_table_len = hr_dev->caps.gid_table_len[port];
-
- spin_lock_irqsave(&hr_dev->iboe.lock, flags);
-
- for (i = 0; i < gid_table_len; i++) {
- gid_idx = hns_get_gid_index(hr_dev, port, i);
- if (!memcmp(gid->raw, iboe->gid_table[gid_idx].raw,
- sizeof(gid->raw)))
- break;
- if (free < 0 && !memcmp(zgid.raw,
- iboe->gid_table[gid_idx].raw, sizeof(zgid.raw)))
- free = i;
- }
-
- if (i >= gid_table_len) {
- if (free < 0) {
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
- dev_dbg(&hr_dev->pdev->dev,
- "gid_index overflow, port(%d)\n", port);
- return;
- }
- if (!hns_roce_set_gid(hr_dev, port, free, gid))
- hns_roce_update_gids(hr_dev, port);
- } else if (event == NETDEV_DOWN) {
- if (!hns_roce_set_gid(hr_dev, port, i, &zgid))
- hns_roce_update_gids(hr_dev, port);
- }
-
- spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
-}
-
-static int hns_roce_inet_event(struct notifier_block *self, unsigned long event,
- void *ptr)
-{
- struct in_ifaddr *ifa = ptr;
- struct hns_roce_dev *hr_dev;
- struct net_device *dev = ifa->ifa_dev->dev;
- union ib_gid gid;
-
- ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
-
- hr_dev = container_of(self, struct hns_roce_dev, iboe.nb_inet);
-
- hns_roce_addr_event(event, dev, hr_dev, &gid);
-
- return NOTIFY_DONE;
-}
-
-static int hns_roce_setup_mtu_gids(struct hns_roce_dev *hr_dev)
-{
- struct in_ifaddr *ifa_list = NULL;
- union ib_gid gid = {{0} };
- u32 ipaddr = 0;
- int index = 0;
- int ret = 0;
- u8 i = 0;
+ u8 i;
for (i = 0; i < hr_dev->caps.num_ports; i++) {
- hns_roce_set_mtu(hr_dev, i,
- ib_mtu_enum_to_int(hr_dev->caps.max_mtu));
+ hr_dev->hw->set_mtu(hr_dev, hr_dev->iboe.phy_port[i],
+ hr_dev->caps.max_mtu);
hns_roce_set_mac(hr_dev, i, hr_dev->iboe.netdevs[i]->dev_addr);
-
- if (hr_dev->iboe.netdevs[i]->ip_ptr) {
- ifa_list = hr_dev->iboe.netdevs[i]->ip_ptr->ifa_list;
- index = 1;
- while (ifa_list) {
- ipaddr = ifa_list->ifa_address;
- ipv6_addr_set_v4mapped(ipaddr,
- (struct in6_addr *)&gid);
- ret = hns_roce_set_gid(hr_dev, i, index, &gid);
- if (ret)
- break;
- index++;
- ifa_list = ifa_list->ifa_next;
- }
- hns_roce_update_gids(hr_dev, i);
- }
}
- return ret;
+ return 0;
}
static int hns_roce_query_device(struct ib_device *ib_dev,
@@ -444,31 +290,6 @@
static int hns_roce_query_gid(struct ib_device *ib_dev, u8 port_num, int index,
union ib_gid *gid)
{
- struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev);
- struct device *dev = &hr_dev->pdev->dev;
- u8 gid_idx = 0;
- u8 port;
-
- if (port_num < 1 || port_num > hr_dev->caps.num_ports ||
- index >= hr_dev->caps.gid_table_len[port_num - 1]) {
- dev_err(dev,
- "port_num %d index %d illegal! correct range: port_num 1~%d index 0~%d!\n",
- port_num, index, hr_dev->caps.num_ports,
- hr_dev->caps.gid_table_len[port_num - 1] - 1);
- return -EINVAL;
- }
-
- port = port_num - 1;
- gid_idx = hns_get_gid_index(hr_dev, port, index);
- if (gid_idx >= HNS_ROCE_MAX_GID_NUM) {
- dev_err(dev, "port_num %d index %d illegal! total gid num %d!\n",
- port_num, index, HNS_ROCE_MAX_GID_NUM);
- return -EINVAL;
- }
-
- memcpy(gid->raw, hr_dev->iboe.gid_table[gid_idx].raw,
- HNS_ROCE_GID_SIZE);
-
return 0;
}
@@ -549,6 +370,8 @@
static int hns_roce_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(context->device);
+
if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0)
return -EINVAL;
@@ -558,10 +381,15 @@
to_hr_ucontext(context)->uar.pfn,
PAGE_SIZE, vma->vm_page_prot))
return -EAGAIN;
-
- } else {
+ } else if (vma->vm_pgoff == 1 && hr_dev->hw_rev == HNS_ROCE_HW_VER1) {
+ /* vm_pgoff: 1 -- TPTR */
+ if (io_remap_pfn_range(vma, vma->vm_start,
+ hr_dev->tptr_dma_addr >> PAGE_SHIFT,
+ hr_dev->tptr_size,
+ vma->vm_page_prot))
+ return -EAGAIN;
+ } else
return -EINVAL;
- }
return 0;
}
@@ -605,7 +433,7 @@
spin_lock_init(&iboe->lock);
ib_dev = &hr_dev->ib_dev;
- strlcpy(ib_dev->name, "hisi_%d", IB_DEVICE_NAME_MAX);
+ strlcpy(ib_dev->name, "hns_%d", IB_DEVICE_NAME_MAX);
ib_dev->owner = THIS_MODULE;
ib_dev->node_type = RDMA_NODE_IB_CA;
@@ -639,6 +467,8 @@
ib_dev->get_link_layer = hns_roce_get_link_layer;
ib_dev->get_netdev = hns_roce_get_netdev;
ib_dev->query_gid = hns_roce_query_gid;
+ ib_dev->add_gid = hns_roce_add_gid;
+ ib_dev->del_gid = hns_roce_del_gid;
ib_dev->query_pkey = hns_roce_query_pkey;
ib_dev->alloc_ucontext = hns_roce_alloc_ucontext;
ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext;
@@ -681,32 +511,22 @@
return ret;
}
- ret = hns_roce_setup_mtu_gids(hr_dev);
+ ret = hns_roce_setup_mtu_mac(hr_dev);
if (ret) {
- dev_err(dev, "roce_setup_mtu_gids failed!\n");
- goto error_failed_setup_mtu_gids;
+ dev_err(dev, "setup_mtu_mac failed!\n");
+ goto error_failed_setup_mtu_mac;
}
iboe->nb.notifier_call = hns_roce_netdev_event;
ret = register_netdevice_notifier(&iboe->nb);
if (ret) {
dev_err(dev, "register_netdevice_notifier failed!\n");
- goto error_failed_setup_mtu_gids;
- }
-
- iboe->nb_inet.notifier_call = hns_roce_inet_event;
- ret = register_inetaddr_notifier(&iboe->nb_inet);
- if (ret) {
- dev_err(dev, "register inet addr notifier failed!\n");
- goto error_failed_register_inetaddr_notifier;
+ goto error_failed_setup_mtu_mac;
}
return 0;
-error_failed_register_inetaddr_notifier:
- unregister_netdevice_notifier(&iboe->nb);
-
-error_failed_setup_mtu_gids:
+error_failed_setup_mtu_mac:
ib_unregister_device(ib_dev);
return ret;
@@ -940,10 +760,10 @@
}
/**
-* hns_roce_setup_hca - setup host channel adapter
-* @hr_dev: pointer to hns roce device
-* Return : int
-*/
+ * hns_roce_setup_hca - setup host channel adapter
+ * @hr_dev: pointer to hns roce device
+ * Return : int
+ */
static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
{
int ret;
@@ -1008,11 +828,11 @@
}
/**
-* hns_roce_probe - RoCE driver entrance
-* @pdev: pointer to platform device
-* Return : int
-*
-*/
+ * hns_roce_probe - RoCE driver entrance
+ * @pdev: pointer to platform device
+ * Return : int
+ *
+ */
static int hns_roce_probe(struct platform_device *pdev)
{
int ret;
@@ -1023,9 +843,6 @@
if (!hr_dev)
return -ENOMEM;
- memset((u8 *)hr_dev + sizeof(struct ib_device), 0,
- sizeof(struct hns_roce_dev) - sizeof(struct ib_device));
-
hr_dev->pdev = pdev;
platform_set_drvdata(pdev, hr_dev);
@@ -1125,9 +942,9 @@
}
/**
-* hns_roce_remove - remove RoCE device
-* @pdev: pointer to platform device
-*/
+ * hns_roce_remove - remove RoCE device
+ * @pdev: pointer to platform device
+ */
static int hns_roce_remove(struct platform_device *pdev)
{
struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev);
diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index fb87883..4139abe 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -42,7 +42,7 @@
return (u32)(ind >> 24) | (ind << 8);
}
-static unsigned long key_to_hw_index(u32 key)
+unsigned long key_to_hw_index(u32 key)
{
return (key << 24) | (key >> 8);
}
@@ -53,16 +53,16 @@
{
return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0,
HNS_ROCE_CMD_SW2HW_MPT,
- HNS_ROCE_CMD_TIME_CLASS_B);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
}
-static int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
+int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox,
unsigned long mpt_index)
{
return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0,
mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT,
- HNS_ROCE_CMD_TIME_CLASS_B);
+ HNS_ROCE_CMD_TIMEOUT_MSECS);
}
static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order,
@@ -137,11 +137,13 @@
for (i = 0; i <= buddy->max_order; ++i) {
s = BITS_TO_LONGS(1 << (buddy->max_order - i));
- buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL);
- if (!buddy->bits[i])
- goto err_out_free;
-
- bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i));
+ buddy->bits[i] = kcalloc(s, sizeof(long), GFP_KERNEL |
+ __GFP_NOWARN);
+ if (!buddy->bits[i]) {
+ buddy->bits[i] = vzalloc(s * sizeof(long));
+ if (!buddy->bits[i])
+ goto err_out_free;
+ }
}
set_bit(0, buddy->bits[buddy->max_order]);
@@ -151,7 +153,7 @@
err_out_free:
for (i = 0; i <= buddy->max_order; ++i)
- kfree(buddy->bits[i]);
+ kvfree(buddy->bits[i]);
err_out:
kfree(buddy->bits);
@@ -164,7 +166,7 @@
int i;
for (i = 0; i <= buddy->max_order; ++i)
- kfree(buddy->bits[i]);
+ kvfree(buddy->bits[i]);
kfree(buddy->bits);
kfree(buddy->num_free);
@@ -287,7 +289,7 @@
}
hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap,
- key_to_hw_index(mr->key));
+ key_to_hw_index(mr->key), BITMAP_NO_RR);
}
static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
@@ -605,13 +607,20 @@
int hns_roce_dereg_mr(struct ib_mr *ibmr)
{
+ struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
struct hns_roce_mr *mr = to_hr_mr(ibmr);
+ int ret = 0;
- hns_roce_mr_free(to_hr_dev(ibmr->device), mr);
- if (mr->umem)
- ib_umem_release(mr->umem);
+ if (hr_dev->hw->dereg_mr) {
+ ret = hr_dev->hw->dereg_mr(hr_dev, mr);
+ } else {
+ hns_roce_mr_free(hr_dev, mr);
- kfree(mr);
+ if (mr->umem)
+ ib_umem_release(mr->umem);
- return 0;
+ kfree(mr);
+ }
+
+ return ret;
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c
index 05db7d5..a64500f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_pd.c
+++ b/drivers/infiniband/hw/hns/hns_roce_pd.c
@@ -40,7 +40,7 @@
static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn)
{
- hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn);
+ hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn, BITMAP_NO_RR);
}
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev)
@@ -121,7 +121,8 @@
void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar)
{
- hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index);
+ hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index,
+ BITMAP_NO_RR);
}
int hns_roce_init_uar_table(struct hns_roce_dev *hr_dev)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index e86dd8d..f036f32 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -37,7 +37,7 @@
#include "hns_roce_common.h"
#include "hns_roce_device.h"
#include "hns_roce_hem.h"
-#include "hns_roce_user.h"
+#include <rdma/hns-abi.h>
#define SQP_NUM (2 * HNS_ROCE_MAX_PORTS)
@@ -250,7 +250,7 @@
if (base_qpn < SQP_NUM)
return;
- hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt);
+ hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, BITMAP_RR);
}
static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev,
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 8ec09e4..da2eb5a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -112,9 +112,12 @@
#define I40IW_DRV_OPT_MCAST_LOGPORT_MAP 0x00000800
#define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
-#define IW_CFG_FPM_QP_COUNT 32768
-#define I40IW_MAX_PAGES_PER_FMR 512
-#define I40IW_MIN_PAGES_PER_FMR 1
+#define IW_CFG_FPM_QP_COUNT 32768
+#define I40IW_MAX_PAGES_PER_FMR 512
+#define I40IW_MIN_PAGES_PER_FMR 1
+#define I40IW_CQP_COMPL_RQ_WQE_FLUSHED 2
+#define I40IW_CQP_COMPL_SQ_WQE_FLUSHED 3
+#define I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED 4
#define I40IW_MTU_TO_MSS 40
#define I40IW_DEFAULT_MSS 1460
@@ -210,6 +213,12 @@
u32 ceq_id;
};
+struct l2params_work {
+ struct work_struct work;
+ struct i40iw_device *iwdev;
+ struct i40iw_l2params l2params;
+};
+
#define I40IW_MSIX_TABLE_SIZE 65
struct virtchnl_work {
@@ -227,6 +236,7 @@
struct net_device *netdev;
wait_queue_head_t vchnl_waitq;
struct i40iw_sc_dev sc_dev;
+ struct i40iw_sc_vsi vsi;
struct i40iw_handler *hdl;
struct i40e_info *ldev;
struct i40e_client *client;
@@ -280,7 +290,6 @@
u32 sd_type;
struct workqueue_struct *param_wq;
atomic_t params_busy;
- u32 mss;
enum init_completion_state init_state;
u16 mac_ip_table_idx;
atomic_t vchnl_msgs;
@@ -297,6 +306,14 @@
u32 mr_stagmask;
u32 mpa_version;
bool dcb;
+ bool closing;
+ bool reset;
+ u32 used_pds;
+ u32 used_cqs;
+ u32 used_mrs;
+ u32 used_qps;
+ wait_queue_head_t close_wq;
+ atomic64_t use_count;
};
struct i40iw_ib_device {
@@ -498,7 +515,7 @@
int i40iw_register_rdma_device(struct i40iw_device *iwdev);
void i40iw_port_ibevent(struct i40iw_device *iwdev);
-int i40iw_cm_disconn(struct i40iw_qp *);
+void i40iw_cm_disconn(struct i40iw_qp *iwqp);
void i40iw_cm_disconn_worker(void *);
int mini_cm_recv_pkt(struct i40iw_cm_core *, struct i40iw_device *,
struct sk_buff *);
@@ -508,20 +525,26 @@
enum i40iw_status_code i40iw_add_mac_addr(struct i40iw_device *iwdev,
u8 *mac_addr, u8 *mac_index);
int i40iw_modify_qp(struct ib_qp *, struct ib_qp_attr *, int, struct ib_udata *);
+void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq);
void i40iw_rem_pdusecount(struct i40iw_pd *iwpd, struct i40iw_device *iwdev);
void i40iw_add_pdusecount(struct i40iw_pd *iwpd);
+void i40iw_rem_devusecount(struct i40iw_device *iwdev);
+void i40iw_add_devusecount(struct i40iw_device *iwdev);
void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
struct i40iw_modify_qp_info *info, bool wait);
+void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev,
+ struct i40iw_sc_qp *qp,
+ bool suspend);
enum i40iw_status_code i40iw_manage_qhash(struct i40iw_device *iwdev,
struct i40iw_cm_info *cminfo,
enum i40iw_quad_entry_type etype,
enum i40iw_quad_hash_manage_type mtype,
void *cmnode,
bool wait);
-void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf);
-void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp);
+void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf);
+void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp);
void i40iw_free_qp_resources(struct i40iw_device *iwdev,
struct i40iw_qp *iwqp,
u32 qp_num);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 8563769..95a0586 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -68,13 +68,13 @@
/**
* i40iw_free_sqbuf - put back puda buffer if refcount = 0
- * @dev: FPK device
+ * @vsi: pointer to vsi structure
* @buf: puda buffer to free
*/
-void i40iw_free_sqbuf(struct i40iw_sc_dev *dev, void *bufp)
+void i40iw_free_sqbuf(struct i40iw_sc_vsi *vsi, void *bufp)
{
struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)bufp;
- struct i40iw_puda_rsrc *ilq = dev->ilq;
+ struct i40iw_puda_rsrc *ilq = vsi->ilq;
if (!atomic_dec_return(&buf->refcount))
i40iw_puda_ret_bufpool(ilq, buf);
@@ -221,6 +221,7 @@
memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr));
cm_info->loc_port = cm_node->loc_port;
cm_info->rem_port = cm_node->rem_port;
+ cm_info->user_pri = cm_node->user_pri;
}
/**
@@ -271,6 +272,7 @@
event.provider_data = (void *)cm_node;
event.private_data = (void *)cm_node->pdata_buf;
event.private_data_len = (u8)cm_node->pdata.size;
+ event.ird = cm_node->ird_size;
break;
case IW_CM_EVENT_CONNECT_REPLY:
i40iw_get_cmevent_info(cm_node, cm_id, &event);
@@ -335,13 +337,13 @@
*/
static void i40iw_free_retrans_entry(struct i40iw_cm_node *cm_node)
{
- struct i40iw_sc_dev *dev = cm_node->dev;
+ struct i40iw_device *iwdev = cm_node->iwdev;
struct i40iw_timer_entry *send_entry;
send_entry = cm_node->send_entry;
if (send_entry) {
cm_node->send_entry = NULL;
- i40iw_free_sqbuf(dev, (void *)send_entry->sqbuf);
+ i40iw_free_sqbuf(&iwdev->vsi, (void *)send_entry->sqbuf);
kfree(send_entry);
atomic_dec(&cm_node->ref_count);
}
@@ -360,15 +362,6 @@
spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
}
-static bool is_remote_ne020_or_chelsio(struct i40iw_cm_node *cm_node)
-{
- if ((cm_node->rem_mac[0] == 0x0) &&
- (((cm_node->rem_mac[1] == 0x12) && (cm_node->rem_mac[2] == 0x55)) ||
- ((cm_node->rem_mac[1] == 0x07 && (cm_node->rem_mac[2] == 0x43)))))
- return true;
- return false;
-}
-
/**
* i40iw_form_cm_frame - get a free packet and build frame
* @cm_node: connection's node ionfo to use in frame
@@ -384,7 +377,7 @@
u8 flags)
{
struct i40iw_puda_buf *sqbuf;
- struct i40iw_sc_dev *dev = cm_node->dev;
+ struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
u8 *buf;
struct tcphdr *tcph;
@@ -396,8 +389,9 @@
u32 opts_len = 0;
u32 pd_len = 0;
u32 hdr_len = 0;
+ u16 vtag;
- sqbuf = i40iw_puda_get_bufpool(dev->ilq);
+ sqbuf = i40iw_puda_get_bufpool(vsi->ilq);
if (!sqbuf)
return NULL;
buf = sqbuf->mem.va;
@@ -408,11 +402,8 @@
if (hdr)
hdr_len = hdr->size;
- if (pdata) {
+ if (pdata)
pd_len = pdata->size;
- if (!is_remote_ne020_or_chelsio(cm_node))
- pd_len += MPA_ZERO_PAD_LEN;
- }
if (cm_node->vlan_id < VLAN_TAG_PRESENT)
eth_hlen += 4;
@@ -445,7 +436,8 @@
ether_addr_copy(ethh->h_source, cm_node->loc_mac);
if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
- ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id);
+ vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id;
+ ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IP);
} else {
@@ -454,7 +446,7 @@
iph->version = IPVERSION;
iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
- iph->tos = 0;
+ iph->tos = cm_node->tos;
iph->tot_len = htons(packetsize);
iph->id = htons(++cm_node->tcp_cntxt.loc_id);
@@ -474,13 +466,15 @@
ether_addr_copy(ethh->h_source, cm_node->loc_mac);
if (cm_node->vlan_id < VLAN_TAG_PRESENT) {
((struct vlan_ethhdr *)ethh)->h_vlan_proto = htons(ETH_P_8021Q);
- ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(cm_node->vlan_id);
+ vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) | cm_node->vlan_id;
+ ((struct vlan_ethhdr *)ethh)->h_vlan_TCI = htons(vtag);
((struct vlan_ethhdr *)ethh)->h_vlan_encapsulated_proto = htons(ETH_P_IPV6);
} else {
ethh->h_proto = htons(ETH_P_IPV6);
}
ip6h->version = 6;
- ip6h->flow_lbl[0] = 0;
+ ip6h->priority = cm_node->tos >> 4;
+ ip6h->flow_lbl[0] = cm_node->tos << 4;
ip6h->flow_lbl[1] = 0;
ip6h->flow_lbl[2] = 0;
ip6h->payload_len = htons(packetsize - sizeof(*ip6h));
@@ -1065,7 +1059,7 @@
int send_retrans,
int close_when_complete)
{
- struct i40iw_sc_dev *dev = cm_node->dev;
+ struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
struct i40iw_cm_core *cm_core = cm_node->cm_core;
struct i40iw_timer_entry *new_send;
int ret = 0;
@@ -1074,7 +1068,7 @@
new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
if (!new_send) {
- i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf);
+ i40iw_free_sqbuf(vsi, (void *)sqbuf);
return -ENOMEM;
}
new_send->retrycount = I40IW_DEFAULT_RETRYS;
@@ -1089,7 +1083,7 @@
new_send->timetosend += (HZ / 10);
if (cm_node->close_entry) {
kfree(new_send);
- i40iw_free_sqbuf(cm_node->dev, (void *)sqbuf);
+ i40iw_free_sqbuf(vsi, (void *)sqbuf);
i40iw_pr_err("already close entry\n");
return -EINVAL;
}
@@ -1104,7 +1098,7 @@
new_send->timetosend = jiffies + I40IW_RETRY_TIMEOUT;
atomic_inc(&sqbuf->refcount);
- i40iw_puda_send_buf(dev->ilq, sqbuf);
+ i40iw_puda_send_buf(vsi->ilq, sqbuf);
if (!send_retrans) {
i40iw_cleanup_retrans_entry(cm_node);
if (close_when_complete)
@@ -1201,6 +1195,7 @@
struct i40iw_cm_node *cm_node;
struct i40iw_timer_entry *send_entry, *close_entry;
struct list_head *list_core_temp;
+ struct i40iw_sc_vsi *vsi;
struct list_head *list_node;
struct i40iw_cm_core *cm_core = (struct i40iw_cm_core *)pass;
u32 settimer = 0;
@@ -1276,9 +1271,10 @@
cm_node->cm_core->stats_pkt_retrans++;
spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
+ vsi = &cm_node->iwdev->vsi;
dev = cm_node->dev;
atomic_inc(&send_entry->sqbuf->refcount);
- i40iw_puda_send_buf(dev->ilq, send_entry->sqbuf);
+ i40iw_puda_send_buf(vsi->ilq, send_entry->sqbuf);
spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
if (send_entry->send_retrans) {
send_entry->retranscount--;
@@ -1379,10 +1375,11 @@
static void i40iw_send_ack(struct i40iw_cm_node *cm_node)
{
struct i40iw_puda_buf *sqbuf;
+ struct i40iw_sc_vsi *vsi = &cm_node->iwdev->vsi;
sqbuf = i40iw_form_cm_frame(cm_node, NULL, NULL, NULL, SET_ACK);
if (sqbuf)
- i40iw_puda_send_buf(cm_node->dev->ilq, sqbuf);
+ i40iw_puda_send_buf(vsi->ilq, sqbuf);
else
i40iw_pr_err("no sqbuf\n");
}
@@ -1564,9 +1561,15 @@
memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
sizeof(cm_info->loc_addr));
cm_info->vlan_id = child_listen_node->vlan_id;
- ret = i40iw_manage_qhash(iwdev, cm_info,
- I40IW_QHASH_TYPE_TCP_SYN,
- I40IW_QHASH_MANAGE_TYPE_DELETE, NULL, false);
+ if (child_listen_node->qhash_set) {
+ ret = i40iw_manage_qhash(iwdev, cm_info,
+ I40IW_QHASH_TYPE_TCP_SYN,
+ I40IW_QHASH_MANAGE_TYPE_DELETE,
+ NULL, false);
+ child_listen_node->qhash_set = false;
+ } else {
+ ret = I40IW_SUCCESS;
+ }
i40iw_debug(&iwdev->sc_dev,
I40IW_DEBUG_CM,
"freed pointer = %p\n",
@@ -1591,9 +1594,10 @@
static struct net_device *i40iw_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
{
struct net_device *ip_dev = NULL;
-#if IS_ENABLED(CONFIG_IPV6)
struct in6_addr laddr6;
+ if (!IS_ENABLED(CONFIG_IPV6))
+ return NULL;
i40iw_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr);
if (vlan_id)
*vlan_id = I40IW_NO_VLAN;
@@ -1610,7 +1614,6 @@
}
}
rcu_read_unlock();
-#endif
return ip_dev;
}
@@ -1646,7 +1649,7 @@
{
struct net_device *ip_dev;
struct inet6_dev *idev;
- struct inet6_ifaddr *ifp;
+ struct inet6_ifaddr *ifp, *tmp;
enum i40iw_status_code ret = 0;
struct i40iw_cm_listener *child_listen_node;
unsigned long flags;
@@ -1661,7 +1664,7 @@
i40iw_pr_err("idev == NULL\n");
break;
}
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
i40iw_debug(&iwdev->sc_dev,
I40IW_DEBUG_CM,
"IP=%pI6, vlan_id=%d, MAC=%pM\n",
@@ -1675,7 +1678,6 @@
"Allocating child listener %p\n",
child_listen_node);
if (!child_listen_node) {
- i40iw_pr_err("listener memory allocation\n");
ret = I40IW_ERR_NO_MEMORY;
goto exit;
}
@@ -1695,6 +1697,7 @@
I40IW_QHASH_MANAGE_TYPE_ADD,
NULL, true);
if (!ret) {
+ child_listen_node->qhash_set = true;
spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
list_add(&child_listen_node->child_listen_list,
&cm_parent_listen_node->child_listen_list);
@@ -1751,7 +1754,6 @@
"Allocating child listener %p\n",
child_listen_node);
if (!child_listen_node) {
- i40iw_pr_err("listener memory allocation\n");
in_dev_put(idev);
ret = I40IW_ERR_NO_MEMORY;
goto exit;
@@ -1773,6 +1775,7 @@
NULL,
true);
if (!ret) {
+ child_listen_node->qhash_set = true;
spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
list_add(&child_listen_node->child_listen_list,
&cm_parent_listen_node->child_listen_list);
@@ -1880,6 +1883,7 @@
nfo.loc_port = listener->loc_port;
nfo.ipv4 = listener->ipv4;
nfo.vlan_id = listener->vlan_id;
+ nfo.user_pri = listener->user_pri;
if (!list_empty(&listener->child_listen_list)) {
i40iw_del_multiple_qhash(listener->iwdev, &nfo, listener);
@@ -2138,6 +2142,20 @@
/* set our node specific transport info */
cm_node->ipv4 = cm_info->ipv4;
cm_node->vlan_id = cm_info->vlan_id;
+ if ((cm_node->vlan_id == I40IW_NO_VLAN) && iwdev->dcb)
+ cm_node->vlan_id = 0;
+ cm_node->tos = cm_info->tos;
+ cm_node->user_pri = cm_info->user_pri;
+ if (listener) {
+ if (listener->tos != cm_info->tos)
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB,
+ "application TOS[%d] and remote client TOS[%d] mismatch\n",
+ listener->tos, cm_info->tos);
+ cm_node->tos = max(listener->tos, cm_info->tos);
+ cm_node->user_pri = rt_tos2priority(cm_node->tos);
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "listener: TOS:[%d] UP:[%d]\n",
+ cm_node->tos, cm_node->user_pri);
+ }
memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr));
memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr));
cm_node->loc_port = cm_info->loc_port;
@@ -2162,7 +2180,7 @@
I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
ts = current_kernel_time();
cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec;
- cm_node->tcp_cntxt.mss = iwdev->mss;
+ cm_node->tcp_cntxt.mss = iwdev->vsi.mss;
cm_node->iwdev = iwdev;
cm_node->dev = &iwdev->sc_dev;
@@ -2236,7 +2254,7 @@
i40iw_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
} else {
if (!i40iw_listen_port_in_use(cm_core, cm_node->loc_port) &&
- cm_node->apbvt_set && cm_node->iwdev) {
+ cm_node->apbvt_set) {
i40iw_manage_apbvt(cm_node->iwdev,
cm_node->loc_port,
I40IW_MANAGE_APBVT_DEL);
@@ -2861,7 +2879,7 @@
/* create a CM connection node */
cm_node = i40iw_make_cm_node(cm_core, iwdev, cm_info, NULL);
if (!cm_node)
- return NULL;
+ return ERR_PTR(-ENOMEM);
/* set our node side to client (active) side */
cm_node->tcp_cntxt.client = 1;
cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
@@ -2874,7 +2892,8 @@
cm_node->vlan_id,
I40IW_CM_LISTENER_ACTIVE_STATE);
if (!loopback_remotelistener) {
- i40iw_create_event(cm_node, I40IW_CM_EVENT_ABORTED);
+ i40iw_rem_ref_cm_node(cm_node);
+ return ERR_PTR(-ECONNREFUSED);
} else {
loopback_cm_info = *cm_info;
loopback_cm_info.loc_port = cm_info->rem_port;
@@ -2887,7 +2906,7 @@
loopback_remotelistener);
if (!loopback_remotenode) {
i40iw_rem_ref_cm_node(cm_node);
- return NULL;
+ return ERR_PTR(-ENOMEM);
}
cm_core->stats_loopbacks++;
loopback_remotenode->loopbackpartner = cm_node;
@@ -3041,10 +3060,10 @@
/**
* i40iw_receive_ilq - recv an ETHERNET packet, and process it
* through CM
- * @dev: FPK dev struct
+ * @vsi: pointer to the vsi structure
* @rbuf: receive buffer
*/
-void i40iw_receive_ilq(struct i40iw_sc_dev *dev, struct i40iw_puda_buf *rbuf)
+void i40iw_receive_ilq(struct i40iw_sc_vsi *vsi, struct i40iw_puda_buf *rbuf)
{
struct i40iw_cm_node *cm_node;
struct i40iw_cm_listener *listener;
@@ -3052,9 +3071,11 @@
struct ipv6hdr *ip6h;
struct tcphdr *tcph;
struct i40iw_cm_info cm_info;
+ struct i40iw_sc_dev *dev = vsi->dev;
struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
struct i40iw_cm_core *cm_core = &iwdev->cm_core;
struct vlan_ethhdr *ethh;
+ u16 vtag;
/* if vlan, then maclen = 18 else 14 */
iph = (struct iphdr *)rbuf->iph;
@@ -3068,7 +3089,9 @@
ethh = (struct vlan_ethhdr *)rbuf->mem.va;
if (ethh->h_vlan_proto == htons(ETH_P_8021Q)) {
- cm_info.vlan_id = ntohs(ethh->h_vlan_TCI) & VLAN_VID_MASK;
+ vtag = ntohs(ethh->h_vlan_TCI);
+ cm_info.user_pri = (vtag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+ cm_info.vlan_id = vtag & VLAN_VID_MASK;
i40iw_debug(cm_core->dev,
I40IW_DEBUG_CM,
"%s vlan_id=%d\n",
@@ -3083,6 +3106,7 @@
cm_info.loc_addr[0] = ntohl(iph->daddr);
cm_info.rem_addr[0] = ntohl(iph->saddr);
cm_info.ipv4 = true;
+ cm_info.tos = iph->tos;
} else {
ip6h = (struct ipv6hdr *)rbuf->iph;
i40iw_copy_ip_ntohl(cm_info.loc_addr,
@@ -3090,6 +3114,7 @@
i40iw_copy_ip_ntohl(cm_info.rem_addr,
ip6h->saddr.in6_u.u6_addr32);
cm_info.ipv4 = false;
+ cm_info.tos = (ip6h->priority << 4) | (ip6h->flow_lbl[0] >> 4);
}
cm_info.loc_port = ntohs(tcph->dest);
cm_info.rem_port = ntohs(tcph->source);
@@ -3309,6 +3334,8 @@
ctx_info->tcp_info_valid = true;
ctx_info->iwarp_info_valid = true;
+ ctx_info->add_to_qoslist = true;
+ ctx_info->user_pri = cm_node->user_pri;
i40iw_init_tcp_ctx(cm_node, &tcp_info, iwqp);
if (cm_node->snd_mark_en) {
@@ -3320,33 +3347,47 @@
cm_node->state = I40IW_CM_STATE_OFFLOADED;
tcp_info.tcp_state = I40IW_TCP_STATE_ESTABLISHED;
tcp_info.src_mac_addr_idx = iwdev->mac_ip_table_idx;
+ tcp_info.tos = cm_node->tos;
dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp, (u64 *)(iwqp->host_ctx.va), ctx_info);
/* once tcp_info is set, no need to do it again */
ctx_info->tcp_info_valid = false;
ctx_info->iwarp_info_valid = false;
+ ctx_info->add_to_qoslist = false;
}
/**
* i40iw_cm_disconn - when a connection is being closed
* @iwqp: associate qp for the connection
*/
-int i40iw_cm_disconn(struct i40iw_qp *iwqp)
+void i40iw_cm_disconn(struct i40iw_qp *iwqp)
{
struct disconn_work *work;
struct i40iw_device *iwdev = iwqp->iwdev;
struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+ unsigned long flags;
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
- return -ENOMEM; /* Timer will clean up */
+ return; /* Timer will clean up */
+ spin_lock_irqsave(&iwdev->qptable_lock, flags);
+ if (!iwdev->qp_table[iwqp->ibqp.qp_num]) {
+ spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_CM,
+ "%s qp_id %d is already freed\n",
+ __func__, iwqp->ibqp.qp_num);
+ kfree(work);
+ return;
+ }
i40iw_add_ref(&iwqp->ibqp);
+ spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
+
work->iwqp = iwqp;
INIT_WORK(&work->work, i40iw_disconnect_worker);
queue_work(cm_core->disconn_wq, &work->work);
- return 0;
+ return;
}
/**
@@ -3432,7 +3473,7 @@
*terminate-handler to issue cm_disconn which can re-free
*a QP even after its refcnt=0.
*/
- del_timer(&iwqp->terminate_timer);
+ i40iw_terminate_del_timer(qp);
if (!iwqp->flush_issued) {
iwqp->flush_issued = 1;
issue_flush = 1;
@@ -3462,7 +3503,7 @@
/* Flush the queues */
i40iw_flush_wqes(iwdev, iwqp);
- if (qp->term_flags) {
+ if (qp->term_flags && iwqp->ibqp.event_handler) {
ibevent.device = iwqp->ibqp.device;
ibevent.event = (qp->eventtype == TERM_EVENT_QP_FATAL) ?
IB_EVENT_QP_FATAL : IB_EVENT_QP_ACCESS_ERR;
@@ -3571,7 +3612,7 @@
iwqp->cm_node = (void *)cm_node;
cm_node->iwqp = iwqp;
- buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE + MPA_ZERO_PAD_LEN;
+ buf_len = conn_param->private_data_len + I40IW_MAX_IETF_SIZE;
status = i40iw_allocate_dma_mem(dev->hw, &iwqp->ietf_mem, buf_len, 1);
@@ -3605,18 +3646,10 @@
iwqp->lsmm_mr = ibmr;
if (iwqp->page)
iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page);
- if (is_remote_ne020_or_chelsio(cm_node))
- dev->iw_priv_qp_ops->qp_send_lsmm(
- &iwqp->sc_qp,
+ dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp,
iwqp->ietf_mem.va,
(accept.size + conn_param->private_data_len),
ibmr->lkey);
- else
- dev->iw_priv_qp_ops->qp_send_lsmm(
- &iwqp->sc_qp,
- iwqp->ietf_mem.va,
- (accept.size + conn_param->private_data_len + MPA_ZERO_PAD_LEN),
- ibmr->lkey);
} else {
if (iwqp->page)
@@ -3714,6 +3747,7 @@
struct sockaddr_in6 *raddr6;
bool qhash_set = false;
int apbvt_set = 0;
+ int err = 0;
enum i40iw_status_code status;
ibqp = i40iw_get_qp(cm_id->device, conn_param->qpn);
@@ -3759,6 +3793,10 @@
i40iw_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL);
}
cm_info.cm_id = cm_id;
+ cm_info.tos = cm_id->tos;
+ cm_info.user_pri = rt_tos2priority(cm_id->tos);
+ i40iw_debug(&iwdev->sc_dev, I40IW_DEBUG_DCB, "%s TOS:[%d] UP:[%d]\n",
+ __func__, cm_id->tos, cm_info.user_pri);
if ((cm_info.ipv4 && (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr)) ||
(!cm_info.ipv4 && memcmp(laddr6->sin6_addr.in6_u.u6_addr32,
raddr6->sin6_addr.in6_u.u6_addr32,
@@ -3790,8 +3828,11 @@
conn_param->private_data_len,
(void *)conn_param->private_data,
&cm_info);
- if (!cm_node)
- goto err;
+
+ if (IS_ERR(cm_node)) {
+ err = PTR_ERR(cm_node);
+ goto err_out;
+ }
i40iw_record_ird_ord(cm_node, (u16)conn_param->ird, (u16)conn_param->ord);
if (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO &&
@@ -3805,10 +3846,12 @@
iwqp->cm_id = cm_id;
i40iw_add_ref(&iwqp->ibqp);
- if (cm_node->state == I40IW_CM_STATE_SYN_SENT) {
- if (i40iw_send_syn(cm_node, 0)) {
+ if (cm_node->state != I40IW_CM_STATE_OFFLOADED) {
+ cm_node->state = I40IW_CM_STATE_SYN_SENT;
+ err = i40iw_send_syn(cm_node, 0);
+ if (err) {
i40iw_rem_ref_cm_node(cm_node);
- goto err;
+ goto err_out;
}
}
@@ -3820,24 +3863,25 @@
cm_node->cm_id);
return 0;
-err:
- if (cm_node) {
- if (cm_node->ipv4)
- i40iw_debug(cm_node->dev,
- I40IW_DEBUG_CM,
- "Api - connect() FAILED: dest addr=%pI4",
- cm_node->rem_addr);
- else
- i40iw_debug(cm_node->dev, I40IW_DEBUG_CM,
- "Api - connect() FAILED: dest addr=%pI6",
- cm_node->rem_addr);
- }
- i40iw_manage_qhash(iwdev,
- &cm_info,
- I40IW_QHASH_TYPE_TCP_ESTABLISHED,
- I40IW_QHASH_MANAGE_TYPE_DELETE,
- NULL,
- false);
+err_out:
+ if (cm_info.ipv4)
+ i40iw_debug(&iwdev->sc_dev,
+ I40IW_DEBUG_CM,
+ "Api - connect() FAILED: dest addr=%pI4",
+ cm_info.rem_addr);
+ else
+ i40iw_debug(&iwdev->sc_dev,
+ I40IW_DEBUG_CM,
+ "Api - connect() FAILED: dest addr=%pI6",
+ cm_info.rem_addr);
+
+ if (qhash_set)
+ i40iw_manage_qhash(iwdev,
+ &cm_info,
+ I40IW_QHASH_TYPE_TCP_ESTABLISHED,
+ I40IW_QHASH_MANAGE_TYPE_DELETE,
+ NULL,
+ false);
if (apbvt_set && !i40iw_listen_port_in_use(&iwdev->cm_core,
cm_info.loc_port))
@@ -3846,7 +3890,7 @@
I40IW_MANAGE_APBVT_DEL);
cm_id->rem_ref(cm_id);
iwdev->cm_core.stats_connect_errs++;
- return -ENOMEM;
+ return err;
}
/**
@@ -3904,6 +3948,10 @@
cm_id->provider_data = cm_listen_node;
+ cm_listen_node->tos = cm_id->tos;
+ cm_listen_node->user_pri = rt_tos2priority(cm_id->tos);
+ cm_info.user_pri = cm_listen_node->user_pri;
+
if (!cm_listen_node->reused_node) {
if (wildcard) {
if (cm_info.ipv4)
@@ -4124,3 +4172,158 @@
queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
}
+
+/**
+ * i40iw_qhash_ctrl - enable/disable qhash for list
+ * @iwdev: device pointer
+ * @parent_listen_node: parent listen node
+ * @nfo: cm info node
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @ifup: flag indicating interface up when true
+ *
+ * Enables or disables the qhash for the node in the child
+ * listen list that matches ipaddr. If no matching IP was found
+ * it will allocate and add a new child listen node to the
+ * parent listen node. The listen_list_lock is assumed to be
+ * held when called.
+ */
+static void i40iw_qhash_ctrl(struct i40iw_device *iwdev,
+ struct i40iw_cm_listener *parent_listen_node,
+ struct i40iw_cm_info *nfo,
+ u32 *ipaddr, bool ipv4, bool ifup)
+{
+ struct list_head *child_listen_list = &parent_listen_node->child_listen_list;
+ struct i40iw_cm_listener *child_listen_node;
+ struct list_head *pos, *tpos;
+ enum i40iw_status_code ret;
+ bool node_allocated = false;
+ enum i40iw_quad_hash_manage_type op =
+ ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
+
+ list_for_each_safe(pos, tpos, child_listen_list) {
+ child_listen_node =
+ list_entry(pos,
+ struct i40iw_cm_listener,
+ child_listen_list);
+ if (!memcmp(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16))
+ goto set_qhash;
+ }
+
+ /* if not found then add a child listener if interface is going up */
+ if (!ifup)
+ return;
+ child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_ATOMIC);
+ if (!child_listen_node)
+ return;
+ node_allocated = true;
+ memcpy(child_listen_node, parent_listen_node, sizeof(*child_listen_node));
+
+ memcpy(child_listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16);
+
+set_qhash:
+ memcpy(nfo->loc_addr,
+ child_listen_node->loc_addr,
+ sizeof(nfo->loc_addr));
+ nfo->vlan_id = child_listen_node->vlan_id;
+ ret = i40iw_manage_qhash(iwdev, nfo,
+ I40IW_QHASH_TYPE_TCP_SYN,
+ op,
+ NULL, false);
+ if (!ret) {
+ child_listen_node->qhash_set = ifup;
+ if (node_allocated)
+ list_add(&child_listen_node->child_listen_list,
+ &parent_listen_node->child_listen_list);
+ } else if (node_allocated) {
+ kfree(child_listen_node);
+ }
+}
+
+/**
+ * i40iw_cm_disconnect_all - disconnect all connected qp's
+ * @iwdev: device pointer
+ */
+void i40iw_cm_disconnect_all(struct i40iw_device *iwdev)
+{
+ struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+ struct list_head *list_core_temp;
+ struct list_head *list_node;
+ struct i40iw_cm_node *cm_node;
+ unsigned long flags;
+ struct list_head connected_list;
+ struct ib_qp_attr attr;
+
+ INIT_LIST_HEAD(&connected_list);
+ spin_lock_irqsave(&cm_core->ht_lock, flags);
+ list_for_each_safe(list_node, list_core_temp, &cm_core->connected_nodes) {
+ cm_node = container_of(list_node, struct i40iw_cm_node, list);
+ atomic_inc(&cm_node->ref_count);
+ list_add(&cm_node->connected_entry, &connected_list);
+ }
+ spin_unlock_irqrestore(&cm_core->ht_lock, flags);
+
+ list_for_each_safe(list_node, list_core_temp, &connected_list) {
+ cm_node = container_of(list_node, struct i40iw_cm_node, connected_entry);
+ attr.qp_state = IB_QPS_ERR;
+ i40iw_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
+ i40iw_rem_ref_cm_node(cm_node);
+ }
+}
+
+/**
+ * i40iw_ifdown_notify - process an ifdown on an interface
+ * @iwdev: device pointer
+ * @ipaddr: Pointer to IPv4 or IPv6 address
+ * @ipv4: flag indicating IPv4 when true
+ * @ifup: flag indicating interface up when true
+ */
+void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
+ u32 *ipaddr, bool ipv4, bool ifup)
+{
+ struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+ unsigned long flags;
+ struct i40iw_cm_listener *listen_node;
+ static const u32 ip_zero[4] = { 0, 0, 0, 0 };
+ struct i40iw_cm_info nfo;
+ u16 vlan_id = rdma_vlan_dev_vlan_id(netdev);
+ enum i40iw_status_code ret;
+ enum i40iw_quad_hash_manage_type op =
+ ifup ? I40IW_QHASH_MANAGE_TYPE_ADD : I40IW_QHASH_MANAGE_TYPE_DELETE;
+
+ /* Disable or enable qhash for listeners */
+ spin_lock_irqsave(&cm_core->listen_list_lock, flags);
+ list_for_each_entry(listen_node, &cm_core->listen_nodes, list) {
+ if (vlan_id == listen_node->vlan_id &&
+ (!memcmp(listen_node->loc_addr, ipaddr, ipv4 ? 4 : 16) ||
+ !memcmp(listen_node->loc_addr, ip_zero, ipv4 ? 4 : 16))) {
+ memcpy(nfo.loc_addr, listen_node->loc_addr,
+ sizeof(nfo.loc_addr));
+ nfo.loc_port = listen_node->loc_port;
+ nfo.ipv4 = listen_node->ipv4;
+ nfo.vlan_id = listen_node->vlan_id;
+ nfo.user_pri = listen_node->user_pri;
+ if (!list_empty(&listen_node->child_listen_list)) {
+ i40iw_qhash_ctrl(iwdev,
+ listen_node,
+ &nfo,
+ ipaddr, ipv4, ifup);
+ } else if (memcmp(listen_node->loc_addr, ip_zero,
+ ipv4 ? 4 : 16)) {
+ ret = i40iw_manage_qhash(iwdev,
+ &nfo,
+ I40IW_QHASH_TYPE_TCP_SYN,
+ op,
+ NULL,
+ false);
+ if (!ret)
+ listen_node->qhash_set = ifup;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
+
+ /* disconnect any connected qp's on ifdown */
+ if (!ifup)
+ i40iw_cm_disconnect_all(iwdev);
+}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.h b/drivers/infiniband/hw/i40iw/i40iw_cm.h
index e9046d9..2e52e38 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.h
@@ -56,8 +56,6 @@
#define I40IW_MAX_IETF_SIZE 32
-#define MPA_ZERO_PAD_LEN 4
-
/* IETF RTR MSG Fields */
#define IETF_PEER_TO_PEER 0x8000
#define IETF_FLPDU_ZERO_LEN 0x4000
@@ -299,6 +297,7 @@
enum i40iw_cm_listener_state listener_state;
u32 reused_node;
u8 user_pri;
+ u8 tos;
u16 vlan_id;
bool qhash_set;
bool ipv4;
@@ -341,9 +340,11 @@
int accept_pend;
struct list_head timer_entry;
struct list_head reset_entry;
+ struct list_head connected_entry;
atomic_t passive_state;
bool qhash_set;
u8 user_pri;
+ u8 tos;
bool ipv4;
bool snd_mark_en;
u16 lsmm_size;
@@ -368,7 +369,8 @@
u32 rem_addr[4];
u16 vlan_id;
int backlog;
- u16 user_pri;
+ u8 user_pri;
+ u8 tos;
bool ipv4;
};
@@ -445,4 +447,7 @@
u8 *mac_addr,
u32 action);
+void i40iw_if_notify(struct i40iw_device *iwdev, struct net_device *netdev,
+ u32 *ipaddr, bool ipv4, bool ifup);
+void i40iw_cm_disconnect_all(struct i40iw_device *iwdev);
#endif /* I40IW_CM_H */
diff --git a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
index 2c4b4d0..392f783 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_ctrl.c
@@ -103,6 +103,7 @@
if (newtail != tail) {
/* SUCCESS */
I40IW_RING_MOVE_TAIL(cqp->sq_ring);
+ cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++;
return 0;
}
udelay(I40IW_SLEEP_COUNT);
@@ -223,6 +224,136 @@
}
/**
+ * i40iw_fill_qos_list - Change all unknown qs handles to available ones
+ * @qs_list: list of qs_handles to be fixed with valid qs_handles
+ */
+static void i40iw_fill_qos_list(u16 *qs_list)
+{
+ u16 qshandle = qs_list[0];
+ int i;
+
+ for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
+ if (qs_list[i] == QS_HANDLE_UNKNOWN)
+ qs_list[i] = qshandle;
+ else
+ qshandle = qs_list[i];
+ }
+}
+
+/**
+ * i40iw_qp_from_entry - Given entry, get to the qp structure
+ * @entry: Points to list of qp structure
+ */
+static struct i40iw_sc_qp *i40iw_qp_from_entry(struct list_head *entry)
+{
+ if (!entry)
+ return NULL;
+
+ return (struct i40iw_sc_qp *)((char *)entry - offsetof(struct i40iw_sc_qp, list));
+}
+
+/**
+ * i40iw_get_qp - get the next qp from the list given current qp
+ * @head: Listhead of qp's
+ * @qp: current qp
+ */
+static struct i40iw_sc_qp *i40iw_get_qp(struct list_head *head, struct i40iw_sc_qp *qp)
+{
+ struct list_head *entry = NULL;
+ struct list_head *lastentry;
+
+ if (list_empty(head))
+ return NULL;
+
+ if (!qp) {
+ entry = head->next;
+ } else {
+ lastentry = &qp->list;
+ entry = (lastentry != head) ? lastentry->next : NULL;
+ }
+
+ return i40iw_qp_from_entry(entry);
+}
+
+/**
+ * i40iw_change_l2params - given the new l2 parameters, change all qp
+ * @vsi: pointer to the vsi structure
+ * @l2params: New paramaters from l2
+ */
+void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params)
+{
+ struct i40iw_sc_dev *dev = vsi->dev;
+ struct i40iw_sc_qp *qp = NULL;
+ bool qs_handle_change = false;
+ bool mss_change = false;
+ unsigned long flags;
+ u16 qs_handle;
+ int i;
+
+ if (vsi->mss != l2params->mss) {
+ mss_change = true;
+ vsi->mss = l2params->mss;
+ }
+
+ i40iw_fill_qos_list(l2params->qs_handle_list);
+ for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
+ qs_handle = l2params->qs_handle_list[i];
+ if (vsi->qos[i].qs_handle != qs_handle)
+ qs_handle_change = true;
+ else if (!mss_change)
+ continue; /* no MSS nor qs handle change */
+ spin_lock_irqsave(&vsi->qos[i].lock, flags);
+ qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
+ while (qp) {
+ if (mss_change)
+ i40iw_qp_mss_modify(dev, qp);
+ if (qs_handle_change) {
+ qp->qs_handle = qs_handle;
+ /* issue cqp suspend command */
+ i40iw_qp_suspend_resume(dev, qp, true);
+ }
+ qp = i40iw_get_qp(&vsi->qos[i].qplist, qp);
+ }
+ spin_unlock_irqrestore(&vsi->qos[i].lock, flags);
+ vsi->qos[i].qs_handle = qs_handle;
+ }
+}
+
+/**
+ * i40iw_qp_rem_qos - remove qp from qos lists during destroy qp
+ * @qp: qp to be removed from qos
+ */
+static void i40iw_qp_rem_qos(struct i40iw_sc_qp *qp)
+{
+ struct i40iw_sc_vsi *vsi = qp->vsi;
+ unsigned long flags;
+
+ if (!qp->on_qoslist)
+ return;
+ spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags);
+ list_del(&qp->list);
+ spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags);
+}
+
+/**
+ * i40iw_qp_add_qos - called during setctx fot qp to be added to qos
+ * @qp: qp to be added to qos
+ */
+void i40iw_qp_add_qos(struct i40iw_sc_qp *qp)
+{
+ struct i40iw_sc_vsi *vsi = qp->vsi;
+ unsigned long flags;
+
+ if (qp->on_qoslist)
+ return;
+ spin_lock_irqsave(&vsi->qos[qp->user_pri].lock, flags);
+ qp->qs_handle = vsi->qos[qp->user_pri].qs_handle;
+ list_add(&qp->list, &vsi->qos[qp->user_pri].qplist);
+ qp->on_qoslist = true;
+ spin_unlock_irqrestore(&vsi->qos[qp->user_pri].lock, flags);
+}
+
+/**
* i40iw_sc_pd_init - initialize sc pd struct
* @dev: sc device struct
* @pd: sc pd ptr
@@ -292,6 +423,9 @@
info->dev->cqp = cqp;
I40IW_RING_INIT(cqp->sq_ring, cqp->sq_size);
+ cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS] = 0;
+ cqp->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS] = 0;
+
i40iw_debug(cqp->dev, I40IW_DEBUG_WQE,
"%s: sq_size[%04d] hw_sq_size[%04d] sq_base[%p] sq_pa[%llxh] cqp[%p] polarity[x%04X]\n",
__func__, cqp->sq_size, cqp->hw_sq_size,
@@ -302,12 +436,10 @@
/**
* i40iw_sc_cqp_create - create cqp during bringup
* @cqp: struct for cqp hw
- * @disable_pfpdus: if pfpdu to be disabled
* @maj_err: If error, major err number
* @min_err: If error, minor err number
*/
static enum i40iw_status_code i40iw_sc_cqp_create(struct i40iw_sc_cqp *cqp,
- bool disable_pfpdus,
u16 *maj_err,
u16 *min_err)
{
@@ -326,9 +458,6 @@
temp = LS_64(cqp->hw_sq_size, I40IW_CQPHC_SQSIZE) |
LS_64(cqp->struct_ver, I40IW_CQPHC_SVER);
- if (disable_pfpdus)
- temp |= LS_64(1, I40IW_CQPHC_DISABLE_PFPDUS);
-
set_64bit_val(cqp->host_ctx, 0, temp);
set_64bit_val(cqp->host_ctx, 8, cqp->sq_pa);
temp = LS_64(cqp->enabled_vf_count, I40IW_CQPHC_ENABLED_VFS) |
@@ -424,6 +553,7 @@
return NULL;
}
I40IW_ATOMIC_RING_MOVE_HEAD(cqp->sq_ring, wqe_idx, ret_code);
+ cqp->dev->cqp_cmd_stats[OP_REQUESTED_COMMANDS]++;
if (ret_code)
return NULL;
if (!wqe_idx)
@@ -559,6 +689,8 @@
I40IW_RING_GETCURRENT_HEAD(ccq->cq_uk.cq_ring));
wmb(); /* write shadow area before tail */
I40IW_RING_MOVE_TAIL(cqp->sq_ring);
+ ccq->dev->cqp_cmd_stats[OP_COMPLETED_COMMANDS]++;
+
return ret_code;
}
@@ -1051,6 +1183,7 @@
u64 qw1 = 0;
u64 qw2 = 0;
u64 temp;
+ struct i40iw_sc_vsi *vsi = info->vsi;
wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
@@ -1082,7 +1215,7 @@
LS_64(info->dest_ip[2], I40IW_CQPSQ_QHASH_ADDR2) |
LS_64(info->dest_ip[3], I40IW_CQPSQ_QHASH_ADDR3));
}
- qw2 = LS_64(cqp->dev->qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE);
+ qw2 = LS_64(vsi->qos[info->user_pri].qs_handle, I40IW_CQPSQ_QHASH_QS_HANDLE);
if (info->vlan_valid)
qw2 |= LS_64(info->vlan_id, I40IW_CQPSQ_QHASH_VLANID);
set_64bit_val(wqe, 16, qw2);
@@ -2103,6 +2236,7 @@
u32 offset;
qp->dev = info->pd->dev;
+ qp->vsi = info->vsi;
qp->sq_pa = info->sq_pa;
qp->rq_pa = info->rq_pa;
qp->hw_host_ctx_pa = info->host_ctx_pa;
@@ -2151,7 +2285,7 @@
qp->rq_tph_en = info->rq_tph_en;
qp->rcv_tph_en = info->rcv_tph_en;
qp->xmit_tph_en = info->xmit_tph_en;
- qp->qs_handle = qp->pd->dev->qs_handle;
+ qp->qs_handle = qp->vsi->qos[qp->user_pri].qs_handle;
qp->exception_lan_queue = qp->pd->dev->exception_lan_queue;
return 0;
@@ -2296,6 +2430,7 @@
struct i40iw_sc_cqp *cqp;
u64 header;
+ i40iw_qp_rem_qos(qp);
cqp = qp->pd->dev->cqp;
wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
@@ -2443,10 +2578,20 @@
{
struct i40iwarp_offload_info *iw;
struct i40iw_tcp_offload_info *tcp;
+ struct i40iw_sc_vsi *vsi;
+ struct i40iw_sc_dev *dev;
u64 qw0, qw3, qw7 = 0;
iw = info->iwarp_info;
tcp = info->tcp_info;
+ vsi = qp->vsi;
+ dev = qp->dev;
+ if (info->add_to_qoslist) {
+ qp->user_pri = info->user_pri;
+ i40iw_qp_add_qos(qp);
+ i40iw_debug(qp->dev, I40IW_DEBUG_DCB, "%s qp[%d] UP[%d] qset[%d]\n",
+ __func__, qp->qp_uk.qp_id, qp->user_pri, qp->qs_handle);
+ }
qw0 = LS_64(qp->qp_uk.rq_wqe_size, I40IWQPC_RQWQESIZE) |
LS_64(info->err_rq_idx_valid, I40IWQPC_ERR_RQ_IDX_VALID) |
LS_64(qp->rcv_tph_en, I40IWQPC_RCVTPHEN) |
@@ -2487,16 +2632,14 @@
LS_64(iw->rdmap_ver, I40IWQPC_RDMAP_VER);
qw7 |= LS_64(iw->pd_id, I40IWQPC_PDIDX);
- set_64bit_val(qp_ctx, 144, qp->q2_pa);
+ set_64bit_val(qp_ctx,
+ 144,
+ LS_64(qp->q2_pa, I40IWQPC_Q2ADDR) |
+ LS_64(vsi->fcn_id, I40IWQPC_STAT_INDEX));
set_64bit_val(qp_ctx,
152,
LS_64(iw->last_byte_sent, I40IWQPC_LASTBYTESENT));
- /*
- * Hard-code IRD_SIZE to hw-limit, 128, in qpctx, i.e matching an
- *advertisable IRD of 64
- */
- iw->ird_size = I40IW_QPCTX_ENCD_MAXIRD;
set_64bit_val(qp_ctx,
160,
LS_64(iw->ord_size, I40IWQPC_ORDSIZE) |
@@ -2507,6 +2650,9 @@
LS_64(iw->bind_en, I40IWQPC_BINDEN) |
LS_64(iw->fast_reg_en, I40IWQPC_FASTREGEN) |
LS_64(iw->priv_mode_en, I40IWQPC_PRIVEN) |
+ LS_64((((vsi->stats_fcn_id_alloc) &&
+ (dev->is_pf) && (vsi->fcn_id >= I40IW_FIRST_NON_PF_STAT)) ? 1 : 0),
+ I40IWQPC_USESTATSINSTANCE) |
LS_64(1, I40IWQPC_IWARPMODE) |
LS_64(iw->rcv_mark_en, I40IWQPC_RCVMARKERS) |
LS_64(iw->align_hdrs, I40IWQPC_ALIGNHDRS) |
@@ -2623,7 +2769,9 @@
u64 *wqe;
struct i40iw_sc_cqp *cqp;
u64 header;
+ enum i40iw_page_size page_size;
+ page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
cqp = dev->cqp;
wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, scratch);
if (!wqe)
@@ -2643,7 +2791,7 @@
LS_64(1, I40IW_CQPSQ_STAG_MR) |
LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
- LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
+ LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
LS_64(info->remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
LS_64(info->use_hmc_fcn_index, I40IW_CQPSQ_STAG_USEHMCFNIDX) |
LS_64(info->use_pf_rid, I40IW_CQPSQ_STAG_USEPFRID) |
@@ -2679,7 +2827,9 @@
u32 pble_obj_cnt;
bool remote_access;
u8 addr_type;
+ enum i40iw_page_size page_size;
+ page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
if (info->access_rights & (I40IW_ACCESS_FLAGS_REMOTEREAD_ONLY |
I40IW_ACCESS_FLAGS_REMOTEWRITE_ONLY))
remote_access = true;
@@ -2722,7 +2872,7 @@
header = LS_64(I40IW_CQP_OP_REG_MR, I40IW_CQPSQ_OPCODE) |
LS_64(1, I40IW_CQPSQ_STAG_MR) |
LS_64(info->chunk_size, I40IW_CQPSQ_STAG_LPBLSIZE) |
- LS_64(info->page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
+ LS_64(page_size, I40IW_CQPSQ_STAG_HPAGESIZE) |
LS_64(info->access_rights, I40IW_CQPSQ_STAG_ARIGHTS) |
LS_64(remote_access, I40IW_CQPSQ_STAG_REMACCENABLED) |
LS_64(addr_type, I40IW_CQPSQ_STAG_VABASEDTO) |
@@ -2937,7 +3087,9 @@
u64 temp, header;
u64 *wqe;
u32 wqe_idx;
+ enum i40iw_page_size page_size;
+ page_size = (info->page_size == 0x200000) ? I40IW_PAGE_SIZE_2M : I40IW_PAGE_SIZE_4K;
wqe = i40iw_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, I40IW_QP_WQE_MIN_SIZE,
0, info->wr_id);
if (!wqe)
@@ -2964,7 +3116,7 @@
LS_64(info->stag_idx, I40IWQPSQ_STAGINDEX) |
LS_64(I40IWQP_OP_FAST_REGISTER, I40IWQPSQ_OPCODE) |
LS_64(info->chunk_size, I40IWQPSQ_LPBLSIZE) |
- LS_64(info->page_size, I40IWQPSQ_HPAGESIZE) |
+ LS_64(page_size, I40IWQPSQ_HPAGESIZE) |
LS_64(info->access_rights, I40IWQPSQ_STAGRIGHTS) |
LS_64(info->addr_type, I40IWQPSQ_VABASEDTO) |
LS_64(info->read_fence, I40IWQPSQ_READFENCE) |
@@ -3959,7 +4111,7 @@
struct cqp_commands_info *pcmdinfo)
{
enum i40iw_status_code status = 0;
- unsigned long flags;
+ unsigned long flags;
spin_lock_irqsave(&dev->cqp_lock, flags);
if (list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp))
@@ -3978,7 +4130,7 @@
{
enum i40iw_status_code status = 0;
struct cqp_commands_info *pcmdinfo;
- unsigned long flags;
+ unsigned long flags;
spin_lock_irqsave(&dev->cqp_lock, flags);
while (!list_empty(&dev->cqp_cmd_head) && !i40iw_ring_full(dev->cqp)) {
@@ -4055,7 +4207,6 @@
u16 ddp_seg_len;
int copy_len = 0;
u8 is_tagged = 0;
- enum i40iw_flush_opcode flush_code = FLUSH_INVALID;
u32 opcode;
struct i40iw_terminate_hdr *termhdr;
@@ -4228,9 +4379,6 @@
if (copy_len)
memcpy(termhdr + 1, pkt, copy_len);
- if (flush_code && !info->in_rdrsp_wr)
- qp->sq_flush = (info->sq) ? true : false;
-
return sizeof(struct i40iw_terminate_hdr) + copy_len;
}
@@ -4321,286 +4469,370 @@
}
/**
- * i40iw_hw_stat_init - Initiliaze HW stats table
- * @devstat: pestat struct
+ * i40iw_sc_vsi_init - Initialize virtual device
+ * @vsi: pointer to the vsi structure
+ * @info: parameters to initialize vsi
+ **/
+void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info)
+{
+ int i;
+
+ vsi->dev = info->dev;
+ vsi->back_vsi = info->back_vsi;
+ vsi->mss = info->params->mss;
+ i40iw_fill_qos_list(info->params->qs_handle_list);
+
+ for (i = 0; i < I40IW_MAX_USER_PRIORITY; i++) {
+ vsi->qos[i].qs_handle =
+ info->params->qs_handle_list[i];
+ i40iw_debug(vsi->dev, I40IW_DEBUG_DCB, "qset[%d]: %d\n", i, vsi->qos[i].qs_handle);
+ spin_lock_init(&vsi->qos[i].lock);
+ INIT_LIST_HEAD(&vsi->qos[i].qplist);
+ }
+}
+
+/**
+ * i40iw_hw_stats_init - Initiliaze HW stats table
+ * @stats: pestat struct
* @fcn_idx: PCI fn id
- * @hw: PF i40iw_hw structure.
* @is_pf: Is it a PF?
*
- * Populate the HW stat table with register offset addr for each
- * stat. And start the perioidic stats timer.
+ * Populate the HW stats table with register offset addr for each
+ * stats. And start the perioidic stats timer.
*/
-static void i40iw_hw_stat_init(struct i40iw_dev_pestat *devstat,
- u8 fcn_idx,
- struct i40iw_hw *hw, bool is_pf)
+void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 fcn_idx, bool is_pf)
{
- u32 stat_reg_offset;
- u32 stat_index;
- struct i40iw_dev_hw_stat_offsets *stat_table =
- &devstat->hw_stat_offsets;
- struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
-
- devstat->hw = hw;
+ u32 stats_reg_offset;
+ u32 stats_index;
+ struct i40iw_dev_hw_stats_offsets *stats_table =
+ &stats->hw_stats_offsets;
+ struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
if (is_pf) {
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
I40E_GLPES_PFIP4RXDISCARD(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
I40E_GLPES_PFIP4RXTRUNC(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
I40E_GLPES_PFIP4TXNOROUTE(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
I40E_GLPES_PFIP6RXDISCARD(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
I40E_GLPES_PFIP6RXTRUNC(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
I40E_GLPES_PFIP6TXNOROUTE(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
I40E_GLPES_PFTCPRTXSEG(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
I40E_GLPES_PFTCPRXOPTERR(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
I40E_GLPES_PFTCPRXPROTOERR(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
I40E_GLPES_PFIP4RXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
I40E_GLPES_PFIP4RXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
I40E_GLPES_PFIP4RXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
I40E_GLPES_PFIP4RXMCPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
I40E_GLPES_PFIP4TXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
I40E_GLPES_PFIP4TXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
I40E_GLPES_PFIP4TXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
I40E_GLPES_PFIP4TXMCPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
I40E_GLPES_PFIP6RXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
I40E_GLPES_PFIP6RXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
I40E_GLPES_PFIP6RXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
I40E_GLPES_PFIP6RXMCPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
I40E_GLPES_PFIP6TXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
I40E_GLPES_PFIP6TXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
I40E_GLPES_PFIP6TXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
I40E_GLPES_PFTCPRXSEGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
I40E_GLPES_PFTCPTXSEGLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
I40E_GLPES_PFRDMARXRDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
I40E_GLPES_PFRDMARXSNDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
I40E_GLPES_PFRDMARXWRSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
I40E_GLPES_PFRDMATXRDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
I40E_GLPES_PFRDMATXSNDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
I40E_GLPES_PFRDMATXWRSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
I40E_GLPES_PFRDMAVBNDLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
I40E_GLPES_PFRDMAVINVLO(fcn_idx);
} else {
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXDISCARD] =
I40E_GLPES_VFIP4RXDISCARD(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4RXTRUNC] =
I40E_GLPES_VFIP4RXTRUNC(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] =
I40E_GLPES_VFIP4TXNOROUTE(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXDISCARD] =
I40E_GLPES_VFIP6RXDISCARD(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6RXTRUNC] =
I40E_GLPES_VFIP6RXTRUNC(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] =
I40E_GLPES_VFIP6TXNOROUTE(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRTXSEG] =
I40E_GLPES_VFTCPRTXSEG(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXOPTERR] =
I40E_GLPES_VFTCPRXOPTERR(fcn_idx);
- stat_table->stat_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
+ stats_table->stats_offset_32[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] =
I40E_GLPES_VFTCPRXPROTOERR(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXOCTS] =
I40E_GLPES_VFIP4RXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXPKTS] =
I40E_GLPES_VFIP4RXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXFRAGS] =
I40E_GLPES_VFIP4RXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4RXMCPKTS] =
I40E_GLPES_VFIP4RXMCPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXOCTS] =
I40E_GLPES_VFIP4TXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXPKTS] =
I40E_GLPES_VFIP4TXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXFRAGS] =
I40E_GLPES_VFIP4TXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP4TXMCPKTS] =
I40E_GLPES_VFIP4TXMCPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXOCTS] =
I40E_GLPES_VFIP6RXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXPKTS] =
I40E_GLPES_VFIP6RXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXFRAGS] =
I40E_GLPES_VFIP6RXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6RXMCPKTS] =
I40E_GLPES_VFIP6RXMCPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXOCTS] =
I40E_GLPES_VFIP6TXOCTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXPKTS] =
I40E_GLPES_VFIP6TXPKTSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_IP6TXFRAGS] =
I40E_GLPES_VFIP6TXFRAGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPRXSEGS] =
I40E_GLPES_VFTCPRXSEGSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_TCPTXSEG] =
I40E_GLPES_VFTCPTXSEGLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXRDS] =
I40E_GLPES_VFRDMARXRDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXSNDS] =
I40E_GLPES_VFRDMARXSNDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMARXWRS] =
I40E_GLPES_VFRDMARXWRSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXRDS] =
I40E_GLPES_VFRDMATXRDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXSNDS] =
I40E_GLPES_VFRDMATXSNDSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMATXWRS] =
I40E_GLPES_VFRDMATXWRSLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVBND] =
I40E_GLPES_VFRDMAVBNDLO(fcn_idx);
- stat_table->stat_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
+ stats_table->stats_offset_64[I40IW_HW_STAT_INDEX_RDMAVINV] =
I40E_GLPES_VFRDMAVINVLO(fcn_idx);
}
- for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
- stat_index++) {
- stat_reg_offset = stat_table->stat_offset_64[stat_index];
- last_rd_stats->stat_value_64[stat_index] =
- readq(devstat->hw->hw_addr + stat_reg_offset);
+ for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
+ stats_index++) {
+ stats_reg_offset = stats_table->stats_offset_64[stats_index];
+ last_rd_stats->stats_value_64[stats_index] =
+ readq(stats->hw->hw_addr + stats_reg_offset);
}
- for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
- stat_index++) {
- stat_reg_offset = stat_table->stat_offset_32[stat_index];
- last_rd_stats->stat_value_32[stat_index] =
- i40iw_rd32(devstat->hw, stat_reg_offset);
+ for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
+ stats_index++) {
+ stats_reg_offset = stats_table->stats_offset_32[stats_index];
+ last_rd_stats->stats_value_32[stats_index] =
+ i40iw_rd32(stats->hw, stats_reg_offset);
}
}
/**
- * i40iw_hw_stat_read_32 - Read 32-bit HW stat counters and accommodates for roll-overs.
- * @devstat: pestat struct
- * @index: index in HW stat table which contains offset reg-addr
- * @value: hw stat value
+ * i40iw_hw_stats_read_32 - Read 32-bit HW stats counters and accommodates for roll-overs.
+ * @stat: pestat struct
+ * @index: index in HW stats table which contains offset reg-addr
+ * @value: hw stats value
*/
-static void i40iw_hw_stat_read_32(struct i40iw_dev_pestat *devstat,
- enum i40iw_hw_stat_index_32b index,
- u64 *value)
+void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats,
+ enum i40iw_hw_stats_index_32b index,
+ u64 *value)
{
- struct i40iw_dev_hw_stat_offsets *stat_table =
- &devstat->hw_stat_offsets;
- struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
- struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
- u64 new_stat_value = 0;
- u32 stat_reg_offset = stat_table->stat_offset_32[index];
+ struct i40iw_dev_hw_stats_offsets *stats_table =
+ &stats->hw_stats_offsets;
+ struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
+ struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats;
+ u64 new_stats_value = 0;
+ u32 stats_reg_offset = stats_table->stats_offset_32[index];
- new_stat_value = i40iw_rd32(devstat->hw, stat_reg_offset);
+ new_stats_value = i40iw_rd32(stats->hw, stats_reg_offset);
/*roll-over case */
- if (new_stat_value < last_rd_stats->stat_value_32[index])
- hw_stats->stat_value_32[index] += new_stat_value;
+ if (new_stats_value < last_rd_stats->stats_value_32[index])
+ hw_stats->stats_value_32[index] += new_stats_value;
else
- hw_stats->stat_value_32[index] +=
- new_stat_value - last_rd_stats->stat_value_32[index];
- last_rd_stats->stat_value_32[index] = new_stat_value;
- *value = hw_stats->stat_value_32[index];
+ hw_stats->stats_value_32[index] +=
+ new_stats_value - last_rd_stats->stats_value_32[index];
+ last_rd_stats->stats_value_32[index] = new_stats_value;
+ *value = hw_stats->stats_value_32[index];
}
/**
- * i40iw_hw_stat_read_64 - Read HW stat counters (greater than 32-bit) and accommodates for roll-overs.
- * @devstat: pestat struct
- * @index: index in HW stat table which contains offset reg-addr
- * @value: hw stat value
+ * i40iw_hw_stats_read_64 - Read HW stats counters (greater than 32-bit) and accommodates for roll-overs.
+ * @stats: pestat struct
+ * @index: index in HW stats table which contains offset reg-addr
+ * @value: hw stats value
*/
-static void i40iw_hw_stat_read_64(struct i40iw_dev_pestat *devstat,
- enum i40iw_hw_stat_index_64b index,
- u64 *value)
+void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats,
+ enum i40iw_hw_stats_index_64b index,
+ u64 *value)
{
- struct i40iw_dev_hw_stat_offsets *stat_table =
- &devstat->hw_stat_offsets;
- struct i40iw_dev_hw_stats *last_rd_stats = &devstat->last_read_hw_stats;
- struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
- u64 new_stat_value = 0;
- u32 stat_reg_offset = stat_table->stat_offset_64[index];
+ struct i40iw_dev_hw_stats_offsets *stats_table =
+ &stats->hw_stats_offsets;
+ struct i40iw_dev_hw_stats *last_rd_stats = &stats->last_read_hw_stats;
+ struct i40iw_dev_hw_stats *hw_stats = &stats->hw_stats;
+ u64 new_stats_value = 0;
+ u32 stats_reg_offset = stats_table->stats_offset_64[index];
- new_stat_value = readq(devstat->hw->hw_addr + stat_reg_offset);
+ new_stats_value = readq(stats->hw->hw_addr + stats_reg_offset);
/*roll-over case */
- if (new_stat_value < last_rd_stats->stat_value_64[index])
- hw_stats->stat_value_64[index] += new_stat_value;
+ if (new_stats_value < last_rd_stats->stats_value_64[index])
+ hw_stats->stats_value_64[index] += new_stats_value;
else
- hw_stats->stat_value_64[index] +=
- new_stat_value - last_rd_stats->stat_value_64[index];
- last_rd_stats->stat_value_64[index] = new_stat_value;
- *value = hw_stats->stat_value_64[index];
+ hw_stats->stats_value_64[index] +=
+ new_stats_value - last_rd_stats->stats_value_64[index];
+ last_rd_stats->stats_value_64[index] = new_stats_value;
+ *value = hw_stats->stats_value_64[index];
}
/**
- * i40iw_hw_stat_read_all - read all HW stat counters
- * @devstat: pestat struct
- * @stat_values: hw stats structure
+ * i40iw_hw_stats_read_all - read all HW stat counters
+ * @stats: pestat struct
+ * @stats_values: hw stats structure
*
* Read all the HW stat counters and populates hw_stats structure
- * of passed-in dev's pestat as well as copy created in stat_values.
+ * of passed-in vsi's pestat as well as copy created in stat_values.
*/
-static void i40iw_hw_stat_read_all(struct i40iw_dev_pestat *devstat,
- struct i40iw_dev_hw_stats *stat_values)
+void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats,
+ struct i40iw_dev_hw_stats *stats_values)
{
- u32 stat_index;
+ u32 stats_index;
+ unsigned long flags;
- for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
- stat_index++)
- i40iw_hw_stat_read_32(devstat, stat_index,
- &stat_values->stat_value_32[stat_index]);
- for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
- stat_index++)
- i40iw_hw_stat_read_64(devstat, stat_index,
- &stat_values->stat_value_64[stat_index]);
+ spin_lock_irqsave(&stats->lock, flags);
+
+ for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
+ stats_index++)
+ i40iw_hw_stats_read_32(stats, stats_index,
+ &stats_values->stats_value_32[stats_index]);
+ for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
+ stats_index++)
+ i40iw_hw_stats_read_64(stats, stats_index,
+ &stats_values->stats_value_64[stats_index]);
+ spin_unlock_irqrestore(&stats->lock, flags);
}
/**
- * i40iw_hw_stat_refresh_all - Update all HW stat structs
- * @devstat: pestat struct
- * @stat_values: hw stats structure
+ * i40iw_hw_stats_refresh_all - Update all HW stats structs
+ * @stats: pestat struct
*
- * Read all the HW stat counters to refresh values in hw_stats structure
+ * Read all the HW stats counters to refresh values in hw_stats structure
* of passed-in dev's pestat
*/
-static void i40iw_hw_stat_refresh_all(struct i40iw_dev_pestat *devstat)
+void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats)
{
- u64 stat_value;
- u32 stat_index;
+ u64 stats_value;
+ u32 stats_index;
+ unsigned long flags;
- for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_32;
- stat_index++)
- i40iw_hw_stat_read_32(devstat, stat_index, &stat_value);
- for (stat_index = 0; stat_index < I40IW_HW_STAT_INDEX_MAX_64;
- stat_index++)
- i40iw_hw_stat_read_64(devstat, stat_index, &stat_value);
+ spin_lock_irqsave(&stats->lock, flags);
+
+ for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_32;
+ stats_index++)
+ i40iw_hw_stats_read_32(stats, stats_index, &stats_value);
+ for (stats_index = 0; stats_index < I40IW_HW_STAT_INDEX_MAX_64;
+ stats_index++)
+ i40iw_hw_stats_read_64(stats, stats_index, &stats_value);
+ spin_unlock_irqrestore(&stats->lock, flags);
+}
+
+/**
+ * i40iw_get_fcn_id - Return the function id
+ * @dev: pointer to the device
+ */
+static u8 i40iw_get_fcn_id(struct i40iw_sc_dev *dev)
+{
+ u8 fcn_id = I40IW_INVALID_FCN_ID;
+ u8 i;
+
+ for (i = I40IW_FIRST_NON_PF_STAT; i < I40IW_MAX_STATS_COUNT; i++)
+ if (!dev->fcn_id_array[i]) {
+ fcn_id = i;
+ dev->fcn_id_array[i] = true;
+ break;
+ }
+ return fcn_id;
+}
+
+/**
+ * i40iw_vsi_stats_init - Initialize the vsi statistics
+ * @vsi: pointer to the vsi structure
+ * @info: The info structure used for initialization
+ */
+enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info)
+{
+ u8 fcn_id = info->fcn_id;
+
+ if (info->alloc_fcn_id)
+ fcn_id = i40iw_get_fcn_id(vsi->dev);
+
+ if (fcn_id == I40IW_INVALID_FCN_ID)
+ return I40IW_ERR_NOT_READY;
+
+ vsi->pestat = info->pestat;
+ vsi->pestat->hw = vsi->dev->hw;
+
+ if (info->stats_initialize) {
+ i40iw_hw_stats_init(vsi->pestat, fcn_id, true);
+ spin_lock_init(&vsi->pestat->lock);
+ i40iw_hw_stats_start_timer(vsi);
+ }
+ vsi->stats_fcn_id_alloc = info->alloc_fcn_id;
+ vsi->fcn_id = fcn_id;
+ return I40IW_SUCCESS;
+}
+
+/**
+ * i40iw_vsi_stats_free - Free the vsi stats
+ * @vsi: pointer to the vsi structure
+ */
+void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi)
+{
+ u8 fcn_id = vsi->fcn_id;
+
+ if ((vsi->stats_fcn_id_alloc) && (fcn_id != I40IW_INVALID_FCN_ID))
+ vsi->dev->fcn_id_array[fcn_id] = false;
+ i40iw_hw_stats_stop_timer(vsi);
}
static struct i40iw_cqp_ops iw_cqp_ops = {
@@ -4711,24 +4943,6 @@
NULL
};
-static const struct i40iw_device_pestat_ops iw_device_pestat_ops = {
- i40iw_hw_stat_init,
- i40iw_hw_stat_read_32,
- i40iw_hw_stat_read_64,
- i40iw_hw_stat_read_all,
- i40iw_hw_stat_refresh_all
-};
-
-/**
- * i40iw_device_init_pestat - Initialize the pestat structure
- * @dev: pestat struct
- */
-enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *devstat)
-{
- devstat->ops = iw_device_pestat_ops;
- return 0;
-}
-
/**
* i40iw_device_init - Initialize IWARP device
* @dev: IWARP device pointer
@@ -4750,14 +4964,7 @@
dev->debug_mask = info->debug_mask;
- ret_code = i40iw_device_init_pestat(&dev->dev_pestat);
- if (ret_code) {
- i40iw_debug(dev, I40IW_DEBUG_DEV,
- "%s: i40iw_device_init_pestat failed\n", __func__);
- return ret_code;
- }
dev->hmc_fn_id = info->hmc_fn_id;
- dev->qs_handle = info->qs_handle;
dev->exception_lan_queue = info->exception_lan_queue;
dev->is_pf = info->is_pf;
@@ -4770,15 +4977,10 @@
dev->hw = info->hw;
dev->hw->hw_addr = info->bar0;
- val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID);
- dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID);
-
if (dev->is_pf) {
- dev->dev_pestat.ops.iw_hw_stat_init(&dev->dev_pestat,
- dev->hmc_fn_id, dev->hw, true);
- spin_lock_init(&dev->dev_pestat.stats_lock);
- /*start the periodic stats_timer */
- i40iw_hw_stats_start_timer(dev);
+ val = i40iw_rd32(dev->hw, I40E_GLPCI_DREVID);
+ dev->hw_rev = (u8)RS_32(val, I40E_GLPCI_DREVID_DEFAULT_REVID);
+
val = i40iw_rd32(dev->hw, I40E_GLPCI_LBARCTRL);
db_size = (u8)RS_32(val, I40E_GLPCI_LBARCTRL_PE_DB_SIZE);
if ((db_size != I40IW_PE_DB_SIZE_4M) &&
diff --git a/drivers/infiniband/hw/i40iw/i40iw_d.h b/drivers/infiniband/hw/i40iw/i40iw_d.h
index 2fac1db..a39ac12 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_d.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_d.h
@@ -35,6 +35,8 @@
#ifndef I40IW_D_H
#define I40IW_D_H
+#define I40IW_FIRST_USER_QP_ID 2
+
#define I40IW_DB_ADDR_OFFSET (4 * 1024 * 1024 - 64 * 1024)
#define I40IW_VF_DB_ADDR_OFFSET (64 * 1024)
@@ -67,6 +69,9 @@
#define I40IW_STAG_TYPE_NONSHARED 1
#define I40IW_MAX_USER_PRIORITY 8
+#define I40IW_MAX_STATS_COUNT 16
+#define I40IW_FIRST_NON_PF_STAT 4
+
#define LS_64_1(val, bits) ((u64)(uintptr_t)val << bits)
#define RS_64_1(val, bits) ((u64)(uintptr_t)val >> bits)
@@ -74,6 +79,8 @@
#define RS_32_1(val, bits) (u32)(val >> bits)
#define I40E_HI_DWORD(x) ((u32)((((x) >> 16) >> 16) & 0xFFFFFFFF))
+#define QS_HANDLE_UNKNOWN 0xffff
+
#define LS_64(val, field) (((u64)val << field ## _SHIFT) & (field ## _MASK))
#define RS_64(val, field) ((u64)(val & field ## _MASK) >> field ## _SHIFT)
@@ -1199,8 +1206,11 @@
#define I40IWQPC_RXCQNUM_SHIFT 32
#define I40IWQPC_RXCQNUM_MASK (0x1ffffULL << I40IWQPC_RXCQNUM_SHIFT)
-#define I40IWQPC_Q2ADDR_SHIFT I40IW_CQPHC_QPCTX_SHIFT
-#define I40IWQPC_Q2ADDR_MASK I40IW_CQPHC_QPCTX_MASK
+#define I40IWQPC_STAT_INDEX_SHIFT 0
+#define I40IWQPC_STAT_INDEX_MASK (0x1fULL << I40IWQPC_STAT_INDEX_SHIFT)
+
+#define I40IWQPC_Q2ADDR_SHIFT 0
+#define I40IWQPC_Q2ADDR_MASK (0xffffffffffffff00ULL << I40IWQPC_Q2ADDR_SHIFT)
#define I40IWQPC_LASTBYTESENT_SHIFT 0
#define I40IWQPC_LASTBYTESENT_MASK (0xffUL << I40IWQPC_LASTBYTESENT_SHIFT)
@@ -1232,11 +1242,8 @@
#define I40IWQPC_PRIVEN_SHIFT 25
#define I40IWQPC_PRIVEN_MASK (1UL << I40IWQPC_PRIVEN_SHIFT)
-#define I40IWQPC_LSMMPRESENT_SHIFT 26
-#define I40IWQPC_LSMMPRESENT_MASK (1UL << I40IWQPC_LSMMPRESENT_SHIFT)
-
-#define I40IWQPC_ADJUSTFORLSMM_SHIFT 27
-#define I40IWQPC_ADJUSTFORLSMM_MASK (1UL << I40IWQPC_ADJUSTFORLSMM_SHIFT)
+#define I40IWQPC_USESTATSINSTANCE_SHIFT 26
+#define I40IWQPC_USESTATSINSTANCE_MASK (1UL << I40IWQPC_USESTATSINSTANCE_SHIFT)
#define I40IWQPC_IWARPMODE_SHIFT 28
#define I40IWQPC_IWARPMODE_MASK (1UL << I40IWQPC_IWARPMODE_SHIFT)
@@ -1713,6 +1720,8 @@
#define OP_MANAGE_VF_PBLE_BP 28
#define OP_QUERY_FPM_VALUES 29
#define OP_COMMIT_FPM_VALUES 30
-#define OP_SIZE_CQP_STAT_ARRAY 31
+#define OP_REQUESTED_COMMANDS 31
+#define OP_COMPLETED_COMMANDS 32
+#define OP_SIZE_CQP_STAT_ARRAY 33
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 0c92a40..476867a 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -62,7 +62,7 @@
max_mr = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_MR].cnt;
arp_table_size = iwdev->sc_dev.hmc_info->hmc_obj[I40IW_HMC_IW_ARP].cnt;
iwdev->max_cqe = 0xFFFFF;
- num_pds = max_qp * 4;
+ num_pds = I40IW_MAX_PDS;
resources_size = sizeof(struct i40iw_arp_entry) * arp_table_size;
resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
resources_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
@@ -308,7 +308,9 @@
iwqp = iwdev->qp_table[info->qp_cq_id];
if (!iwqp) {
spin_unlock_irqrestore(&iwdev->qptable_lock, flags);
- i40iw_pr_err("qp_id %d is already freed\n", info->qp_cq_id);
+ i40iw_debug(dev, I40IW_DEBUG_AEQ,
+ "%s qp_id %d is already freed\n",
+ __func__, info->qp_cq_id);
continue;
}
i40iw_add_ref(&iwqp->ibqp);
@@ -359,6 +361,9 @@
continue;
i40iw_cm_disconn(iwqp);
break;
+ case I40IW_AE_QP_SUSPEND_COMPLETE:
+ i40iw_qp_suspend_resume(dev, &iwqp->sc_qp, false);
+ break;
case I40IW_AE_TERMINATE_SENT:
i40iw_terminate_send_fin(qp);
break;
@@ -404,19 +409,18 @@
case I40IW_AE_LCE_CQ_CATASTROPHIC:
case I40IW_AE_UDA_XMIT_DGRAM_TOO_LONG:
case I40IW_AE_UDA_XMIT_IPADDR_MISMATCH:
- case I40IW_AE_QP_SUSPEND_COMPLETE:
ctx_info->err_rq_idx_valid = false;
default:
- if (!info->sq && ctx_info->err_rq_idx_valid) {
- ctx_info->err_rq_idx = info->wqe_idx;
- ctx_info->tcp_info_valid = false;
- ctx_info->iwarp_info_valid = false;
- ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
- iwqp->host_ctx.va,
- ctx_info);
- }
- i40iw_terminate_connection(qp, info);
- break;
+ if (!info->sq && ctx_info->err_rq_idx_valid) {
+ ctx_info->err_rq_idx = info->wqe_idx;
+ ctx_info->tcp_info_valid = false;
+ ctx_info->iwarp_info_valid = false;
+ ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
+ iwqp->host_ctx.va,
+ ctx_info);
+ }
+ i40iw_terminate_connection(qp, info);
+ break;
}
if (info->qp)
i40iw_rem_ref(&iwqp->ibqp);
@@ -538,6 +542,7 @@
{
struct i40iw_qhash_table_info *info;
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
+ struct i40iw_sc_vsi *vsi = &iwdev->vsi;
enum i40iw_status_code status;
struct i40iw_cqp *iwcqp = &iwdev->cqp;
struct i40iw_cqp_request *cqp_request;
@@ -550,6 +555,7 @@
info = &cqp_info->in.u.manage_qhash_table_entry.info;
memset(info, 0, sizeof(*info));
+ info->vsi = &iwdev->vsi;
info->manage = mtype;
info->entry_type = etype;
if (cminfo->vlan_id != 0xFFFF) {
@@ -560,8 +566,9 @@
}
info->ipv4_valid = cminfo->ipv4;
+ info->user_pri = cminfo->user_pri;
ether_addr_copy(info->mac_addr, iwdev->netdev->dev_addr);
- info->qp_num = cpu_to_le32(dev->ilq->qp_id);
+ info->qp_num = cpu_to_le32(vsi->ilq->qp_id);
info->dest_port = cpu_to_le16(cminfo->loc_port);
info->dest_ip[0] = cpu_to_le32(cminfo->loc_addr[0]);
info->dest_ip[1] = cpu_to_le32(cminfo->loc_addr[1]);
@@ -617,6 +624,7 @@
struct i40iw_qp_flush_info *hw_info;
struct i40iw_cqp_request *cqp_request;
struct cqp_commands_info *cqp_info;
+ struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
if (!cqp_request)
@@ -631,9 +639,30 @@
cqp_info->in.u.qp_flush_wqes.qp = qp;
cqp_info->in.u.qp_flush_wqes.scratch = (uintptr_t)cqp_request;
status = i40iw_handle_cqp_op(iwdev, cqp_request);
- if (status)
+ if (status) {
i40iw_pr_err("CQP-OP Flush WQE's fail");
- return status;
+ complete(&iwqp->sq_drained);
+ complete(&iwqp->rq_drained);
+ return status;
+ }
+ if (!cqp_request->compl_info.maj_err_code) {
+ switch (cqp_request->compl_info.min_err_code) {
+ case I40IW_CQP_COMPL_RQ_WQE_FLUSHED:
+ complete(&iwqp->sq_drained);
+ break;
+ case I40IW_CQP_COMPL_SQ_WQE_FLUSHED:
+ complete(&iwqp->rq_drained);
+ break;
+ case I40IW_CQP_COMPL_RQ_SQ_WQE_FLUSHED:
+ break;
+ default:
+ complete(&iwqp->sq_drained);
+ complete(&iwqp->rq_drained);
+ break;
+ }
+ }
+
+ return 0;
}
/**
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index ac2f3cd..2728af3 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -237,14 +237,11 @@
*/
static void i40iw_destroy_cqp(struct i40iw_device *iwdev, bool free_hwcqp)
{
- enum i40iw_status_code status = 0;
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
struct i40iw_cqp *cqp = &iwdev->cqp;
- if (free_hwcqp && dev->cqp_ops->cqp_destroy)
- status = dev->cqp_ops->cqp_destroy(dev->cqp);
- if (status)
- i40iw_pr_err("destroy cqp failed");
+ if (free_hwcqp)
+ dev->cqp_ops->cqp_destroy(dev->cqp);
i40iw_free_dma_mem(dev->hw, &cqp->sq);
kfree(cqp->scratch_array);
@@ -270,6 +267,7 @@
i40iw_wr32(dev->hw, I40E_PFINT_DYN_CTLN(msix_vec->idx - 1), 0);
else
i40iw_wr32(dev->hw, I40E_VFINT_DYN_CTLN1(msix_vec->idx - 1), 0);
+ irq_set_affinity_hint(msix_vec->irq, NULL);
free_irq(msix_vec->irq, dev_id);
}
@@ -603,7 +601,7 @@
i40iw_pr_err("cqp init status %d\n", status);
goto exit;
}
- status = dev->cqp_ops->cqp_create(dev->cqp, true, &maj_err, &min_err);
+ status = dev->cqp_ops->cqp_create(dev->cqp, &maj_err, &min_err);
if (status) {
i40iw_pr_err("cqp create status %d maj_err %d min_err %d\n",
status, maj_err, min_err);
@@ -688,6 +686,7 @@
struct i40iw_msix_vector *msix_vec)
{
enum i40iw_status_code status;
+ cpumask_t mask;
if (iwdev->msix_shared && !ceq_id) {
tasklet_init(&iwdev->dpc_tasklet, i40iw_dpc, (unsigned long)iwdev);
@@ -697,12 +696,15 @@
status = request_irq(msix_vec->irq, i40iw_ceq_handler, 0, "CEQ", iwceq);
}
+ cpumask_clear(&mask);
+ cpumask_set_cpu(msix_vec->cpu_affinity, &mask);
+ irq_set_affinity_hint(msix_vec->irq, &mask);
+
if (status) {
i40iw_pr_err("ceq irq config fail\n");
return I40IW_ERR_CONFIG;
}
msix_vec->ceq_id = ceq_id;
- msix_vec->cpu_affinity = 0;
return 0;
}
@@ -930,6 +932,7 @@
struct i40iw_puda_rsrc_info info;
enum i40iw_status_code status;
+ memset(&info, 0, sizeof(info));
info.type = I40IW_PUDA_RSRC_TYPE_ILQ;
info.cq_id = 1;
info.qp_id = 0;
@@ -939,10 +942,9 @@
info.rq_size = 8192;
info.buf_size = 1024;
info.tx_buf_cnt = 16384;
- info.mss = iwdev->mss;
info.receive = i40iw_receive_ilq;
info.xmit_complete = i40iw_free_sqbuf;
- status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info);
+ status = i40iw_puda_create_rsrc(&iwdev->vsi, &info);
if (status)
i40iw_pr_err("ilq create fail\n");
return status;
@@ -959,6 +961,7 @@
struct i40iw_puda_rsrc_info info;
enum i40iw_status_code status;
+ memset(&info, 0, sizeof(info));
info.type = I40IW_PUDA_RSRC_TYPE_IEQ;
info.cq_id = 2;
info.qp_id = iwdev->sc_dev.exception_lan_queue;
@@ -967,9 +970,8 @@
info.sq_size = 8192;
info.rq_size = 8192;
info.buf_size = 2048;
- info.mss = iwdev->mss;
info.tx_buf_cnt = 16384;
- status = i40iw_puda_create_rsrc(&iwdev->sc_dev, &info);
+ status = i40iw_puda_create_rsrc(&iwdev->vsi, &info);
if (status)
i40iw_pr_err("ieq create fail\n");
return status;
@@ -1159,7 +1161,7 @@
{
struct net_device *ip_dev;
struct inet6_dev *idev;
- struct inet6_ifaddr *ifp;
+ struct inet6_ifaddr *ifp, *tmp;
u32 local_ipaddr6[4];
rcu_read_lock();
@@ -1172,7 +1174,7 @@
i40iw_pr_err("ipv6 inet device not found\n");
break;
}
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) {
i40iw_pr_info("IP=%pI6, vlan_id=%d, MAC=%pM\n", &ifp->addr,
rdma_vlan_dev_vlan_id(ip_dev), ip_dev->dev_addr);
i40iw_copy_ip_ntohl(local_ipaddr6,
@@ -1294,17 +1296,23 @@
enum i40iw_status_code status;
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
struct i40iw_device_init_info info;
+ struct i40iw_vsi_init_info vsi_info;
struct i40iw_dma_mem mem;
+ struct i40iw_l2params l2params;
u32 size;
+ struct i40iw_vsi_stats_info stats_info;
+ u16 last_qset = I40IW_NO_QSET;
+ u16 qset;
+ u32 i;
+ memset(&l2params, 0, sizeof(l2params));
memset(&info, 0, sizeof(info));
size = sizeof(struct i40iw_hmc_pble_rsrc) + sizeof(struct i40iw_hmc_info) +
(sizeof(struct i40iw_hmc_obj_info) * I40IW_HMC_IW_MAX);
iwdev->hmc_info_mem = kzalloc(size, GFP_KERNEL);
- if (!iwdev->hmc_info_mem) {
- i40iw_pr_err("memory alloc fail\n");
+ if (!iwdev->hmc_info_mem)
return I40IW_ERR_NO_MEMORY;
- }
+
iwdev->pble_rsrc = (struct i40iw_hmc_pble_rsrc *)iwdev->hmc_info_mem;
dev->hmc_info = &iwdev->hw.hmc;
dev->hmc_info->hmc_obj = (struct i40iw_hmc_obj_info *)(iwdev->pble_rsrc + 1);
@@ -1325,7 +1333,17 @@
info.bar0 = ldev->hw_addr;
info.hw = &iwdev->hw;
info.debug_mask = debug;
- info.qs_handle = ldev->params.qos.prio_qos[0].qs_handle;
+ l2params.mss =
+ (ldev->params.mtu) ? ldev->params.mtu - I40IW_MTU_TO_MSS : I40IW_DEFAULT_MSS;
+ for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++) {
+ qset = ldev->params.qos.prio_qos[i].qs_handle;
+ l2params.qs_handle_list[i] = qset;
+ if (last_qset == I40IW_NO_QSET)
+ last_qset = qset;
+ else if ((qset != last_qset) && (qset != I40IW_NO_QSET))
+ iwdev->dcb = true;
+ }
+ i40iw_pr_info("DCB is set/clear = %d\n", iwdev->dcb);
info.exception_lan_queue = 1;
info.vchnl_send = i40iw_virtchnl_send;
status = i40iw_device_init(&iwdev->sc_dev, &info);
@@ -1334,6 +1352,20 @@
kfree(iwdev->hmc_info_mem);
iwdev->hmc_info_mem = NULL;
}
+ memset(&vsi_info, 0, sizeof(vsi_info));
+ vsi_info.dev = &iwdev->sc_dev;
+ vsi_info.back_vsi = (void *)iwdev;
+ vsi_info.params = &l2params;
+ i40iw_sc_vsi_init(&iwdev->vsi, &vsi_info);
+
+ if (dev->is_pf) {
+ memset(&stats_info, 0, sizeof(stats_info));
+ stats_info.fcn_id = ldev->fid;
+ stats_info.pestat = kzalloc(sizeof(*stats_info.pestat), GFP_KERNEL);
+ stats_info.stats_initialize = true;
+ if (stats_info.pestat)
+ i40iw_vsi_stats_init(&iwdev->vsi, &stats_info);
+ }
return status;
}
@@ -1384,6 +1416,7 @@
for (i = 0, ceq_idx = 0; i < iwdev->msix_count; i++, iw_qvinfo++) {
iwdev->iw_msixtbl[i].idx = ldev->msix_entries[i].entry;
iwdev->iw_msixtbl[i].irq = ldev->msix_entries[i].vector;
+ iwdev->iw_msixtbl[i].cpu_affinity = ceq_idx;
if (i == 0) {
iw_qvinfo->aeq_idx = 0;
if (iwdev->msix_shared)
@@ -1404,18 +1437,19 @@
* i40iw_deinit_device - clean up the device resources
* @iwdev: iwarp device
* @reset: true if called before reset
- * @del_hdl: true if delete hdl entry
*
* Destroy the ib device interface, remove the mac ip entry and ipv4/ipv6 addresses,
* destroy the device queues and free the pble and the hmc objects
*/
-static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del_hdl)
+static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset)
{
struct i40e_info *ldev = iwdev->ldev;
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
i40iw_pr_info("state = %d\n", iwdev->init_state);
+ if (iwdev->param_wq)
+ destroy_workqueue(iwdev->param_wq);
switch (iwdev->init_state) {
case RDMA_DEV_REGISTERED:
@@ -1441,10 +1475,10 @@
i40iw_destroy_aeq(iwdev, reset);
/* fallthrough */
case IEQ_CREATED:
- i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_IEQ, reset);
+ i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_IEQ, reset);
/* fallthrough */
case ILQ_CREATED:
- i40iw_puda_dele_resources(dev, I40IW_PUDA_RSRC_TYPE_ILQ, reset);
+ i40iw_puda_dele_resources(&iwdev->vsi, I40IW_PUDA_RSRC_TYPE_ILQ, reset);
/* fallthrough */
case CCQ_CREATED:
i40iw_destroy_ccq(iwdev, reset);
@@ -1456,13 +1490,14 @@
i40iw_del_hmc_objects(dev, dev->hmc_info, true, reset);
/* fallthrough */
case CQP_CREATED:
- i40iw_destroy_cqp(iwdev, !reset);
+ i40iw_destroy_cqp(iwdev, true);
/* fallthrough */
case INITIAL_STATE:
i40iw_cleanup_cm_core(&iwdev->cm_core);
- if (dev->is_pf)
- i40iw_hw_stats_del_timer(dev);
-
+ if (iwdev->vsi.pestat) {
+ i40iw_vsi_stats_free(&iwdev->vsi);
+ kfree(iwdev->vsi.pestat);
+ }
i40iw_del_init_mem(iwdev);
break;
case INVALID_STATE:
@@ -1472,8 +1507,7 @@
break;
}
- if (del_hdl)
- i40iw_del_handler(i40iw_find_i40e_handler(ldev));
+ i40iw_del_handler(i40iw_find_i40e_handler(ldev));
kfree(iwdev->hdl);
}
@@ -1508,7 +1542,6 @@
iwdev->max_enabled_vfs = iwdev->max_rdma_vfs;
iwdev->netdev = ldev->netdev;
hdl->client = client;
- iwdev->mss = (!ldev->params.mtu) ? I40IW_DEFAULT_MSS : ldev->params.mtu - I40IW_MTU_TO_MSS;
if (!ldev->ftype)
iwdev->db_start = pci_resource_start(ldev->pcidev, 0) + I40IW_DB_ADDR_OFFSET;
else
@@ -1528,6 +1561,7 @@
init_waitqueue_head(&iwdev->vchnl_waitq);
init_waitqueue_head(&dev->vf_reqs);
+ init_waitqueue_head(&iwdev->close_wq);
status = i40iw_initialize_dev(iwdev, ldev);
exit:
@@ -1540,6 +1574,20 @@
}
/**
+ * i40iw_get_used_rsrc - determine resources used internally
+ * @iwdev: iwarp device
+ *
+ * Called after internal allocations
+ */
+static void i40iw_get_used_rsrc(struct i40iw_device *iwdev)
+{
+ iwdev->used_pds = find_next_zero_bit(iwdev->allocated_pds, iwdev->max_pd, 0);
+ iwdev->used_qps = find_next_zero_bit(iwdev->allocated_qps, iwdev->max_qp, 0);
+ iwdev->used_cqs = find_next_zero_bit(iwdev->allocated_cqs, iwdev->max_cq, 0);
+ iwdev->used_mrs = find_next_zero_bit(iwdev->allocated_mrs, iwdev->max_mr, 0);
+}
+
+/**
* i40iw_open - client interface operation open for iwarp/uda device
* @ldev: lan device information
* @client: iwarp client information, provided during registration
@@ -1611,6 +1659,7 @@
status = i40iw_initialize_hw_resources(iwdev);
if (status)
break;
+ i40iw_get_used_rsrc(iwdev);
dev->ccq_ops->ccq_arm(dev->ccq);
status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc);
if (status)
@@ -1630,35 +1679,73 @@
iwdev->init_state = RDMA_DEV_REGISTERED;
iwdev->iw_status = 1;
i40iw_port_ibevent(iwdev);
+ iwdev->param_wq = alloc_ordered_workqueue("l2params", WQ_MEM_RECLAIM);
+ if(iwdev->param_wq == NULL)
+ break;
i40iw_pr_info("i40iw_open completed\n");
return 0;
} while (0);
i40iw_pr_err("status = %d last completion = %d\n", status, iwdev->init_state);
- i40iw_deinit_device(iwdev, false, false);
+ i40iw_deinit_device(iwdev, false);
return -ERESTART;
}
/**
- * i40iw_l2param_change : handle qs handles for qos and mss change
+ * i40iw_l2params_worker - worker for l2 params change
+ * @work: work pointer for l2 params
+ */
+static void i40iw_l2params_worker(struct work_struct *work)
+{
+ struct l2params_work *dwork =
+ container_of(work, struct l2params_work, work);
+ struct i40iw_device *iwdev = dwork->iwdev;
+
+ i40iw_change_l2params(&iwdev->vsi, &dwork->l2params);
+ atomic_dec(&iwdev->params_busy);
+ kfree(work);
+}
+
+/**
+ * i40iw_l2param_change - handle qs handles for qos and mss change
* @ldev: lan device information
* @client: client for paramater change
* @params: new parameters from L2
*/
-static void i40iw_l2param_change(struct i40e_info *ldev,
- struct i40e_client *client,
+static void i40iw_l2param_change(struct i40e_info *ldev, struct i40e_client *client,
struct i40e_params *params)
{
struct i40iw_handler *hdl;
+ struct i40iw_l2params *l2params;
+ struct l2params_work *work;
struct i40iw_device *iwdev;
+ int i;
hdl = i40iw_find_i40e_handler(ldev);
if (!hdl)
return;
iwdev = &hdl->device;
- if (params->mtu)
- iwdev->mss = params->mtu - I40IW_MTU_TO_MSS;
+
+ if (atomic_read(&iwdev->params_busy))
+ return;
+
+
+ work = kzalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+
+ atomic_inc(&iwdev->params_busy);
+
+ work->iwdev = iwdev;
+ l2params = &work->l2params;
+ for (i = 0; i < I40E_CLIENT_MAX_USER_PRIORITY; i++)
+ l2params->qs_handle_list[i] = params->qos.prio_qos[i].qs_handle;
+
+ l2params->mss = (params->mtu) ? params->mtu - I40IW_MTU_TO_MSS : iwdev->vsi.mss;
+
+ INIT_WORK(&work->work, i40iw_l2params_worker);
+ queue_work(iwdev->param_wq, &work->work);
}
/**
@@ -1679,8 +1766,11 @@
return;
iwdev = &hdl->device;
+ iwdev->closing = true;
+
+ i40iw_cm_disconnect_all(iwdev);
destroy_workqueue(iwdev->virtchnl_wq);
- i40iw_deinit_device(iwdev, reset, true);
+ i40iw_deinit_device(iwdev, reset);
}
/**
@@ -1701,21 +1791,23 @@
struct i40iw_vfdev *tmp_vfdev;
unsigned int i;
unsigned long flags;
+ struct i40iw_device *iwdev;
hdl = i40iw_find_i40e_handler(ldev);
if (!hdl)
return;
dev = &hdl->device.sc_dev;
+ iwdev = (struct i40iw_device *)dev->back_dev;
for (i = 0; i < I40IW_MAX_PE_ENABLED_VF_COUNT; i++) {
if (!dev->vf_dev[i] || (dev->vf_dev[i]->vf_id != vf_id))
continue;
/* free all resources allocated on behalf of vf */
tmp_vfdev = dev->vf_dev[i];
- spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
+ spin_lock_irqsave(&iwdev->vsi.pestat->lock, flags);
dev->vf_dev[i] = NULL;
- spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
+ spin_unlock_irqrestore(&iwdev->vsi.pestat->lock, flags);
i40iw_del_hmc_objects(dev, &tmp_vfdev->hmc_info, false, false);
/* remove vf hmc function */
memset(&hmc_fcn_info, 0, sizeof(hmc_fcn_info));
diff --git a/drivers/infiniband/hw/i40iw/i40iw_osdep.h b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
index 80f422b..aa66c1c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_osdep.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_osdep.h
@@ -198,6 +198,8 @@
void i40iw_cqp_spawn_worker(struct i40iw_sc_dev *dev,
struct i40iw_virtchnl_work_info *work_info, u32 iw_vf_idx);
void *i40iw_remove_head(struct list_head *list);
+void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend);
+void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
void i40iw_term_modify_qp(struct i40iw_sc_qp *qp, u8 next_state, u8 term, u8 term_len);
void i40iw_terminate_done(struct i40iw_sc_qp *qp, int timeout_occurred);
@@ -207,9 +209,9 @@
enum i40iw_status_code i40iw_hw_manage_vf_pble_bp(struct i40iw_device *iwdev,
struct i40iw_manage_vf_pble_info *info,
bool wait);
-struct i40iw_dev_pestat;
-void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *);
-void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *);
+struct i40iw_sc_vsi;
+void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi);
+void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi);
#define i40iw_mmiowb() mmiowb()
void i40iw_wr32(struct i40iw_hw *hw, u32 reg, u32 value);
u32 i40iw_rd32(struct i40iw_hw *hw, u32 reg);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_p.h b/drivers/infiniband/hw/i40iw/i40iw_p.h
index a0b8ca1..28a92fe 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_p.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_p.h
@@ -47,8 +47,6 @@
enum i40iw_status_code i40iw_device_init(struct i40iw_sc_dev *dev,
struct i40iw_device_init_info *info);
-enum i40iw_status_code i40iw_device_init_pestat(struct i40iw_dev_pestat *);
-
void i40iw_sc_cqp_post_sq(struct i40iw_sc_cqp *cqp);
u64 *i40iw_sc_cqp_get_next_send_wqe(struct i40iw_sc_cqp *cqp, u64 scratch);
@@ -64,7 +62,24 @@
enum i40iw_status_code i40iw_pf_init_vfhmc(struct i40iw_sc_dev *dev, u8 vf_hmc_fn_id,
u32 *vf_cnt_array);
-/* cqp misc functions */
+/* stats functions */
+void i40iw_hw_stats_refresh_all(struct i40iw_vsi_pestat *stats);
+void i40iw_hw_stats_read_all(struct i40iw_vsi_pestat *stats, struct i40iw_dev_hw_stats *stats_values);
+void i40iw_hw_stats_read_32(struct i40iw_vsi_pestat *stats,
+ enum i40iw_hw_stats_index_32b index,
+ u64 *value);
+void i40iw_hw_stats_read_64(struct i40iw_vsi_pestat *stats,
+ enum i40iw_hw_stats_index_64b index,
+ u64 *value);
+void i40iw_hw_stats_init(struct i40iw_vsi_pestat *stats, u8 index, bool is_pf);
+
+/* vsi misc functions */
+enum i40iw_status_code i40iw_vsi_stats_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_stats_info *info);
+void i40iw_vsi_stats_free(struct i40iw_sc_vsi *vsi);
+void i40iw_sc_vsi_init(struct i40iw_sc_vsi *vsi, struct i40iw_vsi_init_info *info);
+
+void i40iw_change_l2params(struct i40iw_sc_vsi *vsi, struct i40iw_l2params *l2params);
+void i40iw_qp_add_qos(struct i40iw_sc_qp *qp);
void i40iw_terminate_send_fin(struct i40iw_sc_qp *qp);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_pble.c b/drivers/infiniband/hw/i40iw/i40iw_pble.c
index 85993dc..c87ba16 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_pble.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_pble.c
@@ -353,10 +353,6 @@
pages = (idx->rel_pd_idx) ? (I40IW_HMC_PD_CNT_IN_SD -
idx->rel_pd_idx) : I40IW_HMC_PD_CNT_IN_SD;
pages = min(pages, pble_rsrc->unallocated_pble >> PBLE_512_SHIFT);
- if (!pages) {
- ret_code = I40IW_ERR_NO_PBLCHUNKS_AVAILABLE;
- goto error;
- }
info.chunk = chunk;
info.hmc_info = hmc_info;
info.pages = pages;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.c b/drivers/infiniband/hw/i40iw/i40iw_puda.c
index c62d354..449ba8c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.c
@@ -42,12 +42,13 @@
#include "i40iw_p.h"
#include "i40iw_puda.h"
-static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
+static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi,
struct i40iw_puda_buf *buf);
-static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid);
+static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid);
static void i40iw_ilq_putback_rcvbuf(struct i40iw_sc_qp *qp, u32 wqe_idx);
static enum i40iw_status_code i40iw_puda_replenish_rq(struct i40iw_puda_rsrc
*rsrc, bool initial);
+static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp);
/**
* i40iw_puda_get_listbuf - get buffer from puda list
* @list: list to use for buffers (ILQ or IEQ)
@@ -292,7 +293,7 @@
unsigned long flags;
if ((cq_type == I40IW_CQ_TYPE_ILQ) || (cq_type == I40IW_CQ_TYPE_IEQ)) {
- rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? dev->ilq : dev->ieq;
+ rsrc = (cq_type == I40IW_CQ_TYPE_ILQ) ? cq->vsi->ilq : cq->vsi->ieq;
} else {
i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s qp_type error\n", __func__);
return I40IW_ERR_BAD_PTR;
@@ -335,7 +336,7 @@
rsrc->stats_pkt_rcvd++;
rsrc->compl_rxwqe_idx = info.wqe_idx;
i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s RQ completion\n", __func__);
- rsrc->receive(rsrc->dev, buf);
+ rsrc->receive(rsrc->vsi, buf);
if (cq_type == I40IW_CQ_TYPE_ILQ)
i40iw_ilq_putback_rcvbuf(&rsrc->qp, info.wqe_idx);
else
@@ -345,12 +346,12 @@
i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s SQ completion\n", __func__);
sqwrid = (void *)(uintptr_t)qp->sq_wrtrk_array[info.wqe_idx].wrid;
I40IW_RING_SET_TAIL(qp->sq_ring, info.wqe_idx);
- rsrc->xmit_complete(rsrc->dev, sqwrid);
+ rsrc->xmit_complete(rsrc->vsi, sqwrid);
spin_lock_irqsave(&rsrc->bufpool_lock, flags);
rsrc->tx_wqe_avail_cnt++;
spin_unlock_irqrestore(&rsrc->bufpool_lock, flags);
- if (!list_empty(&dev->ilq->txpend))
- i40iw_puda_send_buf(dev->ilq, NULL);
+ if (!list_empty(&rsrc->vsi->ilq->txpend))
+ i40iw_puda_send_buf(rsrc->vsi->ilq, NULL);
}
done:
@@ -513,10 +514,8 @@
* i40iw_puda_qp_wqe - setup wqe for qp create
* @rsrc: resource for qp
*/
-static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_puda_rsrc *rsrc)
+static enum i40iw_status_code i40iw_puda_qp_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
{
- struct i40iw_sc_qp *qp = &rsrc->qp;
- struct i40iw_sc_dev *dev = rsrc->dev;
struct i40iw_sc_cqp *cqp;
u64 *wqe;
u64 header;
@@ -582,6 +581,7 @@
qp->back_qp = (void *)rsrc;
qp->sq_pa = mem->pa;
qp->rq_pa = qp->sq_pa + sq_size;
+ qp->vsi = rsrc->vsi;
ukqp->sq_base = mem->va;
ukqp->rq_base = &ukqp->sq_base[rsrc->sq_size];
ukqp->shadow_area = ukqp->rq_base[rsrc->rq_size].elem;
@@ -608,15 +608,63 @@
ukqp->wqe_alloc_reg = (u32 __iomem *)(i40iw_get_hw_addr(qp->pd->dev) +
I40E_VFPE_WQEALLOC1);
- qp->qs_handle = qp->dev->qs_handle;
+ qp->user_pri = 0;
+ i40iw_qp_add_qos(qp);
i40iw_puda_qp_setctx(rsrc);
- ret = i40iw_puda_qp_wqe(rsrc);
+ if (rsrc->ceq_valid)
+ ret = i40iw_cqp_qp_create_cmd(rsrc->dev, qp);
+ else
+ ret = i40iw_puda_qp_wqe(rsrc->dev, qp);
if (ret)
i40iw_free_dma_mem(rsrc->dev->hw, &rsrc->qpmem);
return ret;
}
/**
+ * i40iw_puda_cq_wqe - setup wqe for cq create
+ * @rsrc: resource for cq
+ */
+static enum i40iw_status_code i40iw_puda_cq_wqe(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq)
+{
+ u64 *wqe;
+ struct i40iw_sc_cqp *cqp;
+ u64 header;
+ struct i40iw_ccq_cqe_info compl_info;
+ enum i40iw_status_code status = 0;
+
+ cqp = dev->cqp;
+ wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
+ if (!wqe)
+ return I40IW_ERR_RING_FULL;
+
+ set_64bit_val(wqe, 0, cq->cq_uk.cq_size);
+ set_64bit_val(wqe, 8, RS_64_1(cq, 1));
+ set_64bit_val(wqe, 16,
+ LS_64(cq->shadow_read_threshold,
+ I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
+ set_64bit_val(wqe, 32, cq->cq_pa);
+
+ set_64bit_val(wqe, 40, cq->shadow_area_pa);
+
+ header = cq->cq_uk.cq_id |
+ LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
+ LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
+ LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) |
+ LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) |
+ LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
+ set_64bit_val(wqe, 24, header);
+
+ i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE",
+ wqe, I40IW_CQP_WQE_SIZE * 8);
+
+ i40iw_sc_cqp_post_sq(dev->cqp);
+ status = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+ I40IW_CQP_OP_CREATE_CQ,
+ &compl_info);
+ return status;
+}
+
+/**
* i40iw_puda_cq_create - create cq for resource
* @rsrc: resource for which cq to create
*/
@@ -624,18 +672,13 @@
{
struct i40iw_sc_dev *dev = rsrc->dev;
struct i40iw_sc_cq *cq = &rsrc->cq;
- u64 *wqe;
- struct i40iw_sc_cqp *cqp;
- u64 header;
enum i40iw_status_code ret = 0;
u32 tsize, cqsize;
- u32 shadow_read_threshold = 128;
struct i40iw_dma_mem *mem;
- struct i40iw_ccq_cqe_info compl_info;
struct i40iw_cq_init_info info;
struct i40iw_cq_uk_init_info *init_info = &info.cq_uk_init_info;
- cq->back_cq = (void *)rsrc;
+ cq->vsi = rsrc->vsi;
cqsize = rsrc->cq_size * (sizeof(struct i40iw_cqe));
tsize = cqsize + sizeof(struct i40iw_cq_shadow_area);
ret = i40iw_allocate_dma_mem(dev->hw, &rsrc->cqmem, tsize,
@@ -656,39 +699,15 @@
init_info->shadow_area = (u64 *)((u8 *)mem->va + cqsize);
init_info->cq_size = rsrc->cq_size;
init_info->cq_id = rsrc->cq_id;
+ info.ceqe_mask = true;
+ info.ceq_id_valid = true;
ret = dev->iw_priv_cq_ops->cq_init(cq, &info);
if (ret)
goto error;
- cqp = dev->cqp;
- wqe = i40iw_sc_cqp_get_next_send_wqe(cqp, 0);
- if (!wqe) {
- ret = I40IW_ERR_RING_FULL;
- goto error;
- }
-
- set_64bit_val(wqe, 0, rsrc->cq_size);
- set_64bit_val(wqe, 8, RS_64_1(cq, 1));
- set_64bit_val(wqe, 16, LS_64(shadow_read_threshold, I40IW_CQPSQ_CQ_SHADOW_READ_THRESHOLD));
- set_64bit_val(wqe, 32, cq->cq_pa);
-
- set_64bit_val(wqe, 40, cq->shadow_area_pa);
-
- header = rsrc->cq_id |
- LS_64(I40IW_CQP_OP_CREATE_CQ, I40IW_CQPSQ_OPCODE) |
- LS_64(1, I40IW_CQPSQ_CQ_CHKOVERFLOW) |
- LS_64(1, I40IW_CQPSQ_CQ_ENCEQEMASK) |
- LS_64(1, I40IW_CQPSQ_CQ_CEQIDVALID) |
- LS_64(cqp->polarity, I40IW_CQPSQ_WQEVALID);
- set_64bit_val(wqe, 24, header);
-
- i40iw_debug_buf(dev, I40IW_DEBUG_PUDA, "PUDA CQE",
- wqe, I40IW_CQP_WQE_SIZE * 8);
-
- i40iw_sc_cqp_post_sq(dev->cqp);
- ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_CREATE_CQ,
- &compl_info);
-
+ if (rsrc->ceq_valid)
+ ret = i40iw_cqp_cq_create_cmd(dev, cq);
+ else
+ ret = i40iw_puda_cq_wqe(dev, cq);
error:
if (ret)
i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
@@ -696,30 +715,94 @@
}
/**
+ * i40iw_puda_free_qp - free qp for resource
+ * @rsrc: resource for which qp to free
+ */
+static void i40iw_puda_free_qp(struct i40iw_puda_rsrc *rsrc)
+{
+ enum i40iw_status_code ret;
+ struct i40iw_ccq_cqe_info compl_info;
+ struct i40iw_sc_dev *dev = rsrc->dev;
+
+ if (rsrc->ceq_valid) {
+ i40iw_cqp_qp_destroy_cmd(dev, &rsrc->qp);
+ return;
+ }
+
+ ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp,
+ 0, false, true, true);
+ if (ret)
+ i40iw_debug(dev, I40IW_DEBUG_PUDA,
+ "%s error puda qp destroy wqe\n",
+ __func__);
+
+ if (!ret) {
+ ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+ I40IW_CQP_OP_DESTROY_QP,
+ &compl_info);
+ if (ret)
+ i40iw_debug(dev, I40IW_DEBUG_PUDA,
+ "%s error puda qp destroy failed\n",
+ __func__);
+ }
+}
+
+/**
+ * i40iw_puda_free_cq - free cq for resource
+ * @rsrc: resource for which cq to free
+ */
+static void i40iw_puda_free_cq(struct i40iw_puda_rsrc *rsrc)
+{
+ enum i40iw_status_code ret;
+ struct i40iw_ccq_cqe_info compl_info;
+ struct i40iw_sc_dev *dev = rsrc->dev;
+
+ if (rsrc->ceq_valid) {
+ i40iw_cqp_cq_destroy_cmd(dev, &rsrc->cq);
+ return;
+ }
+ ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true);
+
+ if (ret)
+ i40iw_debug(dev, I40IW_DEBUG_PUDA,
+ "%s error ieq cq destroy\n",
+ __func__);
+
+ if (!ret) {
+ ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
+ I40IW_CQP_OP_DESTROY_CQ,
+ &compl_info);
+ if (ret)
+ i40iw_debug(dev, I40IW_DEBUG_PUDA,
+ "%s error ieq qp destroy done\n",
+ __func__);
+ }
+}
+
+/**
* i40iw_puda_dele_resources - delete all resources during close
* @dev: iwarp device
* @type: type of resource to dele
* @reset: true if reset chip
*/
-void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
+void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi,
enum puda_resource_type type,
bool reset)
{
- struct i40iw_ccq_cqe_info compl_info;
+ struct i40iw_sc_dev *dev = vsi->dev;
struct i40iw_puda_rsrc *rsrc;
struct i40iw_puda_buf *buf = NULL;
struct i40iw_puda_buf *nextbuf = NULL;
struct i40iw_virt_mem *vmem;
- enum i40iw_status_code ret;
switch (type) {
case I40IW_PUDA_RSRC_TYPE_ILQ:
- rsrc = dev->ilq;
- vmem = &dev->ilq_mem;
+ rsrc = vsi->ilq;
+ vmem = &vsi->ilq_mem;
break;
case I40IW_PUDA_RSRC_TYPE_IEQ:
- rsrc = dev->ieq;
- vmem = &dev->ieq_mem;
+ rsrc = vsi->ieq;
+ vmem = &vsi->ieq_mem;
break;
default:
i40iw_debug(dev, I40IW_DEBUG_PUDA, "%s: error resource type = 0x%x\n",
@@ -731,45 +814,14 @@
case PUDA_HASH_CRC_COMPLETE:
i40iw_free_hash_desc(rsrc->hash_desc);
case PUDA_QP_CREATED:
- do {
- if (reset)
- break;
- ret = dev->iw_priv_qp_ops->qp_destroy(&rsrc->qp,
- 0, false, true, true);
- if (ret)
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s error ieq qp destroy\n",
- __func__);
-
- ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_DESTROY_QP,
- &compl_info);
- if (ret)
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s error ieq qp destroy done\n",
- __func__);
- } while (0);
+ if (!reset)
+ i40iw_puda_free_qp(rsrc);
i40iw_free_dma_mem(dev->hw, &rsrc->qpmem);
/* fallthrough */
case PUDA_CQ_CREATED:
- do {
- if (reset)
- break;
- ret = dev->iw_priv_cq_ops->cq_destroy(&rsrc->cq, 0, true);
- if (ret)
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s error ieq cq destroy\n",
- __func__);
-
- ret = dev->cqp_ops->poll_for_cqp_op_done(dev->cqp,
- I40IW_CQP_OP_DESTROY_CQ,
- &compl_info);
- if (ret)
- i40iw_debug(rsrc->dev, I40IW_DEBUG_PUDA,
- "%s error ieq qp destroy done\n",
- __func__);
- } while (0);
+ if (!reset)
+ i40iw_puda_free_cq(rsrc);
i40iw_free_dma_mem(dev->hw, &rsrc->cqmem);
break;
@@ -825,9 +877,10 @@
* @dev: iwarp device
* @info: resource information
*/
-enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
+enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
struct i40iw_puda_rsrc_info *info)
{
+ struct i40iw_sc_dev *dev = vsi->dev;
enum i40iw_status_code ret = 0;
struct i40iw_puda_rsrc *rsrc;
u32 pudasize;
@@ -840,10 +893,10 @@
rqwridsize = info->rq_size * 8;
switch (info->type) {
case I40IW_PUDA_RSRC_TYPE_ILQ:
- vmem = &dev->ilq_mem;
+ vmem = &vsi->ilq_mem;
break;
case I40IW_PUDA_RSRC_TYPE_IEQ:
- vmem = &dev->ieq_mem;
+ vmem = &vsi->ieq_mem;
break;
default:
return I40IW_NOT_SUPPORTED;
@@ -856,22 +909,22 @@
rsrc = (struct i40iw_puda_rsrc *)vmem->va;
spin_lock_init(&rsrc->bufpool_lock);
if (info->type == I40IW_PUDA_RSRC_TYPE_ILQ) {
- dev->ilq = (struct i40iw_puda_rsrc *)vmem->va;
- dev->ilq_count = info->count;
+ vsi->ilq = (struct i40iw_puda_rsrc *)vmem->va;
+ vsi->ilq_count = info->count;
rsrc->receive = info->receive;
rsrc->xmit_complete = info->xmit_complete;
} else {
- vmem = &dev->ieq_mem;
- dev->ieq_count = info->count;
- dev->ieq = (struct i40iw_puda_rsrc *)vmem->va;
+ vmem = &vsi->ieq_mem;
+ vsi->ieq_count = info->count;
+ vsi->ieq = (struct i40iw_puda_rsrc *)vmem->va;
rsrc->receive = i40iw_ieq_receive;
rsrc->xmit_complete = i40iw_ieq_tx_compl;
}
+ rsrc->ceq_valid = info->ceq_valid;
rsrc->type = info->type;
rsrc->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)((u8 *)vmem->va + pudasize);
rsrc->rq_wrid_array = (u64 *)((u8 *)vmem->va + pudasize + sqwridsize);
- rsrc->mss = info->mss;
/* Initialize all ieq lists */
INIT_LIST_HEAD(&rsrc->bufpool);
INIT_LIST_HEAD(&rsrc->txpend);
@@ -885,6 +938,7 @@
rsrc->cq_size = info->rq_size + info->sq_size;
rsrc->buf_size = info->buf_size;
rsrc->dev = dev;
+ rsrc->vsi = vsi;
ret = i40iw_puda_cq_create(rsrc);
if (!ret) {
@@ -919,7 +973,7 @@
dev->ccq_ops->ccq_arm(&rsrc->cq);
return ret;
error:
- i40iw_puda_dele_resources(dev, info->type, false);
+ i40iw_puda_dele_resources(vsi, info->type, false);
return ret;
}
@@ -1131,7 +1185,7 @@
list_add(&buf->list, &pbufl);
status = i40iw_ieq_create_pbufl(pfpdu, rxlist, &pbufl, buf, fpdu_len);
- if (!status)
+ if (status)
goto error;
txbuf = i40iw_puda_get_bufpool(ieq);
@@ -1332,7 +1386,7 @@
}
if (pfpdu->mode && (fps != pfpdu->fps)) {
/* clean up qp as it is new partial sequence */
- i40iw_ieq_cleanup_qp(ieq->dev, qp);
+ i40iw_ieq_cleanup_qp(ieq, qp);
i40iw_debug(ieq->dev, I40IW_DEBUG_IEQ,
"%s: restarting new partial\n", __func__);
pfpdu->mode = false;
@@ -1344,7 +1398,7 @@
pfpdu->rcv_nxt = fps;
pfpdu->fps = fps;
pfpdu->mode = true;
- pfpdu->max_fpdu_data = ieq->mss;
+ pfpdu->max_fpdu_data = ieq->vsi->mss;
pfpdu->pmode_count++;
INIT_LIST_HEAD(rxlist);
i40iw_ieq_check_first_buf(buf, fps);
@@ -1379,14 +1433,14 @@
* @dev: iwarp device
* @buf: exception buffer received
*/
-static void i40iw_ieq_receive(struct i40iw_sc_dev *dev,
+static void i40iw_ieq_receive(struct i40iw_sc_vsi *vsi,
struct i40iw_puda_buf *buf)
{
- struct i40iw_puda_rsrc *ieq = dev->ieq;
+ struct i40iw_puda_rsrc *ieq = vsi->ieq;
struct i40iw_sc_qp *qp = NULL;
u32 wqe_idx = ieq->compl_rxwqe_idx;
- qp = i40iw_ieq_get_qp(dev, buf);
+ qp = i40iw_ieq_get_qp(vsi->dev, buf);
if (!qp) {
ieq->stats_bad_qp_id++;
i40iw_puda_ret_bufpool(ieq, buf);
@@ -1404,12 +1458,12 @@
/**
* i40iw_ieq_tx_compl - put back after sending completed exception buffer
- * @dev: iwarp device
+ * @vsi: pointer to the vsi structure
* @sqwrid: pointer to puda buffer
*/
-static void i40iw_ieq_tx_compl(struct i40iw_sc_dev *dev, void *sqwrid)
+static void i40iw_ieq_tx_compl(struct i40iw_sc_vsi *vsi, void *sqwrid)
{
- struct i40iw_puda_rsrc *ieq = dev->ieq;
+ struct i40iw_puda_rsrc *ieq = vsi->ieq;
struct i40iw_puda_buf *buf = (struct i40iw_puda_buf *)sqwrid;
i40iw_puda_ret_bufpool(ieq, buf);
@@ -1421,15 +1475,14 @@
/**
* i40iw_ieq_cleanup_qp - qp is being destroyed
- * @dev: iwarp device
+ * @ieq: ieq resource
* @qp: all pending fpdu buffers
*/
-void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+static void i40iw_ieq_cleanup_qp(struct i40iw_puda_rsrc *ieq, struct i40iw_sc_qp *qp)
{
struct i40iw_puda_buf *buf;
struct i40iw_pfpdu *pfpdu = &qp->pfpdu;
struct list_head *rxlist = &pfpdu->rxlist;
- struct i40iw_puda_rsrc *ieq = dev->ieq;
if (!pfpdu->mode)
return;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_puda.h b/drivers/infiniband/hw/i40iw/i40iw_puda.h
index 52bf782..dba05ce 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_puda.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_puda.h
@@ -100,6 +100,7 @@
enum puda_resource_type type; /* ILQ or IEQ */
u32 count;
u16 pd_id;
+ bool ceq_valid;
u32 cq_id;
u32 qp_id;
u32 sq_size;
@@ -107,8 +108,8 @@
u16 buf_size;
u16 mss;
u32 tx_buf_cnt; /* total bufs allocated will be rq_size + tx_buf_cnt */
- void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *);
- void (*xmit_complete)(struct i40iw_sc_dev *, void *);
+ void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *);
+ void (*xmit_complete)(struct i40iw_sc_vsi *, void *);
};
struct i40iw_puda_rsrc {
@@ -116,6 +117,7 @@
struct i40iw_sc_qp qp;
struct i40iw_sc_pd sc_pd;
struct i40iw_sc_dev *dev;
+ struct i40iw_sc_vsi *vsi;
struct i40iw_dma_mem cqmem;
struct i40iw_dma_mem qpmem;
struct i40iw_virt_mem ilq_mem;
@@ -123,6 +125,7 @@
enum puda_resource_type type;
u16 buf_size; /*buffer must be max datalen + tcpip hdr + mac */
u16 mss;
+ bool ceq_valid;
u32 cq_id;
u32 qp_id;
u32 sq_size;
@@ -142,8 +145,8 @@
u32 avail_buf_count; /* snapshot of currently available buffers */
spinlock_t bufpool_lock;
struct i40iw_puda_buf *alloclist;
- void (*receive)(struct i40iw_sc_dev *, struct i40iw_puda_buf *);
- void (*xmit_complete)(struct i40iw_sc_dev *, void *);
+ void (*receive)(struct i40iw_sc_vsi *, struct i40iw_puda_buf *);
+ void (*xmit_complete)(struct i40iw_sc_vsi *, void *);
/* puda stats */
u64 stats_buf_alloc_fail;
u64 stats_pkt_rcvd;
@@ -160,14 +163,13 @@
struct i40iw_puda_buf *buf);
enum i40iw_status_code i40iw_puda_send(struct i40iw_sc_qp *qp,
struct i40iw_puda_send_info *info);
-enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_dev *dev,
+enum i40iw_status_code i40iw_puda_create_rsrc(struct i40iw_sc_vsi *vsi,
struct i40iw_puda_rsrc_info *info);
-void i40iw_puda_dele_resources(struct i40iw_sc_dev *dev,
+void i40iw_puda_dele_resources(struct i40iw_sc_vsi *vsi,
enum puda_resource_type type,
bool reset);
enum i40iw_status_code i40iw_puda_poll_completion(struct i40iw_sc_dev *dev,
struct i40iw_sc_cq *cq, u32 *compl_err);
-void i40iw_ieq_cleanup_qp(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
struct i40iw_sc_qp *i40iw_ieq_get_qp(struct i40iw_sc_dev *dev,
struct i40iw_puda_buf *buf);
@@ -180,4 +182,8 @@
void i40iw_free_hash_desc(struct shash_desc *desc);
void i40iw_ieq_update_tcpip_info(struct i40iw_puda_buf *buf, u16 length,
u32 seqnum);
+enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
+void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp);
+void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq);
#endif
diff --git a/drivers/infiniband/hw/i40iw/i40iw_type.h b/drivers/infiniband/hw/i40iw/i40iw_type.h
index 2b1a04e..f3f8e9c 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_type.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_type.h
@@ -61,7 +61,7 @@
struct i40iw_sc_dev;
struct i40iw_hmc_info;
-struct i40iw_dev_pestat;
+struct i40iw_vsi_pestat;
struct i40iw_cqp_ops;
struct i40iw_ccq_ops;
@@ -74,6 +74,11 @@
struct i40iw_priv_cq_ops;
struct i40iw_hmc_ops;
+enum i40iw_page_size {
+ I40IW_PAGE_SIZE_4K,
+ I40IW_PAGE_SIZE_2M
+};
+
enum i40iw_resource_indicator_type {
I40IW_RSRC_INDICATOR_TYPE_ADAPTER = 0,
I40IW_RSRC_INDICATOR_TYPE_CQ,
@@ -186,7 +191,7 @@
I40IW_DEBUG_ALL = 0xFFFFFFFF
};
-enum i40iw_hw_stat_index_32b {
+enum i40iw_hw_stats_index_32b {
I40IW_HW_STAT_INDEX_IP4RXDISCARD = 0,
I40IW_HW_STAT_INDEX_IP4RXTRUNC,
I40IW_HW_STAT_INDEX_IP4TXNOROUTE,
@@ -199,7 +204,7 @@
I40IW_HW_STAT_INDEX_MAX_32
};
-enum i40iw_hw_stat_index_64b {
+enum i40iw_hw_stats_index_64b {
I40IW_HW_STAT_INDEX_IP4RXOCTS = 0,
I40IW_HW_STAT_INDEX_IP4RXPKTS,
I40IW_HW_STAT_INDEX_IP4RXFRAGS,
@@ -229,32 +234,23 @@
I40IW_HW_STAT_INDEX_MAX_64
};
-struct i40iw_dev_hw_stat_offsets {
- u32 stat_offset_32[I40IW_HW_STAT_INDEX_MAX_32];
- u32 stat_offset_64[I40IW_HW_STAT_INDEX_MAX_64];
+struct i40iw_dev_hw_stats_offsets {
+ u32 stats_offset_32[I40IW_HW_STAT_INDEX_MAX_32];
+ u32 stats_offset_64[I40IW_HW_STAT_INDEX_MAX_64];
};
struct i40iw_dev_hw_stats {
- u64 stat_value_32[I40IW_HW_STAT_INDEX_MAX_32];
- u64 stat_value_64[I40IW_HW_STAT_INDEX_MAX_64];
+ u64 stats_value_32[I40IW_HW_STAT_INDEX_MAX_32];
+ u64 stats_value_64[I40IW_HW_STAT_INDEX_MAX_64];
};
-struct i40iw_device_pestat_ops {
- void (*iw_hw_stat_init)(struct i40iw_dev_pestat *, u8, struct i40iw_hw *, bool);
- void (*iw_hw_stat_read_32)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_32b, u64 *);
- void (*iw_hw_stat_read_64)(struct i40iw_dev_pestat *, enum i40iw_hw_stat_index_64b, u64 *);
- void (*iw_hw_stat_read_all)(struct i40iw_dev_pestat *, struct i40iw_dev_hw_stats *);
- void (*iw_hw_stat_refresh_all)(struct i40iw_dev_pestat *);
-};
-
-struct i40iw_dev_pestat {
+struct i40iw_vsi_pestat {
struct i40iw_hw *hw;
- struct i40iw_device_pestat_ops ops;
struct i40iw_dev_hw_stats hw_stats;
struct i40iw_dev_hw_stats last_read_hw_stats;
- struct i40iw_dev_hw_stat_offsets hw_stat_offsets;
+ struct i40iw_dev_hw_stats_offsets hw_stats_offsets;
struct timer_list stats_timer;
- spinlock_t stats_lock; /* rdma stats lock */
+ spinlock_t lock; /* rdma stats lock */
};
struct i40iw_hw {
@@ -350,6 +346,7 @@
u64 cq_pa;
u64 shadow_area_pa;
struct i40iw_sc_dev *dev;
+ struct i40iw_sc_vsi *vsi;
void *pbl_list;
void *back_cq;
u32 ceq_id;
@@ -373,6 +370,7 @@
u64 shadow_area_pa;
u64 q2_pa;
struct i40iw_sc_dev *dev;
+ struct i40iw_sc_vsi *vsi;
struct i40iw_sc_pd *pd;
u64 *hw_host_ctx;
void *llp_stream_handle;
@@ -397,6 +395,9 @@
bool virtual_map;
bool flush_sq;
bool flush_rq;
+ u8 user_pri;
+ struct list_head list;
+ bool on_qoslist;
bool sq_flush;
enum i40iw_flush_opcode flush_code;
enum i40iw_term_eventtypes eventtype;
@@ -424,10 +425,16 @@
char parm_buffer[I40IW_VCHNL_MAX_VF_MSG_SIZE - 1];
};
+struct i40iw_qos {
+ struct list_head qplist;
+ spinlock_t lock; /* qos list */
+ u16 qs_handle;
+};
+
struct i40iw_vfdev {
struct i40iw_sc_dev *pf_dev;
u8 *hmc_info_mem;
- struct i40iw_dev_pestat dev_pestat;
+ struct i40iw_vsi_pestat pestat;
struct i40iw_hmc_pble_info *pble_info;
struct i40iw_hmc_info hmc_info;
struct i40iw_vchnl_vf_msg_buffer vf_msg_buffer;
@@ -441,11 +448,28 @@
bool stats_initialized;
};
+#define I40IW_INVALID_FCN_ID 0xff
+struct i40iw_sc_vsi {
+ struct i40iw_sc_dev *dev;
+ void *back_vsi; /* Owned by OS */
+ u32 ilq_count;
+ struct i40iw_virt_mem ilq_mem;
+ struct i40iw_puda_rsrc *ilq;
+ u32 ieq_count;
+ struct i40iw_virt_mem ieq_mem;
+ struct i40iw_puda_rsrc *ieq;
+ u16 mss;
+ u8 fcn_id;
+ bool stats_fcn_id_alloc;
+ struct i40iw_qos qos[I40IW_MAX_USER_PRIORITY];
+ struct i40iw_vsi_pestat *pestat;
+};
+
struct i40iw_sc_dev {
struct list_head cqp_cmd_head; /* head of the CQP command list */
spinlock_t cqp_lock; /* cqp list sync */
struct i40iw_dev_uk dev_uk;
- struct i40iw_dev_pestat dev_pestat;
+ bool fcn_id_array[I40IW_MAX_STATS_COUNT];
struct i40iw_dma_mem vf_fpm_query_buf[I40IW_MAX_PE_ENABLED_VF_COUNT];
u64 fpm_query_buf_pa;
u64 fpm_commit_buf_pa;
@@ -472,17 +496,9 @@
struct i40iw_cqp_misc_ops *cqp_misc_ops;
struct i40iw_hmc_ops *hmc_ops;
struct i40iw_vchnl_if vchnl_if;
- u32 ilq_count;
- struct i40iw_virt_mem ilq_mem;
- struct i40iw_puda_rsrc *ilq;
- u32 ieq_count;
- struct i40iw_virt_mem ieq_mem;
- struct i40iw_puda_rsrc *ieq;
-
const struct i40iw_vf_cqp_ops *iw_vf_cqp_ops;
struct i40iw_hmc_fpm_misc hmc_fpm_misc;
- u16 qs_handle;
u32 debug_mask;
u16 exception_lan_queue;
u8 hmc_fn_id;
@@ -556,6 +572,19 @@
u16 mss;
};
+struct i40iw_vsi_init_info {
+ struct i40iw_sc_dev *dev;
+ void *back_vsi;
+ struct i40iw_l2params *params;
+};
+
+struct i40iw_vsi_stats_info {
+ struct i40iw_vsi_pestat *pestat;
+ u8 fcn_id;
+ bool alloc_fcn_id;
+ bool stats_initialize;
+};
+
struct i40iw_device_init_info {
u64 fpm_query_buf_pa;
u64 fpm_commit_buf_pa;
@@ -564,7 +593,6 @@
struct i40iw_hw *hw;
void __iomem *bar0;
enum i40iw_status_code (*vchnl_send)(struct i40iw_sc_dev *, u32, u8 *, u16);
- u16 qs_handle;
u16 exception_lan_queue;
u8 hmc_fn_id;
bool is_pf;
@@ -722,6 +750,8 @@
bool iwarp_info_valid;
bool err_rq_idx_valid;
u16 err_rq_idx;
+ bool add_to_qoslist;
+ u8 user_pri;
};
struct i40iw_aeqe_info {
@@ -814,6 +844,7 @@
struct i40iw_qp_init_info {
struct i40iw_qp_uk_init_info qp_uk_init_info;
struct i40iw_sc_pd *pd;
+ struct i40iw_sc_vsi *vsi;
u64 *host_ctx;
u8 *q2;
u64 sq_pa;
@@ -880,13 +911,14 @@
};
struct i40iw_qhash_table_info {
+ struct i40iw_sc_vsi *vsi;
enum i40iw_quad_hash_manage_type manage;
enum i40iw_quad_entry_type entry_type;
bool vlan_valid;
bool ipv4_valid;
u8 mac_addr[6];
u16 vlan_id;
- u16 qs_handle;
+ u8 user_pri;
u32 qp_num;
u32 dest_ip[4];
u32 src_ip[4];
@@ -976,7 +1008,7 @@
struct i40iw_cqp_ops {
enum i40iw_status_code (*cqp_init)(struct i40iw_sc_cqp *,
struct i40iw_cqp_init_info *);
- enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, bool, u16 *, u16 *);
+ enum i40iw_status_code (*cqp_create)(struct i40iw_sc_cqp *, u16 *, u16 *);
void (*cqp_post_sq)(struct i40iw_sc_cqp *);
u64 *(*cqp_get_next_send_wqe)(struct i40iw_sc_cqp *, u64 scratch);
enum i40iw_status_code (*cqp_destroy)(struct i40iw_sc_cqp *);
diff --git a/drivers/infiniband/hw/i40iw/i40iw_uk.c b/drivers/infiniband/hw/i40iw/i40iw_uk.c
index 4d28c3c..4376cd6 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_uk.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_uk.c
@@ -175,12 +175,10 @@
if (!*wqe_idx)
qp->swqe_polarity = !qp->swqe_polarity;
}
-
- for (i = 0; i < wqe_size / I40IW_QP_WQE_MIN_SIZE; i++) {
- I40IW_RING_MOVE_HEAD(qp->sq_ring, ret_code);
- if (ret_code)
- return NULL;
- }
+ I40IW_RING_MOVE_HEAD_BY_COUNT(qp->sq_ring,
+ wqe_size / I40IW_QP_WQE_MIN_SIZE, ret_code);
+ if (ret_code)
+ return NULL;
wqe = qp->sq_base[*wqe_idx].elem;
@@ -430,7 +428,7 @@
struct i40iw_inline_rdma_write *op_info;
u64 *push;
u64 header = 0;
- u32 i, wqe_idx;
+ u32 wqe_idx;
enum i40iw_status_code ret_code;
bool read_fence = false;
u8 wqe_size;
@@ -465,14 +463,12 @@
src = (u8 *)(op_info->data);
if (op_info->len <= 16) {
- for (i = 0; i < op_info->len; i++, src++, dest++)
- *dest = *src;
+ memcpy(dest, src, op_info->len);
} else {
- for (i = 0; i < 16; i++, src++, dest++)
- *dest = *src;
+ memcpy(dest, src, 16);
+ src += 16;
dest = (u8 *)wqe + 32;
- for (; i < op_info->len; i++, src++, dest++)
- *dest = *src;
+ memcpy(dest, src, op_info->len - 16);
}
wmb(); /* make sure WQE is populated before valid bit is set */
@@ -507,7 +503,7 @@
u8 *dest, *src;
struct i40iw_post_inline_send *op_info;
u64 header;
- u32 wqe_idx, i;
+ u32 wqe_idx;
enum i40iw_status_code ret_code;
bool read_fence = false;
u8 wqe_size;
@@ -540,14 +536,12 @@
src = (u8 *)(op_info->data);
if (op_info->len <= 16) {
- for (i = 0; i < op_info->len; i++, src++, dest++)
- *dest = *src;
+ memcpy(dest, src, op_info->len);
} else {
- for (i = 0; i < 16; i++, src++, dest++)
- *dest = *src;
+ memcpy(dest, src, 16);
+ src += 16;
dest = (u8 *)wqe + 32;
- for (; i < op_info->len; i++, src++, dest++)
- *dest = *src;
+ memcpy(dest, src, op_info->len - 16);
}
wmb(); /* make sure WQE is populated before valid bit is set */
@@ -1190,12 +1184,8 @@
if (data_size <= 16)
*wqe_size = I40IW_QP_WQE_MIN_SIZE;
- else if (data_size <= 48)
- *wqe_size = 64;
- else if (data_size <= 80)
- *wqe_size = 96;
else
- *wqe_size = 128;
+ *wqe_size = 64;
return 0;
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_user.h b/drivers/infiniband/hw/i40iw/i40iw_user.h
index 276bcef..80d9f46 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_user.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_user.h
@@ -72,12 +72,12 @@
I40IW_MAX_SQ_PAYLOAD_SIZE = 2145386496,
I40IW_MAX_INLINE_DATA_SIZE = 48,
I40IW_MAX_PUSHMODE_INLINE_DATA_SIZE = 48,
- I40IW_MAX_IRD_SIZE = 32,
- I40IW_QPCTX_ENCD_MAXIRD = 3,
+ I40IW_MAX_IRD_SIZE = 63,
+ I40IW_MAX_ORD_SIZE = 127,
I40IW_MAX_WQ_ENTRIES = 2048,
- I40IW_MAX_ORD_SIZE = 32,
I40IW_Q2_BUFFER_SIZE = (248 + 100),
- I40IW_QP_CTX_SIZE = 248
+ I40IW_QP_CTX_SIZE = 248,
+ I40IW_MAX_PDS = 32768
};
#define i40iw_handle void *
@@ -96,12 +96,6 @@
#define i40iw_physical_fragment u64
#define i40iw_address_list u64 *
-#define I40IW_CREATE_STAG(index, key) (((index) << 8) + (key))
-
-#define I40IW_STAG_KEY_FROM_STAG(stag) ((stag) && 0x000000FF)
-
-#define I40IW_STAG_INDEX_FROM_STAG(stag) (((stag) && 0xFFFFFF00) >> 8)
-
#define I40IW_MAX_MR_SIZE 0x10000000000L
struct i40iw_qp_uk;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c
index 6fd043b..0f5d43d 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_utils.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c
@@ -153,6 +153,7 @@
struct i40iw_device *iwdev;
struct i40iw_handler *hdl;
u32 local_ipaddr;
+ u32 action = I40IW_ARP_ADD;
hdl = i40iw_find_netdev(event_netdev);
if (!hdl)
@@ -164,44 +165,25 @@
if (netdev != event_netdev)
return NOTIFY_DONE;
+ if (upper_dev)
+ local_ipaddr = ntohl(
+ ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
+ else
+ local_ipaddr = ntohl(ifa->ifa_address);
switch (event) {
case NETDEV_DOWN:
- if (upper_dev)
- local_ipaddr = ntohl(
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
- else
- local_ipaddr = ntohl(ifa->ifa_address);
- i40iw_manage_arp_cache(iwdev,
- netdev->dev_addr,
- &local_ipaddr,
- true,
- I40IW_ARP_DELETE);
- return NOTIFY_OK;
+ action = I40IW_ARP_DELETE;
+ /* Fall through */
case NETDEV_UP:
- if (upper_dev)
- local_ipaddr = ntohl(
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
- else
- local_ipaddr = ntohl(ifa->ifa_address);
- i40iw_manage_arp_cache(iwdev,
- netdev->dev_addr,
- &local_ipaddr,
- true,
- I40IW_ARP_ADD);
- break;
+ /* Fall through */
case NETDEV_CHANGEADDR:
- /* Add the address to the IP table */
- if (upper_dev)
- local_ipaddr = ntohl(
- ((struct in_device *)upper_dev->ip_ptr)->ifa_list->ifa_address);
- else
- local_ipaddr = ntohl(ifa->ifa_address);
-
i40iw_manage_arp_cache(iwdev,
netdev->dev_addr,
&local_ipaddr,
true,
- I40IW_ARP_ADD);
+ action);
+ i40iw_if_notify(iwdev, netdev, &local_ipaddr, true,
+ (action == I40IW_ARP_ADD) ? true : false);
break;
default:
break;
@@ -225,6 +207,7 @@
struct i40iw_device *iwdev;
struct i40iw_handler *hdl;
u32 local_ipaddr6[4];
+ u32 action = I40IW_ARP_ADD;
hdl = i40iw_find_netdev(event_netdev);
if (!hdl)
@@ -235,24 +218,21 @@
if (netdev != event_netdev)
return NOTIFY_DONE;
+ i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
switch (event) {
case NETDEV_DOWN:
- i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
- i40iw_manage_arp_cache(iwdev,
- netdev->dev_addr,
- local_ipaddr6,
- false,
- I40IW_ARP_DELETE);
- return NOTIFY_OK;
+ action = I40IW_ARP_DELETE;
+ /* Fall through */
case NETDEV_UP:
/* Fall through */
case NETDEV_CHANGEADDR:
- i40iw_copy_ip_ntohl(local_ipaddr6, ifa->addr.in6_u.u6_addr32);
i40iw_manage_arp_cache(iwdev,
netdev->dev_addr,
local_ipaddr6,
false,
- I40IW_ARP_ADD);
+ action);
+ i40iw_if_notify(iwdev, netdev, local_ipaddr6, false,
+ (action == I40IW_ARP_ADD) ? true : false);
break;
default:
break;
@@ -392,6 +372,7 @@
i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
i40iw_free_qp_resources(iwdev, iwqp, qp_num);
+ i40iw_rem_devusecount(iwdev);
}
/**
@@ -415,7 +396,10 @@
i40iw_pr_err("error cqp command 0x%x timed out ret = %d\n",
info->cqp_cmd, timeout_ret);
err_code = -ETIME;
- i40iw_request_reset(iwdev);
+ if (!iwdev->reset) {
+ iwdev->reset = true;
+ i40iw_request_reset(iwdev);
+ }
goto done;
}
cqp_error = cqp_request->compl_info.error;
@@ -445,6 +429,11 @@
struct cqp_commands_info *info = &cqp_request->info;
int err_code = 0;
+ if (iwdev->reset) {
+ i40iw_free_cqp_request(&iwdev->cqp, cqp_request);
+ return I40IW_ERR_CQP_COMPL_ERROR;
+ }
+
status = i40iw_process_cqp_cmd(dev, info);
if (status) {
i40iw_pr_err("error cqp command 0x%x failed\n", info->cqp_cmd);
@@ -459,6 +448,26 @@
}
/**
+ * i40iw_add_devusecount - add dev refcount
+ * @iwdev: dev for refcount
+ */
+void i40iw_add_devusecount(struct i40iw_device *iwdev)
+{
+ atomic64_inc(&iwdev->use_count);
+}
+
+/**
+ * i40iw_rem_devusecount - decrement refcount for dev
+ * @iwdev: device
+ */
+void i40iw_rem_devusecount(struct i40iw_device *iwdev)
+{
+ if (!atomic64_dec_and_test(&iwdev->use_count))
+ return;
+ wake_up(&iwdev->close_wq);
+}
+
+/**
* i40iw_add_pdusecount - add pd refcount
* @iwpd: pd for refcount
*/
@@ -712,6 +721,51 @@
}
/**
+ * i40iw_qp_suspend_resume - cqp command for suspend/resume
+ * @dev: hardware control device structure
+ * @qp: hardware control qp
+ * @suspend: flag if suspend or resume
+ */
+void i40iw_qp_suspend_resume(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp, bool suspend)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+ struct i40iw_cqp_request *cqp_request;
+ struct i40iw_sc_cqp *cqp = dev->cqp;
+ struct cqp_commands_info *cqp_info;
+ enum i40iw_status_code status;
+
+ cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = (suspend) ? OP_SUSPEND : OP_RESUME;
+ cqp_info->in.u.suspend_resume.cqp = cqp;
+ cqp_info->in.u.suspend_resume.qp = qp;
+ cqp_info->in.u.suspend_resume.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP QP Suspend/Resume fail");
+}
+
+/**
+ * i40iw_qp_mss_modify - modify mss for qp
+ * @dev: hardware control device structure
+ * @qp: hardware control qp
+ */
+void i40iw_qp_mss_modify(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+ struct i40iw_qp *iwqp = (struct i40iw_qp *)qp->back_qp;
+ struct i40iw_modify_qp_info info;
+
+ memset(&info, 0, sizeof(info));
+ info.mss_change = true;
+ info.new_mss = qp->vsi->mss;
+ i40iw_hw_modify_qp(iwdev, iwqp, &info, false);
+}
+
+/**
* i40iw_term_modify_qp - modify qp for term message
* @qp: hardware control qp
* @next_state: qp's next state
@@ -769,6 +823,7 @@
struct i40iw_sc_qp *qp = (struct i40iw_sc_qp *)&iwqp->sc_qp;
i40iw_terminate_done(qp, 1);
+ i40iw_rem_ref(&iwqp->ibqp);
}
/**
@@ -780,6 +835,7 @@
struct i40iw_qp *iwqp;
iwqp = (struct i40iw_qp *)qp->back_qp;
+ i40iw_add_ref(&iwqp->ibqp);
init_timer(&iwqp->terminate_timer);
iwqp->terminate_timer.function = i40iw_terminate_timeout;
iwqp->terminate_timer.expires = jiffies + HZ;
@@ -796,7 +852,8 @@
struct i40iw_qp *iwqp;
iwqp = (struct i40iw_qp *)qp->back_qp;
- del_timer(&iwqp->terminate_timer);
+ if (del_timer(&iwqp->terminate_timer))
+ i40iw_rem_ref(&iwqp->ibqp);
}
/**
@@ -1011,6 +1068,116 @@
}
/**
+ * i40iw_cqp_cq_create_cmd - create a cq for the cqp
+ * @dev: device pointer
+ * @cq: pointer to created cq
+ */
+enum i40iw_status_code i40iw_cqp_cq_create_cmd(struct i40iw_sc_dev *dev,
+ struct i40iw_sc_cq *cq)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+ struct i40iw_cqp *iwcqp = &iwdev->cqp;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ enum i40iw_status_code status;
+
+ cqp_request = i40iw_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return I40IW_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ cqp_info->cqp_cmd = OP_CQ_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.cq_create.cq = cq;
+ cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP Create QP fail");
+
+ return status;
+}
+
+/**
+ * i40iw_cqp_qp_create_cmd - create a qp for the cqp
+ * @dev: device pointer
+ * @qp: pointer to created qp
+ */
+enum i40iw_status_code i40iw_cqp_qp_create_cmd(struct i40iw_sc_dev *dev,
+ struct i40iw_sc_qp *qp)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+ struct i40iw_cqp *iwcqp = &iwdev->cqp;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ struct i40iw_create_qp_info *qp_info;
+ enum i40iw_status_code status;
+
+ cqp_request = i40iw_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return I40IW_ERR_NO_MEMORY;
+
+ cqp_info = &cqp_request->info;
+ qp_info = &cqp_request->info.in.u.qp_create.info;
+
+ memset(qp_info, 0, sizeof(*qp_info));
+
+ qp_info->cq_num_valid = true;
+ qp_info->next_iwarp_state = I40IW_QP_STATE_RTS;
+
+ cqp_info->cqp_cmd = OP_QP_CREATE;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_create.qp = qp;
+ cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP-OP QP create fail");
+ return status;
+}
+
+/**
+ * i40iw_cqp_cq_destroy_cmd - destroy the cqp cq
+ * @dev: device pointer
+ * @cq: pointer to cq
+ */
+void i40iw_cqp_cq_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_cq *cq)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+
+ i40iw_cq_wq_destroy(iwdev, cq);
+}
+
+/**
+ * i40iw_cqp_qp_destroy_cmd - destroy the cqp
+ * @dev: device pointer
+ * @qp: pointer to qp
+ */
+void i40iw_cqp_qp_destroy_cmd(struct i40iw_sc_dev *dev, struct i40iw_sc_qp *qp)
+{
+ struct i40iw_device *iwdev = (struct i40iw_device *)dev->back_dev;
+ struct i40iw_cqp *iwcqp = &iwdev->cqp;
+ struct i40iw_cqp_request *cqp_request;
+ struct cqp_commands_info *cqp_info;
+ enum i40iw_status_code status;
+
+ cqp_request = i40iw_get_cqp_request(iwcqp, true);
+ if (!cqp_request)
+ return;
+
+ cqp_info = &cqp_request->info;
+ memset(cqp_info, 0, sizeof(*cqp_info));
+
+ cqp_info->cqp_cmd = OP_QP_DESTROY;
+ cqp_info->post_sq = 1;
+ cqp_info->in.u.qp_destroy.qp = qp;
+ cqp_info->in.u.qp_destroy.scratch = (uintptr_t)cqp_request;
+ cqp_info->in.u.qp_destroy.remove_hash_idx = true;
+ status = i40iw_handle_cqp_op(iwdev, cqp_request);
+ if (status)
+ i40iw_pr_err("CQP QP_DESTROY fail");
+}
+
+
+/**
* i40iw_ieq_mpa_crc_ae - generate AE for crc error
* @dev: hardware control device structure
* @qp: hardware control qp
@@ -1208,7 +1375,7 @@
buf->totallen = pkt_len + buf->maclen;
- if (info->payload_len < buf->totallen - 4) {
+ if (info->payload_len < buf->totallen) {
i40iw_pr_err("payload_len = 0x%x totallen expected0x%x\n",
info->payload_len, buf->totallen);
return I40IW_ERR_INVALID_SIZE;
@@ -1224,27 +1391,29 @@
/**
* i40iw_hw_stats_timeout - Stats timer-handler which updates all HW stats
- * @dev: hardware control device structure
+ * @vsi: pointer to the vsi structure
*/
-static void i40iw_hw_stats_timeout(unsigned long dev)
+static void i40iw_hw_stats_timeout(unsigned long vsi)
{
- struct i40iw_sc_dev *pf_dev = (struct i40iw_sc_dev *)dev;
- struct i40iw_dev_pestat *pf_devstat = &pf_dev->dev_pestat;
- struct i40iw_dev_pestat *vf_devstat = NULL;
+ struct i40iw_sc_vsi *sc_vsi = (struct i40iw_sc_vsi *)vsi;
+ struct i40iw_sc_dev *pf_dev = sc_vsi->dev;
+ struct i40iw_vsi_pestat *pf_devstat = sc_vsi->pestat;
+ struct i40iw_vsi_pestat *vf_devstat = NULL;
u16 iw_vf_idx;
unsigned long flags;
/*PF*/
- pf_devstat->ops.iw_hw_stat_read_all(pf_devstat, &pf_devstat->hw_stats);
+ i40iw_hw_stats_read_all(pf_devstat, &pf_devstat->hw_stats);
+
for (iw_vf_idx = 0; iw_vf_idx < I40IW_MAX_PE_ENABLED_VF_COUNT; iw_vf_idx++) {
- spin_lock_irqsave(&pf_devstat->stats_lock, flags);
+ spin_lock_irqsave(&pf_devstat->lock, flags);
if (pf_dev->vf_dev[iw_vf_idx]) {
if (pf_dev->vf_dev[iw_vf_idx]->stats_initialized) {
- vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->dev_pestat;
- vf_devstat->ops.iw_hw_stat_read_all(vf_devstat, &vf_devstat->hw_stats);
+ vf_devstat = &pf_dev->vf_dev[iw_vf_idx]->pestat;
+ i40iw_hw_stats_read_all(vf_devstat, &vf_devstat->hw_stats);
}
}
- spin_unlock_irqrestore(&pf_devstat->stats_lock, flags);
+ spin_unlock_irqrestore(&pf_devstat->lock, flags);
}
mod_timer(&pf_devstat->stats_timer,
@@ -1253,26 +1422,26 @@
/**
* i40iw_hw_stats_start_timer - Start periodic stats timer
- * @dev: hardware control device structure
+ * @vsi: pointer to the vsi structure
*/
-void i40iw_hw_stats_start_timer(struct i40iw_sc_dev *dev)
+void i40iw_hw_stats_start_timer(struct i40iw_sc_vsi *vsi)
{
- struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+ struct i40iw_vsi_pestat *devstat = vsi->pestat;
init_timer(&devstat->stats_timer);
devstat->stats_timer.function = i40iw_hw_stats_timeout;
- devstat->stats_timer.data = (unsigned long)dev;
+ devstat->stats_timer.data = (unsigned long)vsi;
mod_timer(&devstat->stats_timer,
jiffies + msecs_to_jiffies(STATS_TIMER_DELAY));
}
/**
- * i40iw_hw_stats_del_timer - Delete periodic stats timer
- * @dev: hardware control device structure
+ * i40iw_hw_stats_stop_timer - Delete periodic stats timer
+ * @vsi: pointer to the vsi structure
*/
-void i40iw_hw_stats_del_timer(struct i40iw_sc_dev *dev)
+void i40iw_hw_stats_stop_timer(struct i40iw_sc_vsi *vsi)
{
- struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+ struct i40iw_vsi_pestat *devstat = vsi->pestat;
del_timer_sync(&devstat->stats_timer);
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 6329c97..7368a50 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -37,6 +37,7 @@
#include <linux/random.h>
#include <linux/highmem.h>
#include <linux/time.h>
+#include <linux/hugetlb.h>
#include <asm/byteorder.h>
#include <net/ip.h>
#include <rdma/ib_verbs.h>
@@ -67,13 +68,13 @@
props->vendor_part_id = iwdev->ldev->pcidev->device;
props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
- props->max_qp = iwdev->max_qp;
+ props->max_qp = iwdev->max_qp - iwdev->used_qps;
props->max_qp_wr = (I40IW_MAX_WQ_ENTRIES >> 2) - 1;
props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
- props->max_cq = iwdev->max_cq;
+ props->max_cq = iwdev->max_cq - iwdev->used_cqs;
props->max_cqe = iwdev->max_cqe;
- props->max_mr = iwdev->max_mr;
- props->max_pd = iwdev->max_pd;
+ props->max_mr = iwdev->max_mr - iwdev->used_mrs;
+ props->max_pd = iwdev->max_pd - iwdev->used_pds;
props->max_sge_rd = I40IW_MAX_SGE_RD;
props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE;
props->max_qp_init_rd_atom = props->max_qp_rd_atom;
@@ -254,7 +255,6 @@
{
struct i40iw_cqp_request *cqp_request;
struct cqp_commands_info *cqp_info;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
enum i40iw_status_code status;
if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX)
@@ -270,7 +270,7 @@
cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
cqp_info->post_sq = 1;
- cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle;
+ cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
cqp_info->in.u.manage_push_page.info.free_page = 0;
cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
@@ -292,7 +292,6 @@
{
struct i40iw_cqp_request *cqp_request;
struct cqp_commands_info *cqp_info;
- struct i40iw_sc_dev *dev = &iwdev->sc_dev;
enum i40iw_status_code status;
if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX)
@@ -307,7 +306,7 @@
cqp_info->post_sq = 1;
cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
- cqp_info->in.u.manage_push_page.info.qs_handle = dev->qs_handle;
+ cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
cqp_info->in.u.manage_push_page.info.free_page = 1;
cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
@@ -337,6 +336,9 @@
u32 pd_id = 0;
int err;
+ if (iwdev->closing)
+ return ERR_PTR(-ENODEV);
+
err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds,
iwdev->max_pd, &pd_id, &iwdev->next_pd);
if (err) {
@@ -602,6 +604,9 @@
struct i40iwarp_offload_info *iwarp_info;
unsigned long flags;
+ if (iwdev->closing)
+ return ERR_PTR(-ENODEV);
+
if (init_attr->create_flags)
return ERR_PTR(-EINVAL);
if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
@@ -610,11 +615,15 @@
if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+ if (init_attr->cap.max_recv_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
+ init_attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+
memset(&init_info, 0, sizeof(init_info));
sq_size = init_attr->cap.max_send_wr;
rq_size = init_attr->cap.max_recv_wr;
+ init_info.vsi = &iwdev->vsi;
init_info.qp_uk_init_info.sq_size = sq_size;
init_info.qp_uk_init_info.rq_size = rq_size;
init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
@@ -774,6 +783,7 @@
iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
iwdev->qp_table[qp_num] = iwqp;
i40iw_add_pdusecount(iwqp->iwpd);
+ i40iw_add_devusecount(iwdev);
if (ibpd->uobject && udata) {
memset(&uresp, 0, sizeof(uresp));
uresp.actual_sq_size = sq_size;
@@ -815,8 +825,9 @@
attr->qp_access_flags = 0;
attr->cap.max_send_wr = qp->qp_uk.sq_size;
attr->cap.max_recv_wr = qp->qp_uk.rq_size;
- attr->cap.max_recv_sge = 1;
attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
+ attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+ attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
init_attr->event_handler = iwqp->ibqp.event_handler;
init_attr->qp_context = iwqp->ibqp.qp_context;
init_attr->send_cq = iwqp->ibqp.send_cq;
@@ -884,6 +895,11 @@
spin_lock_irqsave(&iwqp->lock, flags);
if (attr_mask & IB_QP_STATE) {
+ if (iwdev->closing && attr->qp_state != IB_QPS_ERR) {
+ err = -EINVAL;
+ goto exit;
+ }
+
switch (attr->qp_state) {
case IB_QPS_INIT:
case IB_QPS_RTR:
@@ -944,7 +960,7 @@
goto exit;
}
if (iwqp->sc_qp.term_flags)
- del_timer(&iwqp->terminate_timer);
+ i40iw_terminate_del_timer(&iwqp->sc_qp);
info.next_iwarp_state = I40IW_QP_STATE_ERROR;
if ((iwqp->hw_tcp_state > I40IW_TCP_STATE_CLOSED) &&
iwdev->iw_status &&
@@ -1037,11 +1053,11 @@
}
/**
- * cq_wq_destroy - send cq destroy cqp
+ * i40iw_cq_wq_destroy - send cq destroy cqp
* @iwdev: iwarp device
* @cq: hardware control cq
*/
-static void cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
+void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
{
enum i40iw_status_code status;
struct i40iw_cqp_request *cqp_request;
@@ -1080,9 +1096,10 @@
iwcq = to_iwcq(ib_cq);
iwdev = to_iwdev(ib_cq->device);
cq = &iwcq->sc_cq;
- cq_wq_destroy(iwdev, cq);
+ i40iw_cq_wq_destroy(iwdev, cq);
cq_free_resources(iwdev, iwcq);
kfree(iwcq);
+ i40iw_rem_devusecount(iwdev);
return 0;
}
@@ -1113,6 +1130,9 @@
int err_code;
int entries = attr->cqe;
+ if (iwdev->closing)
+ return ERR_PTR(-ENODEV);
+
if (entries > iwdev->max_cqe)
return ERR_PTR(-EINVAL);
@@ -1137,7 +1157,8 @@
ukinfo->cq_id = cq_num;
iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
info.ceqe_mask = 0;
- info.ceq_id = 0;
+ if (attr->comp_vector < iwdev->ceqs_count)
+ info.ceq_id = attr->comp_vector;
info.ceq_id_valid = true;
info.ceqe_mask = 1;
info.type = I40IW_CQ_TYPE_IWARP;
@@ -1229,10 +1250,11 @@
}
}
+ i40iw_add_devusecount(iwdev);
return (struct ib_cq *)iwcq;
cq_destroy:
- cq_wq_destroy(iwdev, cq);
+ i40iw_cq_wq_destroy(iwdev, cq);
cq_free_resources:
cq_free_resources(iwdev, iwcq);
error:
@@ -1266,6 +1288,7 @@
stag_idx = (stag & iwdev->mr_stagmask) >> I40IW_CQPSQ_STAG_IDX_SHIFT;
i40iw_free_resource(iwdev, iwdev->allocated_mrs, stag_idx);
+ i40iw_rem_devusecount(iwdev);
}
/**
@@ -1296,19 +1319,18 @@
stag = stag_index << I40IW_CQPSQ_STAG_IDX_SHIFT;
stag |= driver_key;
stag += (u32)consumer_key;
+ i40iw_add_devusecount(iwdev);
}
return stag;
}
/**
* i40iw_next_pbl_addr - Get next pbl address
- * @palloc: Poiner to allocated pbles
* @pbl: pointer to a pble
* @pinfo: info pointer
* @idx: index
*/
-static inline u64 *i40iw_next_pbl_addr(struct i40iw_pble_alloc *palloc,
- u64 *pbl,
+static inline u64 *i40iw_next_pbl_addr(u64 *pbl,
struct i40iw_pble_info **pinfo,
u32 *idx)
{
@@ -1336,9 +1358,11 @@
struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
struct i40iw_pble_info *pinfo;
struct scatterlist *sg;
+ u64 pg_addr = 0;
u32 idx = 0;
pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
+
pg_shift = ffs(region->page_size) - 1;
for_each_sg(region->sg_head.sgl, sg, region->nmap, entry) {
chunk_pages = sg_dma_len(sg) >> pg_shift;
@@ -1346,17 +1370,96 @@
!iwpbl->qp_mr.sq_page)
iwpbl->qp_mr.sq_page = sg_page(sg);
for (i = 0; i < chunk_pages; i++) {
- *pbl = cpu_to_le64(sg_dma_address(sg) + region->page_size * i);
- pbl = i40iw_next_pbl_addr(palloc, pbl, &pinfo, &idx);
+ pg_addr = sg_dma_address(sg) + region->page_size * i;
+
+ if ((entry + i) == 0)
+ *pbl = cpu_to_le64(pg_addr & iwmr->page_msk);
+ else if (!(pg_addr & ~iwmr->page_msk))
+ *pbl = cpu_to_le64(pg_addr);
+ else
+ continue;
+ pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx);
}
}
}
/**
+ * i40iw_set_hugetlb_params - set MR pg size and mask to huge pg values.
+ * @addr: virtual address
+ * @iwmr: mr pointer for this memory registration
+ */
+static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr)
+{
+ struct vm_area_struct *vma;
+ struct hstate *h;
+
+ vma = find_vma(current->mm, addr);
+ if (vma && is_vm_hugetlb_page(vma)) {
+ h = hstate_vma(vma);
+ if (huge_page_size(h) == 0x200000) {
+ iwmr->page_size = huge_page_size(h);
+ iwmr->page_msk = huge_page_mask(h);
+ }
+ }
+}
+
+/**
+ * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous
+ * @arr: lvl1 pbl array
+ * @npages: page count
+ * pg_size: page size
+ *
+ */
+static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size)
+{
+ u32 pg_idx;
+
+ for (pg_idx = 0; pg_idx < npages; pg_idx++) {
+ if ((*arr + (pg_size * pg_idx)) != arr[pg_idx])
+ return false;
+ }
+ return true;
+}
+
+/**
+ * i40iw_check_mr_contiguous - check if MR is physically contiguous
+ * @palloc: pbl allocation struct
+ * pg_size: page size
+ */
+static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size)
+{
+ struct i40iw_pble_level2 *lvl2 = &palloc->level2;
+ struct i40iw_pble_info *leaf = lvl2->leaf;
+ u64 *arr = NULL;
+ u64 *start_addr = NULL;
+ int i;
+ bool ret;
+
+ if (palloc->level == I40IW_LEVEL_1) {
+ arr = (u64 *)palloc->level1.addr;
+ ret = i40iw_check_mem_contiguous(arr, palloc->total_cnt, pg_size);
+ return ret;
+ }
+
+ start_addr = (u64 *)leaf->addr;
+
+ for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
+ arr = (u64 *)leaf->addr;
+ if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr)
+ return false;
+ ret = i40iw_check_mem_contiguous(arr, leaf->cnt, pg_size);
+ if (!ret)
+ return false;
+ }
+
+ return true;
+}
+
+/**
* i40iw_setup_pbles - copy user pg address to pble's
* @iwdev: iwarp device
* @iwmr: mr pointer for this memory registration
- * @use_pbles: flag if to use pble's or memory (level 0)
+ * @use_pbles: flag if to use pble's
*/
static int i40iw_setup_pbles(struct i40iw_device *iwdev,
struct i40iw_mr *iwmr,
@@ -1369,9 +1472,6 @@
enum i40iw_status_code status;
enum i40iw_pble_level level = I40IW_LEVEL_1;
- if (!use_pbles && (iwmr->page_cnt > MAX_SAVE_PAGE_ADDRS))
- return -ENOMEM;
-
if (use_pbles) {
mutex_lock(&iwdev->pbl_mutex);
status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
@@ -1388,6 +1488,10 @@
}
i40iw_copy_user_pgaddrs(iwmr, pbl, level);
+
+ if (use_pbles)
+ iwmr->pgaddrmem[0] = *pbl;
+
return 0;
}
@@ -1409,14 +1513,18 @@
struct i40iw_cq_mr *cqmr = &iwpbl->cq_mr;
struct i40iw_hmc_pble *hmc_p;
u64 *arr = iwmr->pgaddrmem;
+ u32 pg_size;
int err;
int total;
+ bool ret = true;
total = req->sq_pages + req->rq_pages + req->cq_pages;
+ pg_size = iwmr->page_size;
err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
if (err)
return err;
+
if (use_pbles && (palloc->level != I40IW_LEVEL_1)) {
i40iw_free_pble(iwdev->pble_rsrc, palloc);
iwpbl->pbl_allocated = false;
@@ -1425,26 +1533,44 @@
if (use_pbles)
arr = (u64 *)palloc->level1.addr;
- if (req->reg_type == IW_MEMREG_TYPE_QP) {
+
+ if (iwmr->type == IW_MEMREG_TYPE_QP) {
hmc_p = &qpmr->sq_pbl;
qpmr->shadow = (dma_addr_t)arr[total];
+
if (use_pbles) {
+ ret = i40iw_check_mem_contiguous(arr, req->sq_pages, pg_size);
+ if (ret)
+ ret = i40iw_check_mem_contiguous(&arr[req->sq_pages], req->rq_pages, pg_size);
+ }
+
+ if (!ret) {
hmc_p->idx = palloc->level1.idx;
hmc_p = &qpmr->rq_pbl;
hmc_p->idx = palloc->level1.idx + req->sq_pages;
} else {
hmc_p->addr = arr[0];
hmc_p = &qpmr->rq_pbl;
- hmc_p->addr = arr[1];
+ hmc_p->addr = arr[req->sq_pages];
}
} else { /* CQ */
hmc_p = &cqmr->cq_pbl;
cqmr->shadow = (dma_addr_t)arr[total];
+
if (use_pbles)
+ ret = i40iw_check_mem_contiguous(arr, req->cq_pages, pg_size);
+
+ if (!ret)
hmc_p->idx = palloc->level1.idx;
else
hmc_p->addr = arr[0];
}
+
+ if (use_pbles && ret) {
+ i40iw_free_pble(iwdev->pble_rsrc, palloc);
+ iwpbl->pbl_allocated = false;
+ }
+
return err;
}
@@ -1642,8 +1768,9 @@
stag_info->access_rights = access;
stag_info->pd_id = iwpd->sc_pd.pd_id;
stag_info->addr_type = I40IW_ADDR_TYPE_VA_BASED;
+ stag_info->page_size = iwmr->page_size;
- if (iwmr->page_cnt > 1) {
+ if (iwpbl->pbl_allocated) {
if (palloc->level == I40IW_LEVEL_1) {
stag_info->first_pm_pbl_index = palloc->level1.idx;
stag_info->chunk_size = 1;
@@ -1699,6 +1826,11 @@
bool use_pbles = false;
unsigned long flags;
int err = -ENOSYS;
+ int ret;
+ int pg_shift;
+
+ if (iwdev->closing)
+ return ERR_PTR(-ENODEV);
if (length > I40IW_MAX_MR_SIZE)
return ERR_PTR(-EINVAL);
@@ -1723,9 +1855,17 @@
iwmr->ibmr.pd = pd;
iwmr->ibmr.device = pd->device;
ucontext = to_ucontext(pd->uobject->context);
- region_length = region->length + (start & 0xfff);
- pbl_depth = region_length >> 12;
- pbl_depth += (region_length & (4096 - 1)) ? 1 : 0;
+
+ iwmr->page_size = region->page_size;
+ iwmr->page_msk = PAGE_MASK;
+
+ if (region->hugetlb && (req.reg_type == IW_MEMREG_TYPE_MEM))
+ i40iw_set_hugetlb_values(start, iwmr);
+
+ region_length = region->length + (start & (iwmr->page_size - 1));
+ pg_shift = ffs(iwmr->page_size) - 1;
+ pbl_depth = region_length >> pg_shift;
+ pbl_depth += (region_length & (iwmr->page_size - 1)) ? 1 : 0;
iwmr->length = region->length;
iwpbl->user_base = virt;
@@ -1755,13 +1895,21 @@
spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
break;
case IW_MEMREG_TYPE_MEM:
+ use_pbles = (iwmr->page_cnt != 1);
access = I40IW_ACCESS_FLAGS_LOCALREAD;
- use_pbles = (iwmr->page_cnt != 1);
err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
if (err)
goto error;
+ if (use_pbles) {
+ ret = i40iw_check_mr_contiguous(palloc, iwmr->page_size);
+ if (ret) {
+ i40iw_free_pble(iwdev->pble_rsrc, palloc);
+ iwpbl->pbl_allocated = false;
+ }
+ }
+
access |= i40iw_get_user_access(acc);
stag = i40iw_create_stag(iwdev);
if (!stag) {
@@ -1778,6 +1926,7 @@
i40iw_free_stag(iwdev, stag);
goto error;
}
+
break;
default:
goto error;
@@ -1789,7 +1938,7 @@
return &iwmr->ibmr;
error:
- if (palloc->level != I40IW_LEVEL_0)
+ if (palloc->level != I40IW_LEVEL_0 && iwpbl->pbl_allocated)
i40iw_free_pble(iwdev->pble_rsrc, palloc);
ib_umem_release(region);
kfree(iwmr);
@@ -2142,7 +2291,6 @@
case IB_WR_REG_MR:
{
struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
- int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size);
int flags = reg_wr(ib_wr)->access;
struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
@@ -2153,6 +2301,7 @@
info.access_rights |= i40iw_get_user_access(flags);
info.stag_key = reg_wr(ib_wr)->key & 0xff;
info.stag_idx = reg_wr(ib_wr)->key >> 8;
+ info.page_size = reg_wr(ib_wr)->mr->page_size;
info.wr_id = ib_wr->wr_id;
info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
@@ -2166,9 +2315,6 @@
if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR)
info.chunk_size = 1;
- if (page_shift == 21)
- info.page_size = 1; /* 2M page */
-
ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
if (ret)
err = -ENOMEM;
@@ -2487,21 +2633,17 @@
{
struct i40iw_device *iwdev = to_iwdev(ibdev);
struct i40iw_sc_dev *dev = &iwdev->sc_dev;
- struct i40iw_dev_pestat *devstat = &dev->dev_pestat;
+ struct i40iw_vsi_pestat *devstat = iwdev->vsi.pestat;
struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
- unsigned long flags;
if (dev->is_pf) {
- spin_lock_irqsave(&devstat->stats_lock, flags);
- devstat->ops.iw_hw_stat_read_all(devstat,
- &devstat->hw_stats);
- spin_unlock_irqrestore(&devstat->stats_lock, flags);
+ i40iw_hw_stats_read_all(devstat, &devstat->hw_stats);
} else {
if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
return -ENOSYS;
}
- memcpy(&stats->value[0], &hw_stats, sizeof(*hw_stats));
+ memcpy(&stats->value[0], hw_stats, sizeof(*hw_stats));
return stats->num_counters;
}
@@ -2562,7 +2704,9 @@
* @ah_attr: address handle attributes
*/
static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd,
- struct ib_ah_attr *attr)
+ struct ib_ah_attr *attr,
+ struct ib_udata *udata)
+
{
return ERR_PTR(-ENOSYS);
}
@@ -2621,7 +2765,7 @@
(1ull << IB_USER_VERBS_CMD_POST_RECV) |
(1ull << IB_USER_VERBS_CMD_POST_SEND);
iwibdev->ibdev.phys_port_cnt = 1;
- iwibdev->ibdev.num_comp_vectors = 1;
+ iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
iwibdev->ibdev.dma_device = &pcidev->dev;
iwibdev->ibdev.dev.parent = &pcidev->dev;
iwibdev->ibdev.query_port = i40iw_query_port;
@@ -2654,7 +2798,6 @@
iwibdev->ibdev.iwcm = kzalloc(sizeof(*iwibdev->ibdev.iwcm), GFP_KERNEL);
if (!iwibdev->ibdev.iwcm) {
ib_dealloc_device(&iwibdev->ibdev);
- i40iw_pr_err("iwcm == NULL\n");
return NULL;
}
@@ -2719,6 +2862,9 @@
i40iw_unregister_rdma_device(iwibdev);
kfree(iwibdev->ibdev.iwcm);
iwibdev->ibdev.iwcm = NULL;
+ wait_event_timeout(iwibdev->iwdev->close_wq,
+ !atomic64_read(&iwibdev->iwdev->use_count),
+ I40IW_EVENT_TIMEOUT);
ib_dealloc_device(&iwibdev->ibdev);
}
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.h b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
index 0069be8..6549c93 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.h
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.h
@@ -92,6 +92,8 @@
struct ib_umem *region;
u16 type;
u32 page_cnt;
+ u32 page_size;
+ u64 page_msk;
u32 npages;
u32 stag;
u64 length;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
index 3041003..f4d1368 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_virtchnl.c
@@ -403,6 +403,19 @@
}
/**
+ * i40iw_vf_init_pestat - Initialize stats for VF
+ * @devL pointer to the VF Device
+ * @stats: Statistics structure pointer
+ * @index: Stats index
+ */
+static void i40iw_vf_init_pestat(struct i40iw_sc_dev *dev, struct i40iw_vsi_pestat *stats, u16 index)
+{
+ stats->hw = dev->hw;
+ i40iw_hw_stats_init(stats, (u8)index, false);
+ spin_lock_init(&stats->lock);
+}
+
+/**
* i40iw_vchnl_recv_pf - Receive PF virtual channel messages
* @dev: IWARP device pointer
* @vf_id: Virtual function ID associated with the message
@@ -421,9 +434,8 @@
u16 first_avail_iw_vf = I40IW_MAX_PE_ENABLED_VF_COUNT;
struct i40iw_virt_mem vf_dev_mem;
struct i40iw_virtchnl_work_info work_info;
- struct i40iw_dev_pestat *devstat;
+ struct i40iw_vsi_pestat *stats;
enum i40iw_status_code ret_code;
- unsigned long flags;
if (!dev || !msg || !len)
return I40IW_ERR_PARAM;
@@ -496,14 +508,7 @@
i40iw_debug(dev, I40IW_DEBUG_VIRT,
"VF%u error CQP HMC Function operation.\n",
vf_id);
- ret_code = i40iw_device_init_pestat(&vf_dev->dev_pestat);
- if (ret_code)
- i40iw_debug(dev, I40IW_DEBUG_VIRT,
- "VF%u - i40iw_device_init_pestat failed\n",
- vf_id);
- vf_dev->dev_pestat.ops.iw_hw_stat_init(&vf_dev->dev_pestat,
- (u8)vf_dev->pmf_index,
- dev->hw, false);
+ i40iw_vf_init_pestat(dev, &vf_dev->pestat, vf_dev->pmf_index);
vf_dev->stats_initialized = true;
} else {
if (vf_dev) {
@@ -534,12 +539,10 @@
case I40IW_VCHNL_OP_GET_STATS:
if (!vf_dev)
return I40IW_ERR_BAD_PTR;
- devstat = &vf_dev->dev_pestat;
- spin_lock_irqsave(&dev->dev_pestat.stats_lock, flags);
- devstat->ops.iw_hw_stat_read_all(devstat, &devstat->hw_stats);
- spin_unlock_irqrestore(&dev->dev_pestat.stats_lock, flags);
+ stats = &vf_dev->pestat;
+ i40iw_hw_stats_read_all(stats, &stats->hw_stats);
vf_dev->msg_count--;
- vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &devstat->hw_stats);
+ vchnl_pf_send_get_pe_stats_resp(dev, vf_id, vchnl_msg, &stats->hw_stats);
break;
default:
i40iw_debug(dev, I40IW_DEBUG_VIRT,
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index b9bf075..077c33d 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -114,7 +114,9 @@
!(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support))
--ah->av.eth.stat_rate;
}
-
+ ah->av.eth.sl_tclass_flowlabel |=
+ cpu_to_be32((ah_attr->grh.traffic_class << 20) |
+ ah_attr->grh.flow_label);
/*
* HW requires multicast LID so we just choose one.
*/
@@ -122,12 +124,14 @@
ah->av.ib.dlid = cpu_to_be16(0xc000);
memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
- ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29);
+ ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29);
return &ah->ibah;
}
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+
{
struct mlx4_ib_ah *ah;
struct ib_ah *ret;
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 5e99390..06020c5 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -755,10 +755,8 @@
struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
rec = kzalloc(sizeof *rec, GFP_KERNEL);
- if (!rec) {
- pr_err("alias_guid_work: No Memory\n");
+ if (!rec)
return;
- }
pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
index 39a4888..d648453 100644
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ b/drivers/infiniband/hw/mlx4/cm.c
@@ -247,10 +247,8 @@
struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
- if (!ent) {
- mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n");
+ if (!ent)
return ERR_PTR(-ENOMEM);
- }
ent->sl_cm_id = sl_cm_id;
ent->slave_id = slave_id;
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 1672907..db564cc 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -39,6 +39,8 @@
#include <linux/mlx4/cmd.h>
#include <linux/gfp.h>
#include <rdma/ib_pma.h>
+#include <linux/ip.h>
+#include <net/ipv6.h>
#include <linux/mlx4/driver.h>
#include "mlx4_ib.h"
@@ -480,6 +482,23 @@
return -EINVAL;
}
+static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid,
+ union ib_gid *dgid)
+{
+ int version = ib_get_rdma_header_version((const union rdma_network_hdr *)grh);
+ enum rdma_network_type net_type;
+
+ if (version == 4)
+ net_type = RDMA_NETWORK_IPV4;
+ else if (version == 6)
+ net_type = RDMA_NETWORK_IPV6;
+ else
+ return -EINVAL;
+
+ return ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
+ sgid, dgid);
+}
+
int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
enum ib_qp_type dest_qpt, struct ib_wc *wc,
struct ib_grh *grh, struct ib_mad *mad)
@@ -538,7 +557,10 @@
memset(&attr, 0, sizeof attr);
attr.port_num = port;
if (is_eth) {
- memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16);
+ union ib_gid sgid;
+
+ if (get_gids_from_l3_hdr(grh, &sgid, &attr.grh.dgid))
+ return -EINVAL;
attr.ah_flags = IB_AH_GRH;
}
ah = ib_create_ah(tun_ctx->pd, &attr);
@@ -651,6 +673,11 @@
is_eth = 1;
if (is_eth) {
+ union ib_gid dgid;
+ union ib_gid sgid;
+
+ if (get_gids_from_l3_hdr(grh, &sgid, &dgid))
+ return -EINVAL;
if (!(wc->wc_flags & IB_WC_GRH)) {
mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
return -EINVAL;
@@ -659,10 +686,10 @@
mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
return -EINVAL;
}
- err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave);
+ err = mlx4_get_slave_from_roce_gid(dev->dev, port, dgid.raw, &slave);
if (err && mlx4_is_mf_bonded(dev->dev)) {
other_port = (port == 1) ? 2 : 1;
- err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave);
+ err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, dgid.raw, &slave);
if (!err) {
port = other_port;
pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n",
@@ -702,10 +729,18 @@
/* If a grh is present, we demux according to it */
if (wc->wc_flags & IB_WC_GRH) {
- slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
- if (slave < 0) {
- mlx4_ib_warn(ibdev, "failed matching grh\n");
- return -ENOENT;
+ if (grh->dgid.global.interface_id ==
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID) &&
+ grh->dgid.global.subnet_prefix == cpu_to_be64(
+ atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix))) {
+ slave = 0;
+ } else {
+ slave = mlx4_ib_find_real_gid(ibdev, port,
+ grh->dgid.global.interface_id);
+ if (slave < 0) {
+ mlx4_ib_warn(ibdev, "failed matching grh\n");
+ return -ENOENT;
+ }
}
}
/* Class-specific handling */
@@ -1102,10 +1137,8 @@
in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
- if (!in_mad || !out_mad) {
- mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n");
+ if (!in_mad || !out_mad)
goto out;
- }
guid_tbl_blk_num *= 4;
@@ -1916,11 +1949,8 @@
*ret_ctx = NULL;
ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
- if (!ctx) {
- pr_err("failed allocating pv resource context "
- "for port %d, slave %d\n", port, slave);
+ if (!ctx)
return -ENOMEM;
- }
ctx->ib_dev = &dev->ib_dev;
ctx->port = port;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index b597e82..c8413fc 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -430,7 +430,7 @@
struct mlx4_ib_dev *dev = to_mdev(ibdev);
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
- int err = -ENOMEM;
+ int err;
int have_ib_ports;
struct mlx4_uverbs_ex_query_device cmd;
struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
@@ -455,6 +455,7 @@
sizeof(resp.response_length);
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
+ err = -ENOMEM;
if (!in_mad || !out_mad)
goto out;
@@ -547,6 +548,7 @@
props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
props->timestamp_mask = 0xFFFFFFFFFFFFULL;
+ props->max_ah = INT_MAX;
if (!mlx4_is_slave(dev->dev))
err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
@@ -697,9 +699,11 @@
if (err)
goto out;
- props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ?
- IB_WIDTH_4X : IB_WIDTH_1X;
- props->active_speed = IB_SPEED_QDR;
+ props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ||
+ (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
+ IB_WIDTH_4X : IB_WIDTH_1X;
+ props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
+ IB_SPEED_FDR : IB_SPEED_QDR;
props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
props->max_msg_sz = mdev->dev->caps.max_msg_sz;
@@ -2814,20 +2818,22 @@
kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
sizeof(long),
GFP_KERNEL);
- if (!ibdev->ib_uc_qpns_bitmap) {
- dev_err(&dev->persist->pdev->dev,
- "bit map alloc failed\n");
+ if (!ibdev->ib_uc_qpns_bitmap)
goto err_steer_qp_release;
+
+ if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) {
+ bitmap_zero(ibdev->ib_uc_qpns_bitmap,
+ ibdev->steer_qpn_count);
+ err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
+ dev, ibdev->steer_qpn_base,
+ ibdev->steer_qpn_base +
+ ibdev->steer_qpn_count - 1);
+ if (err)
+ goto err_steer_free_bitmap;
+ } else {
+ bitmap_fill(ibdev->ib_uc_qpns_bitmap,
+ ibdev->steer_qpn_count);
}
-
- bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
-
- err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
- dev, ibdev->steer_qpn_base,
- ibdev->steer_qpn_base +
- ibdev->steer_qpn_count - 1);
- if (err)
- goto err_steer_free_bitmap;
}
for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
@@ -3055,15 +3061,12 @@
first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
- if (!dm) {
- pr_err("failed to allocate memory for tunneling qp update\n");
+ if (!dm)
return;
- }
for (i = 0; i < ports; i++) {
dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
if (!dm[i]) {
- pr_err("failed to allocate memory for tunneling qp update work struct\n");
while (--i >= 0)
kfree(dm[i]);
goto out;
@@ -3223,8 +3226,6 @@
ew->port = port;
ew->ib_dev = ibdev;
queue_work(wq, &ew->work);
- } else {
- pr_err("failed to allocate memory for sl2vl update work\n");
}
}
@@ -3284,10 +3285,8 @@
case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
ew = kmalloc(sizeof *ew, GFP_ATOMIC);
- if (!ew) {
- pr_err("failed to allocate memory for events work\n");
+ if (!ew)
break;
- }
INIT_WORK(&ew->work, handle_port_mgmt_change_event);
memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
index a21d37f..e010fe4 100644
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ b/drivers/infiniband/hw/mlx4/mcg.c
@@ -1142,7 +1142,6 @@
work = kmalloc(sizeof *work, GFP_KERNEL);
if (!work) {
ctx->flushing = 0;
- mcg_warn("failed allocating work for cleanup\n");
return;
}
@@ -1202,10 +1201,8 @@
return 0;
req = kzalloc(sizeof *req, GFP_KERNEL);
- if (!req) {
- mcg_warn_group(group, "failed allocation - may leave stall groups\n");
+ if (!req)
return -ENOMEM;
- }
if (!list_empty(&group->func[slave].pending)) {
pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 35141f4..7f3d976 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -742,7 +742,8 @@
void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
-struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int mlx4_ib_destroy_ah(struct ib_ah *ah);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 570bc86..c068add 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -644,7 +644,7 @@
int qpn;
int err;
struct ib_qp_cap backup_cap;
- struct mlx4_ib_sqp *sqp;
+ struct mlx4_ib_sqp *sqp = NULL;
struct mlx4_ib_qp *qp;
enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
struct mlx4_ib_cq *mcq;
@@ -933,7 +933,9 @@
mlx4_db_free(dev->dev, &qp->db);
err:
- if (!*caller_qp)
+ if (sqp)
+ kfree(sqp);
+ else if (!*caller_qp)
kfree(qp);
return err;
}
@@ -1280,7 +1282,8 @@
if (is_qp0(dev, mqp))
mlx4_CLOSE_PORT(dev->dev, mqp->port);
- if (dev->qp1_proxy[mqp->port - 1] == mqp) {
+ if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI &&
+ dev->qp1_proxy[mqp->port - 1] == mqp) {
mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]);
dev->qp1_proxy[mqp->port - 1] = NULL;
mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
@@ -1764,14 +1767,14 @@
u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 :
attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
union ib_gid gid;
- struct ib_gid_attr gid_attr;
+ struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB};
u16 vlan = 0xffff;
u8 smac[ETH_ALEN];
int status = 0;
int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
attr->ah_attr.ah_flags & IB_AH_GRH;
- if (is_eth) {
+ if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) {
int index = attr->ah_attr.grh.sgid_index;
status = ib_get_cached_gid(ibqp->device, port_num,
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index 745efa4..d090e96 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -64,7 +64,9 @@
return &ah->ibah;
}
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+
{
struct mlx5_ib_ah *ah;
struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -75,6 +77,27 @@
if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH))
return ERR_PTR(-EINVAL);
+ if (ll == IB_LINK_LAYER_ETHERNET && udata) {
+ int err;
+ struct mlx5_ib_create_ah_resp resp = {};
+ u32 min_resp_len = offsetof(typeof(resp), dmac) +
+ sizeof(resp.dmac);
+
+ if (udata->outlen < min_resp_len)
+ return ERR_PTR(-EINVAL);
+
+ resp.response_length = min_resp_len;
+
+ err = ib_resolve_eth_dmac(pd->device, ah_attr);
+ if (err)
+ return ERR_PTR(err);
+
+ memcpy(resp.dmac, ah_attr->dmac, ETH_ALEN);
+ err = ib_copy_to_udata(udata, &resp, resp.response_length);
+ if (err)
+ return ERR_PTR(err);
+ }
+
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index fcd04b8..b3ef47c 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -731,7 +731,7 @@
int entries, u32 **cqb,
int *cqe_size, int *index, int *inlen)
{
- struct mlx5_ib_create_cq ucmd;
+ struct mlx5_ib_create_cq ucmd = {};
size_t ucmdlen;
int page_shift;
__be64 *pas;
@@ -770,7 +770,7 @@
if (err)
goto err_umem;
- mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift,
+ mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift,
&ncont, NULL);
mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont);
@@ -792,8 +792,36 @@
*index = to_mucontext(context)->uuari.uars[0].index;
+ if (ucmd.cqe_comp_en == 1) {
+ if (unlikely((*cqe_size != 64) ||
+ !MLX5_CAP_GEN(dev->mdev, cqe_compression))) {
+ err = -EOPNOTSUPP;
+ mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n",
+ *cqe_size);
+ goto err_cqb;
+ }
+
+ if (unlikely(!ucmd.cqe_comp_res_format ||
+ !(ucmd.cqe_comp_res_format <
+ MLX5_IB_CQE_RES_RESERVED) ||
+ (ucmd.cqe_comp_res_format &
+ (ucmd.cqe_comp_res_format - 1)))) {
+ err = -EOPNOTSUPP;
+ mlx5_ib_warn(dev, "CQE compression res format %d is not supported!\n",
+ ucmd.cqe_comp_res_format);
+ goto err_cqb;
+ }
+
+ MLX5_SET(cqc, cqc, cqe_comp_en, 1);
+ MLX5_SET(cqc, cqc, mini_cqe_res_format,
+ ilog2(ucmd.cqe_comp_res_format));
+ }
+
return 0;
+err_cqb:
+ kfree(cqb);
+
err_db:
mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db);
@@ -1124,7 +1152,7 @@
return err;
}
- mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
+ mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift,
npas, NULL);
cq->resize_umem = umem;
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 2be65dd..d566f67 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -127,7 +127,7 @@
if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev))
&& ibdev->ib_active) {
- struct ib_event ibev = {0};
+ struct ib_event ibev = { };
ibev.device = &ibdev->ib_dev;
ibev.event = (event == NETDEV_UP) ?
@@ -496,6 +496,7 @@
struct mlx5_ib_dev *dev = to_mdev(ibdev);
struct mlx5_core_dev *mdev = dev->mdev;
int err = -ENOMEM;
+ int max_sq_desc;
int max_rq_sg;
int max_sq_sg;
u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
@@ -618,9 +619,10 @@
props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
sizeof(struct mlx5_wqe_data_seg);
- max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) -
- sizeof(struct mlx5_wqe_ctrl_seg)) /
- sizeof(struct mlx5_wqe_data_seg);
+ max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
+ max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
+ sizeof(struct mlx5_wqe_raddr_seg)) /
+ sizeof(struct mlx5_wqe_data_seg);
props->max_sge = min(max_rq_sg, max_sq_sg);
props->max_sge_rd = MLX5_MAX_SGE_RD;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
@@ -643,6 +645,7 @@
props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
props->max_mcast_grp;
props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
+ props->max_ah = INT_MAX;
props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
@@ -669,6 +672,40 @@
1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
}
+ if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
+ uhw->outlen)) {
+ resp.mlx5_ib_support_multi_pkt_send_wqes =
+ MLX5_CAP_ETH(mdev, multi_pkt_send_wqe);
+ resp.response_length +=
+ sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
+ }
+
+ if (field_avail(typeof(resp), reserved, uhw->outlen))
+ resp.response_length += sizeof(resp.reserved);
+
+ if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
+ resp.cqe_comp_caps.max_num =
+ MLX5_CAP_GEN(dev->mdev, cqe_compression) ?
+ MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0;
+ resp.cqe_comp_caps.supported_format =
+ MLX5_IB_CQE_RES_FORMAT_HASH |
+ MLX5_IB_CQE_RES_FORMAT_CSUM;
+ resp.response_length += sizeof(resp.cqe_comp_caps);
+ }
+
+ if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) {
+ if (MLX5_CAP_QOS(mdev, packet_pacing) &&
+ MLX5_CAP_GEN(mdev, qos)) {
+ resp.packet_pacing_caps.qp_rate_limit_max =
+ MLX5_CAP_QOS(mdev, packet_pacing_max_rate);
+ resp.packet_pacing_caps.qp_rate_limit_min =
+ MLX5_CAP_QOS(mdev, packet_pacing_min_rate);
+ resp.packet_pacing_caps.supported_qpts |=
+ 1 << IB_QPT_RAW_PACKET;
+ }
+ resp.response_length += sizeof(resp.packet_pacing_caps);
+ }
+
if (uhw->outlen) {
err = ib_copy_to_udata(uhw, &resp, resp.response_length);
@@ -1093,7 +1130,8 @@
resp.response_length += sizeof(resp.cqe_version);
if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
- resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE;
+ resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
+ MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
resp.response_length += sizeof(resp.cmds_supp_uhw);
}
@@ -1502,6 +1540,22 @@
MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
}
+static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val,
+ bool inner)
+{
+ if (inner) {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, inner_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, inner_ipv6_flow_label, val);
+ } else {
+ MLX5_SET(fte_match_set_misc,
+ misc_c, outer_ipv6_flow_label, mask);
+ MLX5_SET(fte_match_set_misc,
+ misc_v, outer_ipv6_flow_label, val);
+ }
+}
+
static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
{
MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
@@ -1515,6 +1569,7 @@
#define LAST_IPV4_FIELD tos
#define LAST_IPV6_FIELD traffic_class
#define LAST_TCP_UDP_FIELD src_port
+#define LAST_TUNNEL_FIELD tunnel_id
/* Field is the last supported field */
#define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -1527,155 +1582,164 @@
static int parse_flow_attr(u32 *match_c, u32 *match_v,
const union ib_flow_spec *ib_spec)
{
- void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
- outer_headers);
- void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
- outer_headers);
void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
misc_parameters);
void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
misc_parameters);
+ void *headers_c;
+ void *headers_v;
- switch (ib_spec->type) {
+ if (ib_spec->type & IB_FLOW_SPEC_INNER) {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ inner_headers);
+ } else {
+ headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
+ outer_headers);
+ }
+
+ switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
case IB_FLOW_SPEC_ETH:
if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
return -ENOTSUPP;
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dmac_47_16),
ib_spec->eth.mask.dst_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dmac_47_16),
ib_spec->eth.val.dst_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
smac_47_16),
ib_spec->eth.mask.src_mac);
- ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
smac_47_16),
ib_spec->eth.val.src_mac);
if (ib_spec->eth.mask.vlan_tag) {
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
vlan_tag, 1);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
vlan_tag, 1);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
first_vid, ntohs(ib_spec->eth.val.vlan_tag));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
first_cfi,
ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
first_cfi,
ntohs(ib_spec->eth.val.vlan_tag) >> 12);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
first_prio,
ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
first_prio,
ntohs(ib_spec->eth.val.vlan_tag) >> 13);
}
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, ntohs(ib_spec->eth.mask.ether_type));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
ethertype, ntohs(ib_spec->eth.val.ether_type));
break;
case IB_FLOW_SPEC_IPV4:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
return -ENOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
ethertype, ETH_P_IP);
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.mask.src_ip,
sizeof(ib_spec->ipv4.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.val.src_ip,
sizeof(ib_spec->ipv4.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.mask.dst_ip,
sizeof(ib_spec->ipv4.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
&ib_spec->ipv4.val.dst_ip,
sizeof(ib_spec->ipv4.val.dst_ip));
- set_tos(outer_headers_c, outer_headers_v,
+ set_tos(headers_c, headers_v,
ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
- set_proto(outer_headers_c, outer_headers_v,
+ set_proto(headers_c, headers_v,
ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto);
break;
case IB_FLOW_SPEC_IPV6:
if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
return -ENOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c,
ethertype, 0xffff);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v,
ethertype, ETH_P_IPV6);
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.mask.src_ip,
sizeof(ib_spec->ipv6.mask.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
src_ipv4_src_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.val.src_ip,
sizeof(ib_spec->ipv6.val.src_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.mask.dst_ip,
sizeof(ib_spec->ipv6.mask.dst_ip));
- memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
&ib_spec->ipv6.val.dst_ip,
sizeof(ib_spec->ipv6.val.dst_ip));
- set_tos(outer_headers_c, outer_headers_v,
+ set_tos(headers_c, headers_v,
ib_spec->ipv6.mask.traffic_class,
ib_spec->ipv6.val.traffic_class);
- set_proto(outer_headers_c, outer_headers_v,
+ set_proto(headers_c, headers_v,
ib_spec->ipv6.mask.next_hdr,
ib_spec->ipv6.val.next_hdr);
- MLX5_SET(fte_match_set_misc, misc_params_c,
- outer_ipv6_flow_label,
- ntohl(ib_spec->ipv6.mask.flow_label));
- MLX5_SET(fte_match_set_misc, misc_params_v,
- outer_ipv6_flow_label,
- ntohl(ib_spec->ipv6.val.flow_label));
+ set_flow_label(misc_params_c, misc_params_v,
+ ntohl(ib_spec->ipv6.mask.flow_label),
+ ntohl(ib_spec->ipv6.val.flow_label),
+ ib_spec->type & IB_FLOW_SPEC_INNER);
+
break;
case IB_FLOW_SPEC_TCP:
if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
LAST_TCP_UDP_FIELD))
return -ENOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
IPPROTO_TCP);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
ntohs(ib_spec->tcp_udp.val.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
ntohs(ib_spec->tcp_udp.val.dst_port));
break;
case IB_FLOW_SPEC_UDP:
@@ -1683,21 +1747,31 @@
LAST_TCP_UDP_FIELD))
return -ENOTSUPP;
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
0xff);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
IPPROTO_UDP);
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
ntohs(ib_spec->tcp_udp.mask.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
ntohs(ib_spec->tcp_udp.val.src_port));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
ntohs(ib_spec->tcp_udp.mask.dst_port));
- MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport,
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
ntohs(ib_spec->tcp_udp.val.dst_port));
break;
+ case IB_FLOW_SPEC_VXLAN_TUNNEL:
+ if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
+ LAST_TUNNEL_FIELD))
+ return -ENOTSUPP;
+
+ MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
+ ntohl(ib_spec->tunnel.mask.tunnel_id));
+ MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
+ ntohl(ib_spec->tunnel.val.tunnel_id));
+ break;
default:
return -EINVAL;
}
@@ -2721,6 +2795,8 @@
struct ib_port_immutable *immutable)
{
struct ib_port_attr attr;
+ struct mlx5_ib_dev *dev = to_mdev(ibdev);
+ enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
int err;
err = mlx5_ib_query_port(ibdev, port_num, &attr);
@@ -2730,7 +2806,8 @@
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
immutable->core_cap_flags = get_core_cap_flags(ibdev);
- immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
return 0;
}
@@ -2744,7 +2821,7 @@
fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
}
-static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev)
+static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev,
@@ -2773,7 +2850,7 @@
return err;
}
-static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev)
+static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;
@@ -2785,15 +2862,7 @@
}
}
-static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev)
-{
- if (dev->roce.nb.notifier_call) {
- unregister_netdevice_notifier(&dev->roce.nb);
- dev->roce.nb.notifier_call = NULL;
- }
-}
-
-static int mlx5_enable_roce(struct mlx5_ib_dev *dev)
+static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev)
{
int err;
@@ -2804,28 +2873,51 @@
return err;
}
- err = mlx5_nic_vport_enable_roce(dev->mdev);
- if (err)
- goto err_unregister_netdevice_notifier;
+ return 0;
+}
- err = mlx5_roce_lag_init(dev);
+static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev)
+{
+ if (dev->roce.nb.notifier_call) {
+ unregister_netdevice_notifier(&dev->roce.nb);
+ dev->roce.nb.notifier_call = NULL;
+ }
+}
+
+static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
+{
+ int err;
+
+ err = mlx5_add_netdev_notifier(dev);
+ if (err)
+ return err;
+
+ if (MLX5_CAP_GEN(dev->mdev, roce)) {
+ err = mlx5_nic_vport_enable_roce(dev->mdev);
+ if (err)
+ goto err_unregister_netdevice_notifier;
+ }
+
+ err = mlx5_eth_lag_init(dev);
if (err)
goto err_disable_roce;
return 0;
err_disable_roce:
- mlx5_nic_vport_disable_roce(dev->mdev);
+ if (MLX5_CAP_GEN(dev->mdev, roce))
+ mlx5_nic_vport_disable_roce(dev->mdev);
err_unregister_netdevice_notifier:
- mlx5_remove_roce_notifier(dev);
+ mlx5_remove_netdev_notifier(dev);
return err;
}
-static void mlx5_disable_roce(struct mlx5_ib_dev *dev)
+static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
{
- mlx5_roce_lag_cleanup(dev);
- mlx5_nic_vport_disable_roce(dev->mdev);
+ mlx5_eth_lag_cleanup(dev);
+ if (MLX5_CAP_GEN(dev->mdev, roce))
+ mlx5_nic_vport_disable_roce(dev->mdev);
}
static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
@@ -2947,9 +3039,6 @@
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
- if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce))
- return NULL;
-
printk_once(KERN_INFO "%s", mlx5_version);
dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
@@ -2995,6 +3084,8 @@
(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
(1ull << IB_USER_VERBS_CMD_REG_MR) |
(1ull << IB_USER_VERBS_CMD_REREG_MR) |
(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
@@ -3017,7 +3108,8 @@
dev->ib_dev.uverbs_ex_cmd_mask =
(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
- (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
+ (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP);
dev->ib_dev.query_device = mlx5_ib_query_device;
dev->ib_dev.query_port = mlx5_ib_query_port;
@@ -3128,14 +3220,14 @@
spin_lock_init(&dev->reset_flow_resource_lock);
if (ll == IB_LINK_LAYER_ETHERNET) {
- err = mlx5_enable_roce(dev);
+ err = mlx5_enable_eth(dev);
if (err)
goto err_free_port;
}
err = create_dev_resources(&dev->devr);
if (err)
- goto err_disable_roce;
+ goto err_disable_eth;
err = mlx5_ib_odp_init_one(dev);
if (err)
@@ -3179,10 +3271,10 @@
err_rsrc:
destroy_dev_resources(&dev->devr);
-err_disable_roce:
+err_disable_eth:
if (ll == IB_LINK_LAYER_ETHERNET) {
- mlx5_disable_roce(dev);
- mlx5_remove_roce_notifier(dev);
+ mlx5_disable_eth(dev);
+ mlx5_remove_netdev_notifier(dev);
}
err_free_port:
@@ -3199,14 +3291,14 @@
struct mlx5_ib_dev *dev = context;
enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1);
- mlx5_remove_roce_notifier(dev);
+ mlx5_remove_netdev_notifier(dev);
ib_unregister_device(&dev->ib_dev);
mlx5_ib_dealloc_q_counters(dev);
destroy_umrc_res(dev);
mlx5_ib_odp_remove_one(dev);
destroy_dev_resources(&dev->devr);
if (ll == IB_LINK_LAYER_ETHERNET)
- mlx5_disable_roce(dev);
+ mlx5_disable_eth(dev);
kfree(dev->port);
ib_dealloc_device(&dev->ib_dev);
}
diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 996b54e..6851357 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -37,12 +37,15 @@
/* @umem: umem object to scan
* @addr: ib virtual address requested by the user
+ * @max_page_shift: high limit for page_shift - 0 means no limit
* @count: number of PAGE_SIZE pages covered by umem
* @shift: page shift for the compound pages found in the region
* @ncont: number of compund pages
* @order: log2 of the number of compound pages
*/
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
+void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
+ unsigned long max_page_shift,
+ int *count, int *shift,
int *ncont, int *order)
{
unsigned long tmp;
@@ -72,6 +75,8 @@
addr = addr >> page_shift;
tmp = (unsigned long)addr;
m = find_first_bit(&tmp, BITS_PER_LONG);
+ if (max_page_shift)
+ m = min_t(unsigned long, max_page_shift - page_shift, m);
skip = 1 << m;
mask = skip - 1;
i = 0;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 854748b..6c6057e 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -63,6 +63,8 @@
#define MLX5_IB_DEFAULT_UIDX 0xffffff
#define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
+#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
+
enum {
MLX5_IB_MMAP_CMD_SHIFT = 8,
MLX5_IB_MMAP_CMD_MASK = 0xff,
@@ -387,6 +389,7 @@
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
+ u32 rate_limit;
};
struct mlx5_ib_cq_buf {
@@ -418,7 +421,7 @@
struct ib_pd *pd;
unsigned int page_shift;
unsigned int npages;
- u32 length;
+ u64 length;
int access_flags;
u32 mkey;
};
@@ -739,7 +742,8 @@
int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
const void *in_mad, void *response_mad);
-struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
+struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
int mlx5_ib_destroy_ah(struct ib_ah *ah);
struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
@@ -825,7 +829,9 @@
struct ib_port_attr *props);
int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
-void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
+void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr,
+ unsigned long max_page_shift,
+ int *count, int *shift,
int *ncont, int *order);
void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
int page_shift, size_t offset, size_t num_pages,
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 4e90124..8f608deb 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -628,7 +628,8 @@
ent->order = i + 2;
ent->dev = dev;
- if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE)
+ if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+ (mlx5_core_is_pf(dev->mdev)))
limit = dev->mdev->profile->mr_cache[i].limit;
else
limit = 0;
@@ -646,6 +647,33 @@
return 0;
}
+static void wait_for_async_commands(struct mlx5_ib_dev *dev)
+{
+ struct mlx5_mr_cache *cache = &dev->cache;
+ struct mlx5_cache_ent *ent;
+ int total = 0;
+ int i;
+ int j;
+
+ for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ ent = &cache->ent[i];
+ for (j = 0 ; j < 1000; j++) {
+ if (!ent->pending)
+ break;
+ msleep(50);
+ }
+ }
+ for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
+ ent = &cache->ent[i];
+ total += ent->pending;
+ }
+
+ if (total)
+ mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
+ else
+ mlx5_ib_warn(dev, "done with all pending requests\n");
+}
+
int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
{
int i;
@@ -659,6 +687,7 @@
clean_keys(dev, i);
destroy_workqueue(dev->cache.wq);
+ wait_for_async_commands(dev);
del_timer_sync(&dev->delay_timer);
return 0;
@@ -816,29 +845,34 @@
umrwr->mkey = key;
}
-static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
- int access_flags, int *npages,
- int *page_shift, int *ncont, int *order)
+static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
+ int access_flags, struct ib_umem **umem,
+ int *npages, int *page_shift, int *ncont,
+ int *order)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
- struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length,
- access_flags, 0);
- if (IS_ERR(umem)) {
+ int err;
+
+ *umem = ib_umem_get(pd->uobject->context, start, length,
+ access_flags, 0);
+ err = PTR_ERR_OR_ZERO(*umem);
+ if (err < 0) {
mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
- return (void *)umem;
+ return err;
}
- mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order);
+ mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
+ page_shift, ncont, order);
if (!*npages) {
mlx5_ib_warn(dev, "avoid zero region\n");
- ib_umem_release(umem);
- return ERR_PTR(-EINVAL);
+ ib_umem_release(*umem);
+ return -EINVAL;
}
mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
*npages, *ncont, *order, *page_shift);
- return umem;
+ return 0;
}
static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1164,11 +1198,11 @@
mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
start, virt_addr, length, access_flags);
- umem = mr_umem_get(pd, start, length, access_flags, &npages,
+ err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
&page_shift, &ncont, &order);
- if (IS_ERR(umem))
- return (void *)umem;
+ if (err < 0)
+ return ERR_PTR(err);
if (use_umr(order)) {
mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
@@ -1345,10 +1379,9 @@
*/
flags |= IB_MR_REREG_TRANS;
ib_umem_release(mr->umem);
- mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages,
- &page_shift, &ncont, &order);
- if (IS_ERR(mr->umem)) {
- err = PTR_ERR(mr->umem);
+ err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
+ &npages, &page_shift, &ncont, &order);
+ if (err < 0) {
mr->umem = NULL;
return err;
}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index d1e9218..a1b3125 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -77,12 +77,14 @@
enum raw_qp_set_mask_map {
MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0,
+ MLX5_RAW_QP_RATE_LIMIT = 1UL << 1,
};
struct mlx5_modify_raw_qp_param {
u16 operation;
u32 set_mask; /* raw_qp_set_mask_map */
+ u32 rate_limit;
u8 rq_q_ctr_id;
};
@@ -351,6 +353,29 @@
return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB);
}
+static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size)
+{
+ int max_sge;
+
+ if (attr->qp_type == IB_QPT_RC)
+ max_sge = (min_t(int, wqe_size, 512) -
+ sizeof(struct mlx5_wqe_ctrl_seg) -
+ sizeof(struct mlx5_wqe_raddr_seg)) /
+ sizeof(struct mlx5_wqe_data_seg);
+ else if (attr->qp_type == IB_QPT_XRC_INI)
+ max_sge = (min_t(int, wqe_size, 512) -
+ sizeof(struct mlx5_wqe_ctrl_seg) -
+ sizeof(struct mlx5_wqe_xrc_seg) -
+ sizeof(struct mlx5_wqe_raddr_seg)) /
+ sizeof(struct mlx5_wqe_data_seg);
+ else
+ max_sge = (wqe_size - sq_overhead(attr)) /
+ sizeof(struct mlx5_wqe_data_seg);
+
+ return min_t(int, max_sge, wqe_size - sq_overhead(attr) /
+ sizeof(struct mlx5_wqe_data_seg));
+}
+
static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
struct mlx5_ib_qp *qp)
{
@@ -381,13 +406,18 @@
wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) {
- mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
+ mlx5_ib_dbg(dev, "send queue size (%d * %d / %d -> %d) exceeds limits(%d)\n",
+ attr->cap.max_send_wr, wqe_size, MLX5_SEND_WQE_BB,
qp->sq.wqe_cnt,
1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz));
return -ENOMEM;
}
qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
- qp->sq.max_gs = attr->cap.max_send_sge;
+ qp->sq.max_gs = get_send_sge(attr, wqe_size);
+ if (qp->sq.max_gs < attr->cap.max_send_sge)
+ return -ENOMEM;
+
+ attr->cap.max_send_sge = qp->sq.max_gs;
qp->sq.max_post = wq_size / wqe_size;
attr->cap.max_send_wr = qp->sq.max_post;
@@ -647,7 +677,7 @@
return PTR_ERR(*umem);
}
- mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL);
+ mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL);
err = mlx5_ib_get_buf_offset(addr, *page_shift, offset);
if (err) {
@@ -700,7 +730,7 @@
return err;
}
- mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift,
+ mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift,
&ncont, NULL);
err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift,
&rwq->rq_page_offset);
@@ -2442,8 +2472,14 @@
}
static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev,
- struct mlx5_ib_sq *sq, int new_state)
+ struct mlx5_ib_sq *sq,
+ int new_state,
+ const struct mlx5_modify_raw_qp_param *raw_qp_param)
{
+ struct mlx5_ib_qp *ibqp = sq->base.container_mibqp;
+ u32 old_rate = ibqp->rate_limit;
+ u32 new_rate = old_rate;
+ u16 rl_index = 0;
void *in;
void *sqc;
int inlen;
@@ -2459,10 +2495,44 @@
sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
MLX5_SET(sqc, sqc, state, new_state);
- err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
- if (err)
- goto out;
+ if (raw_qp_param->set_mask & MLX5_RAW_QP_RATE_LIMIT) {
+ if (new_state != MLX5_SQC_STATE_RDY)
+ pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n",
+ __func__);
+ else
+ new_rate = raw_qp_param->rate_limit;
+ }
+ if (old_rate != new_rate) {
+ if (new_rate) {
+ err = mlx5_rl_add_rate(dev, new_rate, &rl_index);
+ if (err) {
+ pr_err("Failed configuring rate %u: %d\n",
+ new_rate, err);
+ goto out;
+ }
+ }
+
+ MLX5_SET64(modify_sq_in, in, modify_bitmask, 1);
+ MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
+ }
+
+ err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+ if (err) {
+ /* Remove new rate from table if failed */
+ if (new_rate &&
+ old_rate != new_rate)
+ mlx5_rl_remove_rate(dev, new_rate);
+ goto out;
+ }
+
+ /* Only remove the old rate after new rate was set */
+ if ((old_rate &&
+ (old_rate != new_rate)) ||
+ (new_state != MLX5_SQC_STATE_RDY))
+ mlx5_rl_remove_rate(dev, old_rate);
+
+ ibqp->rate_limit = new_rate;
sq->state = new_state;
out:
@@ -2477,6 +2547,8 @@
struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp;
struct mlx5_ib_rq *rq = &raw_packet_qp->rq;
struct mlx5_ib_sq *sq = &raw_packet_qp->sq;
+ int modify_rq = !!qp->rq.wqe_cnt;
+ int modify_sq = !!qp->sq.wqe_cnt;
int rq_state;
int sq_state;
int err;
@@ -2494,10 +2566,18 @@
rq_state = MLX5_RQC_STATE_RST;
sq_state = MLX5_SQC_STATE_RST;
break;
- case MLX5_CMD_OP_INIT2INIT_QP:
- case MLX5_CMD_OP_INIT2RTR_QP:
case MLX5_CMD_OP_RTR2RTS_QP:
case MLX5_CMD_OP_RTS2RTS_QP:
+ if (raw_qp_param->set_mask ==
+ MLX5_RAW_QP_RATE_LIMIT) {
+ modify_rq = 0;
+ sq_state = sq->state;
+ } else {
+ return raw_qp_param->set_mask ? -EINVAL : 0;
+ }
+ break;
+ case MLX5_CMD_OP_INIT2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
if (raw_qp_param->set_mask)
return -EINVAL;
else
@@ -2507,13 +2587,13 @@
return -EINVAL;
}
- if (qp->rq.wqe_cnt) {
- err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
+ if (modify_rq) {
+ err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param);
if (err)
return err;
}
- if (qp->sq.wqe_cnt) {
+ if (modify_sq) {
if (tx_affinity) {
err = modify_raw_packet_tx_affinity(dev->mdev, sq,
tx_affinity);
@@ -2521,7 +2601,7 @@
return err;
}
- return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state);
+ return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param);
}
return 0;
@@ -2577,7 +2657,6 @@
struct mlx5_ib_port *mibport = NULL;
enum mlx5_qp_state mlx5_cur, mlx5_new;
enum mlx5_qp_optpar optpar;
- int sqd_event;
int mlx5_st;
int err;
u16 op;
@@ -2724,12 +2803,6 @@
if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
context->db_rec_addr = cpu_to_be64(qp->db.dma);
- if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
- attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
- sqd_event = 1;
- else
- sqd_event = 0;
-
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num :
qp->port) - 1;
@@ -2776,6 +2849,12 @@
raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
}
+
+ if (attr_mask & IB_QP_RATE_LIMIT) {
+ raw_qp_param.rate_limit = attr->rate_limit;
+ raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT;
+ }
+
err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity);
} else {
err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
@@ -3067,10 +3146,10 @@
{
memset(umr, 0, sizeof(*umr));
umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
- umr->flags = 1 << 7;
+ umr->flags = MLX5_UMR_INLINE;
}
-static __be64 get_umr_reg_mr_mask(void)
+static __be64 get_umr_reg_mr_mask(int atomic)
{
u64 result;
@@ -3083,9 +3162,11 @@
MLX5_MKEY_MASK_KEY |
MLX5_MKEY_MASK_RR |
MLX5_MKEY_MASK_RW |
- MLX5_MKEY_MASK_A |
MLX5_MKEY_MASK_FREE;
+ if (atomic)
+ result |= MLX5_MKEY_MASK_A;
+
return cpu_to_be64(result);
}
@@ -3146,7 +3227,7 @@
}
static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
- struct ib_send_wr *wr)
+ struct ib_send_wr *wr, int atomic)
{
struct mlx5_umr_wr *umrwr = umr_wr(wr);
@@ -3171,7 +3252,7 @@
if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD)
umr->mkey_mask |= get_umr_update_pd_mask();
if (!umr->mkey_mask)
- umr->mkey_mask = get_umr_reg_mr_mask();
+ umr->mkey_mask = get_umr_reg_mr_mask(atomic);
} else {
umr->mkey_mask = get_umr_unreg_mr_mask();
}
@@ -4024,7 +4105,7 @@
}
qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
- set_reg_umr_segment(seg, wr);
+ set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic)));
seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
if (unlikely((seg == qend)))
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 3857dbd..6f4397e 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -118,7 +118,7 @@
return err;
}
- mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages,
+ mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages,
&page_shift, &ncont, NULL);
err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift,
&offset);
@@ -203,8 +203,6 @@
srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL);
if (!srq->wrid) {
- mlx5_ib_dbg(dev, "kmalloc failed %lu\n",
- (unsigned long)(srq->msrq.max * sizeof(u64)));
err = -ENOMEM;
goto err_in;
}
@@ -282,6 +280,7 @@
mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n",
desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
srq->msrq.max_avail_gather);
+ in.type = init_attr->srq_type;
if (pd->uobject)
err = create_srq_user(pd, srq, &in, udata, buf_size);
@@ -294,7 +293,6 @@
goto err_srq;
}
- in.type = init_attr->srq_type;
in.log_size = ilog2(srq->msrq.max);
in.wqe_shift = srq->msrq.wqe_shift - 4;
if (srq->wq_sig)
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index bcac294..c9f0f36 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -186,8 +186,8 @@
on_hca_fail:
if (ah->type == MTHCA_AH_PCI_POOL) {
- ah->av = pci_pool_alloc(dev->av_table.pool,
- GFP_ATOMIC, &ah->avdma);
+ ah->av = pci_pool_zalloc(dev->av_table.pool,
+ GFP_ATOMIC, &ah->avdma);
if (!ah->av)
return -ENOMEM;
@@ -196,8 +196,6 @@
ah->key = pd->ntmr.ibmr.lkey;
- memset(av, 0, MTHCA_AV_SIZE);
-
av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24));
av->g_slid = ah_attr->src_path_bits;
av->dlid = cpu_to_be16(ah_attr->dlid);
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 358930a4..d317087 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -410,7 +410,9 @@
}
static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr)
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+
{
int err;
struct mthca_ah *ah;
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index 6727af2..2a6979e 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -96,8 +96,6 @@
hca_header = kmalloc(256, GFP_KERNEL);
if (!hca_header) {
err = -ENOMEM;
- mthca_err(mdev, "Couldn't allocate memory to save HCA "
- "PCI header, aborting.\n");
goto put_dev;
}
@@ -119,8 +117,6 @@
bridge_header = kmalloc(256, GFP_KERNEL);
if (!bridge_header) {
err = -ENOMEM;
- mthca_err(mdev, "Couldn't allocate memory to save HCA "
- "bridge PCI header, aborting.\n");
goto free_hca;
}
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 2baa45a..5b96010 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -515,7 +515,6 @@
/* Allocate hardware structure */
nesdev = kzalloc(sizeof(struct nes_device), GFP_KERNEL);
if (!nesdev) {
- printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", pci_name(pcidev));
ret = -ENOMEM;
goto bail2;
}
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 57db9b3..8e70347 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -2282,10 +2282,8 @@
if (!listener) {
/* create a CM listen node (1/2 node to compare incoming traffic to) */
listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
- if (!listener) {
- nes_debug(NES_DBG_CM, "Not creating listener memory allocation failed\n");
+ if (!listener)
return NULL;
- }
listener->loc_addr = cm_info->loc_addr;
listener->loc_port = cm_info->loc_port;
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index a1c6481..19acd13 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -351,9 +351,8 @@
/* allocate a new adapter struct */
nesadapter = kzalloc(adapter_size, GFP_KERNEL);
- if (nesadapter == NULL) {
+ if (!nesadapter)
return NULL;
- }
nes_debug(NES_DBG_INIT, "Allocating new nesadapter @ %p, size = %u (actual size = %u).\n",
nesadapter, (u32)sizeof(struct nes_adapter), adapter_size);
@@ -1007,8 +1006,7 @@
/* Allocate a twice the number of CQP requests as the SQ size */
nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) *
2 * NES_CQP_SQ_SIZE, GFP_KERNEL);
- if (nesdev->nes_cqp_requests == NULL) {
- nes_debug(NES_DBG_INIT, "Unable to allocate memory CQP request entries.\n");
+ if (!nesdev->nes_cqp_requests) {
pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
nesdev->cqp.sq_pbase);
return -ENOMEM;
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
index 4166452..33624f1 100644
--- a/drivers/infiniband/hw/nes/nes_mgt.c
+++ b/drivers/infiniband/hw/nes/nes_mgt.c
@@ -320,8 +320,7 @@
/* Found one */
fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC);
- if (fpdu_info == NULL) {
- nes_debug(NES_DBG_PAU, "Failed to alloc a fpdu_info.\n");
+ if (!fpdu_info) {
rc = -ENOMEM;
goto out;
}
@@ -729,8 +728,7 @@
}
qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC);
- if (qh_chg == NULL) {
- nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
+ if (!qh_chg) {
ret = -ENOMEM;
goto chg_qh_err;
}
@@ -880,10 +878,8 @@
/* Allocate space the all mgt QPs once */
mgtvnic = kzalloc(NES_MGT_QP_COUNT * sizeof(struct nes_vnic_mgt), GFP_KERNEL);
- if (mgtvnic == NULL) {
- nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt structure\n");
+ if (!mgtvnic)
return -ENOMEM;
- }
/* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */
/* We are not sending from this NIC so sq is not allocated */
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 7f8597d..5921ea3 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -662,10 +662,14 @@
nesnic->sq_head &= nesnic->sq_size-1;
}
} else {
- nesvnic->linearized_skbs++;
hoffset = skb_transport_header(skb) - skb->data;
nhoffset = skb_network_header(skb) - skb->data;
- skb_linearize(skb);
+ if (skb_linearize(skb)) {
+ nesvnic->tx_sw_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+ }
+ nesvnic->linearized_skbs++;
skb_set_transport_header(skb, hoffset);
skb_set_network_header(skb, nhoffset);
if (!nes_nic_send(skb, netdev))
@@ -1461,7 +1465,8 @@
/**
* nes_netdev_get_settings
*/
-static int nes_netdev_get_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd)
+static int nes_netdev_get_link_ksettings(struct net_device *netdev,
+ struct ethtool_link_ksettings *cmd)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
struct nes_device *nesdev = nesvnic->nesdev;
@@ -1470,54 +1475,59 @@
u8 phy_type = nesadapter->phy_type[mac_index];
u8 phy_index = nesadapter->phy_index[mac_index];
u16 phy_data;
+ u32 supported, advertising;
- et_cmd->duplex = DUPLEX_FULL;
- et_cmd->port = PORT_MII;
- et_cmd->maxtxpkt = 511;
- et_cmd->maxrxpkt = 511;
+ cmd->base.duplex = DUPLEX_FULL;
+ cmd->base.port = PORT_MII;
if (nesadapter->OneG_Mode) {
- ethtool_cmd_speed_set(et_cmd, SPEED_1000);
+ cmd->base.speed = SPEED_1000;
if (phy_type == NES_PHY_TYPE_PUMA_1G) {
- et_cmd->supported = SUPPORTED_1000baseT_Full;
- et_cmd->advertising = ADVERTISED_1000baseT_Full;
- et_cmd->autoneg = AUTONEG_DISABLE;
- et_cmd->transceiver = XCVR_INTERNAL;
- et_cmd->phy_address = mac_index;
+ supported = SUPPORTED_1000baseT_Full;
+ advertising = ADVERTISED_1000baseT_Full;
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ cmd->base.phy_address = mac_index;
} else {
unsigned long flags;
- et_cmd->supported = SUPPORTED_1000baseT_Full
- | SUPPORTED_Autoneg;
- et_cmd->advertising = ADVERTISED_1000baseT_Full
- | ADVERTISED_Autoneg;
+
+ supported = SUPPORTED_1000baseT_Full
+ | SUPPORTED_Autoneg;
+ advertising = ADVERTISED_1000baseT_Full
+ | ADVERTISED_Autoneg;
spin_lock_irqsave(&nesadapter->phy_lock, flags);
nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
if (phy_data & 0x1000)
- et_cmd->autoneg = AUTONEG_ENABLE;
+ cmd->base.autoneg = AUTONEG_ENABLE;
else
- et_cmd->autoneg = AUTONEG_DISABLE;
- et_cmd->transceiver = XCVR_EXTERNAL;
- et_cmd->phy_address = phy_index;
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ cmd->base.phy_address = phy_index;
}
+ ethtool_convert_legacy_u32_to_link_mode(
+ cmd->link_modes.supported, supported);
+ ethtool_convert_legacy_u32_to_link_mode(
+ cmd->link_modes.advertising, advertising);
return 0;
}
if ((phy_type == NES_PHY_TYPE_ARGUS) ||
(phy_type == NES_PHY_TYPE_SFP_D) ||
(phy_type == NES_PHY_TYPE_KR)) {
- et_cmd->transceiver = XCVR_EXTERNAL;
- et_cmd->port = PORT_FIBRE;
- et_cmd->supported = SUPPORTED_FIBRE;
- et_cmd->advertising = ADVERTISED_FIBRE;
- et_cmd->phy_address = phy_index;
+ cmd->base.port = PORT_FIBRE;
+ supported = SUPPORTED_FIBRE;
+ advertising = ADVERTISED_FIBRE;
+ cmd->base.phy_address = phy_index;
} else {
- et_cmd->transceiver = XCVR_INTERNAL;
- et_cmd->supported = SUPPORTED_10000baseT_Full;
- et_cmd->advertising = ADVERTISED_10000baseT_Full;
- et_cmd->phy_address = mac_index;
+ supported = SUPPORTED_10000baseT_Full;
+ advertising = ADVERTISED_10000baseT_Full;
+ cmd->base.phy_address = mac_index;
}
- ethtool_cmd_speed_set(et_cmd, SPEED_10000);
- et_cmd->autoneg = AUTONEG_DISABLE;
+ cmd->base.speed = SPEED_10000;
+ cmd->base.autoneg = AUTONEG_DISABLE;
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+ supported);
+ ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+ advertising);
+
return 0;
}
@@ -1525,7 +1535,9 @@
/**
* nes_netdev_set_settings
*/
-static int nes_netdev_set_settings(struct net_device *netdev, struct ethtool_cmd *et_cmd)
+static int
+nes_netdev_set_link_ksettings(struct net_device *netdev,
+ const struct ethtool_link_ksettings *cmd)
{
struct nes_vnic *nesvnic = netdev_priv(netdev);
struct nes_device *nesdev = nesvnic->nesdev;
@@ -1539,7 +1551,7 @@
spin_lock_irqsave(&nesadapter->phy_lock, flags);
nes_read_1G_phy_reg(nesdev, 0, phy_index, &phy_data);
- if (et_cmd->autoneg) {
+ if (cmd->base.autoneg) {
/* Turn on Full duplex, Autoneg, and restart autonegotiation */
phy_data |= 0x1300;
} else {
@@ -1556,8 +1568,6 @@
static const struct ethtool_ops nes_ethtool_ops = {
.get_link = ethtool_op_get_link,
- .get_settings = nes_netdev_get_settings,
- .set_settings = nes_netdev_set_settings,
.get_strings = nes_netdev_get_strings,
.get_sset_count = nes_netdev_get_sset_count,
.get_ethtool_stats = nes_netdev_get_ethtool_stats,
@@ -1566,6 +1576,8 @@
.set_coalesce = nes_netdev_set_coalesce,
.get_pauseparam = nes_netdev_get_pauseparam,
.set_pauseparam = nes_netdev_set_pauseparam,
+ .get_link_ksettings = nes_netdev_get_link_ksettings,
+ .set_link_ksettings = nes_netdev_set_link_ksettings,
};
static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, netdev_features_t features)
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index bd69125..aff9fb1 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -771,7 +771,8 @@
/**
* nes_create_ah
*/
-static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
+static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
{
return ERR_PTR(-ENOSYS);
}
@@ -1075,7 +1076,6 @@
mem = kzalloc(sizeof(*nesqp)+NES_SW_CONTEXT_ALIGN-1, GFP_KERNEL);
if (!mem) {
nes_free_resource(nesadapter, nesadapter->allocated_qps, qp_num);
- nes_debug(NES_DBG_QP, "Unable to allocate QP\n");
return ERR_PTR(-ENOMEM);
}
u64nesqp = (unsigned long)mem;
@@ -1475,7 +1475,6 @@
nescq = kzalloc(sizeof(struct nes_cq), GFP_KERNEL);
if (!nescq) {
nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
- nes_debug(NES_DBG_CQ, "Unable to allocate nes_cq struct\n");
return ERR_PTR(-ENOMEM);
}
@@ -2408,7 +2407,6 @@
}
nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL);
if (!nespbl) {
- nes_debug(NES_DBG_MR, "Unable to allocate PBL\n");
ib_umem_release(region);
return ERR_PTR(-ENOMEM);
}
@@ -2416,7 +2414,6 @@
if (!nesmr) {
ib_umem_release(region);
kfree(nespbl);
- nes_debug(NES_DBG_MR, "Unable to allocate nesmr\n");
return ERR_PTR(-ENOMEM);
}
nesmr->region = region;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 797362a..14d33b0 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -154,7 +154,8 @@
return status;
}
-struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+ struct ib_udata *udata)
{
u32 *ahid_addr;
int status;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
index 3856dd4..0704a24 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
@@ -50,7 +50,9 @@
OCRDMA_AH_L3_TYPE_MASK = 0x03,
OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */
};
-struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *);
+
+struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *,
+ struct ib_udata *);
int ocrdma_destroy_ah(struct ib_ah *);
int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 67fc0b6..9a30520 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -1596,10 +1596,9 @@
dev->pd_mgr = kzalloc(sizeof(struct ocrdma_pd_resource_mgr),
GFP_KERNEL);
- if (!dev->pd_mgr) {
- pr_err("%s(%d)Memory allocation failure.\n", __func__, dev->id);
+ if (!dev->pd_mgr)
return;
- }
+
status = ocrdma_mbx_alloc_pd_range(dev);
if (status) {
pr_err("%s(%d) Unable to initialize PD pool, using default.\n",
@@ -1642,7 +1641,7 @@
static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
{
int i;
- int status = 0;
+ int status = -ENOMEM;
int max_ah;
struct ocrdma_create_ah_tbl *cmd;
struct ocrdma_create_ah_tbl_rsp *rsp;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
index 8bef09a..f8e4b0a 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_stats.c
@@ -84,10 +84,8 @@
/* Alloc debugfs mem */
mem->debugfs_mem = kzalloc(OCRDMA_MAX_DBGFS_MEM, GFP_KERNEL);
- if (!mem->debugfs_mem) {
- pr_err("%s: stats debugfs mem allocation failed\n", __func__);
+ if (!mem->debugfs_mem)
return false;
- }
return true;
}
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index a615142..302fb05 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -511,8 +511,10 @@
struct qedr_dev *dev = get_qedr_dev(ibpd->device);
struct qedr_pd *pd = get_qedr_pd(ibpd);
- if (!pd)
+ if (!pd) {
pr_err("Invalid PD received in dealloc_pd\n");
+ return -EINVAL;
+ }
DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
@@ -1477,6 +1479,7 @@
struct qedr_ucontext *ctx = NULL;
struct qedr_create_qp_ureq ureq;
struct qedr_qp *qp;
+ struct ib_qp *ibqp;
int rc = 0;
DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
@@ -1486,13 +1489,13 @@
if (rc)
return ERR_PTR(rc);
+ if (attrs->srq)
+ return ERR_PTR(-EINVAL);
+
qp = kzalloc(sizeof(*qp), GFP_KERNEL);
if (!qp)
return ERR_PTR(-ENOMEM);
- if (attrs->srq)
- return ERR_PTR(-EINVAL);
-
DP_DEBUG(dev, QEDR_MSG_QP,
"create qp: sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
get_qedr_cq(attrs->send_cq),
@@ -1508,7 +1511,10 @@
"create qp: unexpected udata when creating GSI QP\n");
goto err0;
}
- return qedr_create_gsi_qp(dev, attrs, qp);
+ ibqp = qedr_create_gsi_qp(dev, attrs, qp);
+ if (IS_ERR(ibqp))
+ kfree(qp);
+ return ibqp;
}
memset(&in_params, 0, sizeof(in_params));
@@ -2094,7 +2100,8 @@
return rc;
}
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+ struct ib_udata *udata)
{
struct qedr_ah *ah;
@@ -2413,8 +2420,7 @@
*/
pbl = list_first_entry(&info->inuse_pbl_list,
struct qedr_pbl, list_entry);
- list_del(&pbl->list_entry);
- list_add_tail(&pbl->list_entry, &info->free_pbl_list);
+ list_move_tail(&pbl->list_entry, &info->free_pbl_list);
info->completed_handled++;
}
}
@@ -2981,11 +2987,6 @@
return -EINVAL;
}
- if (!wr) {
- DP_ERR(dev, "Got an empty post send.\n");
- return -EINVAL;
- }
-
while (wr) {
rc = __qedr_post_send(ibqp, wr, bad_wr);
if (rc)
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index a9b5e67..070677c 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -70,7 +70,8 @@
int qp_attr_mask, struct ib_qp_init_attr *);
int qedr_destroy_qp(struct ib_qp *ibqp);
-struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr);
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+ struct ib_udata *udata);
int qedr_destroy_ah(struct ib_ah *ibah);
int qedr_dereg_mr(struct ib_mr *);
diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c
index 8c34b23..775018b 100644
--- a/drivers/infiniband/hw/qib/qib_diag.c
+++ b/drivers/infiniband/hw/qib/qib_diag.c
@@ -609,8 +609,6 @@
tmpbuf = vmalloc(plen);
if (!tmpbuf) {
- qib_devinfo(dd->pcidev,
- "Unable to allocate tmp buffer, failing\n");
ret = -ENOMEM;
goto bail;
}
@@ -702,10 +700,8 @@
if (!dd || !op)
return -EINVAL;
olp = vmalloc(sizeof(*olp));
- if (!olp) {
- pr_err("vmalloc for observer failed\n");
+ if (!olp)
return -ENOMEM;
- }
spin_lock_irqsave(&dd->qib_diag_trans_lock, flags);
olp->op = op;
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 728e0a0..2b5982f 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -420,8 +420,7 @@
if (list_empty(&qp->rspwait)) {
qp->r_flags |=
RVT_R_RSP_NAK;
- atomic_inc(
- &qp->refcount);
+ rvt_get_qp(qp);
list_add_tail(
&qp->rspwait,
&rcd->qp_wait_list);
diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c
index 311ee6c..33a2e74 100644
--- a/drivers/infiniband/hw/qib/qib_eeprom.c
+++ b/drivers/infiniband/hw/qib/qib_eeprom.c
@@ -182,12 +182,8 @@
* */
len = sizeof(struct qib_flash);
buf = vmalloc(len);
- if (!buf) {
- qib_dev_err(dd,
- "Couldn't allocate memory to read %u bytes from eeprom for GUID\n",
- len);
+ if (!buf)
goto bail;
- }
/*
* Use "public" eeprom read function, which does locking and
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 382466a..2d1eacf 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -2066,8 +2066,11 @@
ssize_t ret = 0;
void *dest;
- if (WARN_ON_ONCE(!ib_safe_file_access(fp)))
+ if (!ib_safe_file_access(fp)) {
+ pr_err_once("qib_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
+ task_tgid_vnr(current), current->comm);
return -EACCES;
+ }
if (count < sizeof(cmd.type)) {
ret = -EINVAL;
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index a3733f2..92399d3 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -1759,9 +1759,7 @@
}
namelen = strlen(n) + 1;
dd->boardname = kmalloc(namelen, GFP_KERNEL);
- if (!dd->boardname)
- qib_dev_err(dd, "Failed allocation for board name: %s\n", n);
- else
+ if (dd->boardname)
snprintf(dd->boardname, namelen, "%s", n);
if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2)
@@ -2533,8 +2531,6 @@
dd->cspec->cntrnamelen = 1 + s - cntr6120names;
dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs
* sizeof(u64), GFP_KERNEL);
- if (!dd->cspec->cntrs)
- qib_dev_err(dd, "Failed allocation for counters\n");
for (i = 0, s = (char *)portcntr6120names; s; i++)
s = strchr(s + 1, '\n');
@@ -2542,8 +2538,6 @@
dd->cspec->portcntrnamelen = sizeof(portcntr6120names) - 1;
dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs
* sizeof(u64), GFP_KERNEL);
- if (!dd->cspec->portcntrs)
- qib_dev_err(dd, "Failed allocation for portcounters\n");
}
static u32 qib_read_6120cntrs(struct qib_devdata *dd, loff_t pos, char **namep,
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 00b2af2..e55e31a 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -2070,9 +2070,7 @@
namelen = strlen(n) + 1;
dd->boardname = kmalloc(namelen, GFP_KERNEL);
- if (!dd->boardname)
- qib_dev_err(dd, "Failed allocation for board name: %s\n", n);
- else
+ if (dd->boardname)
snprintf(dd->boardname, namelen, "%s", n);
if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2)
@@ -3179,8 +3177,6 @@
dd->cspec->cntrnamelen = 1 + s - cntr7220names;
dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs
* sizeof(u64), GFP_KERNEL);
- if (!dd->cspec->cntrs)
- qib_dev_err(dd, "Failed allocation for counters\n");
for (i = 0, s = (char *)portcntr7220names; s; i++)
s = strchr(s + 1, '\n');
@@ -3188,8 +3184,6 @@
dd->cspec->portcntrnamelen = sizeof(portcntr7220names) - 1;
dd->cspec->portcntrs = kmalloc(dd->cspec->nportcntrs
* sizeof(u64), GFP_KERNEL);
- if (!dd->cspec->portcntrs)
- qib_dev_err(dd, "Failed allocation for portcounters\n");
}
static u32 qib_read_7220cntrs(struct qib_devdata *dd, loff_t pos, char **namep,
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index ded2717..c4a36160 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -3627,9 +3627,7 @@
namelen = strlen(n) + 1;
dd->boardname = kmalloc(namelen, GFP_KERNEL);
- if (!dd->boardname)
- qib_dev_err(dd, "Failed allocation for board name: %s\n", n);
- else
+ if (dd->boardname)
snprintf(dd->boardname, namelen, "%s", n);
snprintf(dd->boardversion, sizeof(dd->boardversion),
@@ -3656,7 +3654,7 @@
static int qib_do_7322_reset(struct qib_devdata *dd)
{
u64 val;
- u64 *msix_vecsave;
+ u64 *msix_vecsave = NULL;
int i, msix_entries, ret = 1;
u16 cmdval;
u8 int_line, clinesz;
@@ -3677,10 +3675,7 @@
/* can be up to 512 bytes, too big for stack */
msix_vecsave = kmalloc(2 * dd->cspec->num_msix_entries *
sizeof(u64), GFP_KERNEL);
- if (!msix_vecsave)
- qib_dev_err(dd, "No mem to save MSIx data\n");
- } else
- msix_vecsave = NULL;
+ }
/*
* Core PCI (as of 2.6.18) doesn't save or rewrite the full vector
@@ -5043,8 +5038,6 @@
dd->cspec->cntrnamelen = 1 + s - cntr7322names;
dd->cspec->cntrs = kmalloc(dd->cspec->ncntrs
* sizeof(u64), GFP_KERNEL);
- if (!dd->cspec->cntrs)
- qib_dev_err(dd, "Failed allocation for counters\n");
for (i = 0, s = (char *)portcntr7322names; s; i++)
s = strchr(s + 1, '\n');
@@ -5053,9 +5046,6 @@
for (i = 0; i < dd->num_pports; ++i) {
dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs
* sizeof(u64), GFP_KERNEL);
- if (!dd->pport[i].cpspec->portcntrs)
- qib_dev_err(dd,
- "Failed allocation for portcounters\n");
}
}
@@ -6461,7 +6451,6 @@
sizeof(*dd->cspec->sendibchk), GFP_KERNEL);
if (!dd->cspec->sendchkenable || !dd->cspec->sendgrhchk ||
!dd->cspec->sendibchk) {
- qib_dev_err(dd, "Failed allocation for hdrchk bitmaps\n");
ret = -ENOMEM;
goto bail;
}
@@ -7338,10 +7327,9 @@
tabsize = actual_cnt;
dd->cspec->msix_entries = kzalloc(tabsize *
sizeof(struct qib_msix_entry), GFP_KERNEL);
- if (!dd->cspec->msix_entries) {
- qib_dev_err(dd, "No memory for MSIx table\n");
+ if (!dd->cspec->msix_entries)
tabsize = 0;
- }
+
for (i = 0; i < tabsize; i++)
dd->cspec->msix_entries[i].msix.entry = i;
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 1730aa8..b50240b 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -133,11 +133,8 @@
* cleanup iterates across all possible ctxts.
*/
dd->rcd = kcalloc(dd->ctxtcnt, sizeof(*dd->rcd), GFP_KERNEL);
- if (!dd->rcd) {
- qib_dev_err(dd,
- "Unable to allocate ctxtdata array, failing\n");
+ if (!dd->rcd)
return -ENOMEM;
- }
/* create (one or more) kctxt */
for (i = 0; i < dd->first_user_ctxt; ++i) {
@@ -265,39 +262,23 @@
size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry)
* IB_CCT_ENTRIES;
ppd->ccti_entries = kzalloc(size, GFP_KERNEL);
- if (!ppd->ccti_entries) {
- qib_dev_err(dd,
- "failed to allocate congestion control table for port %d!\n",
- port);
+ if (!ppd->ccti_entries)
goto bail;
- }
size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry);
ppd->congestion_entries = kzalloc(size, GFP_KERNEL);
- if (!ppd->congestion_entries) {
- qib_dev_err(dd,
- "failed to allocate congestion setting list for port %d!\n",
- port);
+ if (!ppd->congestion_entries)
goto bail_1;
- }
size = sizeof(struct cc_table_shadow);
ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL);
- if (!ppd->ccti_entries_shadow) {
- qib_dev_err(dd,
- "failed to allocate shadow ccti list for port %d!\n",
- port);
+ if (!ppd->ccti_entries_shadow)
goto bail_2;
- }
size = sizeof(struct ib_cc_congestion_setting_attr);
ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL);
- if (!ppd->congestion_entries_shadow) {
- qib_dev_err(dd,
- "failed to allocate shadow congestion setting list for port %d!\n",
- port);
+ if (!ppd->congestion_entries_shadow)
goto bail_3;
- }
return 0;
@@ -391,18 +372,12 @@
dma_addr_t *addrs;
pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *));
- if (!pages) {
- qib_dev_err(dd,
- "failed to allocate shadow page * array, no expected sends!\n");
+ if (!pages)
goto bail;
- }
addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t));
- if (!addrs) {
- qib_dev_err(dd,
- "failed to allocate shadow dma handle array, no expected sends!\n");
+ if (!addrs)
goto bail_free;
- }
dd->pageshadow = pages;
dd->physshadow = addrs;
@@ -1026,11 +1001,8 @@
cnt = 1024;
addr = vmalloc(cnt);
- if (!addr) {
- qib_devinfo(dd->pcidev,
- "Couldn't get memory for checking PIO perf, skipping\n");
+ if (!addr)
goto done;
- }
preempt_disable(); /* we want reasonably accurate elapsed time */
msecs = 1 + jiffies_to_msecs(jiffies);
@@ -1172,9 +1144,6 @@
sizeof(long), GFP_KERNEL);
if (qib_cpulist)
qib_cpulist_count = count;
- else
- qib_early_err(&pdev->dev,
- "Could not alloc cpulist info, cpu affinity might be wrong\n");
}
#ifdef CONFIG_DEBUG_FS
qib_dbg_ibdev_init(&dd->verbs_dev);
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 2097512..031433c 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -941,8 +941,6 @@
{
struct ib_other_headers *ohdr;
struct rvt_swqe *wqe;
- struct ib_wc wc;
- unsigned i;
u32 opcode;
u32 psn;
@@ -988,22 +986,8 @@
qp->s_last = s_last;
/* see post_send() */
barrier();
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
- /* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
- }
+ rvt_put_swqe(wqe);
+ rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
}
/*
* If we were waiting for sends to complete before resending,
@@ -1032,9 +1016,6 @@
struct rvt_swqe *wqe,
struct qib_ibport *ibp)
{
- struct ib_wc wc;
- unsigned i;
-
/*
* Don't decrement refcount and don't generate a
* completion if the SWQE is being resent until the send
@@ -1044,28 +1025,14 @@
qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
u32 s_last;
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
+ rvt_put_swqe(wqe);
s_last = qp->s_last;
if (++s_last >= qp->s_size)
s_last = 0;
qp->s_last = s_last;
/* see post_send() */
barrier();
- /* Post a send completion queue entry if requested. */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = IB_WC_SUCCESS;
- wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
- wc.byte_len = wqe->length;
- wc.qp = &qp->ibqp;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
- }
+ rvt_qp_swqe_complete(qp, wqe, IB_WC_SUCCESS);
} else
this_cpu_inc(*ibp->rvp.rc_delayed_comp);
@@ -2112,8 +2079,7 @@
* Update the next expected PSN. We add 1 later
* below, so only add the remainder here.
*/
- if (len > pmtu)
- qp->r_psn += (len - 1) / pmtu;
+ qp->r_psn += rvt_div_mtu(qp, len - 1);
} else {
e->rdma_sge.mr = NULL;
e->rdma_sge.vaddr = NULL;
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index de1bde5..e54a2fe 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -793,7 +793,6 @@
enum ib_wc_status status)
{
u32 old_last, last;
- unsigned i;
if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
return;
@@ -805,32 +804,13 @@
qp->s_last = last;
/* See post_send() */
barrier();
- for (i = 0; i < wqe->wr.num_sge; i++) {
- struct rvt_sge *sge = &wqe->sg_list[i];
-
- rvt_put_mr(sge->mr);
- }
+ rvt_put_swqe(wqe);
if (qp->ibqp.qp_type == IB_QPT_UD ||
qp->ibqp.qp_type == IB_QPT_SMI ||
qp->ibqp.qp_type == IB_QPT_GSI)
atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
- /* See ch. 11.2.4.1 and 10.7.3.1 */
- if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
- (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
- status != IB_WC_SUCCESS) {
- struct ib_wc wc;
-
- memset(&wc, 0, sizeof(wc));
- wc.wr_id = wqe->wr.wr_id;
- wc.status = status;
- wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
- wc.qp = &qp->ibqp;
- if (status == IB_WC_SUCCESS)
- wc.byte_len = wqe->length;
- rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
- status != IB_WC_SUCCESS);
- }
+ rvt_qp_swqe_complete(qp, wqe, status);
if (qp->s_acked == old_last)
qp->s_acked = last;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 954f150..4b54c0d 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -114,19 +114,6 @@
MODULE_PARM_DESC(disable_sma, "Disable the SMA");
/*
- * Translate ib_wr_opcode into ib_wc_opcode.
- */
-const enum ib_wc_opcode ib_qib_wc_opcode[] = {
- [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
- [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
- [IB_WR_SEND] = IB_WC_SEND,
- [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
- [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
- [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
- [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
-};
-
-/*
* System image GUID.
*/
__be64 ib_qib_sys_image_guid;
@@ -464,7 +451,7 @@
priv = list_entry(list->next, struct qib_qp_priv, iowait);
qp = priv->owner;
list_del_init(&priv->iowait);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
if (!list_empty(list))
mod_timer(&dev->mem_timer, jiffies + 1);
}
@@ -477,8 +464,7 @@
qib_schedule_send(qp);
}
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
@@ -762,7 +748,7 @@
iowait);
qp = priv->owner;
list_del_init(&priv->iowait);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
spin_lock_irqsave(&qp->s_lock, flags);
@@ -772,8 +758,7 @@
}
spin_unlock_irqrestore(&qp->s_lock, flags);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
} else
spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
}
@@ -808,7 +793,7 @@
break;
avail -= qpp->s_tx->txreq.sg_count;
list_del_init(&qpp->iowait);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
qps[n++] = qp;
}
@@ -822,8 +807,7 @@
qib_schedule_send(qp);
}
spin_unlock(&qp->s_lock);
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
@@ -1288,7 +1272,7 @@
priv = list_entry(list->next, struct qib_qp_priv, iowait);
qp = priv->owner;
list_del_init(&priv->iowait);
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
qps[n++] = qp;
}
dd->f_wantpiobuf_intr(dd, 0);
@@ -1306,8 +1290,7 @@
spin_unlock_irqrestore(&qp->s_lock, flags);
/* Notify qib_destroy_qp() if it is waiting. */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
}
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
index 5b0248a..092d4e1 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
@@ -117,10 +117,10 @@
vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
res_chunk = get_qp_res_chunk(qp_grp);
- if (IS_ERR_OR_NULL(res_chunk)) {
+ if (IS_ERR(res_chunk)) {
usnic_err("Unable to get qp res with err %ld\n",
PTR_ERR(res_chunk));
- return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ return PTR_ERR(res_chunk);
}
for (i = 0; i < res_chunk->cnt; i++) {
@@ -158,10 +158,10 @@
vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
res_chunk = get_qp_res_chunk(qp_grp);
- if (IS_ERR_OR_NULL(res_chunk)) {
+ if (IS_ERR(res_chunk)) {
usnic_err("Unable to get qp res with err %ld\n",
PTR_ERR(res_chunk));
- return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ return PTR_ERR(res_chunk);
}
for (i = 0; i < res_chunk->cnt; i++) {
@@ -186,11 +186,11 @@
struct usnic_vnic_res_chunk *res_chunk;
res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
- if (IS_ERR_OR_NULL(res_chunk)) {
+ if (IS_ERR(res_chunk)) {
usnic_err("Unable to get %s with err %ld\n",
usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
PTR_ERR(res_chunk));
- return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+ return PTR_ERR(res_chunk);
}
uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
@@ -228,8 +228,6 @@
flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
if (IS_ERR_OR_NULL(flow)) {
- usnic_err("Unable to alloc flow failed with err %ld\n",
- PTR_ERR(flow));
err = flow ? PTR_ERR(flow) : -EFAULT;
goto out_unreserve_port;
}
@@ -303,8 +301,6 @@
flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
if (IS_ERR_OR_NULL(flow)) {
- usnic_err("Unable to alloc flow failed with err %ld\n",
- PTR_ERR(flow));
err = flow ? PTR_ERR(flow) : -EFAULT;
goto out_put_sock;
}
@@ -694,18 +690,14 @@
}
qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC);
- if (!qp_grp) {
- usnic_err("Unable to alloc qp_grp - Out of memory\n");
+ if (!qp_grp)
return NULL;
- }
qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec,
qp_grp);
if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) {
err = qp_grp->res_chunk_list ?
PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM;
- usnic_err("Unable to alloc res for %d with err %d\n",
- qp_grp->grp_id, err);
goto out_free_qp_grp;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index a5bfbba..74819a7 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -87,12 +87,12 @@
resp.bar_len = bar->len;
chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
- if (IS_ERR_OR_NULL(chunk)) {
+ if (IS_ERR(chunk)) {
usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
qp_grp->grp_id,
PTR_ERR(chunk));
- return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ return PTR_ERR(chunk);
}
WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ);
@@ -101,12 +101,12 @@
resp.rq_idx[i] = chunk->res[i]->vnic_idx;
chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ);
- if (IS_ERR_OR_NULL(chunk)) {
+ if (IS_ERR(chunk)) {
usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ),
qp_grp->grp_id,
PTR_ERR(chunk));
- return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ return PTR_ERR(chunk);
}
WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ);
@@ -115,12 +115,12 @@
resp.wq_idx[i] = chunk->res[i]->vnic_idx;
chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ);
- if (IS_ERR_OR_NULL(chunk)) {
+ if (IS_ERR(chunk)) {
usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ),
qp_grp->grp_id,
PTR_ERR(chunk));
- return chunk ? PTR_ERR(chunk) : -ENOMEM;
+ return PTR_ERR(chunk);
}
WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ);
@@ -738,7 +738,9 @@
/* In ib callbacks section - Start of stub funcs */
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr)
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+
{
usnic_dbg("\n");
return ERR_PTR(-EPERM);
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
index 0d9d2e6a..0ed8e07 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h
@@ -75,7 +75,9 @@
int usnic_ib_mmap(struct ib_ucontext *context,
struct vm_area_struct *vma);
struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr);
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
+
int usnic_ib_destroy_ah(struct ib_ah *ah);
int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
diff --git a/drivers/infiniband/hw/usnic/usnic_vnic.c b/drivers/infiniband/hw/usnic/usnic_vnic.c
index 8875107..e7b0030 100644
--- a/drivers/infiniband/hw/usnic/usnic_vnic.c
+++ b/drivers/infiniband/hw/usnic/usnic_vnic.c
@@ -241,17 +241,12 @@
return ERR_PTR(-EINVAL);
ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
- if (!ret) {
- usnic_err("Failed to allocate chunk for %s - Out of memory\n",
- usnic_vnic_pci_name(vnic));
+ if (!ret)
return ERR_PTR(-ENOMEM);
- }
if (cnt > 0) {
ret->res = kcalloc(cnt, sizeof(*(ret->res)), GFP_ATOMIC);
if (!ret->res) {
- usnic_err("Failed to allocate resources for %s. Out of memory\n",
- usnic_vnic_pci_name(vnic));
kfree(ret);
return ERR_PTR(-ENOMEM);
}
@@ -311,8 +306,10 @@
struct usnic_vnic_res *res;
cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type));
- if (cnt < 1)
+ if (cnt < 1) {
+ usnic_err("Wrong res count with cnt %d\n", cnt);
return -EINVAL;
+ }
chunk->cnt = chunk->free_cnt = cnt;
chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL);
@@ -384,12 +381,8 @@
res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) {
err = usnic_vnic_alloc_res_chunk(vnic, res_type,
&vnic->chunks[res_type]);
- if (err) {
- usnic_err("Failed to alloc res %s with err %d\n",
- usnic_vnic_res_type_to_str(res_type),
- err);
+ if (err)
goto out_clean_chunks;
- }
}
return 0;
@@ -454,11 +447,8 @@
}
vnic = kzalloc(sizeof(*vnic), GFP_KERNEL);
- if (!vnic) {
- usnic_err("Failed to alloc vnic for %s - out of memory\n",
- pci_name(pdev));
+ if (!vnic)
return ERR_PTR(-ENOMEM);
- }
spin_lock_init(&vnic->res_lock);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/Kconfig b/drivers/infiniband/hw/vmw_pvrdma/Kconfig
new file mode 100644
index 0000000..5a9790a
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/Kconfig
@@ -0,0 +1,7 @@
+config INFINIBAND_VMWARE_PVRDMA
+ tristate "VMware Paravirtualized RDMA Driver"
+ depends on NETDEVICES && ETHERNET && PCI && INET && VMXNET3
+ ---help---
+ This driver provides low-level support for VMware Paravirtual
+ RDMA adapter. It interacts with the VMXNet3 driver to provide
+ Ethernet capabilities.
diff --git a/drivers/infiniband/hw/vmw_pvrdma/Makefile b/drivers/infiniband/hw/vmw_pvrdma/Makefile
new file mode 100644
index 0000000..0194ed1
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_INFINIBAND_VMWARE_PVRDMA) += vmw_pvrdma.o
+
+vmw_pvrdma-y := pvrdma_cmd.o pvrdma_cq.o pvrdma_doorbell.o pvrdma_main.o pvrdma_misc.o pvrdma_mr.o pvrdma_qp.o pvrdma_verbs.o
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
new file mode 100644
index 0000000..71e1d55
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -0,0 +1,474 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __PVRDMA_H__
+#define __PVRDMA_H__
+
+#include <linux/compiler.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/semaphore.h>
+#include <linux/workqueue.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/vmw_pvrdma-abi.h>
+
+#include "pvrdma_ring.h"
+#include "pvrdma_dev_api.h"
+#include "pvrdma_verbs.h"
+
+/* NOT the same as BIT_MASK(). */
+#define PVRDMA_MASK(n) ((n << 1) - 1)
+
+/*
+ * VMware PVRDMA PCI device id.
+ */
+#define PCI_DEVICE_ID_VMWARE_PVRDMA 0x0820
+
+struct pvrdma_dev;
+
+struct pvrdma_page_dir {
+ dma_addr_t dir_dma;
+ u64 *dir;
+ int ntables;
+ u64 **tables;
+ u64 npages;
+ void **pages;
+};
+
+struct pvrdma_cq {
+ struct ib_cq ibcq;
+ int offset;
+ spinlock_t cq_lock; /* Poll lock. */
+ struct pvrdma_uar_map *uar;
+ struct ib_umem *umem;
+ struct pvrdma_ring_state *ring_state;
+ struct pvrdma_page_dir pdir;
+ u32 cq_handle;
+ bool is_kernel;
+ atomic_t refcnt;
+ wait_queue_head_t wait;
+};
+
+struct pvrdma_id_table {
+ u32 last;
+ u32 top;
+ u32 max;
+ u32 mask;
+ spinlock_t lock; /* Table lock. */
+ unsigned long *table;
+};
+
+struct pvrdma_uar_map {
+ unsigned long pfn;
+ void __iomem *map;
+ int index;
+};
+
+struct pvrdma_uar_table {
+ struct pvrdma_id_table tbl;
+ int size;
+};
+
+struct pvrdma_ucontext {
+ struct ib_ucontext ibucontext;
+ struct pvrdma_dev *dev;
+ struct pvrdma_uar_map uar;
+ u64 ctx_handle;
+};
+
+struct pvrdma_pd {
+ struct ib_pd ibpd;
+ u32 pdn;
+ u32 pd_handle;
+ int privileged;
+};
+
+struct pvrdma_mr {
+ u32 mr_handle;
+ u64 iova;
+ u64 size;
+};
+
+struct pvrdma_user_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct pvrdma_mr mmr;
+ struct pvrdma_page_dir pdir;
+ u64 *pages;
+ u32 npages;
+ u32 max_pages;
+ u32 page_shift;
+};
+
+struct pvrdma_wq {
+ struct pvrdma_ring *ring;
+ spinlock_t lock; /* Work queue lock. */
+ int wqe_cnt;
+ int wqe_size;
+ int max_sg;
+ int offset;
+};
+
+struct pvrdma_ah {
+ struct ib_ah ibah;
+ struct pvrdma_av av;
+};
+
+struct pvrdma_qp {
+ struct ib_qp ibqp;
+ u32 qp_handle;
+ u32 qkey;
+ struct pvrdma_wq sq;
+ struct pvrdma_wq rq;
+ struct ib_umem *rumem;
+ struct ib_umem *sumem;
+ struct pvrdma_page_dir pdir;
+ int npages;
+ int npages_send;
+ int npages_recv;
+ u32 flags;
+ u8 port;
+ u8 state;
+ bool is_kernel;
+ struct mutex mutex; /* QP state mutex. */
+ atomic_t refcnt;
+ wait_queue_head_t wait;
+};
+
+struct pvrdma_dev {
+ /* PCI device-related information. */
+ struct ib_device ib_dev;
+ struct pci_dev *pdev;
+ void __iomem *regs;
+ struct pvrdma_device_shared_region *dsr; /* Shared region pointer */
+ dma_addr_t dsrbase; /* Shared region base address */
+ void *cmd_slot;
+ void *resp_slot;
+ unsigned long flags;
+ struct list_head device_link;
+
+ /* Locking and interrupt information. */
+ spinlock_t cmd_lock; /* Command lock. */
+ struct semaphore cmd_sema;
+ struct completion cmd_done;
+ struct {
+ enum pvrdma_intr_type type; /* Intr type */
+ struct msix_entry msix_entry[PVRDMA_MAX_INTERRUPTS];
+ irq_handler_t handler[PVRDMA_MAX_INTERRUPTS];
+ u8 enabled[PVRDMA_MAX_INTERRUPTS];
+ u8 size;
+ } intr;
+
+ /* RDMA-related device information. */
+ union ib_gid *sgid_tbl;
+ struct pvrdma_ring_state *async_ring_state;
+ struct pvrdma_page_dir async_pdir;
+ struct pvrdma_ring_state *cq_ring_state;
+ struct pvrdma_page_dir cq_pdir;
+ struct pvrdma_cq **cq_tbl;
+ spinlock_t cq_tbl_lock;
+ struct pvrdma_qp **qp_tbl;
+ spinlock_t qp_tbl_lock;
+ struct pvrdma_uar_table uar_table;
+ struct pvrdma_uar_map driver_uar;
+ __be64 sys_image_guid;
+ spinlock_t desc_lock; /* Device modification lock. */
+ u32 port_cap_mask;
+ struct mutex port_mutex; /* Port modification mutex. */
+ bool ib_active;
+ atomic_t num_qps;
+ atomic_t num_cqs;
+ atomic_t num_pds;
+ atomic_t num_ahs;
+
+ /* Network device information. */
+ struct net_device *netdev;
+ struct notifier_block nb_netdev;
+};
+
+struct pvrdma_netdevice_work {
+ struct work_struct work;
+ struct net_device *event_netdev;
+ unsigned long event;
+};
+
+static inline struct pvrdma_dev *to_vdev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct pvrdma_dev, ib_dev);
+}
+
+static inline struct
+pvrdma_ucontext *to_vucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct pvrdma_ucontext, ibucontext);
+}
+
+static inline struct pvrdma_pd *to_vpd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct pvrdma_pd, ibpd);
+}
+
+static inline struct pvrdma_cq *to_vcq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct pvrdma_cq, ibcq);
+}
+
+static inline struct pvrdma_user_mr *to_vmr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct pvrdma_user_mr, ibmr);
+}
+
+static inline struct pvrdma_qp *to_vqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct pvrdma_qp, ibqp);
+}
+
+static inline struct pvrdma_ah *to_vah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct pvrdma_ah, ibah);
+}
+
+static inline void pvrdma_write_reg(struct pvrdma_dev *dev, u32 reg, u32 val)
+{
+ writel(cpu_to_le32(val), dev->regs + reg);
+}
+
+static inline u32 pvrdma_read_reg(struct pvrdma_dev *dev, u32 reg)
+{
+ return le32_to_cpu(readl(dev->regs + reg));
+}
+
+static inline void pvrdma_write_uar_cq(struct pvrdma_dev *dev, u32 val)
+{
+ writel(cpu_to_le32(val), dev->driver_uar.map + PVRDMA_UAR_CQ_OFFSET);
+}
+
+static inline void pvrdma_write_uar_qp(struct pvrdma_dev *dev, u32 val)
+{
+ writel(cpu_to_le32(val), dev->driver_uar.map + PVRDMA_UAR_QP_OFFSET);
+}
+
+static inline void *pvrdma_page_dir_get_ptr(struct pvrdma_page_dir *pdir,
+ u64 offset)
+{
+ return pdir->pages[offset / PAGE_SIZE] + (offset % PAGE_SIZE);
+}
+
+static inline enum pvrdma_mtu ib_mtu_to_pvrdma(enum ib_mtu mtu)
+{
+ return (enum pvrdma_mtu)mtu;
+}
+
+static inline enum ib_mtu pvrdma_mtu_to_ib(enum pvrdma_mtu mtu)
+{
+ return (enum ib_mtu)mtu;
+}
+
+static inline enum pvrdma_port_state ib_port_state_to_pvrdma(
+ enum ib_port_state state)
+{
+ return (enum pvrdma_port_state)state;
+}
+
+static inline enum ib_port_state pvrdma_port_state_to_ib(
+ enum pvrdma_port_state state)
+{
+ return (enum ib_port_state)state;
+}
+
+static inline int ib_port_cap_flags_to_pvrdma(int flags)
+{
+ return flags & PVRDMA_MASK(PVRDMA_PORT_CAP_FLAGS_MAX);
+}
+
+static inline int pvrdma_port_cap_flags_to_ib(int flags)
+{
+ return flags;
+}
+
+static inline enum pvrdma_port_width ib_port_width_to_pvrdma(
+ enum ib_port_width width)
+{
+ return (enum pvrdma_port_width)width;
+}
+
+static inline enum ib_port_width pvrdma_port_width_to_ib(
+ enum pvrdma_port_width width)
+{
+ return (enum ib_port_width)width;
+}
+
+static inline enum pvrdma_port_speed ib_port_speed_to_pvrdma(
+ enum ib_port_speed speed)
+{
+ return (enum pvrdma_port_speed)speed;
+}
+
+static inline enum ib_port_speed pvrdma_port_speed_to_ib(
+ enum pvrdma_port_speed speed)
+{
+ return (enum ib_port_speed)speed;
+}
+
+static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask)
+{
+ return attr_mask;
+}
+
+static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask)
+{
+ return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX);
+}
+
+static inline enum pvrdma_mig_state ib_mig_state_to_pvrdma(
+ enum ib_mig_state state)
+{
+ return (enum pvrdma_mig_state)state;
+}
+
+static inline enum ib_mig_state pvrdma_mig_state_to_ib(
+ enum pvrdma_mig_state state)
+{
+ return (enum ib_mig_state)state;
+}
+
+static inline int ib_access_flags_to_pvrdma(int flags)
+{
+ return flags;
+}
+
+static inline int pvrdma_access_flags_to_ib(int flags)
+{
+ return flags & PVRDMA_MASK(PVRDMA_ACCESS_FLAGS_MAX);
+}
+
+static inline enum pvrdma_qp_type ib_qp_type_to_pvrdma(enum ib_qp_type type)
+{
+ return (enum pvrdma_qp_type)type;
+}
+
+static inline enum ib_qp_type pvrdma_qp_type_to_ib(enum pvrdma_qp_type type)
+{
+ return (enum ib_qp_type)type;
+}
+
+static inline enum pvrdma_qp_state ib_qp_state_to_pvrdma(enum ib_qp_state state)
+{
+ return (enum pvrdma_qp_state)state;
+}
+
+static inline enum ib_qp_state pvrdma_qp_state_to_ib(enum pvrdma_qp_state state)
+{
+ return (enum ib_qp_state)state;
+}
+
+static inline enum pvrdma_wr_opcode ib_wr_opcode_to_pvrdma(enum ib_wr_opcode op)
+{
+ return (enum pvrdma_wr_opcode)op;
+}
+
+static inline enum ib_wc_status pvrdma_wc_status_to_ib(
+ enum pvrdma_wc_status status)
+{
+ return (enum ib_wc_status)status;
+}
+
+static inline int pvrdma_wc_opcode_to_ib(int opcode)
+{
+ return opcode;
+}
+
+static inline int pvrdma_wc_flags_to_ib(int flags)
+{
+ return flags;
+}
+
+static inline int ib_send_flags_to_pvrdma(int flags)
+{
+ return flags & PVRDMA_MASK(PVRDMA_SEND_FLAGS_MAX);
+}
+
+void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst,
+ const struct pvrdma_qp_cap *src);
+void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst,
+ const struct ib_qp_cap *src);
+void pvrdma_gid_to_ib(union ib_gid *dst, const union pvrdma_gid *src);
+void ib_gid_to_pvrdma(union pvrdma_gid *dst, const union ib_gid *src);
+void pvrdma_global_route_to_ib(struct ib_global_route *dst,
+ const struct pvrdma_global_route *src);
+void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst,
+ const struct ib_global_route *src);
+void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst,
+ const struct pvrdma_ah_attr *src);
+void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
+ const struct ib_ah_attr *src);
+
+int pvrdma_uar_table_init(struct pvrdma_dev *dev);
+void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev);
+
+int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar);
+void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar);
+
+void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq);
+
+int pvrdma_page_dir_init(struct pvrdma_dev *dev, struct pvrdma_page_dir *pdir,
+ u64 npages, bool alloc_pages);
+void pvrdma_page_dir_cleanup(struct pvrdma_dev *dev,
+ struct pvrdma_page_dir *pdir);
+int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx,
+ dma_addr_t daddr);
+int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
+ struct ib_umem *umem, u64 offset);
+dma_addr_t pvrdma_page_dir_get_dma(struct pvrdma_page_dir *pdir, u64 idx);
+int pvrdma_page_dir_insert_page_list(struct pvrdma_page_dir *pdir,
+ u64 *page_list, int num_pages);
+
+int pvrdma_cmd_post(struct pvrdma_dev *dev, union pvrdma_cmd_req *req,
+ union pvrdma_cmd_resp *rsp, unsigned resp_code);
+
+#endif /* __PVRDMA_H__ */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c
new file mode 100644
index 0000000..4a78c53
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cmd.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+
+#include "pvrdma.h"
+
+#define PVRDMA_CMD_TIMEOUT 10000 /* ms */
+
+static inline int pvrdma_cmd_recv(struct pvrdma_dev *dev,
+ union pvrdma_cmd_resp *resp,
+ unsigned resp_code)
+{
+ int err;
+
+ dev_dbg(&dev->pdev->dev, "receive response from device\n");
+
+ err = wait_for_completion_interruptible_timeout(&dev->cmd_done,
+ msecs_to_jiffies(PVRDMA_CMD_TIMEOUT));
+ if (err == 0 || err == -ERESTARTSYS) {
+ dev_warn(&dev->pdev->dev,
+ "completion timeout or interrupted\n");
+ return -ETIMEDOUT;
+ }
+
+ spin_lock(&dev->cmd_lock);
+ memcpy(resp, dev->resp_slot, sizeof(*resp));
+ spin_unlock(&dev->cmd_lock);
+
+ if (resp->hdr.ack != resp_code) {
+ dev_warn(&dev->pdev->dev,
+ "unknown response %#x expected %#x\n",
+ resp->hdr.ack, resp_code);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+int
+pvrdma_cmd_post(struct pvrdma_dev *dev, union pvrdma_cmd_req *req,
+ union pvrdma_cmd_resp *resp, unsigned resp_code)
+{
+ int err;
+
+ dev_dbg(&dev->pdev->dev, "post request to device\n");
+
+ /* Serializiation */
+ down(&dev->cmd_sema);
+
+ BUILD_BUG_ON(sizeof(union pvrdma_cmd_req) !=
+ sizeof(struct pvrdma_cmd_modify_qp));
+
+ spin_lock(&dev->cmd_lock);
+ memcpy(dev->cmd_slot, req, sizeof(*req));
+ spin_unlock(&dev->cmd_lock);
+
+ init_completion(&dev->cmd_done);
+ pvrdma_write_reg(dev, PVRDMA_REG_REQUEST, 0);
+
+ /* Make sure the request is written before reading status. */
+ mb();
+
+ err = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
+ if (err == 0) {
+ if (resp != NULL)
+ err = pvrdma_cmd_recv(dev, resp, resp_code);
+ } else {
+ dev_warn(&dev->pdev->dev,
+ "failed to write request error reg: %d\n", err);
+ err = -EFAULT;
+ }
+
+ up(&dev->cmd_sema);
+
+ return err;
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
new file mode 100644
index 0000000..e429ca5
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_cq.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/page.h>
+#include <linux/io.h>
+#include <linux/wait.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "pvrdma.h"
+
+/**
+ * pvrdma_req_notify_cq - request notification for a completion queue
+ * @ibcq: the completion queue
+ * @notify_flags: notification flags
+ *
+ * @return: 0 for success.
+ */
+int pvrdma_req_notify_cq(struct ib_cq *ibcq,
+ enum ib_cq_notify_flags notify_flags)
+{
+ struct pvrdma_dev *dev = to_vdev(ibcq->device);
+ struct pvrdma_cq *cq = to_vcq(ibcq);
+ u32 val = cq->cq_handle;
+
+ val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
+ PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM;
+
+ pvrdma_write_uar_cq(dev, val);
+
+ return 0;
+}
+
+/**
+ * pvrdma_create_cq - create completion queue
+ * @ibdev: the device
+ * @attr: completion queue attributes
+ * @context: user context
+ * @udata: user data
+ *
+ * @return: ib_cq completion queue pointer on success,
+ * otherwise returns negative errno.
+ */
+struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ int entries = attr->cqe;
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+ struct pvrdma_cq *cq;
+ int ret;
+ int npages;
+ unsigned long flags;
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_cq *cmd = &req.create_cq;
+ struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp;
+ struct pvrdma_create_cq ucmd;
+
+ BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64);
+
+ entries = roundup_pow_of_two(entries);
+ if (entries < 1 || entries > dev->dsr->caps.max_cqe)
+ return ERR_PTR(-EINVAL);
+
+ if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq))
+ return ERR_PTR(-ENOMEM);
+
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq) {
+ atomic_dec(&dev->num_cqs);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ cq->ibcq.cqe = entries;
+
+ if (context) {
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ ret = -EFAULT;
+ goto err_cq;
+ }
+
+ cq->umem = ib_umem_get(context, ucmd.buf_addr, ucmd.buf_size,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (IS_ERR(cq->umem)) {
+ ret = PTR_ERR(cq->umem);
+ goto err_cq;
+ }
+
+ npages = ib_umem_page_count(cq->umem);
+ } else {
+ cq->is_kernel = true;
+
+ /* One extra page for shared ring state */
+ npages = 1 + (entries * sizeof(struct pvrdma_cqe) +
+ PAGE_SIZE - 1) / PAGE_SIZE;
+
+ /* Skip header page. */
+ cq->offset = PAGE_SIZE;
+ }
+
+ if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
+ dev_warn(&dev->pdev->dev,
+ "overflow pages in completion queue\n");
+ ret = -EINVAL;
+ goto err_umem;
+ }
+
+ ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel);
+ if (ret) {
+ dev_warn(&dev->pdev->dev,
+ "could not allocate page directory\n");
+ goto err_umem;
+ }
+
+ /* Ring state is always the first page. Set in library for user cq. */
+ if (cq->is_kernel)
+ cq->ring_state = cq->pdir.pages[0];
+ else
+ pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0);
+
+ atomic_set(&cq->refcnt, 1);
+ init_waitqueue_head(&cq->wait);
+ spin_lock_init(&cq->cq_lock);
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ;
+ cmd->nchunks = npages;
+ cmd->ctx_handle = (context) ?
+ (u64)to_vucontext(context)->ctx_handle : 0;
+ cmd->cqe = entries;
+ cmd->pdir_dma = cq->pdir.dir_dma;
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not create completion queue, error: %d\n", ret);
+ goto err_page_dir;
+ }
+
+ cq->ibcq.cqe = resp->cqe;
+ cq->cq_handle = resp->cq_handle;
+ spin_lock_irqsave(&dev->cq_tbl_lock, flags);
+ dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq;
+ spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
+
+ if (context) {
+ cq->uar = &(to_vucontext(context)->uar);
+
+ /* Copy udata back. */
+ if (ib_copy_to_udata(udata, &cq->cq_handle, sizeof(__u32))) {
+ dev_warn(&dev->pdev->dev,
+ "failed to copy back udata\n");
+ pvrdma_destroy_cq(&cq->ibcq);
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ return &cq->ibcq;
+
+err_page_dir:
+ pvrdma_page_dir_cleanup(dev, &cq->pdir);
+err_umem:
+ if (context)
+ ib_umem_release(cq->umem);
+err_cq:
+ atomic_dec(&dev->num_cqs);
+ kfree(cq);
+
+ return ERR_PTR(ret);
+}
+
+static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq)
+{
+ atomic_dec(&cq->refcnt);
+ wait_event(cq->wait, !atomic_read(&cq->refcnt));
+
+ if (!cq->is_kernel)
+ ib_umem_release(cq->umem);
+
+ pvrdma_page_dir_cleanup(dev, &cq->pdir);
+ kfree(cq);
+}
+
+/**
+ * pvrdma_destroy_cq - destroy completion queue
+ * @cq: the completion queue to destroy.
+ *
+ * @return: 0 for success.
+ */
+int pvrdma_destroy_cq(struct ib_cq *cq)
+{
+ struct pvrdma_cq *vcq = to_vcq(cq);
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_destroy_cq *cmd = &req.destroy_cq;
+ struct pvrdma_dev *dev = to_vdev(cq->device);
+ unsigned long flags;
+ int ret;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_DESTROY_CQ;
+ cmd->cq_handle = vcq->cq_handle;
+
+ ret = pvrdma_cmd_post(dev, &req, NULL, 0);
+ if (ret < 0)
+ dev_warn(&dev->pdev->dev,
+ "could not destroy completion queue, error: %d\n",
+ ret);
+
+ /* free cq's resources */
+ spin_lock_irqsave(&dev->cq_tbl_lock, flags);
+ dev->cq_tbl[vcq->cq_handle] = NULL;
+ spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
+
+ pvrdma_free_cq(dev, vcq);
+ atomic_dec(&dev->num_cqs);
+
+ return ret;
+}
+
+/**
+ * pvrdma_modify_cq - modify the CQ moderation parameters
+ * @ibcq: the CQ to modify
+ * @cq_count: number of CQEs that will trigger an event
+ * @cq_period: max period of time in usec before triggering an event
+ *
+ * @return: -EOPNOTSUPP as CQ resize is not supported.
+ */
+int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i)
+{
+ return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr(
+ &cq->pdir,
+ cq->offset +
+ sizeof(struct pvrdma_cqe) * i);
+}
+
+void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq)
+{
+ int head;
+ int has_data;
+
+ if (!cq->is_kernel)
+ return;
+
+ /* Lock held */
+ has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
+ cq->ibcq.cqe, &head);
+ if (unlikely(has_data > 0)) {
+ int items;
+ int curr;
+ int tail = pvrdma_idx(&cq->ring_state->rx.prod_tail,
+ cq->ibcq.cqe);
+ struct pvrdma_cqe *cqe;
+ struct pvrdma_cqe *curr_cqe;
+
+ items = (tail > head) ? (tail - head) :
+ (cq->ibcq.cqe - head + tail);
+ curr = --tail;
+ while (items-- > 0) {
+ if (curr < 0)
+ curr = cq->ibcq.cqe - 1;
+ if (tail < 0)
+ tail = cq->ibcq.cqe - 1;
+ curr_cqe = get_cqe(cq, curr);
+ if ((curr_cqe->qp & 0xFFFF) != qp->qp_handle) {
+ if (curr != tail) {
+ cqe = get_cqe(cq, tail);
+ *cqe = *curr_cqe;
+ }
+ tail--;
+ } else {
+ pvrdma_idx_ring_inc(
+ &cq->ring_state->rx.cons_head,
+ cq->ibcq.cqe);
+ }
+ curr--;
+ }
+ }
+}
+
+static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp,
+ struct ib_wc *wc)
+{
+ struct pvrdma_dev *dev = to_vdev(cq->ibcq.device);
+ int has_data;
+ unsigned int head;
+ bool tried = false;
+ struct pvrdma_cqe *cqe;
+
+retry:
+ has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx,
+ cq->ibcq.cqe, &head);
+ if (has_data == 0) {
+ if (tried)
+ return -EAGAIN;
+
+ pvrdma_write_uar_cq(dev, cq->cq_handle | PVRDMA_UAR_CQ_POLL);
+
+ tried = true;
+ goto retry;
+ } else if (has_data == PVRDMA_INVALID_IDX) {
+ dev_err(&dev->pdev->dev, "CQ ring state invalid\n");
+ return -EAGAIN;
+ }
+
+ cqe = get_cqe(cq, head);
+
+ /* Ensure cqe is valid. */
+ rmb();
+ if (dev->qp_tbl[cqe->qp & 0xffff])
+ *cur_qp = (struct pvrdma_qp *)dev->qp_tbl[cqe->qp & 0xffff];
+ else
+ return -EAGAIN;
+
+ wc->opcode = pvrdma_wc_opcode_to_ib(cqe->opcode);
+ wc->status = pvrdma_wc_status_to_ib(cqe->status);
+ wc->wr_id = cqe->wr_id;
+ wc->qp = &(*cur_qp)->ibqp;
+ wc->byte_len = cqe->byte_len;
+ wc->ex.imm_data = cqe->imm_data;
+ wc->src_qp = cqe->src_qp;
+ wc->wc_flags = pvrdma_wc_flags_to_ib(cqe->wc_flags);
+ wc->pkey_index = cqe->pkey_index;
+ wc->slid = cqe->slid;
+ wc->sl = cqe->sl;
+ wc->dlid_path_bits = cqe->dlid_path_bits;
+ wc->port_num = cqe->port_num;
+ wc->vendor_err = 0;
+
+ /* Update shared ring state */
+ pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe);
+
+ return 0;
+}
+
+/**
+ * pvrdma_poll_cq - poll for work completion queue entries
+ * @ibcq: completion queue
+ * @num_entries: the maximum number of entries
+ * @entry: pointer to work completion array
+ *
+ * @return: number of polled completion entries
+ */
+int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
+{
+ struct pvrdma_cq *cq = to_vcq(ibcq);
+ struct pvrdma_qp *cur_qp = NULL;
+ unsigned long flags;
+ int npolled;
+
+ if (num_entries < 1 || wc == NULL)
+ return 0;
+
+ spin_lock_irqsave(&cq->cq_lock, flags);
+ for (npolled = 0; npolled < num_entries; ++npolled) {
+ if (pvrdma_poll_one(cq, &cur_qp, wc + npolled))
+ break;
+ }
+
+ spin_unlock_irqrestore(&cq->cq_lock, flags);
+
+ /* Ensure we do not return errors from poll_cq */
+ return npolled;
+}
+
+/**
+ * pvrdma_resize_cq - resize CQ
+ * @ibcq: the completion queue
+ * @entries: CQ entries
+ * @udata: user data
+ *
+ * @return: -EOPNOTSUPP as CQ resize is not supported.
+ */
+int pvrdma_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
+{
+ return -EOPNOTSUPP;
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
new file mode 100644
index 0000000..c067686
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_dev_api.h
@@ -0,0 +1,586 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __PVRDMA_DEV_API_H__
+#define __PVRDMA_DEV_API_H__
+
+#include <linux/types.h>
+
+#include "pvrdma_verbs.h"
+
+#define PVRDMA_VERSION 17
+#define PVRDMA_BOARD_ID 1
+#define PVRDMA_REV_ID 1
+
+/*
+ * Masks and accessors for page directory, which is a two-level lookup:
+ * page directory -> page table -> page. Only one directory for now, but we
+ * could expand that easily. 9 bits for tables, 9 bits for pages, gives one
+ * gigabyte for memory regions and so forth.
+ */
+
+#define PVRDMA_PDIR_SHIFT 18
+#define PVRDMA_PTABLE_SHIFT 9
+#define PVRDMA_PAGE_DIR_DIR(x) (((x) >> PVRDMA_PDIR_SHIFT) & 0x1)
+#define PVRDMA_PAGE_DIR_TABLE(x) (((x) >> PVRDMA_PTABLE_SHIFT) & 0x1ff)
+#define PVRDMA_PAGE_DIR_PAGE(x) ((x) & 0x1ff)
+#define PVRDMA_PAGE_DIR_MAX_PAGES (1 * 512 * 512)
+#define PVRDMA_MAX_FAST_REG_PAGES 128
+
+/*
+ * Max MSI-X vectors.
+ */
+
+#define PVRDMA_MAX_INTERRUPTS 3
+
+/* Register offsets within PCI resource on BAR1. */
+#define PVRDMA_REG_VERSION 0x00 /* R: Version of device. */
+#define PVRDMA_REG_DSRLOW 0x04 /* W: Device shared region low PA. */
+#define PVRDMA_REG_DSRHIGH 0x08 /* W: Device shared region high PA. */
+#define PVRDMA_REG_CTL 0x0c /* W: PVRDMA_DEVICE_CTL */
+#define PVRDMA_REG_REQUEST 0x10 /* W: Indicate device request. */
+#define PVRDMA_REG_ERR 0x14 /* R: Device error. */
+#define PVRDMA_REG_ICR 0x18 /* R: Interrupt cause. */
+#define PVRDMA_REG_IMR 0x1c /* R/W: Interrupt mask. */
+#define PVRDMA_REG_MACL 0x20 /* R/W: MAC address low. */
+#define PVRDMA_REG_MACH 0x24 /* R/W: MAC address high. */
+
+/* Object flags. */
+#define PVRDMA_CQ_FLAG_ARMED_SOL BIT(0) /* Armed for solicited-only. */
+#define PVRDMA_CQ_FLAG_ARMED BIT(1) /* Armed. */
+#define PVRDMA_MR_FLAG_DMA BIT(0) /* DMA region. */
+#define PVRDMA_MR_FLAG_FRMR BIT(1) /* Fast reg memory region. */
+
+/*
+ * Atomic operation capability (masked versions are extended atomic
+ * operations.
+ */
+
+#define PVRDMA_ATOMIC_OP_COMP_SWAP BIT(0) /* Compare and swap. */
+#define PVRDMA_ATOMIC_OP_FETCH_ADD BIT(1) /* Fetch and add. */
+#define PVRDMA_ATOMIC_OP_MASK_COMP_SWAP BIT(2) /* Masked compare and swap. */
+#define PVRDMA_ATOMIC_OP_MASK_FETCH_ADD BIT(3) /* Masked fetch and add. */
+
+/*
+ * Base Memory Management Extension flags to support Fast Reg Memory Regions
+ * and Fast Reg Work Requests. Each flag represents a verb operation and we
+ * must support all of them to qualify for the BMME device cap.
+ */
+
+#define PVRDMA_BMME_FLAG_LOCAL_INV BIT(0) /* Local Invalidate. */
+#define PVRDMA_BMME_FLAG_REMOTE_INV BIT(1) /* Remote Invalidate. */
+#define PVRDMA_BMME_FLAG_FAST_REG_WR BIT(2) /* Fast Reg Work Request. */
+
+/*
+ * GID types. The interpretation of the gid_types bit field in the device
+ * capabilities will depend on the device mode. For now, the device only
+ * supports RoCE as mode, so only the different GID types for RoCE are
+ * defined.
+ */
+
+#define PVRDMA_GID_TYPE_FLAG_ROCE_V1 BIT(0)
+#define PVRDMA_GID_TYPE_FLAG_ROCE_V2 BIT(1)
+
+enum pvrdma_pci_resource {
+ PVRDMA_PCI_RESOURCE_MSIX, /* BAR0: MSI-X, MMIO. */
+ PVRDMA_PCI_RESOURCE_REG, /* BAR1: Registers, MMIO. */
+ PVRDMA_PCI_RESOURCE_UAR, /* BAR2: UAR pages, MMIO, 64-bit. */
+ PVRDMA_PCI_RESOURCE_LAST, /* Last. */
+};
+
+enum pvrdma_device_ctl {
+ PVRDMA_DEVICE_CTL_ACTIVATE, /* Activate device. */
+ PVRDMA_DEVICE_CTL_QUIESCE, /* Quiesce device. */
+ PVRDMA_DEVICE_CTL_RESET, /* Reset device. */
+};
+
+enum pvrdma_intr_vector {
+ PVRDMA_INTR_VECTOR_RESPONSE, /* Command response. */
+ PVRDMA_INTR_VECTOR_ASYNC, /* Async events. */
+ PVRDMA_INTR_VECTOR_CQ, /* CQ notification. */
+ /* Additional CQ notification vectors. */
+};
+
+enum pvrdma_intr_cause {
+ PVRDMA_INTR_CAUSE_RESPONSE = (1 << PVRDMA_INTR_VECTOR_RESPONSE),
+ PVRDMA_INTR_CAUSE_ASYNC = (1 << PVRDMA_INTR_VECTOR_ASYNC),
+ PVRDMA_INTR_CAUSE_CQ = (1 << PVRDMA_INTR_VECTOR_CQ),
+};
+
+enum pvrdma_intr_type {
+ PVRDMA_INTR_TYPE_INTX, /* Legacy. */
+ PVRDMA_INTR_TYPE_MSI, /* MSI. */
+ PVRDMA_INTR_TYPE_MSIX, /* MSI-X. */
+};
+
+enum pvrdma_gos_bits {
+ PVRDMA_GOS_BITS_UNK, /* Unknown. */
+ PVRDMA_GOS_BITS_32, /* 32-bit. */
+ PVRDMA_GOS_BITS_64, /* 64-bit. */
+};
+
+enum pvrdma_gos_type {
+ PVRDMA_GOS_TYPE_UNK, /* Unknown. */
+ PVRDMA_GOS_TYPE_LINUX, /* Linux. */
+};
+
+enum pvrdma_device_mode {
+ PVRDMA_DEVICE_MODE_ROCE, /* RoCE. */
+ PVRDMA_DEVICE_MODE_IWARP, /* iWarp. */
+ PVRDMA_DEVICE_MODE_IB, /* InfiniBand. */
+};
+
+struct pvrdma_gos_info {
+ u32 gos_bits:2; /* W: PVRDMA_GOS_BITS_ */
+ u32 gos_type:4; /* W: PVRDMA_GOS_TYPE_ */
+ u32 gos_ver:16; /* W: Guest OS version. */
+ u32 gos_misc:10; /* W: Other. */
+ u32 pad; /* Pad to 8-byte alignment. */
+};
+
+struct pvrdma_device_caps {
+ u64 fw_ver; /* R: Query device. */
+ __be64 node_guid;
+ __be64 sys_image_guid;
+ u64 max_mr_size;
+ u64 page_size_cap;
+ u64 atomic_arg_sizes; /* EX verbs. */
+ u32 ex_comp_mask; /* EX verbs. */
+ u32 device_cap_flags2; /* EX verbs. */
+ u32 max_fa_bit_boundary; /* EX verbs. */
+ u32 log_max_atomic_inline_arg; /* EX verbs. */
+ u32 vendor_id;
+ u32 vendor_part_id;
+ u32 hw_ver;
+ u32 max_qp;
+ u32 max_qp_wr;
+ u32 device_cap_flags;
+ u32 max_sge;
+ u32 max_sge_rd;
+ u32 max_cq;
+ u32 max_cqe;
+ u32 max_mr;
+ u32 max_pd;
+ u32 max_qp_rd_atom;
+ u32 max_ee_rd_atom;
+ u32 max_res_rd_atom;
+ u32 max_qp_init_rd_atom;
+ u32 max_ee_init_rd_atom;
+ u32 max_ee;
+ u32 max_rdd;
+ u32 max_mw;
+ u32 max_raw_ipv6_qp;
+ u32 max_raw_ethy_qp;
+ u32 max_mcast_grp;
+ u32 max_mcast_qp_attach;
+ u32 max_total_mcast_qp_attach;
+ u32 max_ah;
+ u32 max_fmr;
+ u32 max_map_per_fmr;
+ u32 max_srq;
+ u32 max_srq_wr;
+ u32 max_srq_sge;
+ u32 max_uar;
+ u32 gid_tbl_len;
+ u16 max_pkeys;
+ u8 local_ca_ack_delay;
+ u8 phys_port_cnt;
+ u8 mode; /* PVRDMA_DEVICE_MODE_ */
+ u8 atomic_ops; /* PVRDMA_ATOMIC_OP_* bits */
+ u8 bmme_flags; /* FRWR Mem Mgmt Extensions */
+ u8 gid_types; /* PVRDMA_GID_TYPE_FLAG_ */
+ u8 reserved[4];
+};
+
+struct pvrdma_ring_page_info {
+ u32 num_pages; /* Num pages incl. header. */
+ u32 reserved; /* Reserved. */
+ u64 pdir_dma; /* Page directory PA. */
+};
+
+#pragma pack(push, 1)
+
+struct pvrdma_device_shared_region {
+ u32 driver_version; /* W: Driver version. */
+ u32 pad; /* Pad to 8-byte align. */
+ struct pvrdma_gos_info gos_info; /* W: Guest OS information. */
+ u64 cmd_slot_dma; /* W: Command slot address. */
+ u64 resp_slot_dma; /* W: Response slot address. */
+ struct pvrdma_ring_page_info async_ring_pages;
+ /* W: Async ring page info. */
+ struct pvrdma_ring_page_info cq_ring_pages;
+ /* W: CQ ring page info. */
+ u32 uar_pfn; /* W: UAR pageframe. */
+ u32 pad2; /* Pad to 8-byte align. */
+ struct pvrdma_device_caps caps; /* R: Device capabilities. */
+};
+
+#pragma pack(pop)
+
+/* Event types. Currently a 1:1 mapping with enum ib_event. */
+enum pvrdma_eqe_type {
+ PVRDMA_EVENT_CQ_ERR,
+ PVRDMA_EVENT_QP_FATAL,
+ PVRDMA_EVENT_QP_REQ_ERR,
+ PVRDMA_EVENT_QP_ACCESS_ERR,
+ PVRDMA_EVENT_COMM_EST,
+ PVRDMA_EVENT_SQ_DRAINED,
+ PVRDMA_EVENT_PATH_MIG,
+ PVRDMA_EVENT_PATH_MIG_ERR,
+ PVRDMA_EVENT_DEVICE_FATAL,
+ PVRDMA_EVENT_PORT_ACTIVE,
+ PVRDMA_EVENT_PORT_ERR,
+ PVRDMA_EVENT_LID_CHANGE,
+ PVRDMA_EVENT_PKEY_CHANGE,
+ PVRDMA_EVENT_SM_CHANGE,
+ PVRDMA_EVENT_SRQ_ERR,
+ PVRDMA_EVENT_SRQ_LIMIT_REACHED,
+ PVRDMA_EVENT_QP_LAST_WQE_REACHED,
+ PVRDMA_EVENT_CLIENT_REREGISTER,
+ PVRDMA_EVENT_GID_CHANGE,
+};
+
+/* Event queue element. */
+struct pvrdma_eqe {
+ u32 type; /* Event type. */
+ u32 info; /* Handle, other. */
+};
+
+/* CQ notification queue element. */
+struct pvrdma_cqne {
+ u32 info; /* Handle */
+};
+
+enum {
+ PVRDMA_CMD_FIRST,
+ PVRDMA_CMD_QUERY_PORT = PVRDMA_CMD_FIRST,
+ PVRDMA_CMD_QUERY_PKEY,
+ PVRDMA_CMD_CREATE_PD,
+ PVRDMA_CMD_DESTROY_PD,
+ PVRDMA_CMD_CREATE_MR,
+ PVRDMA_CMD_DESTROY_MR,
+ PVRDMA_CMD_CREATE_CQ,
+ PVRDMA_CMD_RESIZE_CQ,
+ PVRDMA_CMD_DESTROY_CQ,
+ PVRDMA_CMD_CREATE_QP,
+ PVRDMA_CMD_MODIFY_QP,
+ PVRDMA_CMD_QUERY_QP,
+ PVRDMA_CMD_DESTROY_QP,
+ PVRDMA_CMD_CREATE_UC,
+ PVRDMA_CMD_DESTROY_UC,
+ PVRDMA_CMD_CREATE_BIND,
+ PVRDMA_CMD_DESTROY_BIND,
+ PVRDMA_CMD_MAX,
+};
+
+enum {
+ PVRDMA_CMD_FIRST_RESP = (1 << 31),
+ PVRDMA_CMD_QUERY_PORT_RESP = PVRDMA_CMD_FIRST_RESP,
+ PVRDMA_CMD_QUERY_PKEY_RESP,
+ PVRDMA_CMD_CREATE_PD_RESP,
+ PVRDMA_CMD_DESTROY_PD_RESP_NOOP,
+ PVRDMA_CMD_CREATE_MR_RESP,
+ PVRDMA_CMD_DESTROY_MR_RESP_NOOP,
+ PVRDMA_CMD_CREATE_CQ_RESP,
+ PVRDMA_CMD_RESIZE_CQ_RESP,
+ PVRDMA_CMD_DESTROY_CQ_RESP_NOOP,
+ PVRDMA_CMD_CREATE_QP_RESP,
+ PVRDMA_CMD_MODIFY_QP_RESP,
+ PVRDMA_CMD_QUERY_QP_RESP,
+ PVRDMA_CMD_DESTROY_QP_RESP,
+ PVRDMA_CMD_CREATE_UC_RESP,
+ PVRDMA_CMD_DESTROY_UC_RESP_NOOP,
+ PVRDMA_CMD_CREATE_BIND_RESP_NOOP,
+ PVRDMA_CMD_DESTROY_BIND_RESP_NOOP,
+ PVRDMA_CMD_MAX_RESP,
+};
+
+struct pvrdma_cmd_hdr {
+ u64 response; /* Key for response lookup. */
+ u32 cmd; /* PVRDMA_CMD_ */
+ u32 reserved; /* Reserved. */
+};
+
+struct pvrdma_cmd_resp_hdr {
+ u64 response; /* From cmd hdr. */
+ u32 ack; /* PVRDMA_CMD_XXX_RESP */
+ u8 err; /* Error. */
+ u8 reserved[3]; /* Reserved. */
+};
+
+struct pvrdma_cmd_query_port {
+ struct pvrdma_cmd_hdr hdr;
+ u8 port_num;
+ u8 reserved[7];
+};
+
+struct pvrdma_cmd_query_port_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ struct pvrdma_port_attr attrs;
+};
+
+struct pvrdma_cmd_query_pkey {
+ struct pvrdma_cmd_hdr hdr;
+ u8 port_num;
+ u8 index;
+ u8 reserved[6];
+};
+
+struct pvrdma_cmd_query_pkey_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u16 pkey;
+ u8 reserved[6];
+};
+
+struct pvrdma_cmd_create_uc {
+ struct pvrdma_cmd_hdr hdr;
+ u32 pfn; /* UAR page frame number */
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_uc_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 ctx_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_destroy_uc {
+ struct pvrdma_cmd_hdr hdr;
+ u32 ctx_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_pd {
+ struct pvrdma_cmd_hdr hdr;
+ u32 ctx_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_pd_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 pd_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_destroy_pd {
+ struct pvrdma_cmd_hdr hdr;
+ u32 pd_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_mr {
+ struct pvrdma_cmd_hdr hdr;
+ u64 start;
+ u64 length;
+ u64 pdir_dma;
+ u32 pd_handle;
+ u32 access_flags;
+ u32 flags;
+ u32 nchunks;
+};
+
+struct pvrdma_cmd_create_mr_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 mr_handle;
+ u32 lkey;
+ u32 rkey;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_destroy_mr {
+ struct pvrdma_cmd_hdr hdr;
+ u32 mr_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_cq {
+ struct pvrdma_cmd_hdr hdr;
+ u64 pdir_dma;
+ u32 ctx_handle;
+ u32 cqe;
+ u32 nchunks;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_cq_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 cq_handle;
+ u32 cqe;
+};
+
+struct pvrdma_cmd_resize_cq {
+ struct pvrdma_cmd_hdr hdr;
+ u32 cq_handle;
+ u32 cqe;
+};
+
+struct pvrdma_cmd_resize_cq_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 cqe;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_destroy_cq {
+ struct pvrdma_cmd_hdr hdr;
+ u32 cq_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_qp {
+ struct pvrdma_cmd_hdr hdr;
+ u64 pdir_dma;
+ u32 pd_handle;
+ u32 send_cq_handle;
+ u32 recv_cq_handle;
+ u32 srq_handle;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_inline_data;
+ u32 lkey;
+ u32 access_flags;
+ u16 total_chunks;
+ u16 send_chunks;
+ u16 max_atomic_arg;
+ u8 sq_sig_all;
+ u8 qp_type;
+ u8 is_srq;
+ u8 reserved[3];
+};
+
+struct pvrdma_cmd_create_qp_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 qpn;
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_inline_data;
+};
+
+struct pvrdma_cmd_modify_qp {
+ struct pvrdma_cmd_hdr hdr;
+ u32 qp_handle;
+ u32 attr_mask;
+ struct pvrdma_qp_attr attrs;
+};
+
+struct pvrdma_cmd_query_qp {
+ struct pvrdma_cmd_hdr hdr;
+ u32 qp_handle;
+ u32 attr_mask;
+};
+
+struct pvrdma_cmd_query_qp_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ struct pvrdma_qp_attr attrs;
+};
+
+struct pvrdma_cmd_destroy_qp {
+ struct pvrdma_cmd_hdr hdr;
+ u32 qp_handle;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_destroy_qp_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ u32 events_reported;
+ u8 reserved[4];
+};
+
+struct pvrdma_cmd_create_bind {
+ struct pvrdma_cmd_hdr hdr;
+ u32 mtu;
+ u32 vlan;
+ u32 index;
+ u8 new_gid[16];
+ u8 gid_type;
+ u8 reserved[3];
+};
+
+struct pvrdma_cmd_destroy_bind {
+ struct pvrdma_cmd_hdr hdr;
+ u32 index;
+ u8 dest_gid[16];
+ u8 reserved[4];
+};
+
+union pvrdma_cmd_req {
+ struct pvrdma_cmd_hdr hdr;
+ struct pvrdma_cmd_query_port query_port;
+ struct pvrdma_cmd_query_pkey query_pkey;
+ struct pvrdma_cmd_create_uc create_uc;
+ struct pvrdma_cmd_destroy_uc destroy_uc;
+ struct pvrdma_cmd_create_pd create_pd;
+ struct pvrdma_cmd_destroy_pd destroy_pd;
+ struct pvrdma_cmd_create_mr create_mr;
+ struct pvrdma_cmd_destroy_mr destroy_mr;
+ struct pvrdma_cmd_create_cq create_cq;
+ struct pvrdma_cmd_resize_cq resize_cq;
+ struct pvrdma_cmd_destroy_cq destroy_cq;
+ struct pvrdma_cmd_create_qp create_qp;
+ struct pvrdma_cmd_modify_qp modify_qp;
+ struct pvrdma_cmd_query_qp query_qp;
+ struct pvrdma_cmd_destroy_qp destroy_qp;
+ struct pvrdma_cmd_create_bind create_bind;
+ struct pvrdma_cmd_destroy_bind destroy_bind;
+};
+
+union pvrdma_cmd_resp {
+ struct pvrdma_cmd_resp_hdr hdr;
+ struct pvrdma_cmd_query_port_resp query_port_resp;
+ struct pvrdma_cmd_query_pkey_resp query_pkey_resp;
+ struct pvrdma_cmd_create_uc_resp create_uc_resp;
+ struct pvrdma_cmd_create_pd_resp create_pd_resp;
+ struct pvrdma_cmd_create_mr_resp create_mr_resp;
+ struct pvrdma_cmd_create_cq_resp create_cq_resp;
+ struct pvrdma_cmd_resize_cq_resp resize_cq_resp;
+ struct pvrdma_cmd_create_qp_resp create_qp_resp;
+ struct pvrdma_cmd_query_qp_resp query_qp_resp;
+ struct pvrdma_cmd_destroy_qp_resp destroy_qp_resp;
+};
+
+#endif /* __PVRDMA_DEV_API_H__ */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c
new file mode 100644
index 0000000..bf51357
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_doorbell.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+
+#include "pvrdma.h"
+
+int pvrdma_uar_table_init(struct pvrdma_dev *dev)
+{
+ u32 num = dev->dsr->caps.max_uar;
+ u32 mask = num - 1;
+ struct pvrdma_id_table *tbl = &dev->uar_table.tbl;
+
+ if (!is_power_of_2(num))
+ return -EINVAL;
+
+ tbl->last = 0;
+ tbl->top = 0;
+ tbl->max = num;
+ tbl->mask = mask;
+ spin_lock_init(&tbl->lock);
+ tbl->table = kcalloc(BITS_TO_LONGS(num), sizeof(long), GFP_KERNEL);
+ if (!tbl->table)
+ return -ENOMEM;
+
+ /* 0th UAR is taken by the device. */
+ set_bit(0, tbl->table);
+
+ return 0;
+}
+
+void pvrdma_uar_table_cleanup(struct pvrdma_dev *dev)
+{
+ struct pvrdma_id_table *tbl = &dev->uar_table.tbl;
+
+ kfree(tbl->table);
+}
+
+int pvrdma_uar_alloc(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar)
+{
+ struct pvrdma_id_table *tbl;
+ unsigned long flags;
+ u32 obj;
+
+ tbl = &dev->uar_table.tbl;
+
+ spin_lock_irqsave(&tbl->lock, flags);
+ obj = find_next_zero_bit(tbl->table, tbl->max, tbl->last);
+ if (obj >= tbl->max) {
+ tbl->top = (tbl->top + tbl->max) & tbl->mask;
+ obj = find_first_zero_bit(tbl->table, tbl->max);
+ }
+
+ if (obj >= tbl->max) {
+ spin_unlock_irqrestore(&tbl->lock, flags);
+ return -ENOMEM;
+ }
+
+ set_bit(obj, tbl->table);
+ obj |= tbl->top;
+
+ spin_unlock_irqrestore(&tbl->lock, flags);
+
+ uar->index = obj;
+ uar->pfn = (pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >>
+ PAGE_SHIFT) + uar->index;
+
+ return 0;
+}
+
+void pvrdma_uar_free(struct pvrdma_dev *dev, struct pvrdma_uar_map *uar)
+{
+ struct pvrdma_id_table *tbl = &dev->uar_table.tbl;
+ unsigned long flags;
+ u32 obj;
+
+ obj = uar->index & (tbl->max - 1);
+ spin_lock_irqsave(&tbl->lock, flags);
+ clear_bit(obj, tbl->table);
+ tbl->last = min(tbl->last, obj);
+ tbl->top = (tbl->top + tbl->max) & tbl->mask;
+ spin_unlock_irqrestore(&tbl->lock, flags);
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
new file mode 100644
index 0000000..231a1ce
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -0,0 +1,1211 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/errno.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include <net/addrconf.h>
+
+#include "pvrdma.h"
+
+#define DRV_NAME "vmw_pvrdma"
+#define DRV_VERSION "1.0.0.0-k"
+
+static DEFINE_MUTEX(pvrdma_device_list_lock);
+static LIST_HEAD(pvrdma_device_list);
+static struct workqueue_struct *event_wq;
+
+static int pvrdma_add_gid(struct ib_device *ibdev,
+ u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void **context);
+static int pvrdma_del_gid(struct ib_device *ibdev,
+ u8 port_num,
+ unsigned int index,
+ void **context);
+
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "VMW_PVRDMA-%s\n", DRV_VERSION);
+}
+
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", PVRDMA_REV_ID);
+}
+
+static ssize_t show_board(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", PVRDMA_BOARD_ID);
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL);
+
+static struct device_attribute *pvrdma_class_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_hca_type,
+ &dev_attr_board_id
+};
+
+static void pvrdma_get_fw_ver_str(struct ib_device *device, char *str,
+ size_t str_len)
+{
+ struct pvrdma_dev *dev =
+ container_of(device, struct pvrdma_dev, ib_dev);
+ snprintf(str, str_len, "%d.%d.%d\n",
+ (int) (dev->dsr->caps.fw_ver >> 32),
+ (int) (dev->dsr->caps.fw_ver >> 16) & 0xffff,
+ (int) dev->dsr->caps.fw_ver & 0xffff);
+}
+
+static int pvrdma_init_device(struct pvrdma_dev *dev)
+{
+ /* Initialize some device related stuff */
+ spin_lock_init(&dev->cmd_lock);
+ sema_init(&dev->cmd_sema, 1);
+ atomic_set(&dev->num_qps, 0);
+ atomic_set(&dev->num_cqs, 0);
+ atomic_set(&dev->num_pds, 0);
+ atomic_set(&dev->num_ahs, 0);
+
+ return 0;
+}
+
+static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
+ struct ib_port_immutable *immutable)
+{
+ struct ib_port_attr attr;
+ int err;
+
+ err = pvrdma_query_port(ibdev, port_num, &attr);
+ if (err)
+ return err;
+
+ immutable->pkey_tbl_len = attr.pkey_tbl_len;
+ immutable->gid_tbl_len = attr.gid_tbl_len;
+ immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+ immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+ return 0;
+}
+
+static struct net_device *pvrdma_get_netdev(struct ib_device *ibdev,
+ u8 port_num)
+{
+ struct net_device *netdev;
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+
+ if (port_num != 1)
+ return NULL;
+
+ rcu_read_lock();
+ netdev = dev->netdev;
+ if (netdev)
+ dev_hold(netdev);
+ rcu_read_unlock();
+
+ return netdev;
+}
+
+static int pvrdma_register_device(struct pvrdma_dev *dev)
+{
+ int ret = -1;
+ int i = 0;
+
+ strlcpy(dev->ib_dev.name, "vmw_pvrdma%d", IB_DEVICE_NAME_MAX);
+ dev->ib_dev.node_guid = dev->dsr->caps.node_guid;
+ dev->sys_image_guid = dev->dsr->caps.sys_image_guid;
+ dev->flags = 0;
+ dev->ib_dev.owner = THIS_MODULE;
+ dev->ib_dev.num_comp_vectors = 1;
+ dev->ib_dev.dma_device = &dev->pdev->dev;
+ dev->ib_dev.uverbs_abi_ver = PVRDMA_UVERBS_ABI_VERSION;
+ dev->ib_dev.uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH);
+
+ dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+ dev->ib_dev.phys_port_cnt = dev->dsr->caps.phys_port_cnt;
+
+ dev->ib_dev.query_device = pvrdma_query_device;
+ dev->ib_dev.query_port = pvrdma_query_port;
+ dev->ib_dev.query_gid = pvrdma_query_gid;
+ dev->ib_dev.query_pkey = pvrdma_query_pkey;
+ dev->ib_dev.modify_port = pvrdma_modify_port;
+ dev->ib_dev.alloc_ucontext = pvrdma_alloc_ucontext;
+ dev->ib_dev.dealloc_ucontext = pvrdma_dealloc_ucontext;
+ dev->ib_dev.mmap = pvrdma_mmap;
+ dev->ib_dev.alloc_pd = pvrdma_alloc_pd;
+ dev->ib_dev.dealloc_pd = pvrdma_dealloc_pd;
+ dev->ib_dev.create_ah = pvrdma_create_ah;
+ dev->ib_dev.destroy_ah = pvrdma_destroy_ah;
+ dev->ib_dev.create_qp = pvrdma_create_qp;
+ dev->ib_dev.modify_qp = pvrdma_modify_qp;
+ dev->ib_dev.query_qp = pvrdma_query_qp;
+ dev->ib_dev.destroy_qp = pvrdma_destroy_qp;
+ dev->ib_dev.post_send = pvrdma_post_send;
+ dev->ib_dev.post_recv = pvrdma_post_recv;
+ dev->ib_dev.create_cq = pvrdma_create_cq;
+ dev->ib_dev.modify_cq = pvrdma_modify_cq;
+ dev->ib_dev.resize_cq = pvrdma_resize_cq;
+ dev->ib_dev.destroy_cq = pvrdma_destroy_cq;
+ dev->ib_dev.poll_cq = pvrdma_poll_cq;
+ dev->ib_dev.req_notify_cq = pvrdma_req_notify_cq;
+ dev->ib_dev.get_dma_mr = pvrdma_get_dma_mr;
+ dev->ib_dev.reg_user_mr = pvrdma_reg_user_mr;
+ dev->ib_dev.dereg_mr = pvrdma_dereg_mr;
+ dev->ib_dev.alloc_mr = pvrdma_alloc_mr;
+ dev->ib_dev.map_mr_sg = pvrdma_map_mr_sg;
+ dev->ib_dev.add_gid = pvrdma_add_gid;
+ dev->ib_dev.del_gid = pvrdma_del_gid;
+ dev->ib_dev.get_netdev = pvrdma_get_netdev;
+ dev->ib_dev.get_port_immutable = pvrdma_port_immutable;
+ dev->ib_dev.get_link_layer = pvrdma_port_link_layer;
+ dev->ib_dev.get_dev_fw_str = pvrdma_get_fw_ver_str;
+
+ mutex_init(&dev->port_mutex);
+ spin_lock_init(&dev->desc_lock);
+
+ dev->cq_tbl = kcalloc(dev->dsr->caps.max_cq, sizeof(void *),
+ GFP_KERNEL);
+ if (!dev->cq_tbl)
+ return ret;
+ spin_lock_init(&dev->cq_tbl_lock);
+
+ dev->qp_tbl = kcalloc(dev->dsr->caps.max_qp, sizeof(void *),
+ GFP_KERNEL);
+ if (!dev->qp_tbl)
+ goto err_cq_free;
+ spin_lock_init(&dev->qp_tbl_lock);
+
+ ret = ib_register_device(&dev->ib_dev, NULL);
+ if (ret)
+ goto err_qp_free;
+
+ for (i = 0; i < ARRAY_SIZE(pvrdma_class_attributes); ++i) {
+ ret = device_create_file(&dev->ib_dev.dev,
+ pvrdma_class_attributes[i]);
+ if (ret)
+ goto err_class;
+ }
+
+ dev->ib_active = true;
+
+ return 0;
+
+err_class:
+ ib_unregister_device(&dev->ib_dev);
+err_qp_free:
+ kfree(dev->qp_tbl);
+err_cq_free:
+ kfree(dev->cq_tbl);
+
+ return ret;
+}
+
+static irqreturn_t pvrdma_intr0_handler(int irq, void *dev_id)
+{
+ u32 icr = PVRDMA_INTR_CAUSE_RESPONSE;
+ struct pvrdma_dev *dev = dev_id;
+
+ dev_dbg(&dev->pdev->dev, "interrupt 0 (response) handler\n");
+
+ if (dev->intr.type != PVRDMA_INTR_TYPE_MSIX) {
+ /* Legacy intr */
+ icr = pvrdma_read_reg(dev, PVRDMA_REG_ICR);
+ if (icr == 0)
+ return IRQ_NONE;
+ }
+
+ if (icr == PVRDMA_INTR_CAUSE_RESPONSE)
+ complete(&dev->cmd_done);
+
+ return IRQ_HANDLED;
+}
+
+static void pvrdma_qp_event(struct pvrdma_dev *dev, u32 qpn, int type)
+{
+ struct pvrdma_qp *qp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->qp_tbl_lock, flags);
+ qp = dev->qp_tbl[qpn % dev->dsr->caps.max_qp];
+ if (qp)
+ atomic_inc(&qp->refcnt);
+ spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
+
+ if (qp && qp->ibqp.event_handler) {
+ struct ib_qp *ibqp = &qp->ibqp;
+ struct ib_event e;
+
+ e.device = ibqp->device;
+ e.element.qp = ibqp;
+ e.event = type; /* 1:1 mapping for now. */
+ ibqp->event_handler(&e, ibqp->qp_context);
+ }
+ if (qp) {
+ atomic_dec(&qp->refcnt);
+ if (atomic_read(&qp->refcnt) == 0)
+ wake_up(&qp->wait);
+ }
+}
+
+static void pvrdma_cq_event(struct pvrdma_dev *dev, u32 cqn, int type)
+{
+ struct pvrdma_cq *cq;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->cq_tbl_lock, flags);
+ cq = dev->cq_tbl[cqn % dev->dsr->caps.max_cq];
+ if (cq)
+ atomic_inc(&cq->refcnt);
+ spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
+
+ if (cq && cq->ibcq.event_handler) {
+ struct ib_cq *ibcq = &cq->ibcq;
+ struct ib_event e;
+
+ e.device = ibcq->device;
+ e.element.cq = ibcq;
+ e.event = type; /* 1:1 mapping for now. */
+ ibcq->event_handler(&e, ibcq->cq_context);
+ }
+ if (cq) {
+ atomic_dec(&cq->refcnt);
+ if (atomic_read(&cq->refcnt) == 0)
+ wake_up(&cq->wait);
+ }
+}
+
+static void pvrdma_dispatch_event(struct pvrdma_dev *dev, int port,
+ enum ib_event_type event)
+{
+ struct ib_event ib_event;
+
+ memset(&ib_event, 0, sizeof(ib_event));
+ ib_event.device = &dev->ib_dev;
+ ib_event.element.port_num = port;
+ ib_event.event = event;
+ ib_dispatch_event(&ib_event);
+}
+
+static void pvrdma_dev_event(struct pvrdma_dev *dev, u8 port, int type)
+{
+ if (port < 1 || port > dev->dsr->caps.phys_port_cnt) {
+ dev_warn(&dev->pdev->dev, "event on port %d\n", port);
+ return;
+ }
+
+ pvrdma_dispatch_event(dev, port, type);
+}
+
+static inline struct pvrdma_eqe *get_eqe(struct pvrdma_dev *dev, unsigned int i)
+{
+ return (struct pvrdma_eqe *)pvrdma_page_dir_get_ptr(
+ &dev->async_pdir,
+ PAGE_SIZE +
+ sizeof(struct pvrdma_eqe) * i);
+}
+
+static irqreturn_t pvrdma_intr1_handler(int irq, void *dev_id)
+{
+ struct pvrdma_dev *dev = dev_id;
+ struct pvrdma_ring *ring = &dev->async_ring_state->rx;
+ int ring_slots = (dev->dsr->async_ring_pages.num_pages - 1) *
+ PAGE_SIZE / sizeof(struct pvrdma_eqe);
+ unsigned int head;
+
+ dev_dbg(&dev->pdev->dev, "interrupt 1 (async event) handler\n");
+
+ /*
+ * Don't process events until the IB device is registered. Otherwise
+ * we'll try to ib_dispatch_event() on an invalid device.
+ */
+ if (!dev->ib_active)
+ return IRQ_HANDLED;
+
+ while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) {
+ struct pvrdma_eqe *eqe;
+
+ eqe = get_eqe(dev, head);
+
+ switch (eqe->type) {
+ case PVRDMA_EVENT_QP_FATAL:
+ case PVRDMA_EVENT_QP_REQ_ERR:
+ case PVRDMA_EVENT_QP_ACCESS_ERR:
+ case PVRDMA_EVENT_COMM_EST:
+ case PVRDMA_EVENT_SQ_DRAINED:
+ case PVRDMA_EVENT_PATH_MIG:
+ case PVRDMA_EVENT_PATH_MIG_ERR:
+ case PVRDMA_EVENT_QP_LAST_WQE_REACHED:
+ pvrdma_qp_event(dev, eqe->info, eqe->type);
+ break;
+
+ case PVRDMA_EVENT_CQ_ERR:
+ pvrdma_cq_event(dev, eqe->info, eqe->type);
+ break;
+
+ case PVRDMA_EVENT_SRQ_ERR:
+ case PVRDMA_EVENT_SRQ_LIMIT_REACHED:
+ break;
+
+ case PVRDMA_EVENT_PORT_ACTIVE:
+ case PVRDMA_EVENT_PORT_ERR:
+ case PVRDMA_EVENT_LID_CHANGE:
+ case PVRDMA_EVENT_PKEY_CHANGE:
+ case PVRDMA_EVENT_SM_CHANGE:
+ case PVRDMA_EVENT_CLIENT_REREGISTER:
+ case PVRDMA_EVENT_GID_CHANGE:
+ pvrdma_dev_event(dev, eqe->info, eqe->type);
+ break;
+
+ case PVRDMA_EVENT_DEVICE_FATAL:
+ pvrdma_dev_event(dev, 1, eqe->type);
+ break;
+
+ default:
+ break;
+ }
+
+ pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static inline struct pvrdma_cqne *get_cqne(struct pvrdma_dev *dev,
+ unsigned int i)
+{
+ return (struct pvrdma_cqne *)pvrdma_page_dir_get_ptr(
+ &dev->cq_pdir,
+ PAGE_SIZE +
+ sizeof(struct pvrdma_cqne) * i);
+}
+
+static irqreturn_t pvrdma_intrx_handler(int irq, void *dev_id)
+{
+ struct pvrdma_dev *dev = dev_id;
+ struct pvrdma_ring *ring = &dev->cq_ring_state->rx;
+ int ring_slots = (dev->dsr->cq_ring_pages.num_pages - 1) * PAGE_SIZE /
+ sizeof(struct pvrdma_cqne);
+ unsigned int head;
+ unsigned long flags;
+
+ dev_dbg(&dev->pdev->dev, "interrupt x (completion) handler\n");
+
+ while (pvrdma_idx_ring_has_data(ring, ring_slots, &head) > 0) {
+ struct pvrdma_cqne *cqne;
+ struct pvrdma_cq *cq;
+
+ cqne = get_cqne(dev, head);
+ spin_lock_irqsave(&dev->cq_tbl_lock, flags);
+ cq = dev->cq_tbl[cqne->info % dev->dsr->caps.max_cq];
+ if (cq)
+ atomic_inc(&cq->refcnt);
+ spin_unlock_irqrestore(&dev->cq_tbl_lock, flags);
+
+ if (cq && cq->ibcq.comp_handler)
+ cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+ if (cq) {
+ atomic_dec(&cq->refcnt);
+ if (atomic_read(&cq->refcnt))
+ wake_up(&cq->wait);
+ }
+ pvrdma_idx_ring_inc(&ring->cons_head, ring_slots);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void pvrdma_disable_msi_all(struct pvrdma_dev *dev)
+{
+ if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX)
+ pci_disable_msix(dev->pdev);
+ else if (dev->intr.type == PVRDMA_INTR_TYPE_MSI)
+ pci_disable_msi(dev->pdev);
+}
+
+static void pvrdma_free_irq(struct pvrdma_dev *dev)
+{
+ int i;
+
+ dev_dbg(&dev->pdev->dev, "freeing interrupts\n");
+
+ if (dev->intr.type == PVRDMA_INTR_TYPE_MSIX) {
+ for (i = 0; i < dev->intr.size; i++) {
+ if (dev->intr.enabled[i]) {
+ free_irq(dev->intr.msix_entry[i].vector, dev);
+ dev->intr.enabled[i] = 0;
+ }
+ }
+ } else if (dev->intr.type == PVRDMA_INTR_TYPE_INTX ||
+ dev->intr.type == PVRDMA_INTR_TYPE_MSI) {
+ free_irq(dev->pdev->irq, dev);
+ }
+}
+
+static void pvrdma_enable_intrs(struct pvrdma_dev *dev)
+{
+ dev_dbg(&dev->pdev->dev, "enable interrupts\n");
+ pvrdma_write_reg(dev, PVRDMA_REG_IMR, 0);
+}
+
+static void pvrdma_disable_intrs(struct pvrdma_dev *dev)
+{
+ dev_dbg(&dev->pdev->dev, "disable interrupts\n");
+ pvrdma_write_reg(dev, PVRDMA_REG_IMR, ~0);
+}
+
+static int pvrdma_enable_msix(struct pci_dev *pdev, struct pvrdma_dev *dev)
+{
+ int i;
+ int ret;
+
+ for (i = 0; i < PVRDMA_MAX_INTERRUPTS; i++) {
+ dev->intr.msix_entry[i].entry = i;
+ dev->intr.msix_entry[i].vector = i;
+
+ switch (i) {
+ case 0:
+ /* CMD ring handler */
+ dev->intr.handler[i] = pvrdma_intr0_handler;
+ break;
+ case 1:
+ /* Async event ring handler */
+ dev->intr.handler[i] = pvrdma_intr1_handler;
+ break;
+ default:
+ /* Completion queue handler */
+ dev->intr.handler[i] = pvrdma_intrx_handler;
+ break;
+ }
+ }
+
+ ret = pci_enable_msix(pdev, dev->intr.msix_entry,
+ PVRDMA_MAX_INTERRUPTS);
+ if (!ret) {
+ dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
+ dev->intr.size = PVRDMA_MAX_INTERRUPTS;
+ } else if (ret > 0) {
+ ret = pci_enable_msix(pdev, dev->intr.msix_entry, ret);
+ if (!ret) {
+ dev->intr.type = PVRDMA_INTR_TYPE_MSIX;
+ dev->intr.size = ret;
+ } else {
+ dev->intr.size = 0;
+ }
+ }
+
+ dev_dbg(&pdev->dev, "using interrupt type %d, size %d\n",
+ dev->intr.type, dev->intr.size);
+
+ return ret;
+}
+
+static int pvrdma_alloc_intrs(struct pvrdma_dev *dev)
+{
+ int ret = 0;
+ int i;
+
+ if (pci_find_capability(dev->pdev, PCI_CAP_ID_MSIX) &&
+ pvrdma_enable_msix(dev->pdev, dev)) {
+ /* Try MSI */
+ ret = pci_enable_msi(dev->pdev);
+ if (!ret) {
+ dev->intr.type = PVRDMA_INTR_TYPE_MSI;
+ } else {
+ /* Legacy INTR */
+ dev->intr.type = PVRDMA_INTR_TYPE_INTX;
+ }
+ }
+
+ /* Request First IRQ */
+ switch (dev->intr.type) {
+ case PVRDMA_INTR_TYPE_INTX:
+ case PVRDMA_INTR_TYPE_MSI:
+ ret = request_irq(dev->pdev->irq, pvrdma_intr0_handler,
+ IRQF_SHARED, DRV_NAME, dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "failed to request interrupt\n");
+ goto disable_msi;
+ }
+ break;
+ case PVRDMA_INTR_TYPE_MSIX:
+ ret = request_irq(dev->intr.msix_entry[0].vector,
+ pvrdma_intr0_handler, 0, DRV_NAME, dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "failed to request interrupt 0\n");
+ goto disable_msi;
+ }
+ dev->intr.enabled[0] = 1;
+ break;
+ default:
+ /* Not reached */
+ break;
+ }
+
+ /* For MSIX: request intr for each vector */
+ if (dev->intr.size > 1) {
+ ret = request_irq(dev->intr.msix_entry[1].vector,
+ pvrdma_intr1_handler, 0, DRV_NAME, dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "failed to request interrupt 1\n");
+ goto free_irq;
+ }
+ dev->intr.enabled[1] = 1;
+
+ for (i = 2; i < dev->intr.size; i++) {
+ ret = request_irq(dev->intr.msix_entry[i].vector,
+ pvrdma_intrx_handler, 0,
+ DRV_NAME, dev);
+ if (ret) {
+ dev_err(&dev->pdev->dev,
+ "failed to request interrupt %d\n", i);
+ goto free_irq;
+ }
+ dev->intr.enabled[i] = 1;
+ }
+ }
+
+ return 0;
+
+free_irq:
+ pvrdma_free_irq(dev);
+disable_msi:
+ pvrdma_disable_msi_all(dev);
+ return ret;
+}
+
+static void pvrdma_free_slots(struct pvrdma_dev *dev)
+{
+ struct pci_dev *pdev = dev->pdev;
+
+ if (dev->resp_slot)
+ dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->resp_slot,
+ dev->dsr->resp_slot_dma);
+ if (dev->cmd_slot)
+ dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->cmd_slot,
+ dev->dsr->cmd_slot_dma);
+}
+
+static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev,
+ const union ib_gid *gid,
+ int index)
+{
+ int ret;
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_create_bind *cmd_bind = &req.create_bind;
+
+ if (!dev->sgid_tbl) {
+ dev_warn(&dev->pdev->dev, "sgid table not initialized\n");
+ return -EINVAL;
+ }
+
+ memset(cmd_bind, 0, sizeof(*cmd_bind));
+ cmd_bind->hdr.cmd = PVRDMA_CMD_CREATE_BIND;
+ memcpy(cmd_bind->new_gid, gid->raw, 16);
+ cmd_bind->mtu = ib_mtu_enum_to_int(IB_MTU_1024);
+ cmd_bind->vlan = 0xfff;
+ cmd_bind->index = index;
+ cmd_bind->gid_type = PVRDMA_GID_TYPE_FLAG_ROCE_V1;
+
+ ret = pvrdma_cmd_post(dev, &req, NULL, 0);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not create binding, error: %d\n", ret);
+ return -EFAULT;
+ }
+ memcpy(&dev->sgid_tbl[index], gid, sizeof(*gid));
+ return 0;
+}
+
+static int pvrdma_add_gid(struct ib_device *ibdev,
+ u8 port_num,
+ unsigned int index,
+ const union ib_gid *gid,
+ const struct ib_gid_attr *attr,
+ void **context)
+{
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+
+ return pvrdma_add_gid_at_index(dev, gid, index);
+}
+
+static int pvrdma_del_gid_at_index(struct pvrdma_dev *dev, int index)
+{
+ int ret;
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_destroy_bind *cmd_dest = &req.destroy_bind;
+
+ /* Update sgid table. */
+ if (!dev->sgid_tbl) {
+ dev_warn(&dev->pdev->dev, "sgid table not initialized\n");
+ return -EINVAL;
+ }
+
+ memset(cmd_dest, 0, sizeof(*cmd_dest));
+ cmd_dest->hdr.cmd = PVRDMA_CMD_DESTROY_BIND;
+ memcpy(cmd_dest->dest_gid, &dev->sgid_tbl[index], 16);
+ cmd_dest->index = index;
+
+ ret = pvrdma_cmd_post(dev, &req, NULL, 0);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not destroy binding, error: %d\n", ret);
+ return ret;
+ }
+ memset(&dev->sgid_tbl[index], 0, 16);
+ return 0;
+}
+
+static int pvrdma_del_gid(struct ib_device *ibdev,
+ u8 port_num,
+ unsigned int index,
+ void **context)
+{
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+
+ dev_dbg(&dev->pdev->dev, "removing gid at index %u from %s",
+ index, dev->netdev->name);
+
+ return pvrdma_del_gid_at_index(dev, index);
+}
+
+static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
+ unsigned long event)
+{
+ switch (event) {
+ case NETDEV_REBOOT:
+ case NETDEV_DOWN:
+ pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
+ break;
+ case NETDEV_UP:
+ pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+ break;
+ default:
+ dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
+ event, dev->ib_dev.name);
+ break;
+ }
+}
+
+static void pvrdma_netdevice_event_work(struct work_struct *work)
+{
+ struct pvrdma_netdevice_work *netdev_work;
+ struct pvrdma_dev *dev;
+
+ netdev_work = container_of(work, struct pvrdma_netdevice_work, work);
+
+ mutex_lock(&pvrdma_device_list_lock);
+ list_for_each_entry(dev, &pvrdma_device_list, device_link) {
+ if (dev->netdev == netdev_work->event_netdev) {
+ pvrdma_netdevice_event_handle(dev, netdev_work->event);
+ break;
+ }
+ }
+ mutex_unlock(&pvrdma_device_list_lock);
+
+ kfree(netdev_work);
+}
+
+static int pvrdma_netdevice_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *event_netdev = netdev_notifier_info_to_dev(ptr);
+ struct pvrdma_netdevice_work *netdev_work;
+
+ netdev_work = kmalloc(sizeof(*netdev_work), GFP_ATOMIC);
+ if (!netdev_work)
+ return NOTIFY_BAD;
+
+ INIT_WORK(&netdev_work->work, pvrdma_netdevice_event_work);
+ netdev_work->event_netdev = event_netdev;
+ netdev_work->event = event;
+ queue_work(event_wq, &netdev_work->work);
+
+ return NOTIFY_DONE;
+}
+
+static int pvrdma_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct pci_dev *pdev_net;
+ struct pvrdma_dev *dev;
+ int ret;
+ unsigned long start;
+ unsigned long len;
+ unsigned int version;
+ dma_addr_t slot_dma = 0;
+
+ dev_dbg(&pdev->dev, "initializing driver %s\n", pci_name(pdev));
+
+ /* Allocate zero-out device */
+ dev = (struct pvrdma_dev *)ib_alloc_device(sizeof(*dev));
+ if (!dev) {
+ dev_err(&pdev->dev, "failed to allocate IB device\n");
+ return -ENOMEM;
+ }
+
+ mutex_lock(&pvrdma_device_list_lock);
+ list_add(&dev->device_link, &pvrdma_device_list);
+ mutex_unlock(&pvrdma_device_list_lock);
+
+ ret = pvrdma_init_device(dev);
+ if (ret)
+ goto err_free_device;
+
+ dev->pdev = pdev;
+ pci_set_drvdata(pdev, dev);
+
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "cannot enable PCI device\n");
+ goto err_free_device;
+ }
+
+ dev_dbg(&pdev->dev, "PCI resource flags BAR0 %#lx\n",
+ pci_resource_flags(pdev, 0));
+ dev_dbg(&pdev->dev, "PCI resource len %#llx\n",
+ (unsigned long long)pci_resource_len(pdev, 0));
+ dev_dbg(&pdev->dev, "PCI resource start %#llx\n",
+ (unsigned long long)pci_resource_start(pdev, 0));
+ dev_dbg(&pdev->dev, "PCI resource flags BAR1 %#lx\n",
+ pci_resource_flags(pdev, 1));
+ dev_dbg(&pdev->dev, "PCI resource len %#llx\n",
+ (unsigned long long)pci_resource_len(pdev, 1));
+ dev_dbg(&pdev->dev, "PCI resource start %#llx\n",
+ (unsigned long long)pci_resource_start(pdev, 1));
+
+ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
+ !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
+ dev_err(&pdev->dev, "PCI BAR region not MMIO\n");
+ ret = -ENOMEM;
+ goto err_free_device;
+ }
+
+ ret = pci_request_regions(pdev, DRV_NAME);
+ if (ret) {
+ dev_err(&pdev->dev, "cannot request PCI resources\n");
+ goto err_disable_pdev;
+ }
+
+ /* Enable 64-Bit DMA */
+ if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) {
+ ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+ if (ret != 0) {
+ dev_err(&pdev->dev,
+ "pci_set_consistent_dma_mask failed\n");
+ goto err_free_resource;
+ }
+ } else {
+ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (ret != 0) {
+ dev_err(&pdev->dev,
+ "pci_set_dma_mask failed\n");
+ goto err_free_resource;
+ }
+ }
+
+ pci_set_master(pdev);
+
+ /* Map register space */
+ start = pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_REG);
+ len = pci_resource_len(dev->pdev, PVRDMA_PCI_RESOURCE_REG);
+ dev->regs = ioremap(start, len);
+ if (!dev->regs) {
+ dev_err(&pdev->dev, "register mapping failed\n");
+ ret = -ENOMEM;
+ goto err_free_resource;
+ }
+
+ /* Setup per-device UAR. */
+ dev->driver_uar.index = 0;
+ dev->driver_uar.pfn =
+ pci_resource_start(dev->pdev, PVRDMA_PCI_RESOURCE_UAR) >>
+ PAGE_SHIFT;
+ dev->driver_uar.map =
+ ioremap(dev->driver_uar.pfn << PAGE_SHIFT, PAGE_SIZE);
+ if (!dev->driver_uar.map) {
+ dev_err(&pdev->dev, "failed to remap UAR pages\n");
+ ret = -ENOMEM;
+ goto err_unmap_regs;
+ }
+
+ version = pvrdma_read_reg(dev, PVRDMA_REG_VERSION);
+ dev_info(&pdev->dev, "device version %d, driver version %d\n",
+ version, PVRDMA_VERSION);
+ if (version < PVRDMA_VERSION) {
+ dev_err(&pdev->dev, "incompatible device version\n");
+ goto err_uar_unmap;
+ }
+
+ dev->dsr = dma_alloc_coherent(&pdev->dev, sizeof(*dev->dsr),
+ &dev->dsrbase, GFP_KERNEL);
+ if (!dev->dsr) {
+ dev_err(&pdev->dev, "failed to allocate shared region\n");
+ ret = -ENOMEM;
+ goto err_uar_unmap;
+ }
+
+ /* Setup the shared region */
+ memset(dev->dsr, 0, sizeof(*dev->dsr));
+ dev->dsr->driver_version = PVRDMA_VERSION;
+ dev->dsr->gos_info.gos_bits = sizeof(void *) == 4 ?
+ PVRDMA_GOS_BITS_32 :
+ PVRDMA_GOS_BITS_64;
+ dev->dsr->gos_info.gos_type = PVRDMA_GOS_TYPE_LINUX;
+ dev->dsr->gos_info.gos_ver = 1;
+ dev->dsr->uar_pfn = dev->driver_uar.pfn;
+
+ /* Command slot. */
+ dev->cmd_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
+ &slot_dma, GFP_KERNEL);
+ if (!dev->cmd_slot) {
+ ret = -ENOMEM;
+ goto err_free_dsr;
+ }
+
+ dev->dsr->cmd_slot_dma = (u64)slot_dma;
+
+ /* Response slot. */
+ dev->resp_slot = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
+ &slot_dma, GFP_KERNEL);
+ if (!dev->resp_slot) {
+ ret = -ENOMEM;
+ goto err_free_slots;
+ }
+
+ dev->dsr->resp_slot_dma = (u64)slot_dma;
+
+ /* Async event ring */
+ dev->dsr->async_ring_pages.num_pages = 4;
+ ret = pvrdma_page_dir_init(dev, &dev->async_pdir,
+ dev->dsr->async_ring_pages.num_pages, true);
+ if (ret)
+ goto err_free_slots;
+ dev->async_ring_state = dev->async_pdir.pages[0];
+ dev->dsr->async_ring_pages.pdir_dma = dev->async_pdir.dir_dma;
+
+ /* CQ notification ring */
+ dev->dsr->cq_ring_pages.num_pages = 4;
+ ret = pvrdma_page_dir_init(dev, &dev->cq_pdir,
+ dev->dsr->cq_ring_pages.num_pages, true);
+ if (ret)
+ goto err_free_async_ring;
+ dev->cq_ring_state = dev->cq_pdir.pages[0];
+ dev->dsr->cq_ring_pages.pdir_dma = dev->cq_pdir.dir_dma;
+
+ /*
+ * Write the PA of the shared region to the device. The writes must be
+ * ordered such that the high bits are written last. When the writes
+ * complete, the device will have filled out the capabilities.
+ */
+
+ pvrdma_write_reg(dev, PVRDMA_REG_DSRLOW, (u32)dev->dsrbase);
+ pvrdma_write_reg(dev, PVRDMA_REG_DSRHIGH,
+ (u32)((u64)(dev->dsrbase) >> 32));
+
+ /* Make sure the write is complete before reading status. */
+ mb();
+
+ /* Currently, the driver only supports RoCE mode. */
+ if (dev->dsr->caps.mode != PVRDMA_DEVICE_MODE_ROCE) {
+ dev_err(&pdev->dev, "unsupported transport %d\n",
+ dev->dsr->caps.mode);
+ ret = -EFAULT;
+ goto err_free_cq_ring;
+ }
+
+ /* Currently, the driver only supports RoCE V1. */
+ if (!(dev->dsr->caps.gid_types & PVRDMA_GID_TYPE_FLAG_ROCE_V1)) {
+ dev_err(&pdev->dev, "driver needs RoCE v1 support\n");
+ ret = -EFAULT;
+ goto err_free_cq_ring;
+ }
+
+ /* Paired vmxnet3 will have same bus, slot. But func will be 0 */
+ pdev_net = pci_get_slot(pdev->bus, PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
+ if (!pdev_net) {
+ dev_err(&pdev->dev, "failed to find paired net device\n");
+ ret = -ENODEV;
+ goto err_free_cq_ring;
+ }
+
+ if (pdev_net->vendor != PCI_VENDOR_ID_VMWARE ||
+ pdev_net->device != PCI_DEVICE_ID_VMWARE_VMXNET3) {
+ dev_err(&pdev->dev, "failed to find paired vmxnet3 device\n");
+ pci_dev_put(pdev_net);
+ ret = -ENODEV;
+ goto err_free_cq_ring;
+ }
+
+ dev->netdev = pci_get_drvdata(pdev_net);
+ pci_dev_put(pdev_net);
+ if (!dev->netdev) {
+ dev_err(&pdev->dev, "failed to get vmxnet3 device\n");
+ ret = -ENODEV;
+ goto err_free_cq_ring;
+ }
+
+ dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name);
+
+ /* Interrupt setup */
+ ret = pvrdma_alloc_intrs(dev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to allocate interrupts\n");
+ ret = -ENOMEM;
+ goto err_netdevice;
+ }
+
+ /* Allocate UAR table. */
+ ret = pvrdma_uar_table_init(dev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to allocate UAR table\n");
+ ret = -ENOMEM;
+ goto err_free_intrs;
+ }
+
+ /* Allocate GID table */
+ dev->sgid_tbl = kcalloc(dev->dsr->caps.gid_tbl_len,
+ sizeof(union ib_gid), GFP_KERNEL);
+ if (!dev->sgid_tbl) {
+ ret = -ENOMEM;
+ goto err_free_uar_table;
+ }
+ dev_dbg(&pdev->dev, "gid table len %d\n", dev->dsr->caps.gid_tbl_len);
+
+ pvrdma_enable_intrs(dev);
+
+ /* Activate pvrdma device */
+ pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_ACTIVATE);
+
+ /* Make sure the write is complete before reading status. */
+ mb();
+
+ /* Check if device was successfully activated */
+ ret = pvrdma_read_reg(dev, PVRDMA_REG_ERR);
+ if (ret != 0) {
+ dev_err(&pdev->dev, "failed to activate device\n");
+ ret = -EFAULT;
+ goto err_disable_intr;
+ }
+
+ /* Register IB device */
+ ret = pvrdma_register_device(dev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register IB device\n");
+ goto err_disable_intr;
+ }
+
+ dev->nb_netdev.notifier_call = pvrdma_netdevice_event;
+ ret = register_netdevice_notifier(&dev->nb_netdev);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register netdevice events\n");
+ goto err_unreg_ibdev;
+ }
+
+ dev_info(&pdev->dev, "attached to device\n");
+ return 0;
+
+err_unreg_ibdev:
+ ib_unregister_device(&dev->ib_dev);
+err_disable_intr:
+ pvrdma_disable_intrs(dev);
+ kfree(dev->sgid_tbl);
+err_free_uar_table:
+ pvrdma_uar_table_cleanup(dev);
+err_free_intrs:
+ pvrdma_free_irq(dev);
+ pvrdma_disable_msi_all(dev);
+err_netdevice:
+ unregister_netdevice_notifier(&dev->nb_netdev);
+err_free_cq_ring:
+ pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
+err_free_async_ring:
+ pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
+err_free_slots:
+ pvrdma_free_slots(dev);
+err_free_dsr:
+ dma_free_coherent(&pdev->dev, sizeof(*dev->dsr), dev->dsr,
+ dev->dsrbase);
+err_uar_unmap:
+ iounmap(dev->driver_uar.map);
+err_unmap_regs:
+ iounmap(dev->regs);
+err_free_resource:
+ pci_release_regions(pdev);
+err_disable_pdev:
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+err_free_device:
+ mutex_lock(&pvrdma_device_list_lock);
+ list_del(&dev->device_link);
+ mutex_unlock(&pvrdma_device_list_lock);
+ ib_dealloc_device(&dev->ib_dev);
+ return ret;
+}
+
+static void pvrdma_pci_remove(struct pci_dev *pdev)
+{
+ struct pvrdma_dev *dev = pci_get_drvdata(pdev);
+
+ if (!dev)
+ return;
+
+ dev_info(&pdev->dev, "detaching from device\n");
+
+ unregister_netdevice_notifier(&dev->nb_netdev);
+ dev->nb_netdev.notifier_call = NULL;
+
+ flush_workqueue(event_wq);
+
+ /* Unregister ib device */
+ ib_unregister_device(&dev->ib_dev);
+
+ mutex_lock(&pvrdma_device_list_lock);
+ list_del(&dev->device_link);
+ mutex_unlock(&pvrdma_device_list_lock);
+
+ pvrdma_disable_intrs(dev);
+ pvrdma_free_irq(dev);
+ pvrdma_disable_msi_all(dev);
+
+ /* Deactivate pvrdma device */
+ pvrdma_write_reg(dev, PVRDMA_REG_CTL, PVRDMA_DEVICE_CTL_RESET);
+ pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
+ pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
+ pvrdma_free_slots(dev);
+
+ iounmap(dev->regs);
+ kfree(dev->sgid_tbl);
+ kfree(dev->cq_tbl);
+ kfree(dev->qp_tbl);
+ pvrdma_uar_table_cleanup(dev);
+ iounmap(dev->driver_uar.map);
+
+ ib_dealloc_device(&dev->ib_dev);
+
+ /* Free pci resources */
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+}
+
+static struct pci_device_id pvrdma_pci_table[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_PVRDMA), },
+ { 0 },
+};
+
+MODULE_DEVICE_TABLE(pci, pvrdma_pci_table);
+
+static struct pci_driver pvrdma_driver = {
+ .name = DRV_NAME,
+ .id_table = pvrdma_pci_table,
+ .probe = pvrdma_pci_probe,
+ .remove = pvrdma_pci_remove,
+};
+
+static int __init pvrdma_init(void)
+{
+ int err;
+
+ event_wq = alloc_ordered_workqueue("pvrdma_event_wq", WQ_MEM_RECLAIM);
+ if (!event_wq)
+ return -ENOMEM;
+
+ err = pci_register_driver(&pvrdma_driver);
+ if (err)
+ destroy_workqueue(event_wq);
+
+ return err;
+}
+
+static void __exit pvrdma_cleanup(void)
+{
+ pci_unregister_driver(&pvrdma_driver);
+
+ destroy_workqueue(event_wq);
+}
+
+module_init(pvrdma_init);
+module_exit(pvrdma_cleanup);
+
+MODULE_AUTHOR("VMware, Inc");
+MODULE_DESCRIPTION("VMware Paravirtual RDMA driver");
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
new file mode 100644
index 0000000..948b5cc
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_misc.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+
+#include "pvrdma.h"
+
+int pvrdma_page_dir_init(struct pvrdma_dev *dev, struct pvrdma_page_dir *pdir,
+ u64 npages, bool alloc_pages)
+{
+ u64 i;
+
+ if (npages > PVRDMA_PAGE_DIR_MAX_PAGES)
+ return -EINVAL;
+
+ memset(pdir, 0, sizeof(*pdir));
+
+ pdir->dir = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+ &pdir->dir_dma, GFP_KERNEL);
+ if (!pdir->dir)
+ goto err;
+
+ pdir->ntables = PVRDMA_PAGE_DIR_TABLE(npages - 1) + 1;
+ pdir->tables = kcalloc(pdir->ntables, sizeof(*pdir->tables),
+ GFP_KERNEL);
+ if (!pdir->tables)
+ goto err;
+
+ for (i = 0; i < pdir->ntables; i++) {
+ pdir->tables[i] = dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+ (dma_addr_t *)&pdir->dir[i],
+ GFP_KERNEL);
+ if (!pdir->tables[i])
+ goto err;
+ }
+
+ pdir->npages = npages;
+
+ if (alloc_pages) {
+ pdir->pages = kcalloc(npages, sizeof(*pdir->pages),
+ GFP_KERNEL);
+ if (!pdir->pages)
+ goto err;
+
+ for (i = 0; i < pdir->npages; i++) {
+ dma_addr_t page_dma;
+
+ pdir->pages[i] = dma_alloc_coherent(&dev->pdev->dev,
+ PAGE_SIZE,
+ &page_dma,
+ GFP_KERNEL);
+ if (!pdir->pages[i])
+ goto err;
+
+ pvrdma_page_dir_insert_dma(pdir, i, page_dma);
+ }
+ }
+
+ return 0;
+
+err:
+ pvrdma_page_dir_cleanup(dev, pdir);
+
+ return -ENOMEM;
+}
+
+static u64 *pvrdma_page_dir_table(struct pvrdma_page_dir *pdir, u64 idx)
+{
+ return pdir->tables[PVRDMA_PAGE_DIR_TABLE(idx)];
+}
+
+dma_addr_t pvrdma_page_dir_get_dma(struct pvrdma_page_dir *pdir, u64 idx)
+{
+ return pvrdma_page_dir_table(pdir, idx)[PVRDMA_PAGE_DIR_PAGE(idx)];
+}
+
+static void pvrdma_page_dir_cleanup_pages(struct pvrdma_dev *dev,
+ struct pvrdma_page_dir *pdir)
+{
+ if (pdir->pages) {
+ u64 i;
+
+ for (i = 0; i < pdir->npages && pdir->pages[i]; i++) {
+ dma_addr_t page_dma = pvrdma_page_dir_get_dma(pdir, i);
+
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ pdir->pages[i], page_dma);
+ }
+
+ kfree(pdir->pages);
+ }
+}
+
+static void pvrdma_page_dir_cleanup_tables(struct pvrdma_dev *dev,
+ struct pvrdma_page_dir *pdir)
+{
+ if (pdir->tables) {
+ int i;
+
+ pvrdma_page_dir_cleanup_pages(dev, pdir);
+
+ for (i = 0; i < pdir->ntables; i++) {
+ u64 *table = pdir->tables[i];
+
+ if (table)
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ table, pdir->dir[i]);
+ }
+
+ kfree(pdir->tables);
+ }
+}
+
+void pvrdma_page_dir_cleanup(struct pvrdma_dev *dev,
+ struct pvrdma_page_dir *pdir)
+{
+ if (pdir->dir) {
+ pvrdma_page_dir_cleanup_tables(dev, pdir);
+ dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+ pdir->dir, pdir->dir_dma);
+ }
+}
+
+int pvrdma_page_dir_insert_dma(struct pvrdma_page_dir *pdir, u64 idx,
+ dma_addr_t daddr)
+{
+ u64 *table;
+
+ if (idx >= pdir->npages)
+ return -EINVAL;
+
+ table = pvrdma_page_dir_table(pdir, idx);
+ table[PVRDMA_PAGE_DIR_PAGE(idx)] = daddr;
+
+ return 0;
+}
+
+int pvrdma_page_dir_insert_umem(struct pvrdma_page_dir *pdir,
+ struct ib_umem *umem, u64 offset)
+{
+ u64 i = offset;
+ int j, entry;
+ int ret = 0, len = 0;
+ struct scatterlist *sg;
+
+ if (offset >= pdir->npages)
+ return -EINVAL;
+
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ len = sg_dma_len(sg) >> PAGE_SHIFT;
+ for (j = 0; j < len; j++) {
+ dma_addr_t addr = sg_dma_address(sg) +
+ umem->page_size * j;
+
+ ret = pvrdma_page_dir_insert_dma(pdir, i, addr);
+ if (ret)
+ goto exit;
+
+ i++;
+ }
+ }
+
+exit:
+ return ret;
+}
+
+int pvrdma_page_dir_insert_page_list(struct pvrdma_page_dir *pdir,
+ u64 *page_list,
+ int num_pages)
+{
+ int i;
+ int ret;
+
+ if (num_pages > pdir->npages)
+ return -EINVAL;
+
+ for (i = 0; i < num_pages; i++) {
+ ret = pvrdma_page_dir_insert_dma(pdir, i, page_list[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void pvrdma_qp_cap_to_ib(struct ib_qp_cap *dst, const struct pvrdma_qp_cap *src)
+{
+ dst->max_send_wr = src->max_send_wr;
+ dst->max_recv_wr = src->max_recv_wr;
+ dst->max_send_sge = src->max_send_sge;
+ dst->max_recv_sge = src->max_recv_sge;
+ dst->max_inline_data = src->max_inline_data;
+}
+
+void ib_qp_cap_to_pvrdma(struct pvrdma_qp_cap *dst, const struct ib_qp_cap *src)
+{
+ dst->max_send_wr = src->max_send_wr;
+ dst->max_recv_wr = src->max_recv_wr;
+ dst->max_send_sge = src->max_send_sge;
+ dst->max_recv_sge = src->max_recv_sge;
+ dst->max_inline_data = src->max_inline_data;
+}
+
+void pvrdma_gid_to_ib(union ib_gid *dst, const union pvrdma_gid *src)
+{
+ BUILD_BUG_ON(sizeof(union pvrdma_gid) != sizeof(union ib_gid));
+ memcpy(dst, src, sizeof(*src));
+}
+
+void ib_gid_to_pvrdma(union pvrdma_gid *dst, const union ib_gid *src)
+{
+ BUILD_BUG_ON(sizeof(union pvrdma_gid) != sizeof(union ib_gid));
+ memcpy(dst, src, sizeof(*src));
+}
+
+void pvrdma_global_route_to_ib(struct ib_global_route *dst,
+ const struct pvrdma_global_route *src)
+{
+ pvrdma_gid_to_ib(&dst->dgid, &src->dgid);
+ dst->flow_label = src->flow_label;
+ dst->sgid_index = src->sgid_index;
+ dst->hop_limit = src->hop_limit;
+ dst->traffic_class = src->traffic_class;
+}
+
+void ib_global_route_to_pvrdma(struct pvrdma_global_route *dst,
+ const struct ib_global_route *src)
+{
+ ib_gid_to_pvrdma(&dst->dgid, &src->dgid);
+ dst->flow_label = src->flow_label;
+ dst->sgid_index = src->sgid_index;
+ dst->hop_limit = src->hop_limit;
+ dst->traffic_class = src->traffic_class;
+}
+
+void pvrdma_ah_attr_to_ib(struct ib_ah_attr *dst,
+ const struct pvrdma_ah_attr *src)
+{
+ pvrdma_global_route_to_ib(&dst->grh, &src->grh);
+ dst->dlid = src->dlid;
+ dst->sl = src->sl;
+ dst->src_path_bits = src->src_path_bits;
+ dst->static_rate = src->static_rate;
+ dst->ah_flags = src->ah_flags;
+ dst->port_num = src->port_num;
+ memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac));
+}
+
+void ib_ah_attr_to_pvrdma(struct pvrdma_ah_attr *dst,
+ const struct ib_ah_attr *src)
+{
+ ib_global_route_to_pvrdma(&dst->grh, &src->grh);
+ dst->dlid = src->dlid;
+ dst->sl = src->sl;
+ dst->src_path_bits = src->src_path_bits;
+ dst->static_rate = src->static_rate;
+ dst->ah_flags = src->ah_flags;
+ dst->port_num = src->port_num;
+ memcpy(&dst->dmac, &src->dmac, sizeof(dst->dmac));
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
new file mode 100644
index 0000000..8519f32
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_mr.c
@@ -0,0 +1,334 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "pvrdma.h"
+
+/**
+ * pvrdma_get_dma_mr - get a DMA memory region
+ * @pd: protection domain
+ * @acc: access flags
+ *
+ * @return: ib_mr pointer on success, otherwise returns an errno.
+ */
+struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc)
+{
+ struct pvrdma_dev *dev = to_vdev(pd->device);
+ struct pvrdma_user_mr *mr;
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
+ struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
+ int ret;
+
+ /* Support only LOCAL_WRITE flag for DMA MRs */
+ if (acc & ~IB_ACCESS_LOCAL_WRITE) {
+ dev_warn(&dev->pdev->dev,
+ "unsupported dma mr access flags %#x\n", acc);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR;
+ cmd->pd_handle = to_vpd(pd)->pd_handle;
+ cmd->access_flags = acc;
+ cmd->flags = PVRDMA_MR_FLAG_DMA;
+
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not get DMA mem region, error: %d\n", ret);
+ kfree(mr);
+ return ERR_PTR(ret);
+ }
+
+ mr->mmr.mr_handle = resp->mr_handle;
+ mr->ibmr.lkey = resp->lkey;
+ mr->ibmr.rkey = resp->rkey;
+
+ return &mr->ibmr;
+}
+
+/**
+ * pvrdma_reg_user_mr - register a userspace memory region
+ * @pd: protection domain
+ * @start: starting address
+ * @length: length of region
+ * @virt_addr: I/O virtual address
+ * @access_flags: access flags for memory region
+ * @udata: user data
+ *
+ * @return: ib_mr pointer on success, otherwise returns an errno.
+ */
+struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata)
+{
+ struct pvrdma_dev *dev = to_vdev(pd->device);
+ struct pvrdma_user_mr *mr = NULL;
+ struct ib_umem *umem;
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
+ struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
+ int nchunks;
+ int ret;
+ int entry;
+ struct scatterlist *sg;
+
+ if (length == 0 || length > dev->dsr->caps.max_mr_size) {
+ dev_warn(&dev->pdev->dev, "invalid mem region length\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ umem = ib_umem_get(pd->uobject->context, start,
+ length, access_flags, 0);
+ if (IS_ERR(umem)) {
+ dev_warn(&dev->pdev->dev,
+ "could not get umem for mem region\n");
+ return ERR_CAST(umem);
+ }
+
+ nchunks = 0;
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry)
+ nchunks += sg_dma_len(sg) >> PAGE_SHIFT;
+
+ if (nchunks < 0 || nchunks > PVRDMA_PAGE_DIR_MAX_PAGES) {
+ dev_warn(&dev->pdev->dev, "overflow %d pages in mem region\n",
+ nchunks);
+ ret = -EINVAL;
+ goto err_umem;
+ }
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ ret = -ENOMEM;
+ goto err_umem;
+ }
+
+ mr->mmr.iova = virt_addr;
+ mr->mmr.size = length;
+ mr->umem = umem;
+
+ ret = pvrdma_page_dir_init(dev, &mr->pdir, nchunks, false);
+ if (ret) {
+ dev_warn(&dev->pdev->dev,
+ "could not allocate page directory\n");
+ goto err_umem;
+ }
+
+ ret = pvrdma_page_dir_insert_umem(&mr->pdir, mr->umem, 0);
+ if (ret)
+ goto err_pdir;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR;
+ cmd->start = start;
+ cmd->length = length;
+ cmd->pd_handle = to_vpd(pd)->pd_handle;
+ cmd->access_flags = access_flags;
+ cmd->nchunks = nchunks;
+ cmd->pdir_dma = mr->pdir.dir_dma;
+
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not register mem region, error: %d\n", ret);
+ goto err_pdir;
+ }
+
+ mr->mmr.mr_handle = resp->mr_handle;
+ mr->ibmr.lkey = resp->lkey;
+ mr->ibmr.rkey = resp->rkey;
+
+ return &mr->ibmr;
+
+err_pdir:
+ pvrdma_page_dir_cleanup(dev, &mr->pdir);
+err_umem:
+ ib_umem_release(umem);
+ kfree(mr);
+
+ return ERR_PTR(ret);
+}
+
+/**
+ * pvrdma_alloc_mr - allocate a memory region
+ * @pd: protection domain
+ * @mr_type: type of memory region
+ * @max_num_sg: maximum number of pages
+ *
+ * @return: ib_mr pointer on success, otherwise returns an errno.
+ */
+struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg)
+{
+ struct pvrdma_dev *dev = to_vdev(pd->device);
+ struct pvrdma_user_mr *mr;
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_mr *cmd = &req.create_mr;
+ struct pvrdma_cmd_create_mr_resp *resp = &rsp.create_mr_resp;
+ int size = max_num_sg * sizeof(u64);
+ int ret;
+
+ if (mr_type != IB_MR_TYPE_MEM_REG ||
+ max_num_sg > PVRDMA_MAX_FAST_REG_PAGES)
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->pages = kzalloc(size, GFP_KERNEL);
+ if (!mr->pages) {
+ ret = -ENOMEM;
+ goto freemr;
+ }
+
+ ret = pvrdma_page_dir_init(dev, &mr->pdir, max_num_sg, false);
+ if (ret) {
+ dev_warn(&dev->pdev->dev,
+ "failed to allocate page dir for mr\n");
+ ret = -ENOMEM;
+ goto freepages;
+ }
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_MR;
+ cmd->pd_handle = to_vpd(pd)->pd_handle;
+ cmd->access_flags = 0;
+ cmd->flags = PVRDMA_MR_FLAG_FRMR;
+ cmd->nchunks = max_num_sg;
+
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_MR_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not create FR mem region, error: %d\n", ret);
+ goto freepdir;
+ }
+
+ mr->max_pages = max_num_sg;
+ mr->mmr.mr_handle = resp->mr_handle;
+ mr->ibmr.lkey = resp->lkey;
+ mr->ibmr.rkey = resp->rkey;
+ mr->page_shift = PAGE_SHIFT;
+ mr->umem = NULL;
+
+ return &mr->ibmr;
+
+freepdir:
+ pvrdma_page_dir_cleanup(dev, &mr->pdir);
+freepages:
+ kfree(mr->pages);
+freemr:
+ kfree(mr);
+ return ERR_PTR(ret);
+}
+
+/**
+ * pvrdma_dereg_mr - deregister a memory region
+ * @ibmr: memory region
+ *
+ * @return: 0 on success.
+ */
+int pvrdma_dereg_mr(struct ib_mr *ibmr)
+{
+ struct pvrdma_user_mr *mr = to_vmr(ibmr);
+ struct pvrdma_dev *dev = to_vdev(ibmr->device);
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_destroy_mr *cmd = &req.destroy_mr;
+ int ret;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_DESTROY_MR;
+ cmd->mr_handle = mr->mmr.mr_handle;
+ ret = pvrdma_cmd_post(dev, &req, NULL, 0);
+ if (ret < 0)
+ dev_warn(&dev->pdev->dev,
+ "could not deregister mem region, error: %d\n", ret);
+
+ pvrdma_page_dir_cleanup(dev, &mr->pdir);
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ kfree(mr->pages);
+ kfree(mr);
+
+ return 0;
+}
+
+static int pvrdma_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct pvrdma_user_mr *mr = to_vmr(ibmr);
+
+ if (mr->npages == mr->max_pages)
+ return -ENOMEM;
+
+ mr->pages[mr->npages++] = addr;
+ return 0;
+}
+
+int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
+ unsigned int *sg_offset)
+{
+ struct pvrdma_user_mr *mr = to_vmr(ibmr);
+ struct pvrdma_dev *dev = to_vdev(ibmr->device);
+ int ret;
+
+ mr->npages = 0;
+
+ ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, pvrdma_set_page);
+ if (ret < 0)
+ dev_warn(&dev->pdev->dev, "could not map sg to pages\n");
+
+ return ret;
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
new file mode 100644
index 0000000..c8c01e5
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_qp.c
@@ -0,0 +1,972 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/page.h>
+#include <linux/io.h>
+#include <linux/wait.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+
+#include "pvrdma.h"
+
+static inline void get_cqs(struct pvrdma_qp *qp, struct pvrdma_cq **send_cq,
+ struct pvrdma_cq **recv_cq)
+{
+ *send_cq = to_vcq(qp->ibqp.send_cq);
+ *recv_cq = to_vcq(qp->ibqp.recv_cq);
+}
+
+static void pvrdma_lock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
+ unsigned long *scq_flags,
+ unsigned long *rcq_flags)
+ __acquires(scq->cq_lock) __acquires(rcq->cq_lock)
+{
+ if (scq == rcq) {
+ spin_lock_irqsave(&scq->cq_lock, *scq_flags);
+ __acquire(rcq->cq_lock);
+ } else if (scq->cq_handle < rcq->cq_handle) {
+ spin_lock_irqsave(&scq->cq_lock, *scq_flags);
+ spin_lock_irqsave_nested(&rcq->cq_lock, *rcq_flags,
+ SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock_irqsave(&rcq->cq_lock, *rcq_flags);
+ spin_lock_irqsave_nested(&scq->cq_lock, *scq_flags,
+ SINGLE_DEPTH_NESTING);
+ }
+}
+
+static void pvrdma_unlock_cqs(struct pvrdma_cq *scq, struct pvrdma_cq *rcq,
+ unsigned long *scq_flags,
+ unsigned long *rcq_flags)
+ __releases(scq->cq_lock) __releases(rcq->cq_lock)
+{
+ if (scq == rcq) {
+ __release(rcq->cq_lock);
+ spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
+ } else if (scq->cq_handle < rcq->cq_handle) {
+ spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
+ spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
+ } else {
+ spin_unlock_irqrestore(&scq->cq_lock, *scq_flags);
+ spin_unlock_irqrestore(&rcq->cq_lock, *rcq_flags);
+ }
+}
+
+static void pvrdma_reset_qp(struct pvrdma_qp *qp)
+{
+ struct pvrdma_cq *scq, *rcq;
+ unsigned long scq_flags, rcq_flags;
+
+ /* Clean up cqes */
+ get_cqs(qp, &scq, &rcq);
+ pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
+
+ _pvrdma_flush_cqe(qp, scq);
+ if (scq != rcq)
+ _pvrdma_flush_cqe(qp, rcq);
+
+ pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
+
+ /*
+ * Reset queuepair. The checks are because usermode queuepairs won't
+ * have kernel ringstates.
+ */
+ if (qp->rq.ring) {
+ atomic_set(&qp->rq.ring->cons_head, 0);
+ atomic_set(&qp->rq.ring->prod_tail, 0);
+ }
+ if (qp->sq.ring) {
+ atomic_set(&qp->sq.ring->cons_head, 0);
+ atomic_set(&qp->sq.ring->prod_tail, 0);
+ }
+}
+
+static int pvrdma_set_rq_size(struct pvrdma_dev *dev,
+ struct ib_qp_cap *req_cap,
+ struct pvrdma_qp *qp)
+{
+ if (req_cap->max_recv_wr > dev->dsr->caps.max_qp_wr ||
+ req_cap->max_recv_sge > dev->dsr->caps.max_sge) {
+ dev_warn(&dev->pdev->dev, "recv queue size invalid\n");
+ return -EINVAL;
+ }
+
+ qp->rq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_recv_wr));
+ qp->rq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_recv_sge));
+
+ /* Write back */
+ req_cap->max_recv_wr = qp->rq.wqe_cnt;
+ req_cap->max_recv_sge = qp->rq.max_sg;
+
+ qp->rq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_rq_wqe_hdr) +
+ sizeof(struct pvrdma_sge) *
+ qp->rq.max_sg);
+ qp->npages_recv = (qp->rq.wqe_cnt * qp->rq.wqe_size + PAGE_SIZE - 1) /
+ PAGE_SIZE;
+
+ return 0;
+}
+
+static int pvrdma_set_sq_size(struct pvrdma_dev *dev, struct ib_qp_cap *req_cap,
+ enum ib_qp_type type, struct pvrdma_qp *qp)
+{
+ if (req_cap->max_send_wr > dev->dsr->caps.max_qp_wr ||
+ req_cap->max_send_sge > dev->dsr->caps.max_sge) {
+ dev_warn(&dev->pdev->dev, "send queue size invalid\n");
+ return -EINVAL;
+ }
+
+ qp->sq.wqe_cnt = roundup_pow_of_two(max(1U, req_cap->max_send_wr));
+ qp->sq.max_sg = roundup_pow_of_two(max(1U, req_cap->max_send_sge));
+
+ /* Write back */
+ req_cap->max_send_wr = qp->sq.wqe_cnt;
+ req_cap->max_send_sge = qp->sq.max_sg;
+
+ qp->sq.wqe_size = roundup_pow_of_two(sizeof(struct pvrdma_sq_wqe_hdr) +
+ sizeof(struct pvrdma_sge) *
+ qp->sq.max_sg);
+ /* Note: one extra page for the header. */
+ qp->npages_send = 1 + (qp->sq.wqe_cnt * qp->sq.wqe_size +
+ PAGE_SIZE - 1) / PAGE_SIZE;
+
+ return 0;
+}
+
+/**
+ * pvrdma_create_qp - create queue pair
+ * @pd: protection domain
+ * @init_attr: queue pair attributes
+ * @udata: user data
+ *
+ * @return: the ib_qp pointer on success, otherwise returns an errno.
+ */
+struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata)
+{
+ struct pvrdma_qp *qp = NULL;
+ struct pvrdma_dev *dev = to_vdev(pd->device);
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_qp *cmd = &req.create_qp;
+ struct pvrdma_cmd_create_qp_resp *resp = &rsp.create_qp_resp;
+ struct pvrdma_create_qp ucmd;
+ unsigned long flags;
+ int ret;
+
+ if (init_attr->create_flags) {
+ dev_warn(&dev->pdev->dev,
+ "invalid create queuepair flags %#x\n",
+ init_attr->create_flags);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (init_attr->qp_type != IB_QPT_RC &&
+ init_attr->qp_type != IB_QPT_UD &&
+ init_attr->qp_type != IB_QPT_GSI) {
+ dev_warn(&dev->pdev->dev, "queuepair type %d not supported\n",
+ init_attr->qp_type);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!atomic_add_unless(&dev->num_qps, 1, dev->dsr->caps.max_qp))
+ return ERR_PTR(-ENOMEM);
+
+ switch (init_attr->qp_type) {
+ case IB_QPT_GSI:
+ if (init_attr->port_num == 0 ||
+ init_attr->port_num > pd->device->phys_port_cnt ||
+ udata) {
+ dev_warn(&dev->pdev->dev, "invalid queuepair attrs\n");
+ ret = -EINVAL;
+ goto err_qp;
+ }
+ /* fall through */
+ case IB_QPT_RC:
+ case IB_QPT_UD:
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp) {
+ ret = -ENOMEM;
+ goto err_qp;
+ }
+
+ spin_lock_init(&qp->sq.lock);
+ spin_lock_init(&qp->rq.lock);
+ mutex_init(&qp->mutex);
+ atomic_set(&qp->refcnt, 1);
+ init_waitqueue_head(&qp->wait);
+
+ qp->state = IB_QPS_RESET;
+
+ if (pd->uobject && udata) {
+ dev_dbg(&dev->pdev->dev,
+ "create queuepair from user space\n");
+
+ if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
+ ret = -EFAULT;
+ goto err_qp;
+ }
+
+ /* set qp->sq.wqe_cnt, shift, buf_size.. */
+ qp->rumem = ib_umem_get(pd->uobject->context,
+ ucmd.rbuf_addr,
+ ucmd.rbuf_size, 0, 0);
+ if (IS_ERR(qp->rumem)) {
+ ret = PTR_ERR(qp->rumem);
+ goto err_qp;
+ }
+
+ qp->sumem = ib_umem_get(pd->uobject->context,
+ ucmd.sbuf_addr,
+ ucmd.sbuf_size, 0, 0);
+ if (IS_ERR(qp->sumem)) {
+ ib_umem_release(qp->rumem);
+ ret = PTR_ERR(qp->sumem);
+ goto err_qp;
+ }
+
+ qp->npages_send = ib_umem_page_count(qp->sumem);
+ qp->npages_recv = ib_umem_page_count(qp->rumem);
+ qp->npages = qp->npages_send + qp->npages_recv;
+ } else {
+ qp->is_kernel = true;
+
+ ret = pvrdma_set_sq_size(to_vdev(pd->device),
+ &init_attr->cap,
+ init_attr->qp_type, qp);
+ if (ret)
+ goto err_qp;
+
+ ret = pvrdma_set_rq_size(to_vdev(pd->device),
+ &init_attr->cap, qp);
+ if (ret)
+ goto err_qp;
+
+ qp->npages = qp->npages_send + qp->npages_recv;
+
+ /* Skip header page. */
+ qp->sq.offset = PAGE_SIZE;
+
+ /* Recv queue pages are after send pages. */
+ qp->rq.offset = qp->npages_send * PAGE_SIZE;
+ }
+
+ if (qp->npages < 0 || qp->npages > PVRDMA_PAGE_DIR_MAX_PAGES) {
+ dev_warn(&dev->pdev->dev,
+ "overflow pages in queuepair\n");
+ ret = -EINVAL;
+ goto err_umem;
+ }
+
+ ret = pvrdma_page_dir_init(dev, &qp->pdir, qp->npages,
+ qp->is_kernel);
+ if (ret) {
+ dev_warn(&dev->pdev->dev,
+ "could not allocate page directory\n");
+ goto err_umem;
+ }
+
+ if (!qp->is_kernel) {
+ pvrdma_page_dir_insert_umem(&qp->pdir, qp->sumem, 0);
+ pvrdma_page_dir_insert_umem(&qp->pdir, qp->rumem,
+ qp->npages_send);
+ } else {
+ /* Ring state is always the first page. */
+ qp->sq.ring = qp->pdir.pages[0];
+ qp->rq.ring = &qp->sq.ring[1];
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ goto err_qp;
+ }
+
+ /* Not supported */
+ init_attr->cap.max_inline_data = 0;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_QP;
+ cmd->pd_handle = to_vpd(pd)->pd_handle;
+ cmd->send_cq_handle = to_vcq(init_attr->send_cq)->cq_handle;
+ cmd->recv_cq_handle = to_vcq(init_attr->recv_cq)->cq_handle;
+ cmd->max_send_wr = init_attr->cap.max_send_wr;
+ cmd->max_recv_wr = init_attr->cap.max_recv_wr;
+ cmd->max_send_sge = init_attr->cap.max_send_sge;
+ cmd->max_recv_sge = init_attr->cap.max_recv_sge;
+ cmd->max_inline_data = init_attr->cap.max_inline_data;
+ cmd->sq_sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
+ cmd->qp_type = ib_qp_type_to_pvrdma(init_attr->qp_type);
+ cmd->access_flags = IB_ACCESS_LOCAL_WRITE;
+ cmd->total_chunks = qp->npages;
+ cmd->send_chunks = qp->npages_send - 1;
+ cmd->pdir_dma = qp->pdir.dir_dma;
+
+ dev_dbg(&dev->pdev->dev, "create queuepair with %d, %d, %d, %d\n",
+ cmd->max_send_wr, cmd->max_recv_wr, cmd->max_send_sge,
+ cmd->max_recv_sge);
+
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_QP_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not create queuepair, error: %d\n", ret);
+ goto err_pdir;
+ }
+
+ /* max_send_wr/_recv_wr/_send_sge/_recv_sge/_inline_data */
+ qp->qp_handle = resp->qpn;
+ qp->port = init_attr->port_num;
+ qp->ibqp.qp_num = resp->qpn;
+ spin_lock_irqsave(&dev->qp_tbl_lock, flags);
+ dev->qp_tbl[qp->qp_handle % dev->dsr->caps.max_qp] = qp;
+ spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
+
+ return &qp->ibqp;
+
+err_pdir:
+ pvrdma_page_dir_cleanup(dev, &qp->pdir);
+err_umem:
+ if (pd->uobject && udata) {
+ if (qp->rumem)
+ ib_umem_release(qp->rumem);
+ if (qp->sumem)
+ ib_umem_release(qp->sumem);
+ }
+err_qp:
+ kfree(qp);
+ atomic_dec(&dev->num_qps);
+
+ return ERR_PTR(ret);
+}
+
+static void pvrdma_free_qp(struct pvrdma_qp *qp)
+{
+ struct pvrdma_dev *dev = to_vdev(qp->ibqp.device);
+ struct pvrdma_cq *scq;
+ struct pvrdma_cq *rcq;
+ unsigned long flags, scq_flags, rcq_flags;
+
+ /* In case cq is polling */
+ get_cqs(qp, &scq, &rcq);
+ pvrdma_lock_cqs(scq, rcq, &scq_flags, &rcq_flags);
+
+ _pvrdma_flush_cqe(qp, scq);
+ if (scq != rcq)
+ _pvrdma_flush_cqe(qp, rcq);
+
+ spin_lock_irqsave(&dev->qp_tbl_lock, flags);
+ dev->qp_tbl[qp->qp_handle] = NULL;
+ spin_unlock_irqrestore(&dev->qp_tbl_lock, flags);
+
+ pvrdma_unlock_cqs(scq, rcq, &scq_flags, &rcq_flags);
+
+ atomic_dec(&qp->refcnt);
+ wait_event(qp->wait, !atomic_read(&qp->refcnt));
+
+ pvrdma_page_dir_cleanup(dev, &qp->pdir);
+
+ kfree(qp);
+
+ atomic_dec(&dev->num_qps);
+}
+
+/**
+ * pvrdma_destroy_qp - destroy a queue pair
+ * @qp: the queue pair to destroy
+ *
+ * @return: 0 on success.
+ */
+int pvrdma_destroy_qp(struct ib_qp *qp)
+{
+ struct pvrdma_qp *vqp = to_vqp(qp);
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_destroy_qp *cmd = &req.destroy_qp;
+ int ret;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_DESTROY_QP;
+ cmd->qp_handle = vqp->qp_handle;
+
+ ret = pvrdma_cmd_post(to_vdev(qp->device), &req, NULL, 0);
+ if (ret < 0)
+ dev_warn(&to_vdev(qp->device)->pdev->dev,
+ "destroy queuepair failed, error: %d\n", ret);
+
+ pvrdma_free_qp(vqp);
+
+ return 0;
+}
+
+/**
+ * pvrdma_modify_qp - modify queue pair attributes
+ * @ibqp: the queue pair
+ * @attr: the new queue pair's attributes
+ * @attr_mask: attributes mask
+ * @udata: user data
+ *
+ * @returns 0 on success, otherwise returns an errno.
+ */
+int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata)
+{
+ struct pvrdma_dev *dev = to_vdev(ibqp->device);
+ struct pvrdma_qp *qp = to_vqp(ibqp);
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_modify_qp *cmd = &req.modify_qp;
+ int cur_state, next_state;
+ int ret;
+
+ /* Sanity checking. Should need lock here */
+ mutex_lock(&qp->mutex);
+ cur_state = (attr_mask & IB_QP_CUR_STATE) ? attr->cur_qp_state :
+ qp->state;
+ next_state = (attr_mask & IB_QP_STATE) ? attr->qp_state : cur_state;
+
+ if (!ib_modify_qp_is_ok(cur_state, next_state, ibqp->qp_type,
+ attr_mask, IB_LINK_LAYER_ETHERNET)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (attr_mask & IB_QP_PORT) {
+ if (attr->port_num == 0 ||
+ attr->port_num > ibqp->device->phys_port_cnt) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (attr_mask & IB_QP_MIN_RNR_TIMER) {
+ if (attr->min_rnr_timer > 31) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (attr_mask & IB_QP_PKEY_INDEX) {
+ if (attr->pkey_index >= dev->dsr->caps.max_pkeys) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ if (attr_mask & IB_QP_QKEY)
+ qp->qkey = attr->qkey;
+
+ if (cur_state == next_state && cur_state == IB_QPS_RESET) {
+ ret = 0;
+ goto out;
+ }
+
+ qp->state = next_state;
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_MODIFY_QP;
+ cmd->qp_handle = qp->qp_handle;
+ cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
+ cmd->attrs.qp_state = ib_qp_state_to_pvrdma(attr->qp_state);
+ cmd->attrs.cur_qp_state =
+ ib_qp_state_to_pvrdma(attr->cur_qp_state);
+ cmd->attrs.path_mtu = ib_mtu_to_pvrdma(attr->path_mtu);
+ cmd->attrs.path_mig_state =
+ ib_mig_state_to_pvrdma(attr->path_mig_state);
+ cmd->attrs.qkey = attr->qkey;
+ cmd->attrs.rq_psn = attr->rq_psn;
+ cmd->attrs.sq_psn = attr->sq_psn;
+ cmd->attrs.dest_qp_num = attr->dest_qp_num;
+ cmd->attrs.qp_access_flags =
+ ib_access_flags_to_pvrdma(attr->qp_access_flags);
+ cmd->attrs.pkey_index = attr->pkey_index;
+ cmd->attrs.alt_pkey_index = attr->alt_pkey_index;
+ cmd->attrs.en_sqd_async_notify = attr->en_sqd_async_notify;
+ cmd->attrs.sq_draining = attr->sq_draining;
+ cmd->attrs.max_rd_atomic = attr->max_rd_atomic;
+ cmd->attrs.max_dest_rd_atomic = attr->max_dest_rd_atomic;
+ cmd->attrs.min_rnr_timer = attr->min_rnr_timer;
+ cmd->attrs.port_num = attr->port_num;
+ cmd->attrs.timeout = attr->timeout;
+ cmd->attrs.retry_cnt = attr->retry_cnt;
+ cmd->attrs.rnr_retry = attr->rnr_retry;
+ cmd->attrs.alt_port_num = attr->alt_port_num;
+ cmd->attrs.alt_timeout = attr->alt_timeout;
+ ib_qp_cap_to_pvrdma(&cmd->attrs.cap, &attr->cap);
+ ib_ah_attr_to_pvrdma(&cmd->attrs.ah_attr, &attr->ah_attr);
+ ib_ah_attr_to_pvrdma(&cmd->attrs.alt_ah_attr, &attr->alt_ah_attr);
+
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_MODIFY_QP_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not modify queuepair, error: %d\n", ret);
+ } else if (rsp.hdr.err > 0) {
+ dev_warn(&dev->pdev->dev,
+ "cannot modify queuepair, error: %d\n", rsp.hdr.err);
+ ret = -EINVAL;
+ }
+
+ if (ret == 0 && next_state == IB_QPS_RESET)
+ pvrdma_reset_qp(qp);
+
+out:
+ mutex_unlock(&qp->mutex);
+
+ return ret;
+}
+
+static inline void *get_sq_wqe(struct pvrdma_qp *qp, int n)
+{
+ return pvrdma_page_dir_get_ptr(&qp->pdir,
+ qp->sq.offset + n * qp->sq.wqe_size);
+}
+
+static inline void *get_rq_wqe(struct pvrdma_qp *qp, int n)
+{
+ return pvrdma_page_dir_get_ptr(&qp->pdir,
+ qp->rq.offset + n * qp->rq.wqe_size);
+}
+
+static int set_reg_seg(struct pvrdma_sq_wqe_hdr *wqe_hdr, struct ib_reg_wr *wr)
+{
+ struct pvrdma_user_mr *mr = to_vmr(wr->mr);
+
+ wqe_hdr->wr.fast_reg.iova_start = mr->ibmr.iova;
+ wqe_hdr->wr.fast_reg.pl_pdir_dma = mr->pdir.dir_dma;
+ wqe_hdr->wr.fast_reg.page_shift = mr->page_shift;
+ wqe_hdr->wr.fast_reg.page_list_len = mr->npages;
+ wqe_hdr->wr.fast_reg.length = mr->ibmr.length;
+ wqe_hdr->wr.fast_reg.access_flags = wr->access;
+ wqe_hdr->wr.fast_reg.rkey = wr->key;
+
+ return pvrdma_page_dir_insert_page_list(&mr->pdir, mr->pages,
+ mr->npages);
+}
+
+/**
+ * pvrdma_post_send - post send work request entries on a QP
+ * @ibqp: the QP
+ * @wr: work request list to post
+ * @bad_wr: the first bad WR returned
+ *
+ * @return: 0 on success, otherwise errno returned.
+ */
+int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct pvrdma_qp *qp = to_vqp(ibqp);
+ struct pvrdma_dev *dev = to_vdev(ibqp->device);
+ unsigned long flags;
+ struct pvrdma_sq_wqe_hdr *wqe_hdr;
+ struct pvrdma_sge *sge;
+ int i, index;
+ int nreq;
+ int ret;
+
+ /*
+ * In states lower than RTS, we can fail immediately. In other states,
+ * just post and let the device figure it out.
+ */
+ if (qp->state < IB_QPS_RTS) {
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&qp->sq.lock, flags);
+
+ index = pvrdma_idx(&qp->sq.ring->prod_tail, qp->sq.wqe_cnt);
+ for (nreq = 0; wr; nreq++, wr = wr->next) {
+ unsigned int tail;
+
+ if (unlikely(!pvrdma_idx_ring_has_space(
+ qp->sq.ring, qp->sq.wqe_cnt, &tail))) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "send queue is full\n");
+ *bad_wr = wr;
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (unlikely(wr->num_sge > qp->sq.max_sg || wr->num_sge < 0)) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "send SGE overflow\n");
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (unlikely(wr->opcode < 0)) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "invalid send opcode\n");
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Only support UD, RC.
+ * Need to check opcode table for thorough checking.
+ * opcode _UD _UC _RC
+ * _SEND x x x
+ * _SEND_WITH_IMM x x x
+ * _RDMA_WRITE x x
+ * _RDMA_WRITE_WITH_IMM x x
+ * _LOCAL_INV x x
+ * _SEND_WITH_INV x x
+ * _RDMA_READ x
+ * _ATOMIC_CMP_AND_SWP x
+ * _ATOMIC_FETCH_AND_ADD x
+ * _MASK_ATOMIC_CMP_AND_SWP x
+ * _MASK_ATOMIC_FETCH_AND_ADD x
+ * _REG_MR x
+ *
+ */
+ if (qp->ibqp.qp_type != IB_QPT_UD &&
+ qp->ibqp.qp_type != IB_QPT_RC &&
+ wr->opcode != IB_WR_SEND) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "unsupported queuepair type\n");
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto out;
+ } else if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_GSI) {
+ if (wr->opcode != IB_WR_SEND &&
+ wr->opcode != IB_WR_SEND_WITH_IMM) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "invalid send opcode\n");
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ wqe_hdr = (struct pvrdma_sq_wqe_hdr *)get_sq_wqe(qp, index);
+ memset(wqe_hdr, 0, sizeof(*wqe_hdr));
+ wqe_hdr->wr_id = wr->wr_id;
+ wqe_hdr->num_sge = wr->num_sge;
+ wqe_hdr->opcode = ib_wr_opcode_to_pvrdma(wr->opcode);
+ wqe_hdr->send_flags = ib_send_flags_to_pvrdma(wr->send_flags);
+ if (wr->opcode == IB_WR_SEND_WITH_IMM ||
+ wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ wqe_hdr->ex.imm_data = wr->ex.imm_data;
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_GSI:
+ case IB_QPT_UD:
+ if (unlikely(!ud_wr(wr)->ah)) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "invalid address handle\n");
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Use qkey from qp context if high order bit set,
+ * otherwise from work request.
+ */
+ wqe_hdr->wr.ud.remote_qpn = ud_wr(wr)->remote_qpn;
+ wqe_hdr->wr.ud.remote_qkey =
+ ud_wr(wr)->remote_qkey & 0x80000000 ?
+ qp->qkey : ud_wr(wr)->remote_qkey;
+ wqe_hdr->wr.ud.av = to_vah(ud_wr(wr)->ah)->av;
+
+ break;
+ case IB_QPT_RC:
+ switch (wr->opcode) {
+ case IB_WR_RDMA_READ:
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ wqe_hdr->wr.rdma.remote_addr =
+ rdma_wr(wr)->remote_addr;
+ wqe_hdr->wr.rdma.rkey = rdma_wr(wr)->rkey;
+ break;
+ case IB_WR_LOCAL_INV:
+ case IB_WR_SEND_WITH_INV:
+ wqe_hdr->ex.invalidate_rkey =
+ wr->ex.invalidate_rkey;
+ break;
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ wqe_hdr->wr.atomic.remote_addr =
+ atomic_wr(wr)->remote_addr;
+ wqe_hdr->wr.atomic.rkey = atomic_wr(wr)->rkey;
+ wqe_hdr->wr.atomic.compare_add =
+ atomic_wr(wr)->compare_add;
+ if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP)
+ wqe_hdr->wr.atomic.swap =
+ atomic_wr(wr)->swap;
+ break;
+ case IB_WR_REG_MR:
+ ret = set_reg_seg(wqe_hdr, reg_wr(wr));
+ if (ret < 0) {
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "Failed to set fast register work request\n");
+ *bad_wr = wr;
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ break;
+ default:
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "invalid queuepair type\n");
+ ret = -EINVAL;
+ *bad_wr = wr;
+ goto out;
+ }
+
+ sge = (struct pvrdma_sge *)(wqe_hdr + 1);
+ for (i = 0; i < wr->num_sge; i++) {
+ /* Need to check wqe_size 0 or max size */
+ sge->addr = wr->sg_list[i].addr;
+ sge->length = wr->sg_list[i].length;
+ sge->lkey = wr->sg_list[i].lkey;
+ sge++;
+ }
+
+ /* Make sure wqe is written before index update */
+ smp_wmb();
+
+ index++;
+ if (unlikely(index >= qp->sq.wqe_cnt))
+ index = 0;
+ /* Update shared sq ring */
+ pvrdma_idx_ring_inc(&qp->sq.ring->prod_tail,
+ qp->sq.wqe_cnt);
+ }
+
+ ret = 0;
+
+out:
+ spin_unlock_irqrestore(&qp->sq.lock, flags);
+
+ if (!ret)
+ pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_SEND | qp->qp_handle);
+
+ return ret;
+}
+
+/**
+ * pvrdma_post_receive - post receive work request entries on a QP
+ * @ibqp: the QP
+ * @wr: the work request list to post
+ * @bad_wr: the first bad WR returned
+ *
+ * @return: 0 on success, otherwise errno returned.
+ */
+int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct pvrdma_dev *dev = to_vdev(ibqp->device);
+ unsigned long flags;
+ struct pvrdma_qp *qp = to_vqp(ibqp);
+ struct pvrdma_rq_wqe_hdr *wqe_hdr;
+ struct pvrdma_sge *sge;
+ int index, nreq;
+ int ret = 0;
+ int i;
+
+ /*
+ * In the RESET state, we can fail immediately. For other states,
+ * just post and let the device figure it out.
+ */
+ if (qp->state == IB_QPS_RESET) {
+ *bad_wr = wr;
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&qp->rq.lock, flags);
+
+ index = pvrdma_idx(&qp->rq.ring->prod_tail, qp->rq.wqe_cnt);
+ for (nreq = 0; wr; nreq++, wr = wr->next) {
+ unsigned int tail;
+
+ if (unlikely(wr->num_sge > qp->rq.max_sg ||
+ wr->num_sge < 0)) {
+ ret = -EINVAL;
+ *bad_wr = wr;
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "recv SGE overflow\n");
+ goto out;
+ }
+
+ if (unlikely(!pvrdma_idx_ring_has_space(
+ qp->rq.ring, qp->rq.wqe_cnt, &tail))) {
+ ret = -ENOMEM;
+ *bad_wr = wr;
+ dev_warn_ratelimited(&dev->pdev->dev,
+ "recv queue full\n");
+ goto out;
+ }
+
+ wqe_hdr = (struct pvrdma_rq_wqe_hdr *)get_rq_wqe(qp, index);
+ wqe_hdr->wr_id = wr->wr_id;
+ wqe_hdr->num_sge = wr->num_sge;
+ wqe_hdr->total_len = 0;
+
+ sge = (struct pvrdma_sge *)(wqe_hdr + 1);
+ for (i = 0; i < wr->num_sge; i++) {
+ sge->addr = wr->sg_list[i].addr;
+ sge->length = wr->sg_list[i].length;
+ sge->lkey = wr->sg_list[i].lkey;
+ sge++;
+ }
+
+ /* Make sure wqe is written before index update */
+ smp_wmb();
+
+ index++;
+ if (unlikely(index >= qp->rq.wqe_cnt))
+ index = 0;
+ /* Update shared rq ring */
+ pvrdma_idx_ring_inc(&qp->rq.ring->prod_tail,
+ qp->rq.wqe_cnt);
+ }
+
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+
+ pvrdma_write_uar_qp(dev, PVRDMA_UAR_QP_RECV | qp->qp_handle);
+
+ return ret;
+
+out:
+ spin_unlock_irqrestore(&qp->rq.lock, flags);
+
+ return ret;
+}
+
+/**
+ * pvrdma_query_qp - query a queue pair's attributes
+ * @ibqp: the queue pair to query
+ * @attr: the queue pair's attributes
+ * @attr_mask: attributes mask
+ * @init_attr: initial queue pair attributes
+ *
+ * @returns 0 on success, otherwise returns an errno.
+ */
+int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr)
+{
+ struct pvrdma_dev *dev = to_vdev(ibqp->device);
+ struct pvrdma_qp *qp = to_vqp(ibqp);
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_query_qp *cmd = &req.query_qp;
+ struct pvrdma_cmd_query_qp_resp *resp = &rsp.query_qp_resp;
+ int ret = 0;
+
+ mutex_lock(&qp->mutex);
+
+ if (qp->state == IB_QPS_RESET) {
+ attr->qp_state = IB_QPS_RESET;
+ goto out;
+ }
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_QUERY_QP;
+ cmd->qp_handle = qp->qp_handle;
+ cmd->attr_mask = ib_qp_attr_mask_to_pvrdma(attr_mask);
+
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_QP_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not query queuepair, error: %d\n", ret);
+ goto out;
+ }
+
+ attr->qp_state = pvrdma_qp_state_to_ib(resp->attrs.qp_state);
+ attr->cur_qp_state =
+ pvrdma_qp_state_to_ib(resp->attrs.cur_qp_state);
+ attr->path_mtu = pvrdma_mtu_to_ib(resp->attrs.path_mtu);
+ attr->path_mig_state =
+ pvrdma_mig_state_to_ib(resp->attrs.path_mig_state);
+ attr->qkey = resp->attrs.qkey;
+ attr->rq_psn = resp->attrs.rq_psn;
+ attr->sq_psn = resp->attrs.sq_psn;
+ attr->dest_qp_num = resp->attrs.dest_qp_num;
+ attr->qp_access_flags =
+ pvrdma_access_flags_to_ib(resp->attrs.qp_access_flags);
+ attr->pkey_index = resp->attrs.pkey_index;
+ attr->alt_pkey_index = resp->attrs.alt_pkey_index;
+ attr->en_sqd_async_notify = resp->attrs.en_sqd_async_notify;
+ attr->sq_draining = resp->attrs.sq_draining;
+ attr->max_rd_atomic = resp->attrs.max_rd_atomic;
+ attr->max_dest_rd_atomic = resp->attrs.max_dest_rd_atomic;
+ attr->min_rnr_timer = resp->attrs.min_rnr_timer;
+ attr->port_num = resp->attrs.port_num;
+ attr->timeout = resp->attrs.timeout;
+ attr->retry_cnt = resp->attrs.retry_cnt;
+ attr->rnr_retry = resp->attrs.rnr_retry;
+ attr->alt_port_num = resp->attrs.alt_port_num;
+ attr->alt_timeout = resp->attrs.alt_timeout;
+ pvrdma_qp_cap_to_ib(&attr->cap, &resp->attrs.cap);
+ pvrdma_ah_attr_to_ib(&attr->ah_attr, &resp->attrs.ah_attr);
+ pvrdma_ah_attr_to_ib(&attr->alt_ah_attr, &resp->attrs.alt_ah_attr);
+
+ qp->state = attr->qp_state;
+
+ ret = 0;
+
+out:
+ attr->cur_qp_state = attr->qp_state;
+
+ init_attr->event_handler = qp->ibqp.event_handler;
+ init_attr->qp_context = qp->ibqp.qp_context;
+ init_attr->send_cq = qp->ibqp.send_cq;
+ init_attr->recv_cq = qp->ibqp.recv_cq;
+ init_attr->srq = qp->ibqp.srq;
+ init_attr->xrcd = NULL;
+ init_attr->cap = attr->cap;
+ init_attr->sq_sig_type = 0;
+ init_attr->qp_type = qp->ibqp.qp_type;
+ init_attr->create_flags = 0;
+ init_attr->port_num = qp->port;
+
+ mutex_unlock(&qp->mutex);
+ return ret;
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h
new file mode 100644
index 0000000..ed9022a
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_ring.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __PVRDMA_RING_H__
+#define __PVRDMA_RING_H__
+
+#include <linux/types.h>
+
+#define PVRDMA_INVALID_IDX -1 /* Invalid index. */
+
+struct pvrdma_ring {
+ atomic_t prod_tail; /* Producer tail. */
+ atomic_t cons_head; /* Consumer head. */
+};
+
+struct pvrdma_ring_state {
+ struct pvrdma_ring tx; /* Tx ring. */
+ struct pvrdma_ring rx; /* Rx ring. */
+};
+
+static inline int pvrdma_idx_valid(__u32 idx, __u32 max_elems)
+{
+ /* Generates fewer instructions than a less-than. */
+ return (idx & ~((max_elems << 1) - 1)) == 0;
+}
+
+static inline __s32 pvrdma_idx(atomic_t *var, __u32 max_elems)
+{
+ const unsigned int idx = atomic_read(var);
+
+ if (pvrdma_idx_valid(idx, max_elems))
+ return idx & (max_elems - 1);
+ return PVRDMA_INVALID_IDX;
+}
+
+static inline void pvrdma_idx_ring_inc(atomic_t *var, __u32 max_elems)
+{
+ __u32 idx = atomic_read(var) + 1; /* Increment. */
+
+ idx &= (max_elems << 1) - 1; /* Modulo size, flip gen. */
+ atomic_set(var, idx);
+}
+
+static inline __s32 pvrdma_idx_ring_has_space(const struct pvrdma_ring *r,
+ __u32 max_elems, __u32 *out_tail)
+{
+ const __u32 tail = atomic_read(&r->prod_tail);
+ const __u32 head = atomic_read(&r->cons_head);
+
+ if (pvrdma_idx_valid(tail, max_elems) &&
+ pvrdma_idx_valid(head, max_elems)) {
+ *out_tail = tail & (max_elems - 1);
+ return tail != (head ^ max_elems);
+ }
+ return PVRDMA_INVALID_IDX;
+}
+
+static inline __s32 pvrdma_idx_ring_has_data(const struct pvrdma_ring *r,
+ __u32 max_elems, __u32 *out_head)
+{
+ const __u32 tail = atomic_read(&r->prod_tail);
+ const __u32 head = atomic_read(&r->cons_head);
+
+ if (pvrdma_idx_valid(tail, max_elems) &&
+ pvrdma_idx_valid(head, max_elems)) {
+ *out_head = head & (max_elems - 1);
+ return tail != head;
+ }
+ return PVRDMA_INVALID_IDX;
+}
+
+static inline bool pvrdma_idx_ring_is_valid_idx(const struct pvrdma_ring *r,
+ __u32 max_elems, __u32 *idx)
+{
+ const __u32 tail = atomic_read(&r->prod_tail);
+ const __u32 head = atomic_read(&r->cons_head);
+
+ if (pvrdma_idx_valid(tail, max_elems) &&
+ pvrdma_idx_valid(head, max_elems) &&
+ pvrdma_idx_valid(*idx, max_elems)) {
+ if (tail > head && (*idx < tail && *idx >= head))
+ return true;
+ else if (head > tail && (*idx >= head || *idx < tail))
+ return true;
+ }
+ return false;
+}
+
+#endif /* __PVRDMA_RING_H__ */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
new file mode 100644
index 0000000..5489137
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -0,0 +1,579 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <asm/page.h>
+#include <linux/inet.h>
+#include <linux/io.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/vmw_pvrdma-abi.h>
+
+#include "pvrdma.h"
+
+/**
+ * pvrdma_query_device - query device
+ * @ibdev: the device to query
+ * @props: the device properties
+ * @uhw: user data
+ *
+ * @return: 0 on success, otherwise negative errno
+ */
+int pvrdma_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *uhw)
+{
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+
+ if (uhw->inlen || uhw->outlen)
+ return -EINVAL;
+
+ memset(props, 0, sizeof(*props));
+
+ props->fw_ver = dev->dsr->caps.fw_ver;
+ props->sys_image_guid = dev->dsr->caps.sys_image_guid;
+ props->max_mr_size = dev->dsr->caps.max_mr_size;
+ props->page_size_cap = dev->dsr->caps.page_size_cap;
+ props->vendor_id = dev->dsr->caps.vendor_id;
+ props->vendor_part_id = dev->pdev->device;
+ props->hw_ver = dev->dsr->caps.hw_ver;
+ props->max_qp = dev->dsr->caps.max_qp;
+ props->max_qp_wr = dev->dsr->caps.max_qp_wr;
+ props->device_cap_flags = dev->dsr->caps.device_cap_flags;
+ props->max_sge = dev->dsr->caps.max_sge;
+ props->max_cq = dev->dsr->caps.max_cq;
+ props->max_cqe = dev->dsr->caps.max_cqe;
+ props->max_mr = dev->dsr->caps.max_mr;
+ props->max_pd = dev->dsr->caps.max_pd;
+ props->max_qp_rd_atom = dev->dsr->caps.max_qp_rd_atom;
+ props->max_qp_init_rd_atom = dev->dsr->caps.max_qp_init_rd_atom;
+ props->atomic_cap =
+ dev->dsr->caps.atomic_ops &
+ (PVRDMA_ATOMIC_OP_COMP_SWAP | PVRDMA_ATOMIC_OP_FETCH_ADD) ?
+ IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+ props->masked_atomic_cap = props->atomic_cap;
+ props->max_ah = dev->dsr->caps.max_ah;
+ props->max_pkeys = dev->dsr->caps.max_pkeys;
+ props->local_ca_ack_delay = dev->dsr->caps.local_ca_ack_delay;
+ if ((dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_LOCAL_INV) &&
+ (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_REMOTE_INV) &&
+ (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_FAST_REG_WR)) {
+ props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
+ }
+
+ return 0;
+}
+
+/**
+ * pvrdma_query_port - query device port attributes
+ * @ibdev: the device to query
+ * @port: the port number
+ * @props: the device properties
+ *
+ * @return: 0 on success, otherwise negative errno
+ */
+int pvrdma_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_query_port *cmd = &req.query_port;
+ struct pvrdma_cmd_query_port_resp *resp = &rsp.query_port_resp;
+ int err;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_QUERY_PORT;
+ cmd->port_num = port;
+
+ err = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_PORT_RESP);
+ if (err < 0) {
+ dev_warn(&dev->pdev->dev,
+ "could not query port, error: %d\n", err);
+ return err;
+ }
+
+ memset(props, 0, sizeof(*props));
+
+ props->state = pvrdma_port_state_to_ib(resp->attrs.state);
+ props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu);
+ props->active_mtu = pvrdma_mtu_to_ib(resp->attrs.active_mtu);
+ props->gid_tbl_len = resp->attrs.gid_tbl_len;
+ props->port_cap_flags =
+ pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags);
+ props->max_msg_sz = resp->attrs.max_msg_sz;
+ props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr;
+ props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr;
+ props->pkey_tbl_len = resp->attrs.pkey_tbl_len;
+ props->lid = resp->attrs.lid;
+ props->sm_lid = resp->attrs.sm_lid;
+ props->lmc = resp->attrs.lmc;
+ props->max_vl_num = resp->attrs.max_vl_num;
+ props->sm_sl = resp->attrs.sm_sl;
+ props->subnet_timeout = resp->attrs.subnet_timeout;
+ props->init_type_reply = resp->attrs.init_type_reply;
+ props->active_width = pvrdma_port_width_to_ib(resp->attrs.active_width);
+ props->active_speed = pvrdma_port_speed_to_ib(resp->attrs.active_speed);
+ props->phys_state = resp->attrs.phys_state;
+
+ return 0;
+}
+
+/**
+ * pvrdma_query_gid - query device gid
+ * @ibdev: the device to query
+ * @port: the port number
+ * @index: the index
+ * @gid: the device gid value
+ *
+ * @return: 0 on success, otherwise negative errno
+ */
+int pvrdma_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *gid)
+{
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+
+ if (index >= dev->dsr->caps.gid_tbl_len)
+ return -EINVAL;
+
+ memcpy(gid, &dev->sgid_tbl[index], sizeof(union ib_gid));
+
+ return 0;
+}
+
+/**
+ * pvrdma_query_pkey - query device port's P_Key table
+ * @ibdev: the device to query
+ * @port: the port number
+ * @index: the index
+ * @pkey: the device P_Key value
+ *
+ * @return: 0 on success, otherwise negative errno
+ */
+int pvrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ int err = 0;
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_query_pkey *cmd = &req.query_pkey;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_QUERY_PKEY;
+ cmd->port_num = port;
+ cmd->index = index;
+
+ err = pvrdma_cmd_post(to_vdev(ibdev), &req, &rsp,
+ PVRDMA_CMD_QUERY_PKEY_RESP);
+ if (err < 0) {
+ dev_warn(&to_vdev(ibdev)->pdev->dev,
+ "could not query pkey, error: %d\n", err);
+ return err;
+ }
+
+ *pkey = rsp.query_pkey_resp.pkey;
+
+ return 0;
+}
+
+enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
+ u8 port)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+int pvrdma_modify_device(struct ib_device *ibdev, int mask,
+ struct ib_device_modify *props)
+{
+ unsigned long flags;
+
+ if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
+ IB_DEVICE_MODIFY_NODE_DESC)) {
+ dev_warn(&to_vdev(ibdev)->pdev->dev,
+ "unsupported device modify mask %#x\n", mask);
+ return -EOPNOTSUPP;
+ }
+
+ if (mask & IB_DEVICE_MODIFY_NODE_DESC) {
+ spin_lock_irqsave(&to_vdev(ibdev)->desc_lock, flags);
+ memcpy(ibdev->node_desc, props->node_desc, 64);
+ spin_unlock_irqrestore(&to_vdev(ibdev)->desc_lock, flags);
+ }
+
+ if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
+ mutex_lock(&to_vdev(ibdev)->port_mutex);
+ to_vdev(ibdev)->sys_image_guid =
+ cpu_to_be64(props->sys_image_guid);
+ mutex_unlock(&to_vdev(ibdev)->port_mutex);
+ }
+
+ return 0;
+}
+
+/**
+ * pvrdma_modify_port - modify device port attributes
+ * @ibdev: the device to modify
+ * @port: the port number
+ * @mask: attributes to modify
+ * @props: the device properties
+ *
+ * @return: 0 on success, otherwise negative errno
+ */
+int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
+ struct ib_port_modify *props)
+{
+ struct ib_port_attr attr;
+ struct pvrdma_dev *vdev = to_vdev(ibdev);
+ int ret;
+
+ if (mask & ~IB_PORT_SHUTDOWN) {
+ dev_warn(&vdev->pdev->dev,
+ "unsupported port modify mask %#x\n", mask);
+ return -EOPNOTSUPP;
+ }
+
+ mutex_lock(&vdev->port_mutex);
+ ret = pvrdma_query_port(ibdev, port, &attr);
+ if (ret)
+ goto out;
+
+ vdev->port_cap_mask |= props->set_port_cap_mask;
+ vdev->port_cap_mask &= ~props->clr_port_cap_mask;
+
+ if (mask & IB_PORT_SHUTDOWN)
+ vdev->ib_active = false;
+
+out:
+ mutex_unlock(&vdev->port_mutex);
+ return ret;
+}
+
+/**
+ * pvrdma_alloc_ucontext - allocate ucontext
+ * @ibdev: the IB device
+ * @udata: user data
+ *
+ * @return: the ib_ucontext pointer on success, otherwise errno.
+ */
+struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct pvrdma_dev *vdev = to_vdev(ibdev);
+ struct pvrdma_ucontext *context;
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_uc *cmd = &req.create_uc;
+ struct pvrdma_cmd_create_uc_resp *resp = &rsp.create_uc_resp;
+ struct pvrdma_alloc_ucontext_resp uresp;
+ int ret;
+ void *ptr;
+
+ if (!vdev->ib_active)
+ return ERR_PTR(-EAGAIN);
+
+ context = kmalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return ERR_PTR(-ENOMEM);
+
+ context->dev = vdev;
+ ret = pvrdma_uar_alloc(vdev, &context->uar);
+ if (ret) {
+ kfree(context);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* get ctx_handle from host */
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->pfn = context->uar.pfn;
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_UC;
+ ret = pvrdma_cmd_post(vdev, &req, &rsp, PVRDMA_CMD_CREATE_UC_RESP);
+ if (ret < 0) {
+ dev_warn(&vdev->pdev->dev,
+ "could not create ucontext, error: %d\n", ret);
+ ptr = ERR_PTR(ret);
+ goto err;
+ }
+
+ context->ctx_handle = resp->ctx_handle;
+
+ /* copy back to user */
+ uresp.qp_tab_size = vdev->dsr->caps.max_qp;
+ ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (ret) {
+ pvrdma_uar_free(vdev, &context->uar);
+ context->ibucontext.device = ibdev;
+ pvrdma_dealloc_ucontext(&context->ibucontext);
+ return ERR_PTR(-EFAULT);
+ }
+
+ return &context->ibucontext;
+
+err:
+ pvrdma_uar_free(vdev, &context->uar);
+ kfree(context);
+ return ptr;
+}
+
+/**
+ * pvrdma_dealloc_ucontext - deallocate ucontext
+ * @ibcontext: the ucontext
+ *
+ * @return: 0 on success, otherwise errno.
+ */
+int pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+ struct pvrdma_ucontext *context = to_vucontext(ibcontext);
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_destroy_uc *cmd = &req.destroy_uc;
+ int ret;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_DESTROY_UC;
+ cmd->ctx_handle = context->ctx_handle;
+
+ ret = pvrdma_cmd_post(context->dev, &req, NULL, 0);
+ if (ret < 0)
+ dev_warn(&context->dev->pdev->dev,
+ "destroy ucontext failed, error: %d\n", ret);
+
+ /* Free the UAR even if the device command failed */
+ pvrdma_uar_free(to_vdev(ibcontext->device), &context->uar);
+ kfree(context);
+
+ return ret;
+}
+
+/**
+ * pvrdma_mmap - create mmap region
+ * @ibcontext: the user context
+ * @vma: the VMA
+ *
+ * @return: 0 on success, otherwise errno.
+ */
+int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
+{
+ struct pvrdma_ucontext *context = to_vucontext(ibcontext);
+ unsigned long start = vma->vm_start;
+ unsigned long size = vma->vm_end - vma->vm_start;
+ unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
+
+ dev_dbg(&context->dev->pdev->dev, "create mmap region\n");
+
+ if ((size != PAGE_SIZE) || (offset & ~PAGE_MASK)) {
+ dev_warn(&context->dev->pdev->dev,
+ "invalid params for mmap region\n");
+ return -EINVAL;
+ }
+
+ /* Map UAR to kernel space, VM_LOCKED? */
+ vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ if (io_remap_pfn_range(vma, start, context->uar.pfn, size,
+ vma->vm_page_prot))
+ return -EAGAIN;
+
+ return 0;
+}
+
+/**
+ * pvrdma_alloc_pd - allocate protection domain
+ * @ibdev: the IB device
+ * @context: user context
+ * @udata: user data
+ *
+ * @return: the ib_pd protection domain pointer on success, otherwise errno.
+ */
+struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct pvrdma_pd *pd;
+ struct pvrdma_dev *dev = to_vdev(ibdev);
+ union pvrdma_cmd_req req;
+ union pvrdma_cmd_resp rsp;
+ struct pvrdma_cmd_create_pd *cmd = &req.create_pd;
+ struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp;
+ int ret;
+ void *ptr;
+
+ /* Check allowed max pds */
+ if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd))
+ return ERR_PTR(-ENOMEM);
+
+ pd = kmalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd) {
+ ptr = ERR_PTR(-ENOMEM);
+ goto err;
+ }
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD;
+ cmd->ctx_handle = (context) ? to_vucontext(context)->ctx_handle : 0;
+ ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP);
+ if (ret < 0) {
+ dev_warn(&dev->pdev->dev,
+ "failed to allocate protection domain, error: %d\n",
+ ret);
+ ptr = ERR_PTR(ret);
+ goto freepd;
+ }
+
+ pd->privileged = !context;
+ pd->pd_handle = resp->pd_handle;
+ pd->pdn = resp->pd_handle;
+
+ if (context) {
+ if (ib_copy_to_udata(udata, &pd->pdn, sizeof(__u32))) {
+ dev_warn(&dev->pdev->dev,
+ "failed to copy back protection domain\n");
+ pvrdma_dealloc_pd(&pd->ibpd);
+ return ERR_PTR(-EFAULT);
+ }
+ }
+
+ /* u32 pd handle */
+ return &pd->ibpd;
+
+freepd:
+ kfree(pd);
+err:
+ atomic_dec(&dev->num_pds);
+ return ptr;
+}
+
+/**
+ * pvrdma_dealloc_pd - deallocate protection domain
+ * @pd: the protection domain to be released
+ *
+ * @return: 0 on success, otherwise errno.
+ */
+int pvrdma_dealloc_pd(struct ib_pd *pd)
+{
+ struct pvrdma_dev *dev = to_vdev(pd->device);
+ union pvrdma_cmd_req req;
+ struct pvrdma_cmd_destroy_pd *cmd = &req.destroy_pd;
+ int ret;
+
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->hdr.cmd = PVRDMA_CMD_DESTROY_PD;
+ cmd->pd_handle = to_vpd(pd)->pd_handle;
+
+ ret = pvrdma_cmd_post(dev, &req, NULL, 0);
+ if (ret)
+ dev_warn(&dev->pdev->dev,
+ "could not dealloc protection domain, error: %d\n",
+ ret);
+
+ kfree(to_vpd(pd));
+ atomic_dec(&dev->num_pds);
+
+ return 0;
+}
+
+/**
+ * pvrdma_create_ah - create an address handle
+ * @pd: the protection domain
+ * @ah_attr: the attributes of the AH
+ * @udata: user data blob
+ *
+ * @return: the ib_ah pointer on success, otherwise errno.
+ */
+struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata)
+{
+ struct pvrdma_dev *dev = to_vdev(pd->device);
+ struct pvrdma_ah *ah;
+ enum rdma_link_layer ll;
+
+ if (!(ah_attr->ah_flags & IB_AH_GRH))
+ return ERR_PTR(-EINVAL);
+
+ ll = rdma_port_get_link_layer(pd->device, ah_attr->port_num);
+
+ if (ll != IB_LINK_LAYER_ETHERNET ||
+ rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw))
+ return ERR_PTR(-EINVAL);
+
+ if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah))
+ return ERR_PTR(-ENOMEM);
+
+ ah = kzalloc(sizeof(*ah), GFP_KERNEL);
+ if (!ah) {
+ atomic_dec(&dev->num_ahs);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ah->av.port_pd = to_vpd(pd)->pd_handle | (ah_attr->port_num << 24);
+ ah->av.src_path_bits = ah_attr->src_path_bits;
+ ah->av.src_path_bits |= 0x80;
+ ah->av.gid_index = ah_attr->grh.sgid_index;
+ ah->av.hop_limit = ah_attr->grh.hop_limit;
+ ah->av.sl_tclass_flowlabel = (ah_attr->grh.traffic_class << 20) |
+ ah_attr->grh.flow_label;
+ memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, 16);
+ memcpy(ah->av.dmac, ah_attr->dmac, 6);
+
+ ah->ibah.device = pd->device;
+ ah->ibah.pd = pd;
+ ah->ibah.uobject = NULL;
+
+ return &ah->ibah;
+}
+
+/**
+ * pvrdma_destroy_ah - destroy an address handle
+ * @ah: the address handle to destroyed
+ *
+ * @return: 0 on success.
+ */
+int pvrdma_destroy_ah(struct ib_ah *ah)
+{
+ struct pvrdma_dev *dev = to_vdev(ah->device);
+
+ kfree(to_vah(ah));
+ atomic_dec(&dev->num_ahs);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
new file mode 100644
index 0000000..bfbe96b
--- /dev/null
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.h
@@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __PVRDMA_VERBS_H__
+#define __PVRDMA_VERBS_H__
+
+#include <linux/types.h>
+
+union pvrdma_gid {
+ u8 raw[16];
+ struct {
+ __be64 subnet_prefix;
+ __be64 interface_id;
+ } global;
+};
+
+enum pvrdma_link_layer {
+ PVRDMA_LINK_LAYER_UNSPECIFIED,
+ PVRDMA_LINK_LAYER_INFINIBAND,
+ PVRDMA_LINK_LAYER_ETHERNET,
+};
+
+enum pvrdma_mtu {
+ PVRDMA_MTU_256 = 1,
+ PVRDMA_MTU_512 = 2,
+ PVRDMA_MTU_1024 = 3,
+ PVRDMA_MTU_2048 = 4,
+ PVRDMA_MTU_4096 = 5,
+};
+
+static inline int pvrdma_mtu_enum_to_int(enum pvrdma_mtu mtu)
+{
+ switch (mtu) {
+ case PVRDMA_MTU_256: return 256;
+ case PVRDMA_MTU_512: return 512;
+ case PVRDMA_MTU_1024: return 1024;
+ case PVRDMA_MTU_2048: return 2048;
+ case PVRDMA_MTU_4096: return 4096;
+ default: return -1;
+ }
+}
+
+static inline enum pvrdma_mtu pvrdma_mtu_int_to_enum(int mtu)
+{
+ switch (mtu) {
+ case 256: return PVRDMA_MTU_256;
+ case 512: return PVRDMA_MTU_512;
+ case 1024: return PVRDMA_MTU_1024;
+ case 2048: return PVRDMA_MTU_2048;
+ case 4096:
+ default: return PVRDMA_MTU_4096;
+ }
+}
+
+enum pvrdma_port_state {
+ PVRDMA_PORT_NOP = 0,
+ PVRDMA_PORT_DOWN = 1,
+ PVRDMA_PORT_INIT = 2,
+ PVRDMA_PORT_ARMED = 3,
+ PVRDMA_PORT_ACTIVE = 4,
+ PVRDMA_PORT_ACTIVE_DEFER = 5,
+};
+
+enum pvrdma_port_cap_flags {
+ PVRDMA_PORT_SM = 1 << 1,
+ PVRDMA_PORT_NOTICE_SUP = 1 << 2,
+ PVRDMA_PORT_TRAP_SUP = 1 << 3,
+ PVRDMA_PORT_OPT_IPD_SUP = 1 << 4,
+ PVRDMA_PORT_AUTO_MIGR_SUP = 1 << 5,
+ PVRDMA_PORT_SL_MAP_SUP = 1 << 6,
+ PVRDMA_PORT_MKEY_NVRAM = 1 << 7,
+ PVRDMA_PORT_PKEY_NVRAM = 1 << 8,
+ PVRDMA_PORT_LED_INFO_SUP = 1 << 9,
+ PVRDMA_PORT_SM_DISABLED = 1 << 10,
+ PVRDMA_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
+ PVRDMA_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+ PVRDMA_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
+ PVRDMA_PORT_CM_SUP = 1 << 16,
+ PVRDMA_PORT_SNMP_TUNNEL_SUP = 1 << 17,
+ PVRDMA_PORT_REINIT_SUP = 1 << 18,
+ PVRDMA_PORT_DEVICE_MGMT_SUP = 1 << 19,
+ PVRDMA_PORT_VENDOR_CLASS_SUP = 1 << 20,
+ PVRDMA_PORT_DR_NOTICE_SUP = 1 << 21,
+ PVRDMA_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
+ PVRDMA_PORT_BOOT_MGMT_SUP = 1 << 23,
+ PVRDMA_PORT_LINK_LATENCY_SUP = 1 << 24,
+ PVRDMA_PORT_CLIENT_REG_SUP = 1 << 25,
+ PVRDMA_PORT_IP_BASED_GIDS = 1 << 26,
+ PVRDMA_PORT_CAP_FLAGS_MAX = PVRDMA_PORT_IP_BASED_GIDS,
+};
+
+enum pvrdma_port_width {
+ PVRDMA_WIDTH_1X = 1,
+ PVRDMA_WIDTH_4X = 2,
+ PVRDMA_WIDTH_8X = 4,
+ PVRDMA_WIDTH_12X = 8,
+};
+
+static inline int pvrdma_width_enum_to_int(enum pvrdma_port_width width)
+{
+ switch (width) {
+ case PVRDMA_WIDTH_1X: return 1;
+ case PVRDMA_WIDTH_4X: return 4;
+ case PVRDMA_WIDTH_8X: return 8;
+ case PVRDMA_WIDTH_12X: return 12;
+ default: return -1;
+ }
+}
+
+enum pvrdma_port_speed {
+ PVRDMA_SPEED_SDR = 1,
+ PVRDMA_SPEED_DDR = 2,
+ PVRDMA_SPEED_QDR = 4,
+ PVRDMA_SPEED_FDR10 = 8,
+ PVRDMA_SPEED_FDR = 16,
+ PVRDMA_SPEED_EDR = 32,
+};
+
+struct pvrdma_port_attr {
+ enum pvrdma_port_state state;
+ enum pvrdma_mtu max_mtu;
+ enum pvrdma_mtu active_mtu;
+ u32 gid_tbl_len;
+ u32 port_cap_flags;
+ u32 max_msg_sz;
+ u32 bad_pkey_cntr;
+ u32 qkey_viol_cntr;
+ u16 pkey_tbl_len;
+ u16 lid;
+ u16 sm_lid;
+ u8 lmc;
+ u8 max_vl_num;
+ u8 sm_sl;
+ u8 subnet_timeout;
+ u8 init_type_reply;
+ u8 active_width;
+ u8 active_speed;
+ u8 phys_state;
+ u8 reserved[2];
+};
+
+struct pvrdma_global_route {
+ union pvrdma_gid dgid;
+ u32 flow_label;
+ u8 sgid_index;
+ u8 hop_limit;
+ u8 traffic_class;
+ u8 reserved;
+};
+
+struct pvrdma_grh {
+ __be32 version_tclass_flow;
+ __be16 paylen;
+ u8 next_hdr;
+ u8 hop_limit;
+ union pvrdma_gid sgid;
+ union pvrdma_gid dgid;
+};
+
+enum pvrdma_ah_flags {
+ PVRDMA_AH_GRH = 1,
+};
+
+enum pvrdma_rate {
+ PVRDMA_RATE_PORT_CURRENT = 0,
+ PVRDMA_RATE_2_5_GBPS = 2,
+ PVRDMA_RATE_5_GBPS = 5,
+ PVRDMA_RATE_10_GBPS = 3,
+ PVRDMA_RATE_20_GBPS = 6,
+ PVRDMA_RATE_30_GBPS = 4,
+ PVRDMA_RATE_40_GBPS = 7,
+ PVRDMA_RATE_60_GBPS = 8,
+ PVRDMA_RATE_80_GBPS = 9,
+ PVRDMA_RATE_120_GBPS = 10,
+ PVRDMA_RATE_14_GBPS = 11,
+ PVRDMA_RATE_56_GBPS = 12,
+ PVRDMA_RATE_112_GBPS = 13,
+ PVRDMA_RATE_168_GBPS = 14,
+ PVRDMA_RATE_25_GBPS = 15,
+ PVRDMA_RATE_100_GBPS = 16,
+ PVRDMA_RATE_200_GBPS = 17,
+ PVRDMA_RATE_300_GBPS = 18,
+};
+
+struct pvrdma_ah_attr {
+ struct pvrdma_global_route grh;
+ u16 dlid;
+ u16 vlan_id;
+ u8 sl;
+ u8 src_path_bits;
+ u8 static_rate;
+ u8 ah_flags;
+ u8 port_num;
+ u8 dmac[6];
+ u8 reserved;
+};
+
+enum pvrdma_cq_notify_flags {
+ PVRDMA_CQ_SOLICITED = 1 << 0,
+ PVRDMA_CQ_NEXT_COMP = 1 << 1,
+ PVRDMA_CQ_SOLICITED_MASK = PVRDMA_CQ_SOLICITED |
+ PVRDMA_CQ_NEXT_COMP,
+ PVRDMA_CQ_REPORT_MISSED_EVENTS = 1 << 2,
+};
+
+struct pvrdma_qp_cap {
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_send_sge;
+ u32 max_recv_sge;
+ u32 max_inline_data;
+ u32 reserved;
+};
+
+enum pvrdma_sig_type {
+ PVRDMA_SIGNAL_ALL_WR,
+ PVRDMA_SIGNAL_REQ_WR,
+};
+
+enum pvrdma_qp_type {
+ PVRDMA_QPT_SMI,
+ PVRDMA_QPT_GSI,
+ PVRDMA_QPT_RC,
+ PVRDMA_QPT_UC,
+ PVRDMA_QPT_UD,
+ PVRDMA_QPT_RAW_IPV6,
+ PVRDMA_QPT_RAW_ETHERTYPE,
+ PVRDMA_QPT_RAW_PACKET = 8,
+ PVRDMA_QPT_XRC_INI = 9,
+ PVRDMA_QPT_XRC_TGT,
+ PVRDMA_QPT_MAX,
+};
+
+enum pvrdma_qp_create_flags {
+ PVRDMA_QP_CREATE_IPOPVRDMA_UD_LSO = 1 << 0,
+ PVRDMA_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+};
+
+enum pvrdma_qp_attr_mask {
+ PVRDMA_QP_STATE = 1 << 0,
+ PVRDMA_QP_CUR_STATE = 1 << 1,
+ PVRDMA_QP_EN_SQD_ASYNC_NOTIFY = 1 << 2,
+ PVRDMA_QP_ACCESS_FLAGS = 1 << 3,
+ PVRDMA_QP_PKEY_INDEX = 1 << 4,
+ PVRDMA_QP_PORT = 1 << 5,
+ PVRDMA_QP_QKEY = 1 << 6,
+ PVRDMA_QP_AV = 1 << 7,
+ PVRDMA_QP_PATH_MTU = 1 << 8,
+ PVRDMA_QP_TIMEOUT = 1 << 9,
+ PVRDMA_QP_RETRY_CNT = 1 << 10,
+ PVRDMA_QP_RNR_RETRY = 1 << 11,
+ PVRDMA_QP_RQ_PSN = 1 << 12,
+ PVRDMA_QP_MAX_QP_RD_ATOMIC = 1 << 13,
+ PVRDMA_QP_ALT_PATH = 1 << 14,
+ PVRDMA_QP_MIN_RNR_TIMER = 1 << 15,
+ PVRDMA_QP_SQ_PSN = 1 << 16,
+ PVRDMA_QP_MAX_DEST_RD_ATOMIC = 1 << 17,
+ PVRDMA_QP_PATH_MIG_STATE = 1 << 18,
+ PVRDMA_QP_CAP = 1 << 19,
+ PVRDMA_QP_DEST_QPN = 1 << 20,
+ PVRDMA_QP_ATTR_MASK_MAX = PVRDMA_QP_DEST_QPN,
+};
+
+enum pvrdma_qp_state {
+ PVRDMA_QPS_RESET,
+ PVRDMA_QPS_INIT,
+ PVRDMA_QPS_RTR,
+ PVRDMA_QPS_RTS,
+ PVRDMA_QPS_SQD,
+ PVRDMA_QPS_SQE,
+ PVRDMA_QPS_ERR,
+};
+
+enum pvrdma_mig_state {
+ PVRDMA_MIG_MIGRATED,
+ PVRDMA_MIG_REARM,
+ PVRDMA_MIG_ARMED,
+};
+
+enum pvrdma_mw_type {
+ PVRDMA_MW_TYPE_1 = 1,
+ PVRDMA_MW_TYPE_2 = 2,
+};
+
+struct pvrdma_qp_attr {
+ enum pvrdma_qp_state qp_state;
+ enum pvrdma_qp_state cur_qp_state;
+ enum pvrdma_mtu path_mtu;
+ enum pvrdma_mig_state path_mig_state;
+ u32 qkey;
+ u32 rq_psn;
+ u32 sq_psn;
+ u32 dest_qp_num;
+ u32 qp_access_flags;
+ u16 pkey_index;
+ u16 alt_pkey_index;
+ u8 en_sqd_async_notify;
+ u8 sq_draining;
+ u8 max_rd_atomic;
+ u8 max_dest_rd_atomic;
+ u8 min_rnr_timer;
+ u8 port_num;
+ u8 timeout;
+ u8 retry_cnt;
+ u8 rnr_retry;
+ u8 alt_port_num;
+ u8 alt_timeout;
+ u8 reserved[5];
+ struct pvrdma_qp_cap cap;
+ struct pvrdma_ah_attr ah_attr;
+ struct pvrdma_ah_attr alt_ah_attr;
+};
+
+enum pvrdma_send_flags {
+ PVRDMA_SEND_FENCE = 1 << 0,
+ PVRDMA_SEND_SIGNALED = 1 << 1,
+ PVRDMA_SEND_SOLICITED = 1 << 2,
+ PVRDMA_SEND_INLINE = 1 << 3,
+ PVRDMA_SEND_IP_CSUM = 1 << 4,
+ PVRDMA_SEND_FLAGS_MAX = PVRDMA_SEND_IP_CSUM,
+};
+
+enum pvrdma_access_flags {
+ PVRDMA_ACCESS_LOCAL_WRITE = 1 << 0,
+ PVRDMA_ACCESS_REMOTE_WRITE = 1 << 1,
+ PVRDMA_ACCESS_REMOTE_READ = 1 << 2,
+ PVRDMA_ACCESS_REMOTE_ATOMIC = 1 << 3,
+ PVRDMA_ACCESS_MW_BIND = 1 << 4,
+ PVRDMA_ZERO_BASED = 1 << 5,
+ PVRDMA_ACCESS_ON_DEMAND = 1 << 6,
+ PVRDMA_ACCESS_FLAGS_MAX = PVRDMA_ACCESS_ON_DEMAND,
+};
+
+int pvrdma_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props,
+ struct ib_udata *udata);
+int pvrdma_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props);
+int pvrdma_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid);
+int pvrdma_query_pkey(struct ib_device *ibdev, u8 port,
+ u16 index, u16 *pkey);
+enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev,
+ u8 port);
+int pvrdma_modify_device(struct ib_device *ibdev, int mask,
+ struct ib_device_modify *props);
+int pvrdma_modify_port(struct ib_device *ibdev, u8 port,
+ int mask, struct ib_port_modify *props);
+int pvrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+struct ib_ucontext *pvrdma_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata);
+int pvrdma_dealloc_ucontext(struct ib_ucontext *context);
+struct ib_pd *pvrdma_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int pvrdma_dealloc_pd(struct ib_pd *ibpd);
+struct ib_mr *pvrdma_get_dma_mr(struct ib_pd *pd, int acc);
+struct ib_mr *pvrdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int access_flags,
+ struct ib_udata *udata);
+int pvrdma_dereg_mr(struct ib_mr *mr);
+struct ib_mr *pvrdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
+int pvrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset);
+int pvrdma_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+int pvrdma_resize_cq(struct ib_cq *ibcq, int entries,
+ struct ib_udata *udata);
+struct ib_cq *pvrdma_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *context,
+ struct ib_udata *udata);
+int pvrdma_resize_cq(struct ib_cq *ibcq, int entries,
+ struct ib_udata *udata);
+int pvrdma_destroy_cq(struct ib_cq *cq);
+int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc);
+int pvrdma_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
+int pvrdma_destroy_ah(struct ib_ah *ah);
+struct ib_qp *pvrdma_create_qp(struct ib_pd *pd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+int pvrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+int pvrdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
+int pvrdma_destroy_qp(struct ib_qp *qp);
+int pvrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr);
+int pvrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+
+#endif /* __PVRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c
index 6d9904a..4d0b699 100644
--- a/drivers/infiniband/sw/rdmavt/cq.c
+++ b/drivers/infiniband/sw/rdmavt/cq.c
@@ -119,18 +119,17 @@
if (cq->notify == IB_CQ_NEXT_COMP ||
(cq->notify == IB_CQ_SOLICITED &&
(solicited || entry->status != IB_WC_SUCCESS))) {
- struct kthread_worker *worker;
/*
* This will cause send_complete() to be called in
* another thread.
*/
- smp_read_barrier_depends(); /* see rvt_cq_exit */
- worker = cq->rdi->worker;
- if (likely(worker)) {
+ spin_lock(&cq->rdi->n_cqs_lock);
+ if (likely(cq->rdi->worker)) {
cq->notify = RVT_CQ_NONE;
cq->triggered++;
- kthread_queue_work(worker, &cq->comptask);
+ kthread_queue_work(cq->rdi->worker, &cq->comptask);
}
+ spin_unlock(&cq->rdi->n_cqs_lock);
}
spin_unlock_irqrestore(&cq->lock, flags);
@@ -240,15 +239,15 @@
}
}
- spin_lock(&rdi->n_cqs_lock);
+ spin_lock_irq(&rdi->n_cqs_lock);
if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
- spin_unlock(&rdi->n_cqs_lock);
+ spin_unlock_irq(&rdi->n_cqs_lock);
ret = ERR_PTR(-ENOMEM);
goto bail_ip;
}
rdi->n_cqs_allocated++;
- spin_unlock(&rdi->n_cqs_lock);
+ spin_unlock_irq(&rdi->n_cqs_lock);
if (cq->ip) {
spin_lock_irq(&rdi->pending_lock);
@@ -296,9 +295,9 @@
struct rvt_dev_info *rdi = cq->rdi;
kthread_flush_work(&cq->comptask);
- spin_lock(&rdi->n_cqs_lock);
+ spin_lock_irq(&rdi->n_cqs_lock);
rdi->n_cqs_allocated--;
- spin_unlock(&rdi->n_cqs_lock);
+ spin_unlock_irq(&rdi->n_cqs_lock);
if (cq->ip)
kref_put(&cq->ip->ref, rvt_release_mmap_info);
else
@@ -504,33 +503,23 @@
*/
int rvt_driver_cq_init(struct rvt_dev_info *rdi)
{
- int ret = 0;
int cpu;
- struct task_struct *task;
+ struct kthread_worker *worker;
if (rdi->worker)
return 0;
- spin_lock_init(&rdi->n_cqs_lock);
- rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL);
- if (!rdi->worker)
- return -ENOMEM;
- kthread_init_worker(rdi->worker);
- task = kthread_create_on_node(
- kthread_worker_fn,
- rdi->worker,
- rdi->dparms.node,
- "%s", rdi->dparms.cq_name);
- if (IS_ERR(task)) {
- kfree(rdi->worker);
- rdi->worker = NULL;
- return PTR_ERR(task);
- }
- set_user_nice(task, MIN_NICE);
+ spin_lock_init(&rdi->n_cqs_lock);
+
cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
- kthread_bind(task, cpu);
- wake_up_process(task);
- return ret;
+ worker = kthread_create_worker_on_cpu(cpu, 0,
+ "%s", rdi->dparms.cq_name);
+ if (IS_ERR(worker))
+ return PTR_ERR(worker);
+
+ set_user_nice(worker->task, MIN_NICE);
+ rdi->worker = worker;
+ return 0;
}
/**
@@ -541,13 +530,14 @@
{
struct kthread_worker *worker;
- worker = rdi->worker;
- if (!worker)
+ /* block future queuing from send_complete() */
+ spin_lock_irq(&rdi->n_cqs_lock);
+ if (!rdi->worker) {
+ spin_unlock_irq(&rdi->n_cqs_lock);
return;
- /* blocks future queuing from send_complete() */
+ }
rdi->worker = NULL;
- smp_wmb(); /* See rdi_cq_enter */
- kthread_flush_worker(worker);
- kthread_stop(worker->task);
- kfree(worker);
+ spin_unlock_irq(&rdi->n_cqs_lock);
+
+ kthread_destroy_worker(worker);
}
diff --git a/drivers/infiniband/sw/rdmavt/mcast.c b/drivers/infiniband/sw/rdmavt/mcast.c
index 983d319..05c8c2a 100644
--- a/drivers/infiniband/sw/rdmavt/mcast.c
+++ b/drivers/infiniband/sw/rdmavt/mcast.c
@@ -81,7 +81,7 @@
goto bail;
mqp->qp = qp;
- atomic_inc(&qp->refcount);
+ rvt_get_qp(qp);
bail:
return mqp;
@@ -92,8 +92,7 @@
struct rvt_qp *qp = mqp->qp;
/* Notify hfi1_destroy_qp() if it is waiting. */
- if (atomic_dec_and_test(&qp->refcount))
- wake_up(&qp->wait);
+ rvt_put_qp(qp);
kfree(mqp);
}
diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 46b6497..52fd152 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -51,6 +51,7 @@
#include <rdma/rdma_vt.h>
#include "vt.h"
#include "mr.h"
+#include "trace.h"
/**
* rvt_driver_mr_init - Init MR resources per driver
@@ -84,6 +85,7 @@
lkey_table_size = rdi->dparms.lkey_table_size;
}
rdi->lkey_table.max = 1 << lkey_table_size;
+ rdi->lkey_table.shift = 32 - lkey_table_size;
lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
rdi->lkey_table.table = (struct rvt_mregion __rcu **)
vmalloc_node(lk_tab_size, rdi->dparms.node);
@@ -402,6 +404,7 @@
}
mr->mr.map[m]->segs[n].vaddr = vaddr;
mr->mr.map[m]->segs[n].length = umem->page_size;
+ trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size);
n++;
if (n == RVT_SEGSZ) {
m++;
@@ -506,6 +509,7 @@
n = mapped_segs % RVT_SEGSZ;
mr->mr.map[m]->segs[n].vaddr = (void *)addr;
mr->mr.map[m]->segs[n].length = ps;
+ trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
mr->mr.length += ps;
return 0;
@@ -692,6 +696,7 @@
for (i = 0; i < list_len; i++) {
fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i];
fmr->mr.map[m]->segs[n].length = ps;
+ trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps);
if (++n == RVT_SEGSZ) {
m++;
n = 0;
@@ -774,7 +779,6 @@
struct rvt_mregion *mr;
unsigned n, m;
size_t off;
- struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
/*
* We use LKEY == zero for kernel virtual addresses
@@ -782,12 +786,14 @@
*/
rcu_read_lock();
if (sge->lkey == 0) {
+ struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
+
if (pd->user)
goto bail;
mr = rcu_dereference(dev->dma_mr);
if (!mr)
goto bail;
- atomic_inc(&mr->refcount);
+ rvt_get_mr(mr);
rcu_read_unlock();
isge->mr = mr;
@@ -798,8 +804,7 @@
isge->n = 0;
goto ok;
}
- mr = rcu_dereference(
- rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]);
+ mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
goto bail;
@@ -809,7 +814,7 @@
off + sge->length > mr->length ||
(mr->access_flags & acc) != acc))
goto bail;
- atomic_inc(&mr->refcount);
+ rvt_get_mr(mr);
rcu_read_unlock();
off += mr->offset;
@@ -887,7 +892,7 @@
mr = rcu_dereference(rdi->dma_mr);
if (!mr)
goto bail;
- atomic_inc(&mr->refcount);
+ rvt_get_mr(mr);
rcu_read_unlock();
sge->mr = mr;
@@ -899,8 +904,7 @@
goto ok;
}
- mr = rcu_dereference(
- rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
+ mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
if (unlikely(!mr || atomic_read(&mr->lkey_invalid) ||
mr->lkey != rkey || qp->ibqp.pd != mr->pd))
goto bail;
@@ -909,7 +913,7 @@
if (unlikely(vaddr < mr->iova || off + len > mr->length ||
(mr->access_flags & acc) == 0))
goto bail;
- atomic_inc(&mr->refcount);
+ rvt_get_mr(mr);
rcu_read_unlock();
off += mr->offset;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 6500c3b..2a13ac6 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -76,6 +76,23 @@
};
EXPORT_SYMBOL(ib_rvt_state_ops);
+/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_rvt_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD,
+ [IB_WR_SEND_WITH_INV] = IB_WC_SEND,
+ [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV,
+ [IB_WR_REG_MR] = IB_WC_REG_MR
+};
+EXPORT_SYMBOL(ib_rvt_wc_opcode);
+
static void get_map_page(struct rvt_qpn_table *qpt,
struct rvt_qpn_map *map,
gfp_t gfp)
@@ -884,7 +901,8 @@
return ret;
bail_ip:
- kref_put(&qp->ip->ref, rvt_release_mmap_info);
+ if (qp->ip)
+ kref_put(&qp->ip->ref, rvt_release_mmap_info);
bail_qpn:
free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
diff --git a/drivers/infiniband/sw/rdmavt/trace.h b/drivers/infiniband/sw/rdmavt/trace.h
index 6c0457d..e2d23ac 100644
--- a/drivers/infiniband/sw/rdmavt/trace.h
+++ b/drivers/infiniband/sw/rdmavt/trace.h
@@ -45,143 +45,10 @@
*
*/
-#undef TRACE_SYSTEM_VAR
-#define TRACE_SYSTEM_VAR rdmavt
-
-#if !defined(__RDMAVT_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
-#define __RDMAVT_TRACE_H
-
-#include <linux/tracepoint.h>
-#include <linux/trace_seq.h>
-
-#include <rdma/ib_verbs.h>
-#include <rdma/rdma_vt.h>
-
#define RDI_DEV_ENTRY(rdi) __string(dev, rdi->driver_f.get_card_name(rdi))
#define RDI_DEV_ASSIGN(rdi) __assign_str(dev, rdi->driver_f.get_card_name(rdi))
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM rdmavt
-
-TRACE_EVENT(rvt_dbg,
- TP_PROTO(struct rvt_dev_info *rdi,
- const char *msg),
- TP_ARGS(rdi, msg),
- TP_STRUCT__entry(
- RDI_DEV_ENTRY(rdi)
- __string(msg, msg)
- ),
- TP_fast_assign(
- RDI_DEV_ASSIGN(rdi);
- __assign_str(msg, msg);
- ),
- TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
-);
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM rvt_qphash
-DECLARE_EVENT_CLASS(rvt_qphash_template,
- TP_PROTO(struct rvt_qp *qp, u32 bucket),
- TP_ARGS(qp, bucket),
- TP_STRUCT__entry(
- RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
- __field(u32, qpn)
- __field(u32, bucket)
- ),
- TP_fast_assign(
- RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
- __entry->qpn = qp->ibqp.qp_num;
- __entry->bucket = bucket;
- ),
- TP_printk(
- "[%s] qpn 0x%x bucket %u",
- __get_str(dev),
- __entry->qpn,
- __entry->bucket
- )
-);
-
-DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
- TP_PROTO(struct rvt_qp *qp, u32 bucket),
- TP_ARGS(qp, bucket));
-
-DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
- TP_PROTO(struct rvt_qp *qp, u32 bucket),
- TP_ARGS(qp, bucket));
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM rvt_tx
-
-#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode }
-#define show_wr_opcode(opcode) \
-__print_symbolic(opcode, \
- wr_opcode_name(RDMA_WRITE), \
- wr_opcode_name(RDMA_WRITE_WITH_IMM), \
- wr_opcode_name(SEND), \
- wr_opcode_name(SEND_WITH_IMM), \
- wr_opcode_name(RDMA_READ), \
- wr_opcode_name(ATOMIC_CMP_AND_SWP), \
- wr_opcode_name(ATOMIC_FETCH_AND_ADD), \
- wr_opcode_name(LSO), \
- wr_opcode_name(SEND_WITH_INV), \
- wr_opcode_name(RDMA_READ_WITH_INV), \
- wr_opcode_name(LOCAL_INV), \
- wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
- wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
-
-#define POS_PRN \
-"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
-
-TRACE_EVENT(
- rvt_post_one_wr,
- TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
- TP_ARGS(qp, wqe),
- TP_STRUCT__entry(
- RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
- __field(u64, wr_id)
- __field(u32, qpn)
- __field(u32, psn)
- __field(u32, lpsn)
- __field(u32, length)
- __field(u32, opcode)
- __field(u32, size)
- __field(u32, avail)
- __field(u32, head)
- __field(u32, last)
- ),
- TP_fast_assign(
- RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
- __entry->wr_id = wqe->wr.wr_id;
- __entry->qpn = qp->ibqp.qp_num;
- __entry->psn = wqe->psn;
- __entry->lpsn = wqe->lpsn;
- __entry->length = wqe->length;
- __entry->opcode = wqe->wr.opcode;
- __entry->size = qp->s_size;
- __entry->avail = qp->s_avail;
- __entry->head = qp->s_head;
- __entry->last = qp->s_last;
- ),
- TP_printk(
- POS_PRN,
- __get_str(dev),
- __entry->wr_id,
- __entry->qpn,
- __entry->psn,
- __entry->lpsn,
- __entry->length,
- __entry->opcode, show_wr_opcode(__entry->opcode),
- __entry->size,
- __entry->avail,
- __entry->head,
- __entry->last
- )
-);
-
-#endif /* __RDMAVT_TRACE_H */
-
-#undef TRACE_INCLUDE_PATH
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace
-#include <trace/define_trace.h>
+#include "trace_rvt.h"
+#include "trace_qp.h"
+#include "trace_tx.h"
+#include "trace_mr.h"
diff --git a/drivers/infiniband/sw/rdmavt/trace_mr.h b/drivers/infiniband/sw/rdmavt/trace_mr.h
new file mode 100644
index 0000000..3318a6c
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_mr.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_MR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_MR_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_mr.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_mr
+DECLARE_EVENT_CLASS(
+ rvt_mr_template,
+ TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+ TP_ARGS(mr, m, n, v, len),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(mr->pd->device))
+ __field(void *, vaddr)
+ __field(struct page *, page)
+ __field(size_t, len)
+ __field(u32, lkey)
+ __field(u16, m)
+ __field(u16, n)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(mr->pd->device));
+ __entry->vaddr = v;
+ __entry->page = virt_to_page(v);
+ __entry->m = m;
+ __entry->n = n;
+ __entry->len = len;
+ ),
+ TP_printk(
+ "[%s] vaddr %p page %p m %u n %u len %ld",
+ __get_str(dev),
+ __entry->vaddr,
+ __entry->page,
+ __entry->m,
+ __entry->n,
+ __entry->len
+ )
+);
+
+DEFINE_EVENT(
+ rvt_mr_template, rvt_mr_page_seg,
+ TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+ TP_ARGS(mr, m, n, v, len));
+
+DEFINE_EVENT(
+ rvt_mr_template, rvt_mr_fmr_seg,
+ TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+ TP_ARGS(mr, m, n, v, len));
+
+DEFINE_EVENT(
+ rvt_mr_template, rvt_mr_user_seg,
+ TP_PROTO(struct rvt_mregion *mr, u16 m, u16 n, void *v, size_t len),
+ TP_ARGS(mr, m, n, v, len));
+
+#endif /* __RVT_TRACE_MR_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_mr
+#include <trace/define_trace.h>
diff --git a/drivers/infiniband/sw/rdmavt/trace_qp.h b/drivers/infiniband/sw/rdmavt/trace_qp.h
new file mode 100644
index 0000000..4c77a31
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_qp.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_QP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_QP_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_qp
+
+DECLARE_EVENT_CLASS(rvt_qphash_template,
+ TP_PROTO(struct rvt_qp *qp, u32 bucket),
+ TP_ARGS(qp, bucket),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(u32, qpn)
+ __field(u32, bucket)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->bucket = bucket;
+ ),
+ TP_printk(
+ "[%s] qpn 0x%x bucket %u",
+ __get_str(dev),
+ __entry->qpn,
+ __entry->bucket
+ )
+);
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpinsert,
+ TP_PROTO(struct rvt_qp *qp, u32 bucket),
+ TP_ARGS(qp, bucket));
+
+DEFINE_EVENT(rvt_qphash_template, rvt_qpremove,
+ TP_PROTO(struct rvt_qp *qp, u32 bucket),
+ TP_ARGS(qp, bucket));
+
+
+#endif /* __RVT_TRACE_QP_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_qp
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/sw/rdmavt/trace_rvt.h b/drivers/infiniband/sw/rdmavt/trace_rvt.h
new file mode 100644
index 0000000..746f334
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_rvt.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_RVT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_RVT_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt
+
+TRACE_EVENT(rvt_dbg,
+ TP_PROTO(struct rvt_dev_info *rdi,
+ const char *msg),
+ TP_ARGS(rdi, msg),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(rdi)
+ __string(msg, msg)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(rdi);
+ __assign_str(msg, msg);
+ ),
+ TP_printk("[%s]: %s", __get_str(dev), __get_str(msg))
+);
+
+#endif /* __RVT_TRACE_MISC_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_rvt
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/sw/rdmavt/trace_tx.h b/drivers/infiniband/sw/rdmavt/trace_tx.h
new file mode 100644
index 0000000..0e03173
--- /dev/null
+++ b/drivers/infiniband/sw/rdmavt/trace_tx.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * - Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#if !defined(__RVT_TRACE_TX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __RVT_TRACE_TX_H
+
+#include <linux/tracepoint.h>
+#include <linux/trace_seq.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/rdma_vt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rvt_tx
+
+#define wr_opcode_name(opcode) { IB_WR_##opcode, #opcode }
+#define show_wr_opcode(opcode) \
+__print_symbolic(opcode, \
+ wr_opcode_name(RDMA_WRITE), \
+ wr_opcode_name(RDMA_WRITE_WITH_IMM), \
+ wr_opcode_name(SEND), \
+ wr_opcode_name(SEND_WITH_IMM), \
+ wr_opcode_name(RDMA_READ), \
+ wr_opcode_name(ATOMIC_CMP_AND_SWP), \
+ wr_opcode_name(ATOMIC_FETCH_AND_ADD), \
+ wr_opcode_name(LSO), \
+ wr_opcode_name(SEND_WITH_INV), \
+ wr_opcode_name(RDMA_READ_WITH_INV), \
+ wr_opcode_name(LOCAL_INV), \
+ wr_opcode_name(MASKED_ATOMIC_CMP_AND_SWP), \
+ wr_opcode_name(MASKED_ATOMIC_FETCH_AND_ADD))
+
+#define POS_PRN \
+"[%s] wr_id %llx qpn %x psn 0x%x lpsn 0x%x length %u opcode 0x%.2x,%s size %u avail %u head %u last %u"
+
+TRACE_EVENT(
+ rvt_post_one_wr,
+ TP_PROTO(struct rvt_qp *qp, struct rvt_swqe *wqe),
+ TP_ARGS(qp, wqe),
+ TP_STRUCT__entry(
+ RDI_DEV_ENTRY(ib_to_rvt(qp->ibqp.device))
+ __field(u64, wr_id)
+ __field(u32, qpn)
+ __field(u32, psn)
+ __field(u32, lpsn)
+ __field(u32, length)
+ __field(u32, opcode)
+ __field(u32, size)
+ __field(u32, avail)
+ __field(u32, head)
+ __field(u32, last)
+ ),
+ TP_fast_assign(
+ RDI_DEV_ASSIGN(ib_to_rvt(qp->ibqp.device))
+ __entry->wr_id = wqe->wr.wr_id;
+ __entry->qpn = qp->ibqp.qp_num;
+ __entry->psn = wqe->psn;
+ __entry->lpsn = wqe->lpsn;
+ __entry->length = wqe->length;
+ __entry->opcode = wqe->wr.opcode;
+ __entry->size = qp->s_size;
+ __entry->avail = qp->s_avail;
+ __entry->head = qp->s_head;
+ __entry->last = qp->s_last;
+ ),
+ TP_printk(
+ POS_PRN,
+ __get_str(dev),
+ __entry->wr_id,
+ __entry->qpn,
+ __entry->psn,
+ __entry->lpsn,
+ __entry->length,
+ __entry->opcode, show_wr_opcode(__entry->opcode),
+ __entry->size,
+ __entry->avail,
+ __entry->head,
+ __entry->last
+ )
+);
+
+#endif /* __RVT_TRACE_TX_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_tx
+#include <trace/define_trace.h>
+
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 6c5e29d..cd27cbd 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -420,11 +420,12 @@
(wqe->wr.send_flags & IB_SEND_SIGNALED) ||
(qp->req.state == QP_STATE_ERROR)) {
make_send_cqe(qp, wqe, &cqe);
+ advance_consumer(qp->sq.queue);
rxe_cq_post(qp->scq, &cqe, 0);
+ } else {
+ advance_consumer(qp->sq.queue);
}
- advance_consumer(qp->sq.queue);
-
/*
* we completed something so let req run again
* if it is trying to fence
@@ -510,6 +511,8 @@
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
+ rxe_add_ref(qp);
+
if (!qp->valid) {
while ((skb = skb_dequeue(&qp->resp_pkts))) {
rxe_drop_ref(qp);
@@ -739,11 +742,13 @@
/* we come here if we are done with processing and want the task to
* exit from the loop calling us
*/
+ rxe_drop_ref(qp);
return -EAGAIN;
done:
/* we come here if we have processed a packet we want the task to call
* us again to see if there is anything else to do
*/
+ rxe_drop_ref(qp);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index 73849a5a..efe4c6a 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -266,8 +266,6 @@
return err;
}
- atomic_inc(&qp->skb_out);
-
if ((qp_type(qp) != IB_QPT_RC) &&
(pkt->mask & RXE_END_MASK)) {
pkt->wqe->state = wqe_state_done;
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 1869152..d0faca2 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -355,6 +355,9 @@
size_t offset;
u32 crc = crcp ? (*crcp) : 0;
+ if (length == 0)
+ return 0;
+
if (mem->type == RXE_MEM_TYPE_DMA) {
u8 *src, *dest;
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index ffff5a5..16967cd 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -46,7 +46,7 @@
#include "rxe_loc.h"
static LIST_HEAD(rxe_dev_list);
-static spinlock_t dev_list_lock; /* spinlock for device list */
+static DEFINE_SPINLOCK(dev_list_lock); /* spinlock for device list */
struct rxe_dev *net_to_rxe(struct net_device *ndev)
{
@@ -455,6 +455,8 @@
return -EAGAIN;
}
+ if (pkt->qp)
+ atomic_inc(&pkt->qp->skb_out);
kfree_skb(skb);
return 0;
@@ -659,8 +661,6 @@
int rxe_net_ipv4_init(void)
{
- spin_lock_init(&dev_list_lock);
-
recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), false);
if (IS_ERR(recv_sockets.sk4)) {
@@ -676,8 +676,6 @@
{
#if IS_ENABLED(CONFIG_IPV6)
- spin_lock_init(&dev_list_lock);
-
recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net,
htons(ROCE_V2_UDP_DPORT), true);
if (IS_ERR(recv_sockets.sk6)) {
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index f459c43..13ed2cc 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -82,7 +82,7 @@
RXE_MAX_SGE = 32,
RXE_MAX_SGE_RD = 32,
RXE_MAX_CQ = 16384,
- RXE_MAX_LOG_CQE = 13,
+ RXE_MAX_LOG_CQE = 15,
RXE_MAX_MR = 2 * 1024,
RXE_MAX_PD = 0x7ffc,
RXE_MAX_QP_RD_ATOM = 128,
diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c
index 6bac071..d723947 100644
--- a/drivers/infiniband/sw/rxe/rxe_pool.c
+++ b/drivers/infiniband/sw/rxe/rxe_pool.c
@@ -180,7 +180,6 @@
size = BITS_TO_LONGS(max - min + 1) * sizeof(long);
pool->table = kmalloc(size, GFP_KERNEL);
if (!pool->table) {
- pr_warn("no memory for bit table\n");
err = -ENOMEM;
goto out;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 46f0628..252b4d6 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -391,16 +391,15 @@
payload_size(pkt));
calc_icrc = cpu_to_be32(~calc_icrc);
if (unlikely(calc_icrc != pack_icrc)) {
- char saddr[sizeof(struct in6_addr)];
-
if (skb->protocol == htons(ETH_P_IPV6))
- sprintf(saddr, "%pI6", &ipv6_hdr(skb)->saddr);
+ pr_warn_ratelimited("bad ICRC from %pI6c\n",
+ &ipv6_hdr(skb)->saddr);
else if (skb->protocol == htons(ETH_P_IP))
- sprintf(saddr, "%pI4", &ip_hdr(skb)->saddr);
+ pr_warn_ratelimited("bad ICRC from %pI4\n",
+ &ip_hdr(skb)->saddr);
else
- sprintf(saddr, "unknown");
+ pr_warn_ratelimited("bad ICRC from unknown\n");
- pr_warn_ratelimited("bad ICRC from %s\n", saddr);
goto drop;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 22bd963..73d4a97 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -548,23 +548,23 @@
static void save_state(struct rxe_send_wqe *wqe,
struct rxe_qp *qp,
struct rxe_send_wqe *rollback_wqe,
- struct rxe_qp *rollback_qp)
+ u32 *rollback_psn)
{
rollback_wqe->state = wqe->state;
rollback_wqe->first_psn = wqe->first_psn;
rollback_wqe->last_psn = wqe->last_psn;
- rollback_qp->req.psn = qp->req.psn;
+ *rollback_psn = qp->req.psn;
}
static void rollback_state(struct rxe_send_wqe *wqe,
struct rxe_qp *qp,
struct rxe_send_wqe *rollback_wqe,
- struct rxe_qp *rollback_qp)
+ u32 rollback_psn)
{
wqe->state = rollback_wqe->state;
wqe->first_psn = rollback_wqe->first_psn;
wqe->last_psn = rollback_wqe->last_psn;
- qp->req.psn = rollback_qp->req.psn;
+ qp->req.psn = rollback_psn;
}
static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
@@ -593,8 +593,10 @@
int mtu;
int opcode;
int ret;
- struct rxe_qp rollback_qp;
struct rxe_send_wqe rollback_wqe;
+ u32 rollback_psn;
+
+ rxe_add_ref(qp);
next_wqe:
if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR))
@@ -697,6 +699,7 @@
wqe->state = wqe_state_done;
wqe->status = IB_WC_SUCCESS;
__rxe_do_task(&qp->comp.task);
+ rxe_drop_ref(qp);
return 0;
}
payload = mtu;
@@ -719,7 +722,7 @@
* rxe_xmit_packet().
* Otherwise, completer might initiate an unjustified retry flow.
*/
- save_state(wqe, qp, &rollback_wqe, &rollback_qp);
+ save_state(wqe, qp, &rollback_wqe, &rollback_psn);
update_wqe_state(qp, wqe, &pkt);
update_wqe_psn(qp, wqe, &pkt, payload);
ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb);
@@ -727,7 +730,7 @@
qp->need_req_skb = 1;
kfree_skb(skb);
- rollback_state(wqe, qp, &rollback_wqe, &rollback_qp);
+ rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
if (ret == -EAGAIN) {
rxe_run_task(&qp->req.task, 1);
@@ -756,8 +759,7 @@
*/
wqe->wr.send_flags |= IB_SEND_SIGNALED;
__rxe_do_task(&qp->comp.task);
- return -EAGAIN;
-
exit:
+ rxe_drop_ref(qp);
return -EAGAIN;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index dd3d88a..7a36ec9 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -444,6 +444,13 @@
return RESPST_EXECUTE;
}
+ /* A zero-byte op is not required to set an addr or rkey. */
+ if ((pkt->mask & (RXE_READ_MASK | RXE_WRITE_OR_SEND)) &&
+ (pkt->mask & RXE_RETH_MASK) &&
+ reth_len(pkt) == 0) {
+ return RESPST_EXECUTE;
+ }
+
va = qp->resp.va;
rkey = qp->resp.rkey;
resid = qp->resp.resid;
@@ -680,9 +687,14 @@
res->read.va_org = qp->resp.va;
res->first_psn = req_pkt->psn;
- res->last_psn = req_pkt->psn +
- (reth_len(req_pkt) + mtu - 1) /
- mtu - 1;
+
+ if (reth_len(req_pkt)) {
+ res->last_psn = (req_pkt->psn +
+ (reth_len(req_pkt) + mtu - 1) /
+ mtu - 1) & BTH_PSN_MASK;
+ } else {
+ res->last_psn = res->first_psn;
+ }
res->cur_psn = req_pkt->psn;
res->read.resid = qp->resp.resid;
@@ -742,7 +754,8 @@
} else {
qp->resp.res = NULL;
qp->resp.opcode = -1;
- qp->resp.psn = res->cur_psn;
+ if (psn_compare(res->cur_psn, qp->resp.psn) >= 0)
+ qp->resp.psn = res->cur_psn;
state = RESPST_CLEANUP;
}
@@ -1132,6 +1145,7 @@
pkt, skb_copy);
if (rc) {
pr_err("Failed resending result. This flow is not handled - skb ignored\n");
+ rxe_drop_ref(qp);
kfree_skb(skb_copy);
rc = RESPST_CLEANUP;
goto out;
@@ -1198,6 +1212,8 @@
struct rxe_pkt_info *pkt = NULL;
int ret = 0;
+ rxe_add_ref(qp);
+
qp->resp.aeth_syndrome = AETH_ACK_UNLIMITED;
if (!qp->valid) {
@@ -1386,5 +1402,6 @@
exit:
ret = -EAGAIN;
done:
+ rxe_drop_ref(qp);
return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 2a6e3cd..efc832a 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -169,7 +169,7 @@
}
}
- err = rxe_queue_resize(q, (unsigned int *)&attr->max_wr,
+ err = rxe_queue_resize(q, &attr->max_wr,
rcv_wqe_size(srq->rq.max_sge),
srq->rq.queue->ip ?
srq->rq.queue->ip->context :
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 1e19bf8..d2a14a1 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -121,6 +121,7 @@
task->arg = arg;
task->func = func;
snprintf(task->name, sizeof(task->name), "%s", name);
+ task->destroyed = false;
tasklet_init(&task->tasklet, rxe_do_task, (unsigned long)task);
@@ -132,11 +133,29 @@
void rxe_cleanup_task(struct rxe_task *task)
{
+ unsigned long flags;
+ bool idle;
+
+ /*
+ * Mark the task, then wait for it to finish. It might be
+ * running in a non-tasklet (direct call) context.
+ */
+ task->destroyed = true;
+
+ do {
+ spin_lock_irqsave(&task->state_lock, flags);
+ idle = (task->state == TASK_STATE_START);
+ spin_unlock_irqrestore(&task->state_lock, flags);
+ } while (!idle);
+
tasklet_kill(&task->tasklet);
}
void rxe_run_task(struct rxe_task *task, int sched)
{
+ if (task->destroyed)
+ return;
+
if (sched)
tasklet_schedule(&task->tasklet);
else
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index d14aa6d..08ff42d 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -54,6 +54,7 @@
int (*func)(void *arg);
int ret;
char name[16];
+ bool destroyed;
};
/*
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 19841c8..beb7021 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -316,7 +316,9 @@
return err;
}
-static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
+static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr,
+ struct ib_udata *udata)
+
{
int err;
struct rxe_dev *rxe = to_rdev(ibpd->device);
@@ -564,7 +566,7 @@
if (udata) {
if (udata->inlen) {
err = -EINVAL;
- goto err1;
+ goto err2;
}
qp->is_user = 1;
}
@@ -573,12 +575,13 @@
err = rxe_qp_from_init(rxe, qp, pd, init, udata, ibpd);
if (err)
- goto err2;
+ goto err3;
return &qp->ibqp;
-err2:
+err3:
rxe_drop_index(qp);
+err2:
rxe_drop_ref(qp);
err1:
return ERR_PTR(err);
@@ -1007,11 +1010,19 @@
static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
{
struct rxe_cq *cq = to_rcq(ibcq);
+ unsigned long irq_flags;
+ int ret = 0;
+ spin_lock_irqsave(&cq->cq_lock, irq_flags);
if (cq->notify != IB_CQ_NEXT_COMP)
cq->notify = flags & IB_CQ_SOLICITED_MASK;
- return 0;
+ if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue))
+ ret = 1;
+
+ spin_unlock_irqrestore(&cq->cq_lock, irq_flags);
+
+ return ret;
}
static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 339a1ee..096c4f6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -357,11 +357,8 @@
int i;
rx->rx_ring = vzalloc(ipoib_recvq_size * sizeof *rx->rx_ring);
- if (!rx->rx_ring) {
- printk(KERN_WARNING "%s: failed to allocate CM non-SRQ ring (%d entries)\n",
- priv->ca->name, ipoib_recvq_size);
+ if (!rx->rx_ring)
return -ENOMEM;
- }
t = kmalloc(sizeof *t, GFP_KERNEL);
if (!t) {
@@ -1054,8 +1051,6 @@
tx_qp = ib_create_qp(priv->pd, &attr);
if (PTR_ERR(tx_qp) == -EINVAL) {
- ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n",
- priv->ca->name);
attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO;
tx_qp = ib_create_qp(priv->pd, &attr);
}
@@ -1134,7 +1129,6 @@
p->tx_ring = __vmalloc(ipoib_sendq_size * sizeof *p->tx_ring,
GFP_NOIO, PAGE_KERNEL);
if (!p->tx_ring) {
- ipoib_warn(priv, "failed to allocate tx ring\n");
ret = -ENOMEM;
goto err_tx;
}
@@ -1550,8 +1544,6 @@
priv->cm.srq_ring = vzalloc(ipoib_recvq_size * sizeof *priv->cm.srq_ring);
if (!priv->cm.srq_ring) {
- printk(KERN_WARNING "%s: failed to allocate CM SRQ ring (%d entries)\n",
- priv->ca->name, ipoib_recvq_size);
ib_destroy_srq(priv->cm.srq);
priv->cm.srq = NULL;
return;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 830fecb..5038f9d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -416,11 +416,8 @@
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
- if (!qp_work) {
- ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n",
- __func__, priv->qp->qp_num);
+ if (!qp_work)
return;
- }
INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
qp_work->priv = priv;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index c50794f..3ce0765 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1619,11 +1619,8 @@
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
GFP_KERNEL);
- if (!priv->rx_ring) {
- printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
- ca->name, ipoib_recvq_size);
+ if (!priv->rx_ring)
goto out;
- }
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
if (!priv->tx_ring) {
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 1909dd2..fddff40 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -575,8 +575,11 @@
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
return;
- if (ib_query_port(priv->ca, priv->port, &port_attr) ||
- port_attr.state != IB_PORT_ACTIVE) {
+ if (ib_query_port(priv->ca, priv->port, &port_attr)) {
+ ipoib_dbg(priv, "ib_query_port() failed\n");
+ return;
+ }
+ if (port_attr.state != IB_PORT_ACTIVE) {
ipoib_dbg(priv, "port state is not ACTIVE (state = %d) suspending join task\n",
port_attr.state);
return;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index a4b791d..8ae7a3b 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -890,11 +890,14 @@
case RDMA_CM_EVENT_ESTABLISHED:
iser_connected_handler(cma_id, event->param.conn.private_data);
break;
+ case RDMA_CM_EVENT_REJECTED:
+ iser_info("Connection rejected: %s\n",
+ rdma_reject_msg(cma_id, event->status));
+ /* FALLTHROUGH */
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_REJECTED:
iser_connect_error(cma_id);
break;
case RDMA_CM_EVENT_DISCONNECTED:
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 6dd43f6..314e955 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -184,7 +184,7 @@
isert_conn->rx_descs = kzalloc(ISERT_QP_MAX_RECV_DTOS *
sizeof(struct iser_rx_desc), GFP_KERNEL);
if (!isert_conn->rx_descs)
- goto fail;
+ return -ENOMEM;
rx_desc = isert_conn->rx_descs;
@@ -213,9 +213,7 @@
}
kfree(isert_conn->rx_descs);
isert_conn->rx_descs = NULL;
-fail:
isert_err("conn %p failed to allocate rx descriptors\n", isert_conn);
-
return -ENOMEM;
}
@@ -269,10 +267,8 @@
device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp),
GFP_KERNEL);
- if (!device->comps) {
- isert_err("Unable to allocate completion contexts\n");
+ if (!device->comps)
return -ENOMEM;
- }
max_cqe = min(ISER_MAX_CQ_LEN, device->ib_device->attrs.max_cqe);
@@ -432,10 +428,8 @@
isert_conn->login_req_buf = kzalloc(sizeof(*isert_conn->login_req_buf),
GFP_KERNEL);
- if (!isert_conn->login_req_buf) {
- isert_err("Unable to allocate isert_conn->login_buf\n");
+ if (!isert_conn->login_req_buf)
return -ENOMEM;
- }
isert_conn->login_req_dma = ib_dma_map_single(ib_dev,
isert_conn->login_req_buf,
@@ -795,6 +789,8 @@
*/
return 1;
case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */
+ isert_info("Connection rejected: %s\n",
+ rdma_reject_msg(cma_id, event->status));
case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */
case RDMA_CM_EVENT_CONNECT_ERROR:
ret = isert_connect_error(cma_id);
@@ -1276,11 +1272,8 @@
if (payload_length) {
text_in = kzalloc(payload_length, GFP_KERNEL);
- if (!text_in) {
- isert_err("Unable to allocate text_in of payload_length: %u\n",
- payload_length);
+ if (!text_in)
return -ENOMEM;
- }
}
cmd->text_in_ptr = text_in;
@@ -1851,6 +1844,8 @@
isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev,
(void *)cmd->sense_buffer, pdu_len,
DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(ib_dev, isert_cmd->pdu_buf_dma))
+ return -ENOMEM;
isert_cmd->pdu_buf_len = pdu_len;
tx_dsg->addr = isert_cmd->pdu_buf_dma;
@@ -1978,6 +1973,8 @@
isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev,
(void *)cmd->buf_ptr, ISCSI_HDR_LEN,
DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(ib_dev, isert_cmd->pdu_buf_dma))
+ return -ENOMEM;
isert_cmd->pdu_buf_len = ISCSI_HDR_LEN;
tx_dsg->addr = isert_cmd->pdu_buf_dma;
tx_dsg->length = ISCSI_HDR_LEN;
@@ -2018,6 +2015,8 @@
isert_cmd->pdu_buf_dma = ib_dma_map_single(ib_dev,
txt_rsp_buf, txt_rsp_len, DMA_TO_DEVICE);
+ if (ib_dma_mapping_error(ib_dev, isert_cmd->pdu_buf_dma))
+ return -ENOMEM;
isert_cmd->pdu_buf_len = txt_rsp_len;
tx_dsg->addr = isert_cmd->pdu_buf_dma;
@@ -2307,10 +2306,9 @@
int ret;
isert_np = kzalloc(sizeof(struct isert_np), GFP_KERNEL);
- if (!isert_np) {
- isert_err("Unable to allocate struct isert_np\n");
+ if (!isert_np)
return -ENOMEM;
- }
+
sema_init(&isert_np->sem, 0);
mutex_init(&isert_np->mutex);
INIT_LIST_HEAD(&isert_np->accepted);
@@ -2651,7 +2649,6 @@
WQ_UNBOUND | WQ_HIGHPRI, 0);
if (!isert_comp_wq) {
isert_err("Unable to allocate isert_comp_wq\n");
- ret = -ENOMEM;
return -ENOMEM;
}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index d980fb4..8ddc071 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -64,6 +64,11 @@
MODULE_VERSION(DRV_VERSION);
MODULE_INFO(release_date, DRV_RELDATE);
+#if !defined(CONFIG_DYNAMIC_DEBUG)
+#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
+#define DYNAMIC_DEBUG_BRANCH(descriptor) false
+#endif
+
static unsigned int srp_sg_tablesize;
static unsigned int cmd_sg_entries;
static unsigned int indirect_sg_entries;
@@ -384,6 +389,9 @@
max_page_list_len);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
+ if (ret == -ENOMEM)
+ pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
+ dev_name(&device->dev));
goto destroy_pool;
}
d->mr = mr;
@@ -1266,8 +1274,12 @@
struct ib_pool_fmr *fmr;
u64 io_addr = 0;
- if (state->fmr.next >= state->fmr.end)
+ if (state->fmr.next >= state->fmr.end) {
+ shost_printk(KERN_ERR, ch->target->scsi_host,
+ PFX "Out of MRs (mr_per_cmd = %d)\n",
+ ch->target->mr_per_cmd);
return -ENOMEM;
+ }
WARN_ON_ONCE(!dev->use_fmr);
@@ -1323,8 +1335,12 @@
u32 rkey;
int n, err;
- if (state->fr.next >= state->fr.end)
+ if (state->fr.next >= state->fr.end) {
+ shost_printk(KERN_ERR, ch->target->scsi_host,
+ PFX "Out of MRs (mr_per_cmd = %d)\n",
+ ch->target->mr_per_cmd);
return -ENOMEM;
+ }
WARN_ON_ONCE(!dev->use_fast_reg);
@@ -1556,7 +1572,6 @@
return 0;
}
-#if defined(DYNAMIC_DATA_DEBUG)
static void srp_check_mapping(struct srp_map_state *state,
struct srp_rdma_ch *ch, struct srp_request *req,
struct scatterlist *scat, int count)
@@ -1580,7 +1595,6 @@
scsi_bufflen(req->scmnd), desc_len, mr_len,
state->ndesc, state->nmdesc);
}
-#endif
/**
* srp_map_data() - map SCSI data buffer onto an SRP request
@@ -1669,14 +1683,12 @@
if (ret < 0)
goto unmap;
-#if defined(DYNAMIC_DEBUG)
{
DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
"Memory mapping consistency check");
- if (unlikely(ddm.flags & _DPRINTK_FLAGS_PRINT))
+ if (DYNAMIC_DEBUG_BRANCH(ddm))
srp_check_mapping(&state, ch, req, scat, count);
}
-#endif
/* We've mapped the request, now pull as much of the indirect
* descriptor table as we can into the command buffer. If this
@@ -3287,7 +3299,9 @@
*/
scsi_host_get(target->scsi_host);
- mutex_lock(&host->add_target_mutex);
+ ret = mutex_lock_interruptible(&host->add_target_mutex);
+ if (ret < 0)
+ goto put;
ret = srp_parse_options(buf, target);
if (ret)
@@ -3443,6 +3457,7 @@
out:
mutex_unlock(&host->add_target_mutex);
+put:
scsi_host_put(target->scsi_host);
if (ret < 0)
scsi_host_put(target->scsi_host);
@@ -3526,6 +3541,7 @@
static void srp_add_one(struct ib_device *device)
{
struct srp_device *srp_dev;
+ struct ib_device_attr *attr = &device->attrs;
struct srp_host *host;
int mr_page_shift, p;
u64 max_pages_per_mr;
@@ -3540,25 +3556,25 @@
* minimum of 4096 bytes. We're unlikely to build large sglists
* out of smaller entries.
*/
- mr_page_shift = max(12, ffs(device->attrs.page_size_cap) - 1);
+ mr_page_shift = max(12, ffs(attr->page_size_cap) - 1);
srp_dev->mr_page_size = 1 << mr_page_shift;
srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
- max_pages_per_mr = device->attrs.max_mr_size;
+ max_pages_per_mr = attr->max_mr_size;
do_div(max_pages_per_mr, srp_dev->mr_page_size);
pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
- device->attrs.max_mr_size, srp_dev->mr_page_size,
+ attr->max_mr_size, srp_dev->mr_page_size,
max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr);
srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
device->map_phys_fmr && device->unmap_fmr);
- srp_dev->has_fr = (device->attrs.device_cap_flags &
+ srp_dev->has_fr = (attr->device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
dev_warn(&device->dev, "neither FMR nor FR is supported\n");
} else if (!never_register &&
- device->attrs.max_mr_size >= 2 * srp_dev->mr_page_size) {
+ attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
srp_dev->use_fast_reg = (srp_dev->has_fr &&
(!srp_dev->has_fmr || prefer_fr));
srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
@@ -3571,13 +3587,13 @@
if (srp_dev->use_fast_reg) {
srp_dev->max_pages_per_mr =
min_t(u32, srp_dev->max_pages_per_mr,
- device->attrs.max_fast_reg_page_list_len);
+ attr->max_fast_reg_page_list_len);
}
srp_dev->mr_max_size = srp_dev->mr_page_size *
srp_dev->max_pages_per_mr;
pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
- device->name, mr_page_shift, device->attrs.max_mr_size,
- device->attrs.max_fast_reg_page_list_len,
+ device->name, mr_page_shift, attr->max_mr_size,
+ attr->max_fast_reg_page_list_len,
srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
INIT_LIST_HEAD(&srp_dev->dev_list);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 0b1f69e..d21ba9d 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1840,7 +1840,6 @@
struct srpt_rdma_ch *ch, *tmp_ch;
u32 it_iu_len;
int i, ret = 0;
- unsigned char *p;
WARN_ON_ONCE(irqs_disabled());
@@ -1994,21 +1993,18 @@
be64_to_cpu(*(__be64 *)(ch->i_port_id + 8)));
pr_debug("registering session %s\n", ch->sess_name);
- p = &ch->sess_name[0];
-try_again:
ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
- TARGET_PROT_NORMAL, p, ch, NULL);
+ TARGET_PROT_NORMAL, ch->sess_name, ch,
+ NULL);
+ /* Retry without leading "0x" */
+ if (IS_ERR(ch->sess))
+ ch->sess = target_alloc_session(&sport->port_tpg_1, 0, 0,
+ TARGET_PROT_NORMAL,
+ ch->sess_name + 2, ch, NULL);
if (IS_ERR(ch->sess)) {
- pr_info("Rejected login because no ACL has been"
- " configured yet for initiator %s.\n", p);
- /*
- * XXX: Hack to retry of ch->i_port_id without leading '0x'
- */
- if (p == &ch->sess_name[0]) {
- p += 2;
- goto try_again;
- }
+ pr_info("Rejected login because no ACL has been configured yet for initiator %s.\n",
+ ch->sess_name);
rej->reason = cpu_to_be32((PTR_ERR(ch->sess) == -ENOMEM) ?
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES :
SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_roce.c b/drivers/net/ethernet/qlogic/qede/qede_roce.c
index 9867f96..4927271 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_roce.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_roce.c
@@ -191,8 +191,8 @@
}
mutex_unlock(&qedr_dev_list_lock);
- DP_INFO(edev, "qedr: discovered and registered %d RoCE funcs\n",
- qedr_counter);
+ pr_notice("qedr: discovered and registered %d RoCE funcs\n",
+ qedr_counter);
return 0;
}
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 7dc37a0..59e077b 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -119,9 +119,8 @@
};
/*
- * PCI vendor and device IDs.
+ * Maximum devices supported.
*/
-#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07B0
#define MAX_ETHERNET_CARDS 10
#define MAX_PCI_PASSTHRU_DEVICE 6
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index f42ab70..f587af3 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -42,6 +42,28 @@
#define NVME_RDMA_MAX_INLINE_SEGMENTS 1
+static const char *const nvme_rdma_cm_status_strs[] = {
+ [NVME_RDMA_CM_INVALID_LEN] = "invalid length",
+ [NVME_RDMA_CM_INVALID_RECFMT] = "invalid record format",
+ [NVME_RDMA_CM_INVALID_QID] = "invalid queue ID",
+ [NVME_RDMA_CM_INVALID_HSQSIZE] = "invalid host SQ size",
+ [NVME_RDMA_CM_INVALID_HRQSIZE] = "invalid host RQ size",
+ [NVME_RDMA_CM_NO_RSC] = "resource not found",
+ [NVME_RDMA_CM_INVALID_IRD] = "invalid IRD",
+ [NVME_RDMA_CM_INVALID_ORD] = "Invalid ORD",
+};
+
+static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
+{
+ size_t index = status;
+
+ if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) &&
+ nvme_rdma_cm_status_strs[index])
+ return nvme_rdma_cm_status_strs[index];
+ else
+ return "unrecognized reason";
+};
+
/*
* We handle AEN commands ourselves and don't even let the
* block layer know about them.
@@ -1214,16 +1236,24 @@
static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue,
struct rdma_cm_event *ev)
{
- if (ev->param.conn.private_data_len) {
- struct nvme_rdma_cm_rej *rej =
- (struct nvme_rdma_cm_rej *)ev->param.conn.private_data;
+ struct rdma_cm_id *cm_id = queue->cm_id;
+ int status = ev->status;
+ const char *rej_msg;
+ const struct nvme_rdma_cm_rej *rej_data;
+ u8 rej_data_len;
+
+ rej_msg = rdma_reject_msg(cm_id, status);
+ rej_data = rdma_consumer_reject_data(cm_id, ev, &rej_data_len);
+
+ if (rej_data && rej_data_len >= sizeof(u16)) {
+ u16 sts = le16_to_cpu(rej_data->sts);
dev_err(queue->ctrl->ctrl.device,
- "Connect rejected, status %d.", le16_to_cpu(rej->sts));
- /* XXX: Think of something clever to do here... */
+ "Connect rejected: status %d (%s) nvme status %d (%s).\n",
+ status, rej_msg, sts, nvme_rdma_cm_msg(sts));
} else {
dev_err(queue->ctrl->ctrl.device,
- "Connect rejected, no private data.\n");
+ "Connect rejected: status %d (%s).\n", status, rej_msg);
}
return -ECONNRESET;
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 3fbcdb7..8c3760a 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -1374,6 +1374,9 @@
ret = nvmet_rdma_device_removal(cm_id, queue);
break;
case RDMA_CM_EVENT_REJECTED:
+ pr_debug("Connection rejected: %s\n",
+ rdma_reject_msg(cm_id, event->status));
+ /* FALLTHROUGH */
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_CONNECT_ERROR:
nvmet_rdma_queue_connect_fail(cm_id, queue);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index a5f0fbe..57bec54 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -577,7 +577,7 @@
u8 self_lb_en_modifiable[0x1];
u8 reserved_at_9[0x2];
u8 max_lso_cap[0x5];
- u8 reserved_at_10[0x2];
+ u8 multi_pkt_send_wqe[0x2];
u8 wqe_inline_mode[0x2];
u8 rss_ind_tbl_cap[0x4];
u8 reg_umr_sq[0x1];
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index a5e6c7b..abf4aa4 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2253,6 +2253,7 @@
#define PCI_DEVICE_ID_RASTEL_2PORT 0x2000
#define PCI_VENDOR_ID_VMWARE 0x15ad
+#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07b0
#define PCI_VENDOR_ID_ZOLTRIX 0x15b0
#define PCI_DEVICE_ID_ZOLTRIX_2BD0 0x2bd0
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 92a7d85..b49258b 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -603,4 +603,10 @@
int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
struct ib_cm_sidr_rep_param *param);
+/**
+ * ibcm_reject_msg - return a pointer to a reject message string.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ ibcm_reject_msg(int reason);
+
#endif /* IB_CM_H */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index c8a773f..981214b 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -46,7 +46,7 @@
#define IB_MGMT_BASE_VERSION 1
#define OPA_MGMT_BASE_VERSION 0x80
-#define OPA_SMP_CLASS_VERSION 0x80
+#define OPA_SM_CLASS_VERSION 0x80
/* Management classes */
#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5ad43a4..8029d2a 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1102,6 +1102,7 @@
IB_QP_RESERVED2 = (1<<22),
IB_QP_RESERVED3 = (1<<23),
IB_QP_RESERVED4 = (1<<24),
+ IB_QP_RATE_LIMIT = (1<<25),
};
enum ib_qp_state {
@@ -1151,6 +1152,7 @@
u8 rnr_retry;
u8 alt_port_num;
u8 alt_timeout;
+ u32 rate_limit;
};
enum ib_wr_opcode {
@@ -1592,17 +1594,19 @@
/* Supported steering header types */
enum ib_flow_spec_type {
/* L2 headers*/
- IB_FLOW_SPEC_ETH = 0x20,
- IB_FLOW_SPEC_IB = 0x22,
+ IB_FLOW_SPEC_ETH = 0x20,
+ IB_FLOW_SPEC_IB = 0x22,
/* L3 header*/
- IB_FLOW_SPEC_IPV4 = 0x30,
- IB_FLOW_SPEC_IPV6 = 0x31,
+ IB_FLOW_SPEC_IPV4 = 0x30,
+ IB_FLOW_SPEC_IPV6 = 0x31,
/* L4 headers*/
- IB_FLOW_SPEC_TCP = 0x40,
- IB_FLOW_SPEC_UDP = 0x41
+ IB_FLOW_SPEC_TCP = 0x40,
+ IB_FLOW_SPEC_UDP = 0x41,
+ IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50,
+ IB_FLOW_SPEC_INNER = 0x100,
};
#define IB_FLOW_SPEC_LAYER_MASK 0xF0
-#define IB_FLOW_SPEC_SUPPORT_LAYERS 4
+#define IB_FLOW_SPEC_SUPPORT_LAYERS 8
/* Flow steering rule priority is set according to it's domain.
* Lower domain value means higher priority.
@@ -1630,7 +1634,7 @@
};
struct ib_flow_spec_eth {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
struct ib_flow_eth_filter val;
struct ib_flow_eth_filter mask;
@@ -1644,7 +1648,7 @@
};
struct ib_flow_spec_ib {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
struct ib_flow_ib_filter val;
struct ib_flow_ib_filter mask;
@@ -1669,7 +1673,7 @@
};
struct ib_flow_spec_ipv4 {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
struct ib_flow_ipv4_filter val;
struct ib_flow_ipv4_filter mask;
@@ -1687,7 +1691,7 @@
};
struct ib_flow_spec_ipv6 {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
struct ib_flow_ipv6_filter val;
struct ib_flow_ipv6_filter mask;
@@ -1701,15 +1705,30 @@
};
struct ib_flow_spec_tcp_udp {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
struct ib_flow_tcp_udp_filter val;
struct ib_flow_tcp_udp_filter mask;
};
+struct ib_flow_tunnel_filter {
+ __be32 tunnel_id;
+ u8 real_sz[0];
+};
+
+/* ib_flow_spec_tunnel describes the Vxlan tunnel
+ * the tunnel_id from val has the vni value
+ */
+struct ib_flow_spec_tunnel {
+ u32 type;
+ u16 size;
+ struct ib_flow_tunnel_filter val;
+ struct ib_flow_tunnel_filter mask;
+};
+
union ib_flow_spec {
struct {
- enum ib_flow_spec_type type;
+ u32 type;
u16 size;
};
struct ib_flow_spec_eth eth;
@@ -1717,6 +1736,7 @@
struct ib_flow_spec_ipv4 ipv4;
struct ib_flow_spec_tcp_udp tcp_udp;
struct ib_flow_spec_ipv6 ipv6;
+ struct ib_flow_spec_tunnel tunnel;
};
struct ib_flow_attr {
@@ -1933,7 +1953,8 @@
struct ib_udata *udata);
int (*dealloc_pd)(struct ib_pd *pd);
struct ib_ah * (*create_ah)(struct ib_pd *pd,
- struct ib_ah_attr *ah_attr);
+ struct ib_ah_attr *ah_attr,
+ struct ib_udata *udata);
int (*modify_ah)(struct ib_ah *ah,
struct ib_ah_attr *ah_attr);
int (*query_ah)(struct ib_ah *ah,
@@ -2581,6 +2602,24 @@
struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr);
/**
+ * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header
+ * work completion.
+ * @hdr: the L3 header to parse
+ * @net_type: type of header to parse
+ * @sgid: place to store source gid
+ * @dgid: place to store destination gid
+ */
+int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
+ enum rdma_network_type net_type,
+ union ib_gid *sgid, union ib_gid *dgid);
+
+/**
+ * ib_get_rdma_header_version - Get the header version
+ * @hdr: the L3 header to parse
+ */
+int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
+
+/**
* ib_init_ah_from_wc - Initializes address handle attributes from a
* work completion.
* @device: Device on which the received message arrived.
@@ -3357,4 +3396,7 @@
void ib_drain_rq(struct ib_qp *qp);
void ib_drain_sq(struct ib_qp *qp);
void ib_drain_qp(struct ib_qp *qp);
+
+int ib_resolve_eth_dmac(struct ib_device *device,
+ struct ib_ah_attr *ah_attr);
#endif /* IB_VERBS_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 6d0065c..5cd7701 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -253,4 +253,10 @@
int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr,
int *qp_attr_mask);
+/**
+ * iwcm_reject_msg - return a pointer to a reject message string.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ iwcm_reject_msg(int reason);
+
#endif /* IW_CM_H */
diff --git a/include/rdma/opa_smi.h b/include/rdma/opa_smi.h
index 4a529ef..f789611 100644
--- a/include/rdma/opa_smi.h
+++ b/include/rdma/opa_smi.h
@@ -44,8 +44,6 @@
#define OPA_MAX_SLS 32
#define OPA_MAX_SCS 32
-#define OPA_SMI_CLASS_VERSION 0x80
-
#define OPA_LID_PERMISSIVE cpu_to_be32(0xFFFFFFFF)
struct opa_smp {
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 81fb1d1..d3968b5 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -388,4 +388,29 @@
*/
__be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr);
+/**
+ * rdma_reject_msg - return a pointer to a reject message string.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
+ int reason);
+/**
+ * rdma_is_consumer_reject - return true if the consumer rejected the connect
+ * request.
+ * @id: Communication identifier that received the REJECT event.
+ * @reason: Value returned in the REJECT event status field.
+ */
+bool rdma_is_consumer_reject(struct rdma_cm_id *id, int reason);
+
+/**
+ * rdma_consumer_reject_data - return the consumer reject private data and
+ * length, if any.
+ * @id: Communication identifier that received the REJECT event.
+ * @ev: RDMA CM reject event.
+ * @data_len: Pointer to the resulting length of the consumer data.
+ */
+const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
+ struct rdma_cm_event *ev, u8 *data_len);
+
#endif /* RDMA_CM_H */
diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h
index e315021..861e23e 100644
--- a/include/rdma/rdma_vt.h
+++ b/include/rdma/rdma_vt.h
@@ -185,6 +185,27 @@
* check_support() for details.
*/
+ /* hot path calldowns in a single cacheline */
+
+ /*
+ * Give the driver a notice that there is send work to do. It is up to
+ * the driver to generally push the packets out, this just queues the
+ * work with the driver. There are two variants here. The no_lock
+ * version requires the s_lock not to be held. The other assumes the
+ * s_lock is held.
+ */
+ void (*schedule_send)(struct rvt_qp *qp);
+ void (*schedule_send_no_lock)(struct rvt_qp *qp);
+
+ /* Driver specific work request checking */
+ int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
+ /*
+ * Sometimes rdmavt needs to kick the driver's send progress. That is
+ * done by this call back.
+ */
+ void (*do_send)(struct rvt_qp *qp);
+
/* Passed to ib core registration. Callback to create syfs files */
int (*port_callback)(struct ib_device *, u8, struct kobject *);
@@ -223,22 +244,6 @@
void (*notify_qp_reset)(struct rvt_qp *qp);
/*
- * Give the driver a notice that there is send work to do. It is up to
- * the driver to generally push the packets out, this just queues the
- * work with the driver. There are two variants here. The no_lock
- * version requires the s_lock not to be held. The other assumes the
- * s_lock is held.
- */
- void (*schedule_send)(struct rvt_qp *qp);
- void (*schedule_send_no_lock)(struct rvt_qp *qp);
-
- /*
- * Sometimes rdmavt needs to kick the driver's send progress. That is
- * done by this call back.
- */
- void (*do_send)(struct rvt_qp *qp);
-
- /*
* Get a path mtu from the driver based on qp attributes.
*/
int (*get_pmtu_from_attr)(struct rvt_dev_info *rdi, struct rvt_qp *qp,
@@ -324,9 +329,6 @@
void (*modify_qp)(struct rvt_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
- /* Driver specific work request checking */
- int (*check_send_wqe)(struct rvt_qp *qp, struct rvt_swqe *wqe);
-
/* Notify driver a mad agent has been created */
void (*notify_create_mad_agent)(struct rvt_dev_info *rdi, int port_idx);
@@ -355,12 +357,12 @@
/* post send table */
const struct rvt_operation_params *post_parms;
- struct rvt_mregion __rcu *dma_mr;
- struct rvt_lkey_table lkey_table;
-
/* Driver specific helper functions */
struct rvt_driver_provided driver_f;
+ struct rvt_mregion __rcu *dma_mr;
+ struct rvt_lkey_table lkey_table;
+
/* Internal use */
int n_pds_allocated;
spinlock_t n_pds_lock; /* Protect pd allocated count */
diff --git a/include/rdma/rdmavt_mr.h b/include/rdma/rdmavt_mr.h
index 6b3c6c8..de59de2 100644
--- a/include/rdma/rdmavt_mr.h
+++ b/include/rdma/rdmavt_mr.h
@@ -90,11 +90,15 @@
#define RVT_MAX_LKEY_TABLE_BITS 23
struct rvt_lkey_table {
- spinlock_t lock; /* protect changes in this struct */
+ /* read mostly fields */
+ u32 max; /* size of the table */
+ u32 shift; /* lkey/rkey shift */
+ struct rvt_mregion __rcu **table;
+ /* writeable fields */
+ /* protect changes in this struct */
+ spinlock_t lock ____cacheline_aligned_in_smp;
u32 next; /* next unused index (speeds search) */
u32 gen; /* generation count */
- u32 max; /* size of the table */
- struct rvt_mregion __rcu **table;
};
/*
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 2c5183e..f3dbd15 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -51,6 +51,7 @@
#include <rdma/rdma_vt.h>
#include <rdma/ib_pack.h>
#include <rdma/ib_verbs.h>
+#include <rdma/rdmavt_cq.h>
/*
* Atomic bit definitions for r_aflags.
*/
@@ -485,6 +486,23 @@
}
/**
+ * rvt_put_swqe - drop mr refs held by swqe
+ * @wqe - the send wqe
+ *
+ * This drops any mr references held by the swqe
+ */
+static inline void rvt_put_swqe(struct rvt_swqe *wqe)
+{
+ int i;
+
+ for (i = 0; i < wqe->wr.num_sge; i++) {
+ struct rvt_sge *sge = &wqe->sg_list[i];
+
+ rvt_put_mr(sge->mr);
+ }
+}
+
+/**
* rvt_qp_wqe_reserve - reserve operation
* @qp - the rvt qp
* @wqe - the send wqe
@@ -527,6 +545,65 @@
}
}
+extern const enum ib_wc_opcode ib_rvt_wc_opcode[];
+
+/**
+ * rvt_qp_swqe_complete() - insert send completion
+ * @qp - the qp
+ * @wqe - the send wqe
+ * @status - completion status
+ *
+ * Insert a send completion into the completion
+ * queue if the qp indicates it should be done.
+ *
+ * See IBTA 10.7.3.1 for info on completion
+ * control.
+ */
+static inline void rvt_qp_swqe_complete(
+ struct rvt_qp *qp,
+ struct rvt_swqe *wqe,
+ enum ib_wc_status status)
+{
+ if (unlikely(wqe->wr.send_flags & RVT_SEND_RESERVE_USED))
+ return;
+ if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+ status != IB_WC_SUCCESS) {
+ struct ib_wc wc;
+
+ memset(&wc, 0, sizeof(wc));
+ wc.wr_id = wqe->wr.wr_id;
+ wc.status = status;
+ wc.opcode = ib_rvt_wc_opcode[wqe->wr.opcode];
+ wc.qp = &qp->ibqp;
+ wc.byte_len = wqe->length;
+ rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
+ status != IB_WC_SUCCESS);
+ }
+}
+
+/**
+ * @qp - the qp pair
+ * @len - the length
+ *
+ * Perform a shift based mtu round up divide
+ */
+static inline u32 rvt_div_round_up_mtu(struct rvt_qp *qp, u32 len)
+{
+ return (len + qp->pmtu - 1) >> qp->log_pmtu;
+}
+
+/**
+ * @qp - the qp pair
+ * @len - the length
+ *
+ * Perform a shift based mtu divide
+ */
+static inline u32 rvt_div_mtu(struct rvt_qp *qp, u32 len)
+{
+ return len >> qp->log_pmtu;
+}
+
extern const int ib_rvt_state_ops[];
struct rvt_dev_info;
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
index f14ab7f..82bdf56 100644
--- a/include/uapi/rdma/Kbuild
+++ b/include/uapi/rdma/Kbuild
@@ -14,3 +14,5 @@
header-y += mthca-abi.h
header-y += nes-abi.h
header-y += ocrdma-abi.h
+header-y += hns-abi.h
+header-y += vmw_pvrdma-abi.h
diff --git a/include/uapi/rdma/hfi/hfi1_user.h b/include/uapi/rdma/hfi/hfi1_user.h
index d15e728..587b736 100644
--- a/include/uapi/rdma/hfi/hfi1_user.h
+++ b/include/uapi/rdma/hfi/hfi1_user.h
@@ -75,7 +75,7 @@
* may not be implemented; the user code must deal with this if it
* cares, or it must abort after initialization reports the difference.
*/
-#define HFI1_USER_SWMINOR 2
+#define HFI1_USER_SWMINOR 3
/*
* We will encode the major/minor inside a single 32bit version number.
diff --git a/drivers/infiniband/hw/hns/hns_roce_user.h b/include/uapi/rdma/hns-abi.h
similarity index 93%
rename from drivers/infiniband/hw/hns/hns_roce_user.h
rename to include/uapi/rdma/hns-abi.h
index a28f761..5d74019 100644
--- a/drivers/infiniband/hw/hns/hns_roce_user.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -30,8 +30,10 @@
* SOFTWARE.
*/
-#ifndef _HNS_ROCE_USER_H
-#define _HNS_ROCE_USER_H
+#ifndef HNS_ABI_USER_H
+#define HNS_ABI_USER_H
+
+#include <linux/types.h>
struct hns_roce_ib_create_cq {
__u64 buf_addr;
@@ -49,5 +51,4 @@
struct hns_roce_ib_alloc_ucontext_resp {
__u32 qp_tab_size;
};
-
-#endif /*_HNS_ROCE_USER_H */
+#endif /* HNS_ABI_USER_H */
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 25225eb..dfdfe4e 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -37,6 +37,7 @@
#define IB_USER_VERBS_H
#include <linux/types.h>
+#include <rdma/ib_verbs.h>
/*
* Increment this value if any changes that break userspace ABI
@@ -93,6 +94,7 @@
IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE,
IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ,
IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP,
+ IB_USER_VERBS_EX_CMD_MODIFY_QP = IB_USER_VERBS_CMD_MODIFY_QP,
IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
IB_USER_VERBS_EX_CMD_CREATE_WQ,
@@ -545,6 +547,14 @@
IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE,
};
+enum {
+ IB_USER_LEGACY_LAST_QP_ATTR_MASK = IB_QP_DEST_QPN
+};
+
+enum {
+ IB_USER_LAST_QP_ATTR_MASK = IB_QP_RATE_LIMIT
+};
+
struct ib_uverbs_ex_create_qp {
__u64 user_handle;
__u32 pd_handle;
@@ -684,9 +694,20 @@
__u64 driver_data[0];
};
+struct ib_uverbs_ex_modify_qp {
+ struct ib_uverbs_modify_qp base;
+ __u32 rate_limit;
+ __u32 reserved;
+};
+
struct ib_uverbs_modify_qp_resp {
};
+struct ib_uverbs_ex_modify_qp_resp {
+ __u32 comp_mask;
+ __u32 response_length;
+};
+
struct ib_uverbs_destroy_qp {
__u64 response;
__u32 qp_handle;
@@ -908,6 +929,23 @@
struct ib_uverbs_flow_ipv6_filter mask;
};
+struct ib_uverbs_flow_tunnel_filter {
+ __be32 tunnel_id;
+};
+
+struct ib_uverbs_flow_spec_tunnel {
+ union {
+ struct ib_uverbs_flow_spec_hdr hdr;
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ };
+ struct ib_uverbs_flow_tunnel_filter val;
+ struct ib_uverbs_flow_tunnel_filter mask;
+};
+
struct ib_uverbs_flow_attr {
__u32 type;
__u16 size;
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index f5d0f4e..fae6cda 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -82,6 +82,7 @@
enum mlx5_user_cmds_supp_uhw {
MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0,
+ MLX5_USER_CMDS_SUPP_UHW_CREATE_AH = 1 << 1,
};
struct mlx5_ib_alloc_ucontext_resp {
@@ -124,18 +125,47 @@
__u8 reserved[7];
};
+enum mlx5_ib_cqe_comp_res_format {
+ MLX5_IB_CQE_RES_FORMAT_HASH = 1 << 0,
+ MLX5_IB_CQE_RES_FORMAT_CSUM = 1 << 1,
+ MLX5_IB_CQE_RES_RESERVED = 1 << 2,
+};
+
+struct mlx5_ib_cqe_comp_caps {
+ __u32 max_num;
+ __u32 supported_format; /* enum mlx5_ib_cqe_comp_res_format */
+};
+
+struct mlx5_packet_pacing_caps {
+ __u32 qp_rate_limit_min;
+ __u32 qp_rate_limit_max; /* In kpbs */
+
+ /* Corresponding bit will be set if qp type from
+ * 'enum ib_qp_type' is supported, e.g.
+ * supported_qpts |= 1 << IB_QPT_RAW_PACKET
+ */
+ __u32 supported_qpts;
+ __u32 reserved;
+};
+
struct mlx5_ib_query_device_resp {
__u32 comp_mask;
__u32 response_length;
struct mlx5_ib_tso_caps tso_caps;
struct mlx5_ib_rss_caps rss_caps;
+ struct mlx5_ib_cqe_comp_caps cqe_comp_caps;
+ struct mlx5_packet_pacing_caps packet_pacing_caps;
+ __u32 mlx5_ib_support_multi_pkt_send_wqes;
+ __u32 reserved;
};
struct mlx5_ib_create_cq {
__u64 buf_addr;
__u64 db_addr;
__u32 cqe_size;
- __u32 reserved; /* explicit padding (optional on i386) */
+ __u8 cqe_comp_en;
+ __u8 cqe_comp_res_format;
+ __u16 reserved; /* explicit padding (optional on i386) */
};
struct mlx5_ib_create_cq_resp {
@@ -232,6 +262,12 @@
__u32 reserved;
};
+struct mlx5_ib_create_ah_resp {
+ __u32 response_length;
+ __u8 dmac[ETH_ALEN];
+ __u8 reserved[6];
+};
+
struct mlx5_ib_create_wq_resp {
__u32 response_length;
__u32 reserved;
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index 01923d4..d71da36 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -110,7 +110,7 @@
__u32 id;
__u16 addr_size;
__u16 reserved;
- struct sockaddr_storage addr;
+ struct __kernel_sockaddr_storage addr;
};
struct rdma_ucm_resolve_ip {
@@ -126,8 +126,8 @@
__u16 src_size;
__u16 dst_size;
__u32 reserved;
- struct sockaddr_storage src_addr;
- struct sockaddr_storage dst_addr;
+ struct __kernel_sockaddr_storage src_addr;
+ struct __kernel_sockaddr_storage dst_addr;
};
struct rdma_ucm_resolve_route {
@@ -164,8 +164,8 @@
__u16 pkey;
__u16 src_size;
__u16 dst_size;
- struct sockaddr_storage src_addr;
- struct sockaddr_storage dst_addr;
+ struct __kernel_sockaddr_storage src_addr;
+ struct __kernel_sockaddr_storage dst_addr;
};
struct rdma_ucm_query_path_resp {
@@ -257,7 +257,7 @@
__u32 id;
__u16 addr_size;
__u16 join_flags;
- struct sockaddr_storage addr;
+ struct __kernel_sockaddr_storage addr;
};
struct rdma_ucm_get_event {
diff --git a/include/uapi/rdma/vmw_pvrdma-abi.h b/include/uapi/rdma/vmw_pvrdma-abi.h
new file mode 100644
index 0000000..5016abc
--- /dev/null
+++ b/include/uapi/rdma/vmw_pvrdma-abi.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2012-2016 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of EITHER the GNU General Public License
+ * version 2 as published by the Free Software Foundation or the BSD
+ * 2-Clause License. This program is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED
+ * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License version 2 for more details at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program available in the file COPYING in the main
+ * directory of this source tree.
+ *
+ * The BSD 2-Clause License
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __VMW_PVRDMA_ABI_H__
+#define __VMW_PVRDMA_ABI_H__
+
+#include <linux/types.h>
+
+#define PVRDMA_UVERBS_ABI_VERSION 3 /* ABI Version. */
+#define PVRDMA_UAR_HANDLE_MASK 0x00FFFFFF /* Bottom 24 bits. */
+#define PVRDMA_UAR_QP_OFFSET 0 /* QP doorbell. */
+#define PVRDMA_UAR_QP_SEND BIT(30) /* Send bit. */
+#define PVRDMA_UAR_QP_RECV BIT(31) /* Recv bit. */
+#define PVRDMA_UAR_CQ_OFFSET 4 /* CQ doorbell. */
+#define PVRDMA_UAR_CQ_ARM_SOL BIT(29) /* Arm solicited bit. */
+#define PVRDMA_UAR_CQ_ARM BIT(30) /* Arm bit. */
+#define PVRDMA_UAR_CQ_POLL BIT(31) /* Poll bit. */
+
+enum pvrdma_wr_opcode {
+ PVRDMA_WR_RDMA_WRITE,
+ PVRDMA_WR_RDMA_WRITE_WITH_IMM,
+ PVRDMA_WR_SEND,
+ PVRDMA_WR_SEND_WITH_IMM,
+ PVRDMA_WR_RDMA_READ,
+ PVRDMA_WR_ATOMIC_CMP_AND_SWP,
+ PVRDMA_WR_ATOMIC_FETCH_AND_ADD,
+ PVRDMA_WR_LSO,
+ PVRDMA_WR_SEND_WITH_INV,
+ PVRDMA_WR_RDMA_READ_WITH_INV,
+ PVRDMA_WR_LOCAL_INV,
+ PVRDMA_WR_FAST_REG_MR,
+ PVRDMA_WR_MASKED_ATOMIC_CMP_AND_SWP,
+ PVRDMA_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+ PVRDMA_WR_BIND_MW,
+ PVRDMA_WR_REG_SIG_MR,
+};
+
+enum pvrdma_wc_status {
+ PVRDMA_WC_SUCCESS,
+ PVRDMA_WC_LOC_LEN_ERR,
+ PVRDMA_WC_LOC_QP_OP_ERR,
+ PVRDMA_WC_LOC_EEC_OP_ERR,
+ PVRDMA_WC_LOC_PROT_ERR,
+ PVRDMA_WC_WR_FLUSH_ERR,
+ PVRDMA_WC_MW_BIND_ERR,
+ PVRDMA_WC_BAD_RESP_ERR,
+ PVRDMA_WC_LOC_ACCESS_ERR,
+ PVRDMA_WC_REM_INV_REQ_ERR,
+ PVRDMA_WC_REM_ACCESS_ERR,
+ PVRDMA_WC_REM_OP_ERR,
+ PVRDMA_WC_RETRY_EXC_ERR,
+ PVRDMA_WC_RNR_RETRY_EXC_ERR,
+ PVRDMA_WC_LOC_RDD_VIOL_ERR,
+ PVRDMA_WC_REM_INV_RD_REQ_ERR,
+ PVRDMA_WC_REM_ABORT_ERR,
+ PVRDMA_WC_INV_EECN_ERR,
+ PVRDMA_WC_INV_EEC_STATE_ERR,
+ PVRDMA_WC_FATAL_ERR,
+ PVRDMA_WC_RESP_TIMEOUT_ERR,
+ PVRDMA_WC_GENERAL_ERR,
+};
+
+enum pvrdma_wc_opcode {
+ PVRDMA_WC_SEND,
+ PVRDMA_WC_RDMA_WRITE,
+ PVRDMA_WC_RDMA_READ,
+ PVRDMA_WC_COMP_SWAP,
+ PVRDMA_WC_FETCH_ADD,
+ PVRDMA_WC_BIND_MW,
+ PVRDMA_WC_LSO,
+ PVRDMA_WC_LOCAL_INV,
+ PVRDMA_WC_FAST_REG_MR,
+ PVRDMA_WC_MASKED_COMP_SWAP,
+ PVRDMA_WC_MASKED_FETCH_ADD,
+ PVRDMA_WC_RECV = 1 << 7,
+ PVRDMA_WC_RECV_RDMA_WITH_IMM,
+};
+
+enum pvrdma_wc_flags {
+ PVRDMA_WC_GRH = 1 << 0,
+ PVRDMA_WC_WITH_IMM = 1 << 1,
+ PVRDMA_WC_WITH_INVALIDATE = 1 << 2,
+ PVRDMA_WC_IP_CSUM_OK = 1 << 3,
+ PVRDMA_WC_WITH_SMAC = 1 << 4,
+ PVRDMA_WC_WITH_VLAN = 1 << 5,
+ PVRDMA_WC_FLAGS_MAX = PVRDMA_WC_WITH_VLAN,
+};
+
+struct pvrdma_alloc_ucontext_resp {
+ __u32 qp_tab_size;
+ __u32 reserved;
+};
+
+struct pvrdma_alloc_pd_resp {
+ __u32 pdn;
+ __u32 reserved;
+};
+
+struct pvrdma_create_cq {
+ __u64 buf_addr;
+ __u32 buf_size;
+ __u32 reserved;
+};
+
+struct pvrdma_create_cq_resp {
+ __u32 cqn;
+ __u32 reserved;
+};
+
+struct pvrdma_resize_cq {
+ __u64 buf_addr;
+ __u32 buf_size;
+ __u32 reserved;
+};
+
+struct pvrdma_create_srq {
+ __u64 buf_addr;
+};
+
+struct pvrdma_create_srq_resp {
+ __u32 srqn;
+ __u32 reserved;
+};
+
+struct pvrdma_create_qp {
+ __u64 rbuf_addr;
+ __u64 sbuf_addr;
+ __u32 rbuf_size;
+ __u32 sbuf_size;
+ __u64 qp_addr;
+};
+
+/* PVRDMA masked atomic compare and swap */
+struct pvrdma_ex_cmp_swap {
+ __u64 swap_val;
+ __u64 compare_val;
+ __u64 swap_mask;
+ __u64 compare_mask;
+};
+
+/* PVRDMA masked atomic fetch and add */
+struct pvrdma_ex_fetch_add {
+ __u64 add_val;
+ __u64 field_boundary;
+};
+
+/* PVRDMA address vector. */
+struct pvrdma_av {
+ __u32 port_pd;
+ __u32 sl_tclass_flowlabel;
+ __u8 dgid[16];
+ __u8 src_path_bits;
+ __u8 gid_index;
+ __u8 stat_rate;
+ __u8 hop_limit;
+ __u8 dmac[6];
+ __u8 reserved[6];
+};
+
+/* PVRDMA scatter/gather entry */
+struct pvrdma_sge {
+ __u64 addr;
+ __u32 length;
+ __u32 lkey;
+};
+
+/* PVRDMA receive queue work request */
+struct pvrdma_rq_wqe_hdr {
+ __u64 wr_id; /* wr id */
+ __u32 num_sge; /* size of s/g array */
+ __u32 total_len; /* reserved */
+};
+/* Use pvrdma_sge (ib_sge) for receive queue s/g array elements. */
+
+/* PVRDMA send queue work request */
+struct pvrdma_sq_wqe_hdr {
+ __u64 wr_id; /* wr id */
+ __u32 num_sge; /* size of s/g array */
+ __u32 total_len; /* reserved */
+ __u32 opcode; /* operation type */
+ __u32 send_flags; /* wr flags */
+ union {
+ __u32 imm_data;
+ __u32 invalidate_rkey;
+ } ex;
+ __u32 reserved;
+ union {
+ struct {
+ __u64 remote_addr;
+ __u32 rkey;
+ __u8 reserved[4];
+ } rdma;
+ struct {
+ __u64 remote_addr;
+ __u64 compare_add;
+ __u64 swap;
+ __u32 rkey;
+ __u32 reserved;
+ } atomic;
+ struct {
+ __u64 remote_addr;
+ __u32 log_arg_sz;
+ __u32 rkey;
+ union {
+ struct pvrdma_ex_cmp_swap cmp_swap;
+ struct pvrdma_ex_fetch_add fetch_add;
+ } wr_data;
+ } masked_atomics;
+ struct {
+ __u64 iova_start;
+ __u64 pl_pdir_dma;
+ __u32 page_shift;
+ __u32 page_list_len;
+ __u32 length;
+ __u32 access_flags;
+ __u32 rkey;
+ } fast_reg;
+ struct {
+ __u32 remote_qpn;
+ __u32 remote_qkey;
+ struct pvrdma_av av;
+ } ud;
+ } wr;
+};
+/* Use pvrdma_sge (ib_sge) for send queue s/g array elements. */
+
+/* Completion queue element. */
+struct pvrdma_cqe {
+ __u64 wr_id;
+ __u64 qp;
+ __u32 opcode;
+ __u32 status;
+ __u32 byte_len;
+ __u32 imm_data;
+ __u32 src_qp;
+ __u32 wc_flags;
+ __u32 vendor_err;
+ __u16 pkey_index;
+ __u16 slid;
+ __u8 sl;
+ __u8 dlid_path_bits;
+ __u8 port_num;
+ __u8 smac[6];
+ __u8 reserved2[7]; /* Pad to next power of 2 (64). */
+};
+
+#endif /* __VMW_PVRDMA_ABI_H__ */
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 345f090..d5f3117 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -100,11 +100,14 @@
trans->cm_connect_complete(conn, event);
break;
+ case RDMA_CM_EVENT_REJECTED:
+ rdsdebug("Connection rejected: %s\n",
+ rdma_reject_msg(cm_id, event->status));
+ /* FALLTHROUGH */
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
- case RDMA_CM_EVENT_REJECTED:
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_ADDR_CHANGE:
if (conn)