mlx4_core: Reset device when internal error is detected

Reset the device when an internal error is detected.

Also, detect errors by polling the error buffer rather than using
interrupts.  This is more robust and doesn't depend on MSI-X.  Remove
the old interrupt handler entirely, since we don't want to support two
mechanisms for detecting internal errors.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/net/mlx4/catas.c b/drivers/net/mlx4/catas.c
index 1bb088a..6b32ec9 100644
--- a/drivers/net/mlx4/catas.c
+++ b/drivers/net/mlx4/catas.c
@@ -30,41 +30,133 @@
  * SOFTWARE.
  */
 
+#include <linux/workqueue.h>
+
 #include "mlx4.h"
 
-void mlx4_handle_catas_err(struct mlx4_dev *dev)
+enum {
+	MLX4_CATAS_POLL_INTERVAL	= 5 * HZ,
+};
+
+static DEFINE_SPINLOCK(catas_lock);
+
+static LIST_HEAD(catas_list);
+static struct workqueue_struct *catas_wq;
+static struct work_struct catas_work;
+
+static int internal_err_reset = 1;
+module_param(internal_err_reset, int, 0644);
+MODULE_PARM_DESC(internal_err_reset,
+		 "Reset device on internal errors if non-zero (default 1)");
+
+static void dump_err_buf(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
 	int i;
 
-	mlx4_err(dev, "Catastrophic error detected:\n");
+	mlx4_err(dev, "Internal error detected:\n");
 	for (i = 0; i < priv->fw.catas_size; ++i)
 		mlx4_err(dev, "  buf[%02x]: %08x\n",
 			 i, swab32(readl(priv->catas_err.map + i)));
-
-	mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
 }
 
-void mlx4_map_catas_buf(struct mlx4_dev *dev)
+static void poll_catas(unsigned long dev_ptr)
+{
+	struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (readl(priv->catas_err.map)) {
+		dump_err_buf(dev);
+
+		mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
+
+		if (internal_err_reset) {
+			spin_lock(&catas_lock);
+			list_add(&priv->catas_err.list, &catas_list);
+			spin_unlock(&catas_lock);
+
+			queue_work(catas_wq, &catas_work);
+		}
+	} else
+		mod_timer(&priv->catas_err.timer,
+			  round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
+}
+
+static void catas_reset(struct work_struct *work)
+{
+	struct mlx4_priv *priv, *tmppriv;
+	struct mlx4_dev *dev;
+
+	LIST_HEAD(tlist);
+	int ret;
+
+	spin_lock_irq(&catas_lock);
+	list_splice_init(&catas_list, &tlist);
+	spin_unlock_irq(&catas_lock);
+
+	list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
+		ret = mlx4_restart_one(priv->dev.pdev);
+		dev = &priv->dev;
+		if (ret)
+			mlx4_err(dev, "Reset failed (%d)\n", ret);
+		else
+			mlx4_dbg(dev, "Reset succeeded\n");
+	}
+}
+
+void mlx4_start_catas_poll(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	unsigned long addr;
 
+	INIT_LIST_HEAD(&priv->catas_err.list);
+	init_timer(&priv->catas_err.timer);
+	priv->catas_err.map = NULL;
+
 	addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
 		priv->fw.catas_offset;
 
 	priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
-	if (!priv->catas_err.map)
-		mlx4_warn(dev, "Failed to map catastrophic error buffer at 0x%lx\n",
+	if (!priv->catas_err.map) {
+		mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
 			  addr);
+		return;
+	}
 
+	priv->catas_err.timer.data     = (unsigned long) dev;
+	priv->catas_err.timer.function = poll_catas;
+	priv->catas_err.timer.expires  =
+		round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL);
+	add_timer(&priv->catas_err.timer);
 }
 
-void mlx4_unmap_catas_buf(struct mlx4_dev *dev)
+void mlx4_stop_catas_poll(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
+	del_timer_sync(&priv->catas_err.timer);
+
 	if (priv->catas_err.map)
 		iounmap(priv->catas_err.map);
+
+	spin_lock_irq(&catas_lock);
+	list_del(&priv->catas_err.list);
+	spin_unlock_irq(&catas_lock);
+}
+
+int __init mlx4_catas_init(void)
+{
+	INIT_WORK(&catas_work, catas_reset);
+
+	catas_wq = create_singlethread_workqueue("mlx4_err");
+	if (!catas_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mlx4_catas_cleanup(void)
+{
+	destroy_workqueue(catas_wq);
 }
diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c
index 27a82ce..2095c84 100644
--- a/drivers/net/mlx4/eq.c
+++ b/drivers/net/mlx4/eq.c
@@ -89,14 +89,12 @@
 			       (1ull << MLX4_EVENT_TYPE_PATH_MIG_FAILED)    | \
 			       (1ull << MLX4_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
 			       (1ull << MLX4_EVENT_TYPE_WQ_ACCESS_ERROR)    | \
-			       (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR)  | \
 			       (1ull << MLX4_EVENT_TYPE_PORT_CHANGE)	    | \
 			       (1ull << MLX4_EVENT_TYPE_ECC_DETECT)	    | \
 			       (1ull << MLX4_EVENT_TYPE_SRQ_CATAS_ERROR)    | \
 			       (1ull << MLX4_EVENT_TYPE_SRQ_QP_LAST_WQE)    | \
 			       (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT)	    | \
 			       (1ull << MLX4_EVENT_TYPE_CMD))
-#define MLX4_CATAS_EVENT_MASK  (1ull << MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR)
 
 struct mlx4_eqe {
 	u8			reserved1;
@@ -264,7 +262,7 @@
 
 	writel(priv->eq_table.clr_mask, priv->eq_table.clr_int);
 
-	for (i = 0; i < MLX4_EQ_CATAS; ++i)
+	for (i = 0; i < MLX4_NUM_EQ; ++i)
 		work |= mlx4_eq_int(dev, &priv->eq_table.eq[i]);
 
 	return IRQ_RETVAL(work);
@@ -281,14 +279,6 @@
 	return IRQ_HANDLED;
 }
 
-static irqreturn_t mlx4_catas_interrupt(int irq, void *dev_ptr)
-{
-	mlx4_handle_catas_err(dev_ptr);
-
-	/* MSI-X vectors always belong to us */
-	return IRQ_HANDLED;
-}
-
 static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap,
 			int eq_num)
 {
@@ -490,11 +480,9 @@
 
 	if (eq_table->have_irq)
 		free_irq(dev->pdev->irq, dev);
-	for (i = 0; i < MLX4_EQ_CATAS; ++i)
+	for (i = 0; i < MLX4_NUM_EQ; ++i)
 		if (eq_table->eq[i].have_irq)
 			free_irq(eq_table->eq[i].irq, eq_table->eq + i);
-	if (eq_table->eq[MLX4_EQ_CATAS].have_irq)
-		free_irq(eq_table->eq[MLX4_EQ_CATAS].irq, dev);
 }
 
 static int __devinit mlx4_map_clr_int(struct mlx4_dev *dev)
@@ -598,32 +586,19 @@
 	if (dev->flags & MLX4_FLAG_MSI_X) {
 		static const char *eq_name[] = {
 			[MLX4_EQ_COMP]  = DRV_NAME " (comp)",
-			[MLX4_EQ_ASYNC] = DRV_NAME " (async)",
-			[MLX4_EQ_CATAS] = DRV_NAME " (catas)"
+			[MLX4_EQ_ASYNC] = DRV_NAME " (async)"
 		};
 
-		err = mlx4_create_eq(dev, 1, MLX4_EQ_CATAS,
-				     &priv->eq_table.eq[MLX4_EQ_CATAS]);
-		if (err)
-			goto err_out_async;
-
-		for (i = 0; i < MLX4_EQ_CATAS; ++i) {
+		for (i = 0; i < MLX4_NUM_EQ; ++i) {
 			err = request_irq(priv->eq_table.eq[i].irq,
 					  mlx4_msi_x_interrupt,
 					  0, eq_name[i], priv->eq_table.eq + i);
 			if (err)
-				goto err_out_catas;
+				goto err_out_async;
 
 			priv->eq_table.eq[i].have_irq = 1;
 		}
 
-		err = request_irq(priv->eq_table.eq[MLX4_EQ_CATAS].irq,
-				  mlx4_catas_interrupt, 0,
-				  eq_name[MLX4_EQ_CATAS], dev);
-		if (err)
-			goto err_out_catas;
-
-		priv->eq_table.eq[MLX4_EQ_CATAS].have_irq = 1;
 	} else {
 		err = request_irq(dev->pdev->irq, mlx4_interrupt,
 				  IRQF_SHARED, DRV_NAME, dev);
@@ -639,22 +614,11 @@
 		mlx4_warn(dev, "MAP_EQ for async EQ %d failed (%d)\n",
 			   priv->eq_table.eq[MLX4_EQ_ASYNC].eqn, err);
 
-	for (i = 0; i < MLX4_EQ_CATAS; ++i)
+	for (i = 0; i < MLX4_NUM_EQ; ++i)
 		eq_set_ci(&priv->eq_table.eq[i], 1);
 
-	if (dev->flags & MLX4_FLAG_MSI_X) {
-		err = mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 0,
-				  priv->eq_table.eq[MLX4_EQ_CATAS].eqn);
-		if (err)
-			mlx4_warn(dev, "MAP_EQ for catas EQ %d failed (%d)\n",
-				  priv->eq_table.eq[MLX4_EQ_CATAS].eqn, err);
-	}
-
 	return 0;
 
-err_out_catas:
-	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]);
-
 err_out_async:
 	mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_ASYNC]);
 
@@ -675,19 +639,13 @@
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int i;
 
-	if (dev->flags & MLX4_FLAG_MSI_X)
-		mlx4_MAP_EQ(dev, MLX4_CATAS_EVENT_MASK, 1,
-			    priv->eq_table.eq[MLX4_EQ_CATAS].eqn);
-
 	mlx4_MAP_EQ(dev, MLX4_ASYNC_EVENT_MASK, 1,
 		    priv->eq_table.eq[MLX4_EQ_ASYNC].eqn);
 
 	mlx4_free_irqs(dev);
 
-	for (i = 0; i < MLX4_EQ_CATAS; ++i)
+	for (i = 0; i < MLX4_NUM_EQ; ++i)
 		mlx4_free_eq(dev, &priv->eq_table.eq[i]);
-	if (dev->flags & MLX4_FLAG_MSI_X)
-		mlx4_free_eq(dev, &priv->eq_table.eq[MLX4_EQ_CATAS]);
 
 	mlx4_unmap_clr_int(dev);
 
diff --git a/drivers/net/mlx4/intf.c b/drivers/net/mlx4/intf.c
index 9ae951b..be5d9e9 100644
--- a/drivers/net/mlx4/intf.c
+++ b/drivers/net/mlx4/intf.c
@@ -142,6 +142,7 @@
 		mlx4_add_device(intf, priv);
 
 	mutex_unlock(&intf_mutex);
+	mlx4_start_catas_poll(dev);
 
 	return 0;
 }
@@ -151,6 +152,7 @@
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_interface *intf;
 
+	mlx4_stop_catas_poll(dev);
 	mutex_lock(&intf_mutex);
 
 	list_for_each_entry(intf, &intf_list, list)
diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c
index a4f2e04..e8f45e6 100644
--- a/drivers/net/mlx4/main.c
+++ b/drivers/net/mlx4/main.c
@@ -583,13 +583,11 @@
 		goto err_pd_table_free;
 	}
 
-	mlx4_map_catas_buf(dev);
-
 	err = mlx4_init_eq_table(dev);
 	if (err) {
 		mlx4_err(dev, "Failed to initialize "
 			 "event queue table, aborting.\n");
-		goto err_catas_buf;
+		goto err_mr_table_free;
 	}
 
 	err = mlx4_cmd_use_events(dev);
@@ -659,8 +657,7 @@
 err_eq_table_free:
 	mlx4_cleanup_eq_table(dev);
 
-err_catas_buf:
-	mlx4_unmap_catas_buf(dev);
+err_mr_table_free:
 	mlx4_cleanup_mr_table(dev);
 
 err_pd_table_free:
@@ -836,9 +833,6 @@
 	mlx4_cleanup_cq_table(dev);
 	mlx4_cmd_use_polling(dev);
 	mlx4_cleanup_eq_table(dev);
-
-	mlx4_unmap_catas_buf(dev);
-
 	mlx4_cleanup_mr_table(dev);
 	mlx4_cleanup_pd_table(dev);
 	mlx4_cleanup_uar_table(dev);
@@ -885,9 +879,6 @@
 		mlx4_cleanup_cq_table(dev);
 		mlx4_cmd_use_polling(dev);
 		mlx4_cleanup_eq_table(dev);
-
-		mlx4_unmap_catas_buf(dev);
-
 		mlx4_cleanup_mr_table(dev);
 		mlx4_cleanup_pd_table(dev);
 
@@ -908,6 +899,12 @@
 	}
 }
 
+int mlx4_restart_one(struct pci_dev *pdev)
+{
+	mlx4_remove_one(pdev);
+	return mlx4_init_one(pdev, NULL);
+}
+
 static struct pci_device_id mlx4_pci_table[] = {
 	{ PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
 	{ PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
@@ -930,6 +927,10 @@
 {
 	int ret;
 
+	ret = mlx4_catas_init();
+	if (ret)
+		return ret;
+
 	ret = pci_register_driver(&mlx4_driver);
 	return ret < 0 ? ret : 0;
 }
@@ -937,6 +938,7 @@
 static void __exit mlx4_cleanup(void)
 {
 	pci_unregister_driver(&mlx4_driver);
+	mlx4_catas_cleanup();
 }
 
 module_init(mlx4_init);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index d9c91a7..be304a7 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -39,6 +39,7 @@
 
 #include <linux/mutex.h>
 #include <linux/radix-tree.h>
+#include <linux/timer.h>
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/doorbell.h>
@@ -67,7 +68,6 @@
 enum {
 	MLX4_EQ_ASYNC,
 	MLX4_EQ_COMP,
-	MLX4_EQ_CATAS,
 	MLX4_NUM_EQ
 };
 
@@ -248,7 +248,8 @@
 
 struct mlx4_catas_err {
 	u32 __iomem	       *map;
-	int			size;
+	struct timer_list	timer;
+	struct list_head	list;
 };
 
 struct mlx4_priv {
@@ -311,9 +312,11 @@
 void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
 
-void mlx4_map_catas_buf(struct mlx4_dev *dev);
-void mlx4_unmap_catas_buf(struct mlx4_dev *dev);
-
+void mlx4_start_catas_poll(struct mlx4_dev *dev);
+void mlx4_stop_catas_poll(struct mlx4_dev *dev);
+int mlx4_catas_init(void);
+void mlx4_catas_cleanup(void);
+int mlx4_restart_one(struct pci_dev *pdev);
 int mlx4_register_device(struct mlx4_dev *dev);
 void mlx4_unregister_device(struct mlx4_dev *dev);
 void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_event type,