drbd: fix thread stop deadlock

There are races where the receiver may be exiting,
but still need the worker to process some stuff.

Do not wait for the receiver to die from an exiting worker.
The receiver must already be dead in case the worker decides to exit.
If the receiver was still alive, it may still want to queue work, and do
drbd_flush_workqueue() from it's disconnect cleanup code,
which would no longer be processed by an exiting worker.

This also would deadlock,
if the worker was to synchornously wait for the receiver to die.

Do not implicitly stop the worker.
The worker will only be stopped from configuration context, from
conn_reconfig_done(), drbd_adm_down() or drbd_adm_delete_connection(),
after making sure the receiver is already stopped.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a5c9b38..427e959 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -503,7 +503,7 @@
 	thi->task = NULL;
 	thi->t_state = NONE;
 	smp_mb();
-	complete(&thi->stop);
+	complete_all(&thi->stop);
 	spin_unlock_irqrestore(&thi->t_lock, flags);
 
 	conn_info(tconn, "Terminating %s\n", current->comm);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9d9b93f..25468e2 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1050,10 +1050,16 @@
 /* if still unconfigured, stops worker again. */
 static void conn_reconfig_done(struct drbd_tconn *tconn)
 {
+	bool stop_threads;
 	spin_lock_irq(&tconn->req_lock);
-	if (conn_all_vols_unconf(tconn))
-		drbd_thread_stop_nowait(&tconn->worker);
+	stop_threads = conn_all_vols_unconf(tconn);
 	spin_unlock_irq(&tconn->req_lock);
+	if (stop_threads) {
+		/* asender is implicitly stopped by receiver
+		 * in drbd_disconnect() */
+		drbd_thread_stop(&tconn->receiver);
+		drbd_thread_stop(&tconn->worker);
+	}
 }
 
 /* Make sure IO is suspended before calling this function(). */
@@ -3123,7 +3129,6 @@
 
 	/* delete connection */
 	if (conn_lowest_minor(adm_ctx.tconn) < 0) {
-		drbd_thread_stop(&adm_ctx.tconn->worker);
 		list_del(&adm_ctx.tconn->all_tconn);
 		kref_put(&adm_ctx.tconn->kref, &conn_destroy);
 
@@ -3133,7 +3138,6 @@
 		retcode = ERR_CONN_IN_USE;
 		drbd_msg_put_info("failed to delete connection");
 	}
-
 	up_write(&drbd_cfg_rwsem);
 	goto out;
 out_unlock:
@@ -3164,6 +3168,8 @@
 	}
 	up_write(&drbd_cfg_rwsem);
 
+	if (retcode == NO_ERROR)
+		drbd_thread_stop(&adm_ctx.tconn->worker);
 out:
 	drbd_adm_finish(info, retcode);
 	return 0;
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 0512bbb..523ec09 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -40,7 +40,6 @@
 static int w_after_state_ch(struct drbd_work *w, int unused);
 static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 			   union drbd_state ns, enum chg_state_flags flags);
-static void after_all_state_ch(struct drbd_tconn *tconn);
 static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
 static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state);
 static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
@@ -1380,8 +1379,6 @@
 			resume_next_sg(mdev);
 	}
 
-	after_all_state_ch(mdev->tconn);
-
 	drbd_md_sync(mdev);
 }
 
@@ -1393,12 +1390,6 @@
 	enum chg_state_flags flags;
 };
 
-static void after_all_state_ch(struct drbd_tconn *tconn)
-{
-	if (conn_all_vols_unconf(tconn))
-		drbd_thread_stop_nowait(&tconn->worker);
-}
-
 static int w_after_conn_state_ch(struct drbd_work *w, int unused)
 {
 	struct after_conn_state_chg_work *acscw =
@@ -1461,12 +1452,7 @@
 			spin_unlock_irq(&tconn->req_lock);
 		}
 	}
-
-
-	//conn_err(tconn, STATE_FMT, STATE_ARGS("nms", nms));
-	after_all_state_ch(tconn);
 	kref_put(&tconn->kref, &conn_destroy);
-
 	return 0;
 }
 
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 7a73bd4..0da1547 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1744,10 +1744,6 @@
 	 */
 	spin_unlock_irq(&tconn->data.work.q_lock);
 
-	/* _drbd_set_state only uses stop_nowait.
-	 * wait here for the exiting receiver. */
-	drbd_thread_stop(&tconn->receiver);
-
 	down_read(&drbd_cfg_rwsem);
 	idr_for_each_entry(&tconn->volumes, mdev, vnr) {
 		D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);