[PATCH] ocfs2/dlm: fixes

* fix a hang which can occur during shutdown migration
* do not allow nodes to join during recovery
* when restarting lock mastery, do not ignore nodes which come up
* more than one node could become recovery master, fix this
* sleep to allow some time for heartbeat state to catch up to network
* extra debug info for bad recovery state problems
* make DLM_RECO_NODE_DATA_DONE a valid state for non-master recovery nodes
* prune all locks from dead nodes on $RECOVERY lock resources
* do NOT automatically add new nodes to mle nodemaps until they have properly
  joined the domain
* make sure dlm_pick_recovery_master only exits when all nodes have synced
* properly handle dlmunlock errors in dlm_pick_recovery_master
* do not propagate network errors in dlm_send_begin_reco_message
* dead nodes were not being put in the recovery map sometimes, fix this
* dlmunlock was failing to clear the unlock actions on DLM_DENIED

Signed-off-by: Kurt Hackel <kurt.hackel@oracle.com>
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 27e984f..a3194fe 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1050,17 +1050,10 @@
 	node = dlm_bitmap_diff_iter_next(&bdi, &sc);
 	while (node >= 0) {
 		if (sc == NODE_UP) {
-			/* a node came up.  easy.  might not even need
-			 * to talk to it if its node number is higher
-			 * or if we are already blocked. */
-			mlog(0, "node up! %d\n", node);
-			if (blocked)
-				goto next;
-
-			if (node > dlm->node_num) {
-				mlog(0, "node > this node. skipping.\n");
-				goto next;
-			}
+			/* a node came up.  clear any old vote from
+			 * the response map and set it in the vote map
+			 * then restart the mastery. */
+			mlog(ML_NOTICE, "node %d up while restarting\n", node);
 
 			/* redo the master request, but only for the new node */
 			mlog(0, "sending request to new node\n");
@@ -2005,6 +1998,15 @@
 				break;
 
 			mlog(0, "timed out during migration\n");
+			/* avoid hang during shutdown when migrating lockres 
+			 * to a node which also goes down */
+			if (dlm_is_node_dead(dlm, target)) {
+				mlog(0, "%s:%.*s: expected migration target %u "
+				     "is no longer up.  restarting.\n",
+				     dlm->name, res->lockname.len,
+				     res->lockname.name, target);
+				ret = -ERESTARTSYS;
+			}
 		}
 		if (ret == -ERESTARTSYS) {
 			/* migration failed, detach and clean up mle */