Add btrfs_end_transaction_throttle to force writers to wait for pending commits

The existing throttle mechanism was often not sufficient to prevent
new writers from coming in and making a given transaction run forever.
This adds an explicit wait at the end of most operations so they will
allow the current transaction to close.

There is no wait inside file_write, inode updates, or cow filling, all which
have different deadlock possibilities.

This is a temporary measure until better asynchronous commit support is
added.  This code leads to stalls as it waits for data=ordered
writeback, and it really needs to be fixed.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3cc480b..52569b5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1672,24 +1672,6 @@
 	set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
 }
 
-void btrfs_throttle(struct btrfs_root *root)
-{
-#if 0
-	struct backing_dev_info *bdi;
-
-	bdi = &root->fs_info->bdi;
-	if (atomic_read(&root->fs_info->throttles) &&
-	    bdi_write_congested(bdi)) {
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
-		congestion_wait(WRITE, HZ/20);
-#else
-		blk_congestion_wait(WRITE, HZ/20);
-#endif
-
-	}
-#endif
-}
-
 void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
 {
 	/*
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 2bc64fe..deff6b4 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -70,7 +70,6 @@
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
 u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
 void btrfs_csum_final(u32 crc, char *result);
-void btrfs_throttle(struct btrfs_root *root);
 int btrfs_open_device(struct btrfs_device *dev);
 int btrfs_verify_block_csum(struct btrfs_root *root,
 			    struct extent_buffer *buf);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b7f8f92..ece221c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -934,7 +934,6 @@
 		balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
 		if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
 			btrfs_btree_balance_dirty(root, 1);
-		btrfs_throttle(root);
 		cond_resched();
 	}
 out:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cf27b59..bbba335 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -855,10 +855,9 @@
 		btrfs_del_ordered_inode(inode, 1);
 	}
 
-	btrfs_end_transaction(trans, root);
+	btrfs_end_transaction_throttle(trans, root);
 fail:
 	btrfs_btree_balance_dirty(root, nr);
-	btrfs_throttle(root);
 	return ret;
 }
 
@@ -889,10 +888,9 @@
 	}
 
 	nr = trans->blocks_used;
-	ret = btrfs_end_transaction(trans, root);
+	ret = btrfs_end_transaction_throttle(trans, root);
 fail:
 	btrfs_btree_balance_dirty(root, nr);
-	btrfs_throttle(root);
 
 	if (ret && !err)
 		err = ret;
@@ -1871,14 +1869,13 @@
 	btrfs_update_inode_block_group(trans, dir);
 out_unlock:
 	nr = trans->blocks_used;
-	btrfs_end_transaction(trans, root);
+	btrfs_end_transaction_throttle(trans, root);
 fail:
 	if (drop_inode) {
 		inode_dec_link_count(inode);
 		iput(inode);
 	}
 	btrfs_btree_balance_dirty(root, nr);
-	btrfs_throttle(root);
 	return err;
 }
 
@@ -1936,14 +1933,13 @@
 	btrfs_update_inode_block_group(trans, dir);
 out_unlock:
 	nr = trans->blocks_used;
-	btrfs_end_transaction(trans, root);
+	btrfs_end_transaction_throttle(trans, root);
 fail:
 	if (drop_inode) {
 		inode_dec_link_count(inode);
 		iput(inode);
 	}
 	btrfs_btree_balance_dirty(root, nr);
-	btrfs_throttle(root);
 	return err;
 }
 
@@ -1985,14 +1981,13 @@
 		drop_inode = 1;
 
 	nr = trans->blocks_used;
-	btrfs_end_transaction(trans, root);
+	btrfs_end_transaction_throttle(trans, root);
 fail:
 	if (drop_inode) {
 		inode_dec_link_count(inode);
 		iput(inode);
 	}
 	btrfs_btree_balance_dirty(root, nr);
-	btrfs_throttle(root);
 	return err;
 }
 
@@ -2055,13 +2050,12 @@
 
 out_fail:
 	nr = trans->blocks_used;
-	btrfs_end_transaction(trans, root);
+	btrfs_end_transaction_throttle(trans, root);
 
 out_unlock:
 	if (drop_on_err)
 		iput(inode);
 	btrfs_btree_balance_dirty(root, nr);
-	btrfs_throttle(root);
 	return err;
 }
 
@@ -2587,10 +2581,9 @@
 	btrfs_update_inode(trans, root, inode);
 	nr = trans->blocks_used;
 
-	ret = btrfs_end_transaction(trans, root);
+	ret = btrfs_end_transaction_throttle(trans, root);
 	BUG_ON(ret);
 	btrfs_btree_balance_dirty(root, nr);
-	btrfs_throttle(root);
 }
 
 /*
@@ -2912,14 +2905,13 @@
 
 out_unlock:
 	nr = trans->blocks_used;
-	btrfs_end_transaction(trans, root);
+	btrfs_end_transaction_throttle(trans, root);
 out_fail:
 	if (drop_inode) {
 		inode_dec_link_count(inode);
 		iput(inode);
 	}
 	btrfs_btree_balance_dirty(root, nr);
-	btrfs_throttle(root);
 	return err;
 }
 
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6002eb6..026039a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -164,7 +164,6 @@
 		ret = err;
 fail_commit:
 	btrfs_btree_balance_dirty(root, nr);
-	btrfs_throttle(root);
 	return ret;
 }
 
@@ -206,7 +205,6 @@
 
 fail_unlock:
 	btrfs_btree_balance_dirty(root, nr);
-	btrfs_throttle(root);
 	return ret;
 }
 
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5a1ee06..69ed5f8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -130,8 +130,27 @@
 	return h;
 }
 
-int btrfs_end_transaction(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root)
+static noinline int wait_for_commit(struct btrfs_root *root,
+				    struct btrfs_transaction *commit)
+{
+	DEFINE_WAIT(wait);
+	mutex_lock(&root->fs_info->trans_mutex);
+	while(!commit->commit_done) {
+		prepare_to_wait(&commit->commit_wait, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (commit->commit_done)
+			break;
+		mutex_unlock(&root->fs_info->trans_mutex);
+		schedule();
+		mutex_lock(&root->fs_info->trans_mutex);
+	}
+	mutex_unlock(&root->fs_info->trans_mutex);
+	finish_wait(&commit->commit_wait, &wait);
+	return 0;
+}
+
+static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root, int throttle)
 {
 	struct btrfs_transaction *cur_trans;
 
@@ -140,8 +159,18 @@
 	WARN_ON(cur_trans != trans->transaction);
 	WARN_ON(cur_trans->num_writers < 1);
 	cur_trans->num_writers--;
+
 	if (waitqueue_active(&cur_trans->writer_wait))
 		wake_up(&cur_trans->writer_wait);
+
+	if (cur_trans->in_commit && throttle) {
+		int ret;
+		mutex_unlock(&root->fs_info->trans_mutex);
+		ret = wait_for_commit(root, cur_trans);
+		BUG_ON(ret);
+		mutex_lock(&root->fs_info->trans_mutex);
+	}
+
 	put_transaction(cur_trans);
 	mutex_unlock(&root->fs_info->trans_mutex);
 	memset(trans, 0, sizeof(*trans));
@@ -149,6 +178,18 @@
 	return 0;
 }
 
+int btrfs_end_transaction(struct btrfs_trans_handle *trans,
+			  struct btrfs_root *root)
+{
+	return __btrfs_end_transaction(trans, root, 0);
+}
+
+int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root)
+{
+	return __btrfs_end_transaction(trans, root, 1);
+}
+
 
 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *root)
@@ -240,25 +281,6 @@
 	return 0;
 }
 
-static noinline int wait_for_commit(struct btrfs_root *root,
-				    struct btrfs_transaction *commit)
-{
-	DEFINE_WAIT(wait);
-	mutex_lock(&root->fs_info->trans_mutex);
-	while(!commit->commit_done) {
-		prepare_to_wait(&commit->commit_wait, &wait,
-				TASK_UNINTERRUPTIBLE);
-		if (commit->commit_done)
-			break;
-		mutex_unlock(&root->fs_info->trans_mutex);
-		schedule();
-		mutex_lock(&root->fs_info->trans_mutex);
-	}
-	mutex_unlock(&root->fs_info->trans_mutex);
-	finish_wait(&commit->commit_wait, &wait);
-	return 0;
-}
-
 struct dirty_root {
 	struct list_head list;
 	struct btrfs_root *root;
@@ -680,6 +702,7 @@
 			     root->fs_info->btree_inode->i_mapping, GFP_NOFS);
 
 	trans->transaction->in_commit = 1;
+printk("trans %Lu in commit\n", trans->transid);
 	cur_trans = trans->transaction;
 	if (cur_trans->list.prev != &root->fs_info->trans_list) {
 		prev_trans = list_entry(cur_trans->list.prev,
@@ -760,6 +783,7 @@
 	kfree(pinned_copy);
 
 	cur_trans->commit_done = 1;
+printk("trans %Lu done in commit\n", cur_trans->transid);
 	root->fs_info->last_trans_committed = cur_trans->transid;
 	wake_up(&cur_trans->commit_wait);
 	put_transaction(cur_trans);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index c3172dd..52559b5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -101,4 +101,6 @@
 			     struct btrfs_root *root);
 int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root);
+int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root);
 #endif