Add btrfs_end_transaction_throttle to force writers to wait for pending commits
The existing throttle mechanism was often not sufficient to prevent
new writers from coming in and making a given transaction run forever.
This adds an explicit wait at the end of most operations so they will
allow the current transaction to close.
There is no wait inside file_write, inode updates, or cow filling, all which
have different deadlock possibilities.
This is a temporary measure until better asynchronous commit support is
added. This code leads to stalls as it waits for data=ordered
writeback, and it really needs to be fixed.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5a1ee06..69ed5f8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -130,8 +130,27 @@
return h;
}
-int btrfs_end_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+static noinline int wait_for_commit(struct btrfs_root *root,
+ struct btrfs_transaction *commit)
+{
+ DEFINE_WAIT(wait);
+ mutex_lock(&root->fs_info->trans_mutex);
+ while(!commit->commit_done) {
+ prepare_to_wait(&commit->commit_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (commit->commit_done)
+ break;
+ mutex_unlock(&root->fs_info->trans_mutex);
+ schedule();
+ mutex_lock(&root->fs_info->trans_mutex);
+ }
+ mutex_unlock(&root->fs_info->trans_mutex);
+ finish_wait(&commit->commit_wait, &wait);
+ return 0;
+}
+
+static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int throttle)
{
struct btrfs_transaction *cur_trans;
@@ -140,8 +159,18 @@
WARN_ON(cur_trans != trans->transaction);
WARN_ON(cur_trans->num_writers < 1);
cur_trans->num_writers--;
+
if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait);
+
+ if (cur_trans->in_commit && throttle) {
+ int ret;
+ mutex_unlock(&root->fs_info->trans_mutex);
+ ret = wait_for_commit(root, cur_trans);
+ BUG_ON(ret);
+ mutex_lock(&root->fs_info->trans_mutex);
+ }
+
put_transaction(cur_trans);
mutex_unlock(&root->fs_info->trans_mutex);
memset(trans, 0, sizeof(*trans));
@@ -149,6 +178,18 @@
return 0;
}
+int btrfs_end_transaction(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ return __btrfs_end_transaction(trans, root, 0);
+}
+
+int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ return __btrfs_end_transaction(trans, root, 1);
+}
+
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
@@ -240,25 +281,6 @@
return 0;
}
-static noinline int wait_for_commit(struct btrfs_root *root,
- struct btrfs_transaction *commit)
-{
- DEFINE_WAIT(wait);
- mutex_lock(&root->fs_info->trans_mutex);
- while(!commit->commit_done) {
- prepare_to_wait(&commit->commit_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- if (commit->commit_done)
- break;
- mutex_unlock(&root->fs_info->trans_mutex);
- schedule();
- mutex_lock(&root->fs_info->trans_mutex);
- }
- mutex_unlock(&root->fs_info->trans_mutex);
- finish_wait(&commit->commit_wait, &wait);
- return 0;
-}
-
struct dirty_root {
struct list_head list;
struct btrfs_root *root;
@@ -680,6 +702,7 @@
root->fs_info->btree_inode->i_mapping, GFP_NOFS);
trans->transaction->in_commit = 1;
+printk("trans %Lu in commit\n", trans->transid);
cur_trans = trans->transaction;
if (cur_trans->list.prev != &root->fs_info->trans_list) {
prev_trans = list_entry(cur_trans->list.prev,
@@ -760,6 +783,7 @@
kfree(pinned_copy);
cur_trans->commit_done = 1;
+printk("trans %Lu done in commit\n", cur_trans->transid);
root->fs_info->last_trans_committed = cur_trans->transid;
wake_up(&cur_trans->commit_wait);
put_transaction(cur_trans);