Add btrfs_end_transaction_throttle to force writers to wait for pending commits
The existing throttle mechanism was often not sufficient to prevent
new writers from coming in and making a given transaction run forever.
This adds an explicit wait at the end of most operations so they will
allow the current transaction to close.
There is no wait inside file_write, inode updates, or cow filling, all which
have different deadlock possibilities.
This is a temporary measure until better asynchronous commit support is
added. This code leads to stalls as it waits for data=ordered
writeback, and it really needs to be fixed.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3cc480b..52569b5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1672,24 +1672,6 @@
set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
}
-void btrfs_throttle(struct btrfs_root *root)
-{
-#if 0
- struct backing_dev_info *bdi;
-
- bdi = &root->fs_info->bdi;
- if (atomic_read(&root->fs_info->throttles) &&
- bdi_write_congested(bdi)) {
-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18)
- congestion_wait(WRITE, HZ/20);
-#else
- blk_congestion_wait(WRITE, HZ/20);
-#endif
-
- }
-#endif
-}
-
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
{
/*
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 2bc64fe..deff6b4 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -70,7 +70,6 @@
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, char *result);
-void btrfs_throttle(struct btrfs_root *root);
int btrfs_open_device(struct btrfs_device *dev);
int btrfs_verify_block_csum(struct btrfs_root *root,
struct extent_buffer *buf);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b7f8f92..ece221c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -934,7 +934,6 @@
balance_dirty_pages_ratelimited_nr(inode->i_mapping, num_pages);
if (num_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
btrfs_btree_balance_dirty(root, 1);
- btrfs_throttle(root);
cond_resched();
}
out:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cf27b59..bbba335 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -855,10 +855,9 @@
btrfs_del_ordered_inode(inode, 1);
}
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction_throttle(trans, root);
fail:
btrfs_btree_balance_dirty(root, nr);
- btrfs_throttle(root);
return ret;
}
@@ -889,10 +888,9 @@
}
nr = trans->blocks_used;
- ret = btrfs_end_transaction(trans, root);
+ ret = btrfs_end_transaction_throttle(trans, root);
fail:
btrfs_btree_balance_dirty(root, nr);
- btrfs_throttle(root);
if (ret && !err)
err = ret;
@@ -1871,14 +1869,13 @@
btrfs_update_inode_block_group(trans, dir);
out_unlock:
nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction_throttle(trans, root);
fail:
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
}
btrfs_btree_balance_dirty(root, nr);
- btrfs_throttle(root);
return err;
}
@@ -1936,14 +1933,13 @@
btrfs_update_inode_block_group(trans, dir);
out_unlock:
nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction_throttle(trans, root);
fail:
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
}
btrfs_btree_balance_dirty(root, nr);
- btrfs_throttle(root);
return err;
}
@@ -1985,14 +1981,13 @@
drop_inode = 1;
nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction_throttle(trans, root);
fail:
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
}
btrfs_btree_balance_dirty(root, nr);
- btrfs_throttle(root);
return err;
}
@@ -2055,13 +2050,12 @@
out_fail:
nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction_throttle(trans, root);
out_unlock:
if (drop_on_err)
iput(inode);
btrfs_btree_balance_dirty(root, nr);
- btrfs_throttle(root);
return err;
}
@@ -2587,10 +2581,9 @@
btrfs_update_inode(trans, root, inode);
nr = trans->blocks_used;
- ret = btrfs_end_transaction(trans, root);
+ ret = btrfs_end_transaction_throttle(trans, root);
BUG_ON(ret);
btrfs_btree_balance_dirty(root, nr);
- btrfs_throttle(root);
}
/*
@@ -2912,14 +2905,13 @@
out_unlock:
nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
+ btrfs_end_transaction_throttle(trans, root);
out_fail:
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
}
btrfs_btree_balance_dirty(root, nr);
- btrfs_throttle(root);
return err;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6002eb6..026039a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -164,7 +164,6 @@
ret = err;
fail_commit:
btrfs_btree_balance_dirty(root, nr);
- btrfs_throttle(root);
return ret;
}
@@ -206,7 +205,6 @@
fail_unlock:
btrfs_btree_balance_dirty(root, nr);
- btrfs_throttle(root);
return ret;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5a1ee06..69ed5f8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -130,8 +130,27 @@
return h;
}
-int btrfs_end_transaction(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+static noinline int wait_for_commit(struct btrfs_root *root,
+ struct btrfs_transaction *commit)
+{
+ DEFINE_WAIT(wait);
+ mutex_lock(&root->fs_info->trans_mutex);
+ while(!commit->commit_done) {
+ prepare_to_wait(&commit->commit_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (commit->commit_done)
+ break;
+ mutex_unlock(&root->fs_info->trans_mutex);
+ schedule();
+ mutex_lock(&root->fs_info->trans_mutex);
+ }
+ mutex_unlock(&root->fs_info->trans_mutex);
+ finish_wait(&commit->commit_wait, &wait);
+ return 0;
+}
+
+static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, int throttle)
{
struct btrfs_transaction *cur_trans;
@@ -140,8 +159,18 @@
WARN_ON(cur_trans != trans->transaction);
WARN_ON(cur_trans->num_writers < 1);
cur_trans->num_writers--;
+
if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait);
+
+ if (cur_trans->in_commit && throttle) {
+ int ret;
+ mutex_unlock(&root->fs_info->trans_mutex);
+ ret = wait_for_commit(root, cur_trans);
+ BUG_ON(ret);
+ mutex_lock(&root->fs_info->trans_mutex);
+ }
+
put_transaction(cur_trans);
mutex_unlock(&root->fs_info->trans_mutex);
memset(trans, 0, sizeof(*trans));
@@ -149,6 +178,18 @@
return 0;
}
+int btrfs_end_transaction(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ return __btrfs_end_transaction(trans, root, 0);
+}
+
+int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ return __btrfs_end_transaction(trans, root, 1);
+}
+
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
@@ -240,25 +281,6 @@
return 0;
}
-static noinline int wait_for_commit(struct btrfs_root *root,
- struct btrfs_transaction *commit)
-{
- DEFINE_WAIT(wait);
- mutex_lock(&root->fs_info->trans_mutex);
- while(!commit->commit_done) {
- prepare_to_wait(&commit->commit_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- if (commit->commit_done)
- break;
- mutex_unlock(&root->fs_info->trans_mutex);
- schedule();
- mutex_lock(&root->fs_info->trans_mutex);
- }
- mutex_unlock(&root->fs_info->trans_mutex);
- finish_wait(&commit->commit_wait, &wait);
- return 0;
-}
-
struct dirty_root {
struct list_head list;
struct btrfs_root *root;
@@ -680,6 +702,7 @@
root->fs_info->btree_inode->i_mapping, GFP_NOFS);
trans->transaction->in_commit = 1;
+printk("trans %Lu in commit\n", trans->transid);
cur_trans = trans->transaction;
if (cur_trans->list.prev != &root->fs_info->trans_list) {
prev_trans = list_entry(cur_trans->list.prev,
@@ -760,6 +783,7 @@
kfree(pinned_copy);
cur_trans->commit_done = 1;
+printk("trans %Lu done in commit\n", cur_trans->transid);
root->fs_info->last_trans_committed = cur_trans->transid;
wake_up(&cur_trans->commit_wait);
put_transaction(cur_trans);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index c3172dd..52559b5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -101,4 +101,6 @@
struct btrfs_root *root);
int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
+int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
#endif