btrfs_start_transaction: wait for commits in progress to finish
btrfs_commit_transaction has to loop waiting for any writers in the
transaction to finish before it can proceed. btrfs_start_transaction
should be polite and not join a transaction that is in the process
of being finished off.
There are a few places that can't wait, basically the ones doing IO that
might be needed to finish the transaction. For them, btrfs_join_transaction
is added.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4ddc8a8..acbce54 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -513,6 +513,7 @@
u64 alloc_start;
struct btrfs_transaction *running_transaction;
wait_queue_head_t transaction_throttle;
+ wait_queue_head_t transaction_wait;
struct btrfs_super_block super_copy;
struct btrfs_super_block super_for_commit;
struct block_device *__bdev;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 66466d1..99bb385 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1291,6 +1291,7 @@
mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex);
init_waitqueue_head(&fs_info->transaction_throttle);
+ init_waitqueue_head(&fs_info->transaction_wait);
#if 0
ret = add_hasher(fs_info, "crc32c");
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 3e4e5c2..d650589 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -251,7 +251,7 @@
end_of_last_block = start_pos + num_bytes - 1;
lock_extent(io_tree, start_pos, end_of_last_block, GFP_NOFS);
- trans = btrfs_start_transaction(root, 1);
+ trans = btrfs_join_transaction(root, 1);
if (!trans) {
err = -ENOMEM;
goto out_unlock;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index baf4601..0a68732 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -116,7 +116,7 @@
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
- trans = btrfs_start_transaction(root, 1);
+ trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
@@ -502,7 +502,7 @@
return 0;
}
- trans = btrfs_start_transaction(root, 1);
+ trans = btrfs_join_transaction(root, 1);
ordered_extent = btrfs_lookup_ordered_extent(inode, start);
BUG_ON(!ordered_extent);
@@ -1812,7 +1812,7 @@
int ret = 0;
if (wait) {
- trans = btrfs_start_transaction(root, 1);
+ trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
ret = btrfs_commit_transaction(trans, root);
}
@@ -1830,7 +1830,7 @@
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
- trans = btrfs_start_transaction(root, 1);
+ trans = btrfs_join_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
btrfs_update_inode(trans, root, inode);
btrfs_end_transaction(trans, root);
@@ -2395,7 +2395,7 @@
free_extent_map(em);
em = NULL;
btrfs_release_path(root, path);
- trans = btrfs_start_transaction(root, 1);
+ trans = btrfs_join_transaction(root, 1);
goto again;
}
write_extent_buffer(leaf, map + pg_offset, ptr,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 86a5acc..0582390 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -62,6 +62,7 @@
init_waitqueue_head(&cur_trans->writer_wait);
init_waitqueue_head(&cur_trans->commit_wait);
cur_trans->in_commit = 0;
+ cur_trans->blocked = 0;
cur_trans->use_count = 1;
cur_trans->commit_done = 0;
cur_trans->start_time = get_seconds();
@@ -99,14 +100,36 @@
return 0;
}
-struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
- int num_blocks)
+struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
+ int num_blocks, int join)
{
struct btrfs_trans_handle *h =
kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
+ struct btrfs_transaction *cur_trans;
int ret;
mutex_lock(&root->fs_info->trans_mutex);
+ cur_trans = root->fs_info->running_transaction;
+ if (cur_trans && cur_trans->blocked && !join) {
+ DEFINE_WAIT(wait);
+ cur_trans->use_count++;
+ while(1) {
+ prepare_to_wait(&root->fs_info->transaction_wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (cur_trans->blocked) {
+ mutex_unlock(&root->fs_info->trans_mutex);
+ schedule();
+ mutex_lock(&root->fs_info->trans_mutex);
+ finish_wait(&root->fs_info->transaction_wait,
+ &wait);
+ } else {
+ finish_wait(&root->fs_info->transaction_wait,
+ &wait);
+ break;
+ }
+ }
+ put_transaction(cur_trans);
+ }
ret = join_transaction(root);
BUG_ON(ret);
@@ -123,6 +146,17 @@
return h;
}
+struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
+ int num_blocks)
+{
+ return start_transaction(root, num_blocks, 0);
+}
+struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
+ int num_blocks)
+{
+ return start_transaction(root, num_blocks, 1);
+}
+
static noinline int wait_for_commit(struct btrfs_root *root,
struct btrfs_transaction *commit)
{
@@ -156,7 +190,7 @@
if (waitqueue_active(&cur_trans->writer_wait))
wake_up(&cur_trans->writer_wait);
- if (cur_trans->in_commit && throttle) {
+ if (0 && cur_trans->in_commit && throttle) {
DEFINE_WAIT(wait);
mutex_unlock(&root->fs_info->trans_mutex);
prepare_to_wait(&root->fs_info->transaction_throttle, &wait,
@@ -617,6 +651,7 @@
printk("commit trans %Lu\n", trans->transid);
trans->transaction->in_commit = 1;
+ trans->transaction->blocked = 1;
cur_trans = trans->transaction;
if (cur_trans->list.prev != &root->fs_info->trans_list) {
prev_trans = list_entry(cur_trans->list.prev,
@@ -684,7 +719,9 @@
btrfs_copy_pinned(root, pinned_copy);
+ trans->transaction->blocked = 0;
wake_up(&root->fs_info->transaction_throttle);
+ wake_up(&root->fs_info->transaction_wait);
mutex_unlock(&root->fs_info->trans_mutex);
ret = btrfs_write_and_wait_transaction(trans, root);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 910350c..11fbdec 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -27,6 +27,7 @@
int in_commit;
int use_count;
int commit_done;
+ int blocked;
struct list_head list;
struct extent_io_tree dirty_pages;
unsigned long start_time;
@@ -75,6 +76,8 @@
struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
int num_blocks);
+struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
+ int num_blocks);
int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,